Compare commits

..

60 Commits

Author SHA1 Message Date
Harrison Chase
4dbd61e03b turn off serializable for some things 2023-12-07 22:15:58 -08:00
Bagatur
b46802ad88 key 2023-12-07 20:20:48 -08:00
Bagatur
247950ebaa hash 2023-12-07 20:12:22 -08:00
Bagatur
7051e83ae0 mv more 2023-12-07 17:13:38 -08:00
Bagatur
76fb19f205 telegrame 2023-12-07 17:10:47 -08:00
Bagatur
d1a0140659 lint 2023-12-07 17:08:33 -08:00
Bagatur
9e745c8e91 examples 2023-12-07 16:59:41 -08:00
Bagatur
0de6e09659 examples 2023-12-07 16:59:08 -08:00
Bagatur
3e0c5a331c mypy cache 2023-12-07 16:54:08 -08:00
Bagatur
34709ec34f examples 2023-12-07 16:42:15 -08:00
Bagatur
279155f02d switch cache 2023-12-07 16:33:44 -08:00
Bagatur
a2c447938c nit 2023-12-07 16:32:07 -08:00
Bagatur
f541e0235c poetry 2023-12-07 16:28:27 -08:00
Bagatur
89f74bf808 rm mypy test cache 2023-12-07 16:24:18 -08:00
Bagatur
dee5fdfe2b extra 2023-12-07 16:19:47 -08:00
Bagatur
a7cbdccb8a namespaces 2023-12-07 16:02:59 -08:00
Bagatur
91dd47b12a ignore 2023-12-07 15:54:22 -08:00
Bagatur
af2ad3bb4f conftest 2023-12-07 15:43:49 -08:00
Bagatur
60fc995c3b test 2023-12-07 15:42:21 -08:00
Bagatur
c3232a8aa3 make 2023-12-07 15:31:55 -08:00
Bagatur
243465da3e poetry 2023-12-07 15:31:03 -08:00
Bagatur
32499164ab fmt 2023-12-07 15:25:33 -08:00
Bagatur
9a5a52dd99 nit 2023-12-07 15:23:47 -08:00
Bagatur
deab168da5 poetry 2023-12-07 15:21:36 -08:00
Bagatur
d7793a0a66 merge 2023-12-07 15:19:06 -08:00
Erick Friis
f0304a8d9e Merge branch 'bagatur/community' of github.com:langchain-ai/langchain into bagatur/community 2023-12-07 11:02:06 -08:00
Erick Friis
bbc795b752 override llm config 2023-12-07 11:01:59 -08:00
Bagatur
6a0a7a7f66 poetry 2023-12-07 10:50:12 -08:00
Bagatur
de690b081e stubs 2023-12-07 10:38:34 -08:00
Bagatur
cea3d61bf4 more 2023-12-07 10:35:00 -08:00
Bagatur
5631e7e397 more 2023-12-07 10:33:01 -08:00
Bagatur
a66df25a89 more 2023-12-06 20:19:32 -08:00
Bagatur
8226b81fb3 more 2023-12-06 20:19:13 -08:00
Bagatur
280aec4e1d more 2023-12-06 19:37:00 -08:00
Bagatur
12039a0057 more 2023-12-06 19:36:49 -08:00
Bagatur
85eae95764 anyio 2023-12-06 18:53:09 -08:00
Bagatur
6b1af2d19c more 2023-12-06 17:49:52 -08:00
Bagatur
fd2fe4aacf poetry 2023-12-06 17:48:10 -08:00
Bagatur
d6403c6696 poetry 2023-12-06 17:40:38 -08:00
Bagatur
11fc0a5004 poetry 2023-12-06 17:38:20 -08:00
Bagatur
ce884f801f more 2023-12-06 17:35:45 -08:00
Bagatur
75ade6116d fmt 2023-12-06 16:13:04 -08:00
Bagatur
8ccc18fd73 more 2023-12-06 15:37:04 -08:00
Bagatur
f62331a20c script 2023-12-06 13:53:52 -08:00
Bagatur
f922f9fca3 more 2023-12-06 13:53:32 -08:00
Bagatur
c6c9d93283 ci 2023-12-06 11:51:17 -08:00
Bagatur
e08017fcf7 merge 2023-12-06 11:50:12 -08:00
Bagatur
a2129713c3 more 2023-12-06 11:48:35 -08:00
Bagatur
960faa6ebe more 2023-12-06 11:38:11 -08:00
Bagatur
c4f32cb0e9 tests 2023-12-06 10:51:35 -08:00
Bagatur
f56c0359dc latest 2023-12-06 10:40:31 -08:00
Bagatur
98c4f2a5fe merge 2023-12-05 23:28:47 -08:00
Bagatur
31c9081246 merge 2023-12-05 23:25:50 -08:00
Bagatur
f5ed74d56f wip 2023-12-05 23:23:27 -08:00
Bagatur
5535c7851a deps 2023-12-05 15:44:29 -08:00
Bagatur
ee1478b6e1 update 2023-12-05 15:32:20 -08:00
Bagatur
bf7b59ec44 ci 2023-12-05 09:50:22 -08:00
Bagatur
4965f9a10e rm 2023-12-04 16:52:33 -08:00
Bagatur
2d18c65ff7 wip 2023-12-04 16:50:47 -08:00
Bagatur
88d39708a9 scripts 2023-12-04 12:03:39 -08:00
7529 changed files with 811818 additions and 353415 deletions

View File

@@ -5,31 +5,26 @@ This project includes a [dev container](https://containers.dev/), which lets you
You can use the dev container configuration in this folder to build and run the app without needing to install any of its tools locally! You can use it in [GitHub Codespaces](https://github.com/features/codespaces) or the [VS Code Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers).
## GitHub Codespaces
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/langchain-ai/langchain)
You may use the button above, or follow these steps to open this repo in a Codespace:
1. Click the **Code** drop-down menu at the top of <https://github.com/langchain-ai/langchain>.
1. Click the **Code** drop-down menu at the top of https://github.com/langchain-ai/langchain.
1. Click on the **Codespaces** tab.
1. Click **Create codespace on master**.
1. Click **Create codespace on master** .
For more info, check out the [GitHub documentation](https://docs.github.com/en/free-pro-team@latest/github/developing-online-with-codespaces/creating-a-codespace#creating-a-codespace).
## VS Code Dev Containers
[![Open in Dev Containers](https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain)
> [!NOTE]
> If you click the link above you will open the main repo (`langchain-ai/langchain`) and *not* your local cloned repo. This is fine if you only want to run and test the library, but if you want to contribute you can use the link below and replace with your username and cloned repo name:
```txt
https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/&lt;YOUR_USERNAME&gt;/&lt;YOUR_CLONED_REPO_NAME&gt;
Note: If you click the link above you will open the main repo (langchain-ai/langchain) and not your local cloned repo. This is fine if you only want to run and test the library, but if you want to contribute you can use the link below and replace with your username and cloned repo name:
```
https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/<yourusername>/<yourclonedreponame>
```
Then you will have a local cloned repo where you can contribute and then create pull requests.
If you already have VS Code and Docker installed, you can use the button above to get started. This will use VSCode to automatically install the Dev Containers extension if needed, clone the source code into a container volume, and spin up a dev container for use.
If you already have VS Code and Docker installed, you can use the button above to get started. This will cause VS Code to automatically install the Dev Containers extension if needed, clone the source code into a container volume, and spin up a dev container for use.
Alternatively you can also follow these steps to open this repo in a container using the VS Code Dev Containers extension:
@@ -45,5 +40,5 @@ You can learn more in the [Dev Containers documentation](https://code.visualstud
## Tips and tricks
- If you are working with the same repository folder in a container and Windows, you'll want consistent line endings (otherwise you may see hundreds of changes in the SCM view). The `.gitattributes` file in the root of this repo will disable line ending conversion and should prevent this. See [tips and tricks](https://code.visualstudio.com/docs/devcontainers/tips-and-tricks#_resolving-git-line-ending-issues-in-containers-resulting-in-many-modified-files) for more info.
- If you'd like to review the contents of the image used in this dev container, you can check it out in the [devcontainers/images](https://github.com/devcontainers/images/tree/main/src/python) repo.
* If you are working with the same repository folder in a container and Windows, you'll want consistent line endings (otherwise you may see hundreds of changes in the SCM view). The `.gitattributes` file in the root of this repo will disable line ending conversion and should prevent this. See [tips and tricks](https://code.visualstudio.com/docs/devcontainers/tips-and-tricks#_resolving-git-line-ending-issues-in-containers-resulting-in-many-modified-files) for more info.
* If you'd like to review the contents of the image used in this dev container, you can check it out in the [devcontainers/images](https://github.com/devcontainers/images/tree/main/src/python) repo.

View File

@@ -1,58 +1,36 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
{
// Name for the dev container
"name": "langchain",
// Point to a Docker Compose file
"dockerComposeFile": "./docker-compose.yaml",
// Required when using Docker Compose. The name of the service to connect to once running
"service": "langchain",
// The optional 'workspaceFolder' property is the path VS Code should open by default when
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
"workspaceFolder": "/workspaces/langchain",
"mounts": [
"source=langchain-workspaces,target=/workspaces/langchain,type=volume"
],
// Prevent the container from shutting down
"overrideCommand": true,
// Features to add to the dev container. More info: https://containers.dev/features
"features": {
"ghcr.io/devcontainers/features/git:1": {},
"ghcr.io/devcontainers/features/github-cli:1": {}
},
"containerEnv": {
"UV_LINK_MODE": "copy"
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Run commands after the container is created
"postCreateCommand": "uv sync && echo 'LangChain (Python) dev environment ready!'",
// Configure tool-specific properties.
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-python.debugpy",
"ms-python.mypy-type-checker",
"ms-python.isort",
"unifiedjs.vscode-mdx",
"davidanson.vscode-markdownlint",
"ms-toolsai.jupyter",
"GitHub.copilot",
"GitHub.copilot-chat"
],
"settings": {
"python.defaultInterpreterPath": ".venv/bin/python",
"python.formatting.provider": "none",
"[python]": {
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": true
}
}
}
}
}
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
// Name for the dev container
"name": "langchain",
// Point to a Docker Compose file
"dockerComposeFile": "./docker-compose.yaml",
// Required when using Docker Compose. The name of the service to connect to once running
"service": "langchain",
// The optional 'workspaceFolder' property is the path VS Code should open by default when
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
// Prevent the container from shutting down
"overrideCommand": true
// Features to add to the dev container. More info: https://containers.dev/features
// "features": {
// "ghcr.io/devcontainers-contrib/features/poetry:2": {}
// }
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Uncomment the next line to run commands after the container is created.
// "postCreateCommand": "cat /etc/os-release",
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}

View File

@@ -4,10 +4,29 @@ services:
build:
dockerfile: libs/langchain/dev.Dockerfile
context: ..
volumes:
# Update this to wherever you want VS Code to mount the folder of your project
- ..:/workspaces:cached
networks:
- langchain-network
- langchain-network
# environment:
# MONGO_ROOT_USERNAME: root
# MONGO_ROOT_PASSWORD: example123
# depends_on:
# - mongo
# mongo:
# image: mongo
# restart: unless-stopped
# environment:
# MONGO_INITDB_ROOT_USERNAME: root
# MONGO_INITDB_ROOT_PASSWORD: example123
# ports:
# - "27017:27017"
# networks:
# - langchain-network
networks:
langchain-network:
driver: bridge

View File

@@ -1,52 +0,0 @@
# top-most EditorConfig file
root = true
# All files
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
# Python files
[*.py]
indent_style = space
indent_size = 4
max_line_length = 88
# JSON files
[*.json]
indent_style = space
indent_size = 2
# YAML files
[*.{yml,yaml}]
indent_style = space
indent_size = 2
# Markdown files
[*.md]
indent_style = space
indent_size = 2
trim_trailing_whitespace = false
# Configuration files
[*.{toml,ini,cfg}]
indent_style = space
indent_size = 4
# Shell scripts
[*.sh]
indent_style = space
indent_size = 2
# Makefile
[Makefile]
indent_style = tab
indent_size = 4
# Jupyter notebooks
[*.ipynb]
# Jupyter may include trailing whitespace in cell
# outputs that's semantically meaningful
trim_trailing_whitespace = false

3
.github/CODEOWNERS vendored
View File

@@ -1,3 +0,0 @@
/.github/ @baskaryan @ccurme @eyurtsev
/libs/core/ @eyurtsev
/libs/partners/ @ccurme @mdrxy

View File

@@ -129,4 +129,4 @@ For answers to common questions about this code of conduct, see the FAQ at
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations
[translations]: https://www.contributor-covenant.org/translations

View File

@@ -3,4 +3,342 @@
Hi there! Thank you for even being interested in contributing to LangChain.
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether they involve new features, improved infrastructure, better documentation, or bug fixes.
To learn how to contribute to LangChain, please follow the [contribution guide here](https://docs.langchain.com/oss/python/contributing).
## 🗺️ Guidelines
### 👩‍💻 Contributing Code
To contribute to this project, please follow the ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
Please do not try to push directly to this repo unless you are a maintainer.
Please follow the checked-in pull request template when opening pull requests. Note related issues and tag relevant
maintainers.
Pull requests cannot land without passing the formatting, linting, and testing checks first. See [Testing](#testing) and
[Formatting and Linting](#formatting-and-linting) for how to run these checks locally.
It's essential that we maintain great documentation and testing. If you:
- Fix a bug
- Add a relevant unit or integration test when possible. These live in `tests/unit_tests` and `tests/integration_tests`.
- Make an improvement
- Update any affected example notebooks and documentation. These live in `docs`.
- Update unit and integration tests when relevant.
- Add a feature
- Add a demo notebook in `docs/docs/`.
- Add unit and integration tests.
We are a small, progress-oriented team. If there's something you'd like to add or change, opening a pull request is the
best way to get our attention.
### 🚩GitHub Issues
Our [issues](https://github.com/langchain-ai/langchain/issues) page is kept up to date with bugs, improvements, and feature requests.
There is a taxonomy of labels to help with sorting and discovery of issues of interest. Please use these to help organize issues.
If you start working on an issue, please assign it to yourself.
If you are adding an issue, please try to keep it focused on a single, modular bug/improvement/feature.
If two issues are related, or blocking, please link them rather than combining them.
We will try to keep these issues as up-to-date as possible, though
with the rapid rate of development in this field some may get out of date.
If you notice this happening, please let us know.
### 🙋Getting Help
Our goal is to have the simplest developer setup possible. Should you experience any difficulty getting setup, please
contact a maintainer! Not only do we want to help get you unblocked, but we also want to make sure that the process is
smooth for future contributors.
In a similar vein, we do enforce certain linting, formatting, and documentation standards in the codebase.
If you are finding these difficult (or even just annoying) to work with, feel free to contact a maintainer for help -
we do not want these to get in the way of getting good code into the codebase.
## 🚀 Quick Start
This quick start guide explains how to run the repository locally.
For a [development container](https://containers.dev/), see the [.devcontainer folder](https://github.com/langchain-ai/langchain/tree/master/.devcontainer).
### Dependency Management: Poetry and other env/dependency managers
This project utilizes [Poetry](https://python-poetry.org/) v1.6.1+ as a dependency manager.
❗Note: *Before installing Poetry*, if you use `Conda`, create and activate a new Conda env (e.g. `conda create -n langchain python=3.9`)
Install Poetry: **[documentation on how to install it](https://python-poetry.org/docs/#installation)**.
❗Note: If you use `Conda` or `Pyenv` as your environment/package manager, after installing Poetry,
tell Poetry to use the virtualenv python environment (`poetry config virtualenvs.prefer-active-python true`)
### Core vs. Experimental
This repository contains three separate projects:
- `langchain`: core langchain code, abstractions, and use cases.
- `langchain_core`: contain interfaces for key abstractions as well as logic for combining them in chains (LCEL).
- `langchain_experimental`: see the [Experimental README](https://github.com/langchain-ai/langchain/tree/master/libs/experimental/README.md) for more information.
Each of these has its own development environment. Docs are run from the top-level makefile, but development
is split across separate test & release flows.
For this quickstart, start with langchain core:
```bash
cd libs/langchain
```
### Local Development Dependencies
Install langchain development requirements (for running langchain, running examples, linting, formatting, tests, and coverage):
```bash
poetry install --with test
```
Then verify dependency installation:
```bash
make test
```
If the tests don't pass, you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running
Poetry v1.6.1+. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases.
If you are still seeing this bug on v1.6.1, you may also try disabling "modern installation"
(`poetry config installer.modern-installation false`) and re-installing requirements.
See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
### Testing
_some test dependencies are optional; see section about optional dependencies_.
Unit tests cover modular logic that does not require calls to outside APIs.
If you add new logic, please add a unit test.
To run unit tests:
```bash
make test
```
To run unit tests in Docker:
```bash
make docker_tests
```
There are also [integration tests and code-coverage](https://github.com/langchain-ai/langchain/tree/master/libs/langchain/tests/README.md) available.
### Only develop langchain_core or langchain_experimental
If you are only developing `langchain_core` or `langchain_experimental`, you can simply install the dependencies for the respective projects and run tests:
```bash
cd libs/core
poetry install --with test
make test
```
Or:
```bash
cd libs/experimental
poetry install --with test
make test
```
### Formatting and Linting
Run these locally before submitting a PR; the CI system will check also.
#### Code Formatting
Formatting for this project is done via [ruff](https://docs.astral.sh/ruff/rules/).
To run formatting for docs, cookbook and templates:
```bash
make format
```
To run formatting for a library, run the same command from the relevant library directory:
```bash
cd libs/{LIBRARY}
make format
```
Additionally, you can run the formatter only on the files that have been modified in your current branch as compared to the master branch using the format_diff command:
```bash
make format_diff
```
This is especially useful when you have made changes to a subset of the project and want to ensure your changes are properly formatted without affecting the rest of the codebase.
#### Linting
Linting for this project is done via a combination of [ruff](https://docs.astral.sh/ruff/rules/) and [mypy](http://mypy-lang.org/).
To run linting for docs, cookbook and templates:
```bash
make lint
```
To run linting for a library, run the same command from the relevant library directory:
```bash
cd libs/{LIBRARY}
make lint
```
In addition, you can run the linter only on the files that have been modified in your current branch as compared to the master branch using the lint_diff command:
```bash
make lint_diff
```
This can be very helpful when you've made changes to only certain parts of the project and want to ensure your changes meet the linting standards without having to check the entire codebase.
We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
#### Spellcheck
Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell).
Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
To check spelling for this project:
```bash
make spell_check
```
To fix spelling in place:
```bash
make spell_fix
```
If codespell is incorrectly flagging a word, you can skip spellcheck for that word by adding it to the codespell config in the `pyproject.toml` file.
```python
[tool.codespell]
...
# Add here:
ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure'
```
## Working with Optional Dependencies
Langchain relies heavily on optional dependencies to keep the Langchain package lightweight.
You only need to add a new dependency if a **unit test** relies on the package.
If your package is only required for **integration tests**, then you can skip these
steps and leave all pyproject.toml and poetry.lock files alone.
If you're adding a new dependency to Langchain, assume that it will be an optional dependency, and
that most users won't have it installed.
Users who do not have the dependency installed should be able to **import** your code without
any side effects (no warnings, no errors, no exceptions).
To introduce the dependency to the pyproject.toml file correctly, please do the following:
1. Add the dependency to the main group as an optional dependency
```bash
poetry add --optional [package_name]
```
2. Open pyproject.toml and add the dependency to the `extended_testing` extra
3. Relock the poetry file to update the extra.
```bash
poetry lock --no-update
```
4. Add a unit test that the very least attempts to import the new code. Ideally, the unit
test makes use of lightweight fixtures to test the logic of the code.
5. Please use the `@pytest.mark.requires(package_name)` decorator for any tests that require the dependency.
## Adding a Jupyter Notebook
If you are adding a Jupyter Notebook example, you'll want to install the optional `dev` dependencies.
To install dev dependencies:
```bash
poetry install --with dev
```
Launch a notebook:
```bash
poetry run jupyter notebook
```
When you run `poetry install`, the `langchain` package is installed as editable in the virtualenv, so your new logic can be imported into the notebook.
## Documentation
While the code is split between `langchain` and `langchain.experimental`, the documentation is one holistic thing.
This covers how to get started contributing to documentation.
From the top-level of this repo, install documentation dependencies:
```bash
poetry install
```
### Contribute Documentation
The docs directory contains Documentation and API Reference.
Documentation is built using [Docusaurus 2](https://docusaurus.io/).
API Reference are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
For that reason, we ask that you add good documentation to all classes and methods.
Similar to linting, we recognize documentation can be annoying. If you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
### Build Documentation Locally
In the following commands, the prefix `api_` indicates that those are operations for the API Reference.
Before building the documentation, it is always a good idea to clean the build directory:
```bash
make docs_clean
make api_docs_clean
```
Next, you can build the documentation as outlined below:
```bash
make docs_build
make api_docs_build
```
Finally, run the link checker to ensure all links are valid:
```bash
make docs_linkcheck
make api_docs_linkcheck
```
### Verify Documentation changes
After pushing documentation changes to the repository, you can preview and verify that the changes are
what you wanted by clicking the `View deployment` or `Visit Preview` buttons on the pull request `Conversation` page.
This will take you to a preview of the documentation changes.
This preview is created by [Vercel](https://vercel.com/docs/getting-started-with-vercel).
## 🏭 Release Process
As of now, LangChain has an ad hoc release process: releases are cut with high frequency by
a developer and published to [PyPI](https://pypi.org/project/langchain/).
LangChain follows the [semver](https://semver.org/) versioning standard. However, as pre-1.0 software,
even patch releases may contain [non-backwards-compatible changes](https://semver.org/#spec-item-4).
### 🌟 Recognition
If your contribution has made its way into a release, we will want to give you credit on Twitter (only if you want though)!
If you have a Twitter account you would like us to mention, please let us know in the PR or through another means.

View File

@@ -1,121 +1,106 @@
name: "\U0001F41B Bug Report"
description: Report a bug in LangChain. To report a security issue, please instead use the security option below. For questions, please use the LangChain forum.
labels: ["bug"]
type: bug
description: Submit a bug report to help us improve LangChain. To report a security issue, please instead use the security option below.
labels: ["02 Bug Report"]
body:
- type: markdown
attributes:
value: |
Thank you for taking the time to file a bug report.
value: >
Thank you for taking the time to file a bug report. Before creating a new
issue, please make sure to take a few moments to check the issue tracker
for existing issues about the bug.
Use this to report BUGS in LangChain. For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
Relevant links to check before filing a bug report to see if your issue has already been reported, fixed or
if there's another way to solve your problem:
* [LangChain Forum](https://forum.langchain.com/),
* [LangChain documentation with the integrated search](https://docs.langchain.com/oss/python/langchain/overview),
* [API Reference](https://reference.langchain.com/python/),
* [LangChain ChatBot](https://chat.langchain.com/)
* [GitHub search](https://github.com/langchain-ai/langchain),
- type: checkboxes
id: checks
- type: textarea
id: system-info
attributes:
label: Checked other resources
description: Please confirm and check all the following options.
label: System Info
description: Please share your system info with us.
placeholder: LangChain version, platform, python version, ...
validations:
required: true
- type: textarea
id: who-can-help
attributes:
label: Who can help?
description: |
Your issue will be replied to more quickly if you can figure out the right person to tag with @
If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
The core maintainers strive to read all issues, but tagging them will help them prioritize.
Please tag fewer than 3 people.
@hwchase17 - project lead
Tracing / Callbacks
- @agola11
Async
- @agola11
DataLoader Abstractions
- @eyurtsev
LLM/Chat Wrappers
- @hwchase17
- @agola11
Tools / Toolkits
- ...
placeholder: "@Username ..."
- type: checkboxes
id: information-scripts-examples
attributes:
label: Information
description: "The problem arises when using:"
options:
- label: This is a bug, not a usage question.
required: true
- label: I added a clear and descriptive title that summarizes this issue.
required: true
- label: I used the GitHub search to find a similar question and didn't find it.
required: true
- label: I am sure that this is a bug in LangChain rather than my code.
required: true
- label: The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).
required: true
- label: This is not related to the langchain-community package.
required: true
- label: I read what a minimal reproducible example is (https://stackoverflow.com/help/minimal-reproducible-example).
required: true
- label: I posted a self-contained, minimal, reproducible example. A maintainer can copy it and run it AS IS.
required: true
- label: "The official example notebooks/scripts"
- label: "My own modified scripts"
- type: checkboxes
id: related-components
attributes:
label: Related Components
description: "Select the components related to the issue (if applicable):"
options:
- label: "LLMs/Chat Models"
- label: "Embedding Models"
- label: "Prompts / Prompt Templates / Prompt Selectors"
- label: "Output Parsers"
- label: "Document Loaders"
- label: "Vector Stores / Retrievers"
- label: "Memory"
- label: "Agents / Agent Executors"
- label: "Tools / Toolkits"
- label: "Chains"
- label: "Callbacks/Tracing"
- label: "Async"
- type: textarea
id: reproduction
validations:
required: true
attributes:
label: Example Code
label: Reproduction
description: |
Please add a self-contained, [minimal, reproducible, example](https://stackoverflow.com/help/minimal-reproducible-example) with your use case.
If a maintainer can copy it, run it, and see it right away, there's a much higher chance that you'll be able to get help.
**Important!**
* Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
* Reduce your code to the minimum required to reproduce the issue if possible. This makes it much easier for others to help you.
* Use code tags (e.g., ```python ... ```) to correctly [format your code](https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting).
* INCLUDE the language label (e.g. `python`) after the first three backticks to enable syntax highlighting. (e.g., ```python rather than ```).
Please provide a [code sample](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
If you have code snippets, error messages, stack traces please provide them here as well.
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
placeholder: |
The following code:
Steps to reproduce the behavior:
```python
from langchain_core.runnables import RunnableLambda
1.
2.
3.
def bad_code(inputs) -> int:
raise NotImplementedError('For demo purpose')
chain = RunnableLambda(bad_code)
chain.invoke('Hello!')
```
- type: textarea
id: error
validations:
required: false
attributes:
label: Error Message and Stack Trace (if applicable)
description: |
If you are reporting an error, please include the full error message and stack trace.
placeholder: |
Exception + full stack trace
- type: textarea
id: description
attributes:
label: Description
description: |
What is the problem, question, or error?
Write a short description telling what you are doing, what you expect to happen, and what is currently happening.
placeholder: |
* I'm trying to use the `langchain` library to do X.
* I expect to see Y.
* Instead, it does Z.
id: expected-behavior
validations:
required: true
- type: textarea
id: system-info
attributes:
label: System Info
description: |
Please share your system info with us. Do NOT skip this step and please don't trim
the output. Most users don't include enough information here and it makes it harder
for us to help you.
Run the following command in your terminal and paste the output here:
`python -m langchain_core.sys_info`
or if you have an existing python interpreter running:
```python
from langchain_core import sys_info
sys_info.print_sys_info()
```
alternatively, put the entire output of `pip freeze` here.
placeholder: |
python -m langchain_core.sys_info
validations:
required: true
label: Expected behavior
description: "A clear and concise description of what you would expect to happen."

View File

@@ -1,9 +1,6 @@
blank_issues_enabled: false
blank_issues_enabled: true
version: 2.1
contact_links:
- name: 📚 Documentation
url: https://github.com/langchain-ai/docs/issues/new?template=langchain.yml
about: Report an issue related to the LangChain documentation
- name: 💬 LangChain Forum
url: https://forum.langchain.com/
about: General community discussions and support
- name: Discord
url: https://discord.gg/6adMQxSpJS
about: General community discussions

View File

@@ -0,0 +1,19 @@
name: Documentation
description: Report an issue related to the LangChain documentation.
title: "DOC: <Please write a comprehensive title after the 'DOC: ' prefix>"
labels: [03 - Documentation]
body:
- type: textarea
attributes:
label: "Issue with current documentation:"
description: >
Please make sure to leave a reference to the document/code you're
referring to.
- type: textarea
attributes:
label: "Idea or request for content:"
description: >
Please describe as clearly as possible what topics you think are missing
from the current documentation.

View File

@@ -1,118 +1,30 @@
name: " Feature Request"
description: Request a new feature or enhancement for LangChain. For questions, please use the LangChain forum.
labels: ["feature request"]
type: feature
name: "\U0001F680 Feature request"
description: Submit a proposal/request for a new LangChain feature
labels: ["02 Feature Request"]
body:
- type: markdown
attributes:
value: |
Thank you for taking the time to request a new feature.
Use this to request NEW FEATURES or ENHANCEMENTS in LangChain. For bug reports, please use the bug report template. For usage questions and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
Relevant links to check before filing a feature request to see if your request has already been made or
if there's another way to achieve what you want:
* [LangChain Forum](https://forum.langchain.com/),
* [LangChain documentation with the integrated search](https://docs.langchain.com/oss/python/langchain/overview),
* [API Reference](https://reference.langchain.com/python/),
* [LangChain ChatBot](https://chat.langchain.com/)
* [GitHub search](https://github.com/langchain-ai/langchain),
- type: checkboxes
id: checks
attributes:
label: Checked other resources
description: Please confirm and check all the following options.
options:
- label: This is a feature request, not a bug report or usage question.
required: true
- label: I added a clear and descriptive title that summarizes the feature request.
required: true
- label: I used the GitHub search to find a similar feature request and didn't find it.
required: true
- label: I checked the LangChain documentation and API reference to see if this feature already exists.
required: true
- label: This is not related to the langchain-community package.
required: true
- type: textarea
id: feature-description
id: feature-request
validations:
required: true
attributes:
label: Feature Description
label: Feature request
description: |
Please provide a clear and concise description of the feature you would like to see added to LangChain.
A clear and concise description of the feature proposal. Please provide links to any relevant GitHub repos, papers, or other resources if relevant.
What specific functionality are you requesting? Be as detailed as possible.
placeholder: |
I would like LangChain to support...
This feature would allow users to...
- type: textarea
id: use-case
id: motivation
validations:
required: true
attributes:
label: Use Case
label: Motivation
description: |
Describe the specific use case or problem this feature would solve.
Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
Why do you need this feature? What problem does it solve for you or other users?
placeholder: |
I'm trying to build an application that...
Currently, I have to work around this by...
This feature would help me/users to...
- type: textarea
id: proposed-solution
id: contribution
validations:
required: false
required: true
attributes:
label: Proposed Solution
label: Your contribution
description: |
If you have ideas about how this feature could be implemented, please describe them here.
This is optional but can be helpful for maintainers to understand your vision.
placeholder: |
I think this could be implemented by...
The API could look like...
```python
# Example of how the feature might work
```
- type: textarea
id: alternatives
validations:
required: false
attributes:
label: Alternatives Considered
description: |
Have you considered any alternative solutions or workarounds?
What other approaches have you tried or considered?
placeholder: |
I've tried using...
Alternative approaches I considered:
1. ...
2. ...
But these don't work because...
- type: textarea
id: additional-context
validations:
required: false
attributes:
label: Additional Context
description: |
Add any other context, screenshots, examples, or references that would help explain your feature request.
placeholder: |
Related issues: #...
Similar features in other libraries:
- ...
Additional context or examples:
- ...
Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md)

18
.github/ISSUE_TEMPLATE/other.yml vendored Normal file
View File

@@ -0,0 +1,18 @@
name: Other Issue
description: Raise an issue that wouldn't be covered by the other templates.
title: "Issue: <Please write a comprehensive title after the 'Issue: ' prefix>"
labels: [04 - Other]
body:
- type: textarea
attributes:
label: "Issue you'd like to raise."
description: >
Please describe the issue you'd like to raise as clearly as possible.
Make sure to include any relevant links or references.
- type: textarea
attributes:
label: "Suggestion:"
description: >
Please outline a suggestion to improve the issue here.

View File

@@ -1,20 +0,0 @@
name: 🔒 Privileged
description: You are a LangChain maintainer, or was asked directly by a maintainer to create an issue here. If not, check the other options.
body:
- type: markdown
attributes:
value: |
If you are not a LangChain maintainer, employee, or were not asked directly by a maintainer to create an issue, then please start the conversation on the [LangChain Forum](https://forum.langchain.com/) instead.
- type: checkboxes
id: privileged
attributes:
label: Privileged issue
description: Confirm that you are allowed to create an issue here.
options:
- label: I am a LangChain maintainer, or was asked directly by a LangChain maintainer to create an issue here.
required: true
- type: textarea
id: content
attributes:
label: Issue Content
description: Add the content of the issue here.

View File

@@ -1,91 +0,0 @@
name: "📋 Task"
description: Create a task for project management and tracking by LangChain maintainers. If you are not a maintainer, please use other templates or the forum.
labels: ["task"]
type: task
body:
- type: markdown
attributes:
value: |
Thanks for creating a task to help organize LangChain development.
This template is for **maintainer tasks** such as project management, development planning, refactoring, documentation updates, and other organizational work.
If you are not a LangChain maintainer or were not asked directly by a maintainer to create a task, then please start the conversation on the [LangChain Forum](https://forum.langchain.com/) instead or use the appropriate bug report or feature request templates on the previous page.
- type: checkboxes
id: maintainer
attributes:
label: Maintainer task
description: Confirm that you are allowed to create a task here.
options:
- label: I am a LangChain maintainer, or was asked directly by a LangChain maintainer to create a task here.
required: true
- type: textarea
id: task-description
attributes:
label: Task Description
description: |
Provide a clear and detailed description of the task.
What needs to be done? Be specific about the scope and requirements.
placeholder: |
This task involves...
The goal is to...
Specific requirements:
- ...
- ...
validations:
required: true
- type: textarea
id: acceptance-criteria
attributes:
label: Acceptance Criteria
description: |
Define the criteria that must be met for this task to be considered complete.
What are the specific deliverables or outcomes expected?
placeholder: |
This task will be complete when:
- [ ] ...
- [ ] ...
- [ ] ...
validations:
required: true
- type: textarea
id: context
attributes:
label: Context and Background
description: |
Provide any relevant context, background information, or links to related issues/PRs.
Why is this task needed? What problem does it solve?
placeholder: |
Background:
- ...
Related issues/PRs:
- #...
Additional context:
- ...
validations:
required: false
- type: textarea
id: dependencies
attributes:
label: Dependencies
description: |
List any dependencies or blockers for this task.
Are there other tasks, issues, or external factors that need to be completed first?
placeholder: |
This task depends on:
- [ ] Issue #...
- [ ] PR #...
- [ ] External dependency: ...
Blocked by:
- ...
validations:
required: false

View File

@@ -1,28 +1,20 @@
(Replace this entire block of text)
<!-- Thank you for contributing to LangChain!
Thank you for contributing to LangChain! Follow these steps to mark your pull request as ready for review. **If any of these steps are not completed, your PR will not be considered for review.**
Replace this entire comment with:
- **Description:** a description of the change,
- **Issue:** the issue # it fixes (if applicable),
- **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out!
- [ ] **PR title**: Follows the format: {TYPE}({SCOPE}): {DESCRIPTION}
- Examples:
- feat(core): add multi-tenant support
- fix(cli): resolve flag parsing error
- docs(openai): update API usage examples
- Allowed `{TYPE}` values:
- feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert, release
- Allowed `{SCOPE}` values (optional):
- core, cli, langchain, standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq, huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant, xai, infra
- Once you've written the title, please delete this checklist item; do not include it in the PR.
Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally.
- [ ] **PR message**: ***Delete this entire checklist*** and replace with
- **Description:** a description of the change. Include a [closing keyword](https://docs.github.com/en/issues/tracking-your-work-with-issues/using-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) if applicable to a relevant issue.
- **Issue:** the issue # it fixes, if applicable (e.g. Fixes #123)
- **Dependencies:** any dependencies required for this change
See contribution guidelines for more information on how to write/run tests, lint, etc:
https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md
- [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. **We will not consider a PR unless these three are passing in CI.** See [contribution guidelines](https://docs.langchain.com/oss/python/contributing) for more.
If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on network access,
2. an example notebook showing its use. It lives in `docs/extras` directory.
Additional guidelines:
- Most PRs should not touch more than one package.
- Please do not add dependencies to `pyproject.toml` files (even optional ones) unless they are **required** for unit tests. Likewise, please do not update the `uv.lock` files unless you are adding a required dependency.
- Changes should be backwards compatible.
- Make sure optional dependencies are imported within a function.
If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17.
-->

View File

@@ -26,13 +26,12 @@ inputs:
runs:
using: composite
steps:
- uses: actions/setup-python@v5
- uses: actions/setup-python@v4
name: Setup python ${{ inputs.python-version }}
id: setup-python
with:
python-version: ${{ inputs.python-version }}
- uses: actions/cache@v4
- uses: actions/cache@v3
id: cache-bin-poetry
name: Cache Poetry binary - Python ${{ inputs.python-version }}
env:
@@ -75,11 +74,10 @@ runs:
env:
POETRY_VERSION: ${{ inputs.poetry-version }}
PYTHON_VERSION: ${{ inputs.python-version }}
# Install poetry using the python version installed by setup-python step.
run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose
run: pipx install "poetry==$POETRY_VERSION" --python "python$PYTHON_VERSION" --verbose
- name: Restore pip and poetry cached dependencies
uses: actions/cache@v4
uses: actions/cache@v3
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}

View File

@@ -1,39 +0,0 @@
# Helper to set up Python and uv with caching
name: uv-install
description: Set up Python and uv with caching
inputs:
python-version:
description: Python version, supporting MAJOR.MINOR only
required: true
enable-cache:
description: Enable caching for uv dependencies
required: false
default: "true"
cache-suffix:
description: Custom cache key suffix for cache invalidation
required: false
default: ""
working-directory:
description: Working directory for cache glob scoping
required: false
default: "**"
env:
UV_VERSION: "0.5.25"
runs:
using: composite
steps:
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v6
with:
version: ${{ env.UV_VERSION }}
python-version: ${{ inputs.python-version }}
enable-cache: ${{ inputs.enable-cache }}
cache-dependency-glob: |
${{ inputs.working-directory }}/pyproject.toml
${{ inputs.working-directory }}/uv.lock
${{ inputs.working-directory }}/requirements*.txt
cache-suffix: ${{ inputs.cache-suffix }}

View File

@@ -1,330 +0,0 @@
# Global Development Guidelines for LangChain Projects
## Core Development Principles
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
**Bad - Breaking Change:**
```python
def get_user(id, verbose=False): # Changed from `user_id`
pass
```
**Good - Stable Interface:**
```python
def get_user(user_id: str, verbose: bool = False) -> User:
"""Retrieve user by ID with optional verbose output."""
pass
```
**Before making ANY changes to public APIs:**
- Check if the function/class is exported in `__init__.py`
- Look for existing usage patterns in tests and examples
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
- Mark experimental features clearly with docstring admonitions (using MkDocs Material, like `!!! warning`)
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
### 2. Code Quality Standards
**All Python code MUST include type hints and return types.**
**Bad:**
```python
def p(u, d):
return [x for x in u if x not in d]
```
**Good:**
```python
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
"""Filter out users that are not in the known users set.
Args:
users: List of user identifiers to filter.
known_users: Set of known/valid user identifiers.
Returns:
List of users that are not in the known_users set.
"""
return [user for user in users if user not in known_users]
```
**Style Requirements:**
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
- Avoid unnecessary abstraction or premature optimization
- Follow existing patterns in the codebase you're modifying
### 3. Testing Requirements
**Every new feature or bugfix MUST be covered by unit tests.**
**Test Organization:**
- Unit tests: `tests/unit_tests/` (no network calls allowed)
- Integration tests: `tests/integration_tests/` (network calls permitted)
- Use `pytest` as the testing framework
**Test Quality Checklist:**
- [ ] Tests fail when your new logic is broken
- [ ] Happy path is covered
- [ ] Edge cases and error conditions are tested
- [ ] Use fixtures/mocks for external dependencies
- [ ] Tests are deterministic (no flaky tests)
Checklist questions:
- [ ] Does the test suite fail if your new logic is broken?
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
- [ ] Do tests use fixtures or mocks where needed?
```python
def test_filter_unknown_users():
"""Test filtering unknown users from a list."""
users = ["alice", "bob", "charlie"]
known_users = {"alice", "bob"}
result = filter_unknown_users(users, known_users)
assert result == ["charlie"]
assert len(result) == 1
```
### 4. Security and Risk Assessment
**Security Checklist:**
- No `eval()`, `exec()`, or `pickle` on user-controlled input
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
- Remove unreachable/commented code before committing
- Race conditions or resource leaks (file handles, sockets, threads).
- Ensure proper resource cleanup (file handles, connections)
**Bad:**
```python
def load_config(path):
with open(path) as f:
return eval(f.read()) # ⚠️ Never eval config
```
**Good:**
```python
import json
def load_config(path: str) -> dict:
with open(path) as f:
return json.load(f)
```
### 5. Documentation Standards
**Use Google-style docstrings with Args and Returns sections for all public functions.**
**Insufficient Documentation:**
```python
def send_email(to, msg):
"""Send an email to a recipient."""
```
**Complete Documentation:**
```python
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
"""
Send an email to a recipient with specified priority.
Args:
to: The email address of the recipient.
msg: The message body to send.
priority: Email priority level.
Returns:
True if email was sent successfully, False otherwise.
Raises:
InvalidEmailError: If the email address format is invalid.
SMTPConnectionError: If unable to connect to email server.
"""
```
**Documentation Guidelines:**
- Types go in function signatures, NOT in docstrings
- Focus on "why" rather than "what" in descriptions
- Document all parameters, return values, and exceptions
- Keep descriptions concise but clear
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
### 6. Architectural Improvements
**When you encounter code that could be improved, suggest better designs:**
**Poor Design:**
```python
def process_data(data, db_conn, email_client, logger):
# Function doing too many things
validated = validate_data(data)
result = db_conn.save(validated)
email_client.send_notification(result)
logger.log(f"Processed {len(data)} items")
return result
```
**Better Design:**
```python
@dataclass
class ProcessingResult:
"""Result of data processing operation."""
items_processed: int
success: bool
errors: List[str] = field(default_factory=list)
class DataProcessor:
"""Handles data validation, storage, and notification."""
def __init__(self, db_conn: Database, email_client: EmailClient):
self.db = db_conn
self.email = email_client
def process(self, data: List[dict]) -> ProcessingResult:
"""Process and store data with notifications.
Args:
data: List of data items to process.
Returns:
ProcessingResult with details of the operation.
"""
validated = self._validate_data(data)
result = self.db.save(validated)
self._notify_completion(result)
return result
```
**Design Improvement Areas:**
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
- Reduce code duplication through shared utilities
- Make unit testing easier
- Improve separation of concerns (single responsibility)
- Make unit testing easier through dependency injection
- Add clarity without adding complexity
- Prefer dataclasses for structured data
## Development Tools & Commands
### Package Management
```bash
# Add package
uv add package-name
# Sync project dependencies
uv sync
uv lock
```
### Testing
```bash
# Run unit tests (no network)
make test
# Don't run integration tests, as API keys must be set
# Run specific test file
uv run --group test pytest tests/unit_tests/test_specific.py
```
### Code Quality
```bash
# Lint code
make lint
# Format code
make format
# Type checking
uv run --group lint mypy .
```
### Dependency Management Patterns
**Local Development Dependencies:**
```toml
[tool.uv.sources]
langchain-core = { path = "../core", editable = true }
langchain-tests = { path = "../standard-tests", editable = true }
```
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
```python
from langchain_core.tools import tool
@tool
def search_database(query: str) -> str:
"""Search the database for relevant information.
Args:
query: The search query string.
"""
# Implementation here
return results
```
## Commit Standards
**Use Conventional Commits format for PR titles:**
- `feat(core): add multi-tenant support`
- `!fix(cli): resolve flag parsing error` (breaking change uses exclamation mark)
- `docs: update API usage examples`
- `docs(openai): update API usage examples`
## Framework-Specific Guidelines
- Follow the existing patterns in `langchain_core` for base abstractions
- Implement proper streaming support where applicable
- Avoid deprecated components
### Partner Integrations
- Follow the established patterns in existing partner libraries
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
- Include comprehensive integration tests
- Document API key requirements and authentication
---
## Quick Reference Checklist
Before submitting code changes:
- [ ] **Breaking Changes**: Verified no public API changes
- [ ] **Type Hints**: All functions have complete type annotations
- [ ] **Tests**: New functionality is fully tested
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
- [ ] **Documentation**: Google-style docstrings for public functions
- [ ] **Code Quality**: `make lint` and `make format` pass
- [ ] **Architecture**: Suggested improvements where applicable
- [ ] **Commit Message**: Follows Conventional Commits format

View File

@@ -1,11 +0,0 @@
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
# and
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"

View File

@@ -1,25 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1584.81 250">
<defs>
<style>
.cls-1 {
fill: #1c3c3c;
stroke-width: 0px;
}
</style>
</defs>
<g id="LanChain-logo">
<g id="LangChain-logotype">
<polygon class="cls-1" points="596.33 49.07 596.33 200.67 700.76 200.67 700.76 177.78 620.04 177.78 620.04 49.07 596.33 49.07"/>
<path class="cls-1" d="M1126.83,49.07c-20.53,0-37.95,7.4-50.38,21.41-12.32,13.88-18.82,33.36-18.82,56.33,0,47.23,27.25,77.75,69.41,77.75,29.71,0,52.71-15.54,61.54-41.56l2.14-6.31-23.53-8.94-2.17,7.03c-5.26,17.01-18.75,26.38-37.99,26.38-27.48,0-44.55-20.82-44.55-54.34s17.23-54.34,44.97-54.34c19.23,0,30.31,7.54,35.95,24.44l2.46,7.37,22.91-10.75-2.1-5.9c-8.96-25.22-29.65-38.56-59.85-38.56Z"/>
<path class="cls-1" d="M756.43,85.05c-22.76,0-39.78,10.67-46.69,29.27-.44,1.19-1.77,4.78-1.77,4.78l19.51,12.62,2.65-6.91c4.52-11.78,12.88-17.27,26.3-17.27s21.1,6.51,20.96,19.33c0,.52-.04,2.09-.04,2.09,0,0-17.76,2.88-25.08,4.43-31.23,6.6-44.31,18.52-44.31,38.02,0,10.39,5.77,21.64,16.3,27.95,6.32,3.78,14.57,5.21,23.68,5.21,5.99,0,11.81-.89,17.2-2.53,12.25-4.07,15.67-12.07,15.67-12.07v10.46h20.29v-74.78c0-25.42-16.7-40.6-44.67-40.6ZM777.46,164.85c0,7.86-8.56,18.93-28.5,18.93-5.63,0-9.62-1.49-12.28-3.71-3.56-2.97-4.73-7.24-4.24-11.01.21-1.64,1.2-5.17,4.87-8.23,3.75-3.13,10.38-5.37,20.62-7.6,8.42-1.83,19.54-3.85,19.54-3.85v15.48Z"/>
<path class="cls-1" d="M876.11,85.04c-2.82,0-5.57.2-8.24.57-18.17,2.73-23.49,11.96-23.49,11.96l.02-9.31h-22.74s0,112.19,0,112.19h23.71v-62.18c0-21.13,15.41-30.75,29.73-30.75,15.48,0,23,8.32,23,25.45v67.48h23.71v-70.74c0-27.56-17.51-44.67-45.69-44.67Z"/>
<path class="cls-1" d="M1539.12,85.04c-2.82,0-5.57.2-8.24.57-18.17,2.73-23.49,11.96-23.49,11.96v-9.32h-22.72v112.2h23.71v-62.18c0-21.13,15.41-30.75,29.73-30.75,15.48,0,23,8.32,23,25.45v67.48h23.71v-70.74c0-27.56-17.51-44.67-45.69-44.67Z"/>
<path class="cls-1" d="M1020.76,88.26v11.55s-5.81-14.77-32.24-14.77c-32.84,0-53.24,22.66-53.24,59.15,0,20.59,6.58,36.8,18.19,47.04,9.03,7.96,21.09,12.04,35.45,12.32,9.99.19,16.46-2.53,20.5-5.1,7.76-4.94,10.64-9.63,10.64-9.63,0,0-.33,3.67-.93,8.64-.43,3.6-1.24,6.13-1.24,6.13h0c-3.61,12.85-14.17,20.28-29.57,20.28s-24.73-5.07-26.58-15.06l-23.05,6.88c3.98,19.2,22,30.66,48.2,30.66,17.81,0,31.77-4.84,41.5-14.4,9.81-9.64,14.79-23.53,14.79-41.29v-102.41h-22.42ZM1019.26,145.21c0,22.44-10.96,35.84-29.32,35.84-19.67,0-30.95-13.44-30.95-36.86s11.28-36.66,30.95-36.66c17.92,0,29.15,13.34,29.32,34.82v2.86Z"/>
<path class="cls-1" d="M1259.01,85.04c-2.6,0-5.13.17-7.59.49-17.88,2.79-23.14,11.9-23.14,11.9v-2.67h-.01s0-45.69,0-45.69h-23.71v151.39h23.71v-62.18c0-21.27,15.41-30.95,29.73-30.95,15.48,0,23,8.32,23,25.45v67.68h23.71v-70.94c0-27.01-17.94-44.47-45.69-44.47Z"/>
<circle class="cls-1" cx="1450.93" cy="64.47" r="15.37"/>
<path class="cls-1" d="M1439.14,88.2v56.94h0c-6.75-5.56-14.6-9.75-23.5-12.26v-7.23c0-25.42-16.7-40.6-44.67-40.6-22.76,0-39.78,10.67-46.69,29.27-.44,1.19-1.77,4.78-1.77,4.78l19.51,12.62,2.65-6.91c4.52-11.78,12.88-17.27,26.3-17.27s21.1,6.51,20.96,19.33c0,.08,0,1.15,0,2.86-10.04-.28-19.38.69-27.77,2.66,0,0,0,0,0,0-11.06,2.5-31.6,8.85-38.94,25.36-.05.11-1.13,2.96-1.13,2.96-1.06,3.28-1.59,6.84-1.59,10.7,0,10.39,5.77,21.64,16.3,27.95,6.32,3.78,14.57,5.21,23.68,5.21,5.88,0,11.6-.86,16.91-2.44,12.49-4.04,15.96-12.16,15.96-12.16v10.47h20.29v-34.27c-5.7-3.56-14.26-5.66-23.65-5.64,0,2.65,0,4.33,0,4.33,0,7.86-8.56,18.93-28.5,18.93-5.63,0-9.62-1.49-12.28-3.71-3.56-2.97-4.73-7.24-4.24-11.01.21-1.64,1.2-5.17,4.87-8.23l-.04-.11c8.42-6.89,24.97-9.64,40.17-9.04v.03c12.94.47,22.62,3.01,29.53,7.77,1.88,1.19,3.65,2.52,5.28,3.98,6.94,6.23,9.73,13.9,10.93,18.38,1.95,7.31,1.43,18.57,1.43,18.57h23.59v-112.2h-23.59Z"/>
</g>
<path id="LangChain-symbol" class="cls-1" d="M393.52,75.2c9.66,9.66,9.66,25.38,0,35.04l-21.64,21.29-.22-1.22c-1.58-8.75-5.74-16.69-12.02-22.97-4.73-4.72-10.32-8.21-16.62-10.37-3.91,3.93-6.06,9.08-6.06,14.5,0,1.1.1,2.24.3,3.38,3.47,1.25,6.54,3.18,9.12,5.76,9.66,9.66,9.66,25.38,0,35.04l-18.84,18.84c-4.83,4.83-11.17,7.24-17.52,7.24s-12.69-2.41-17.52-7.24c-9.66-9.66-9.66-25.38,0-35.04l21.64-21.28.22,1.22c1.57,8.73,5.73,16.67,12.03,22.96,4.74,4.74,9.99,7.89,16.28,10.04l1.16-1.16c3.52-3.52,5.45-8.2,5.45-13.19,0-1.11-.1-2.22-.29-3.31-3.63-1.2-6.62-2.91-9.34-5.63-3.92-3.92-6.36-8.93-7.04-14.48-.05-.4-.08-.79-.12-1.19-.54-7.23,2.07-14.29,7.16-19.37l18.84-18.84c4.67-4.67,10.89-7.25,17.52-7.25s12.85,2.57,17.52,7.25ZM491.9,125c0,68.93-56.08,125-125,125H125C56.08,250,0,193.93,0,125S56.08,0,125,0h241.9c68.93,0,125,56.08,125,125ZM240.9,187.69c1.97-2.39-7.13-9.12-8.99-11.59-3.78-4.1-3.8-10-6.35-14.79-6.24-14.46-13.41-28.81-23.44-41.05-10.6-13.39-23.68-24.47-35.17-37.04-8.53-8.77-10.81-21.26-18.34-30.69-10.38-15.33-43.2-19.51-48.01,2.14.02.68-.19,1.11-.78,1.54-2.66,1.93-5.03,4.14-7.02,6.81-4.87,6.78-5.62,18.28.46,24.37.2-3.21.31-6.24,2.85-8.54,4.7,4.03,11.8,5.46,17.25,2.45,12.04,17.19,9.04,40.97,18.6,59.49,2.64,4.38,5.3,8.85,8.69,12.69,2.75,4.28,12.25,9.33,12.81,13.29.1,6.8-.7,14.23,3.76,19.92,2.1,4.26-3.06,8.54-7.22,8.01-5.4.74-11.99-3.63-16.72-.94-1.67,1.81-4.94-.19-6.38,2.32-.5,1.3-3.2,3.13-1.59,4.38,1.79-1.36,3.45-2.78,5.86-1.97-.36,1.96,1.19,2.24,2.42,2.81-.04,1.33-.82,2.69.2,3.82,1.19-1.2,1.9-2.9,3.79-3.4,6.28,8.37,12.67-8.47,26.26-.89-2.76-.14-5.21.21-7.07,2.48-.46.51-.85,1.11-.04,1.77,7.33-4.73,7.29,1.62,12.05-.33,3.66-1.91,7.3-4.3,11.65-3.62-4.23,1.22-4.4,4.62-6.88,7.49-.42.44-.62.94-.13,1.67,8.78-.74,9.5-3.66,16.59-7.24,5.29-3.23,10.56,4.6,15.14.14,1.01-.97,2.39-.64,3.64-.77-1.6-8.53-19.19,1.56-18.91-9.88,5.66-3.85,4.36-11.22,4.74-17.17,6.51,3.61,13.75,5.71,20.13,9.16,3.22,5.2,8.27,12.07,15,11.62.18-.52.34-.98.53-1.51,2.04.35,4.66,1.7,5.78-.88,3.05,3.19,7.53,3.03,11.52,2.21,2.95-2.4-5.55-5.82-6.69-8.29ZM419.51,92.72c0-11.64-4.52-22.57-12.73-30.78-8.21-8.21-19.14-12.73-30.79-12.73s-22.58,4.52-30.79,12.73l-18.84,18.84c-4.4,4.4-7.74,9.57-9.93,15.36l-.13.33-.34.1c-6.84,2.11-12.87,5.73-17.92,10.78l-18.84,18.84c-16.97,16.98-16.97,44.6,0,61.57,8.21,8.21,19.14,12.73,30.78,12.73h0c11.64,0,22.58-4.52,30.79-12.73l18.84-18.84c4.38-4.38,7.7-9.53,9.89-15.31l.13-.33.34-.11c6.72-2.06,12.92-5.8,17.95-10.82l18.84-18.84c8.21-8.21,12.73-19.14,12.73-30.79ZM172.38,173.6c-1.62,6.32-2.15,17.09-10.37,17.4-.68,3.65,2.53,5.02,5.44,3.85,2.89-1.33,4.26,1.05,5.23,3.42,4.46.65,11.06-1.49,11.31-6.77-6.66-3.84-8.72-11.14-11.62-17.9Z"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 6.4 KiB

View File

@@ -1,25 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1584.81 250">
<defs>
<style>
.cls-1 {
fill: #fff;
stroke-width: 0px;
}
</style>
</defs>
<g id="LanChain-logo">
<g id="LangChain-logotype">
<polygon class="cls-1" points="596.33 49.07 596.33 200.67 700.76 200.67 700.76 177.78 620.04 177.78 620.04 49.07 596.33 49.07"/>
<path class="cls-1" d="M1126.83,49.07c-20.53,0-37.95,7.4-50.38,21.41-12.32,13.88-18.82,33.36-18.82,56.33,0,47.23,27.25,77.75,69.41,77.75,29.71,0,52.71-15.54,61.54-41.56l2.14-6.31-23.53-8.94-2.17,7.03c-5.26,17.01-18.75,26.38-37.99,26.38-27.48,0-44.55-20.82-44.55-54.34s17.23-54.34,44.97-54.34c19.23,0,30.31,7.54,35.95,24.44l2.46,7.37,22.91-10.75-2.1-5.9c-8.96-25.22-29.65-38.56-59.85-38.56Z"/>
<path class="cls-1" d="M756.43,85.05c-22.76,0-39.78,10.67-46.69,29.27-.44,1.19-1.77,4.78-1.77,4.78l19.51,12.62,2.65-6.91c4.52-11.78,12.88-17.27,26.3-17.27s21.1,6.51,20.96,19.33c0,.52-.04,2.09-.04,2.09,0,0-17.76,2.88-25.08,4.43-31.23,6.6-44.31,18.52-44.31,38.02,0,10.39,5.77,21.64,16.3,27.95,6.32,3.78,14.57,5.21,23.68,5.21,5.99,0,11.81-.89,17.2-2.53,12.25-4.07,15.67-12.07,15.67-12.07v10.46h20.29v-74.78c0-25.42-16.7-40.6-44.67-40.6ZM777.46,164.85c0,7.86-8.56,18.93-28.5,18.93-5.63,0-9.62-1.49-12.28-3.71-3.56-2.97-4.73-7.24-4.24-11.01.21-1.64,1.2-5.17,4.87-8.23,3.75-3.13,10.38-5.37,20.62-7.6,8.42-1.83,19.54-3.85,19.54-3.85v15.48Z"/>
<path class="cls-1" d="M876.11,85.04c-2.82,0-5.57.2-8.24.57-18.17,2.73-23.49,11.96-23.49,11.96l.02-9.31h-22.74s0,112.19,0,112.19h23.71v-62.18c0-21.13,15.41-30.75,29.73-30.75,15.48,0,23,8.32,23,25.45v67.48h23.71v-70.74c0-27.56-17.51-44.67-45.69-44.67Z"/>
<path class="cls-1" d="M1539.12,85.04c-2.82,0-5.57.2-8.24.57-18.17,2.73-23.49,11.96-23.49,11.96v-9.32h-22.72v112.2h23.71v-62.18c0-21.13,15.41-30.75,29.73-30.75,15.48,0,23,8.32,23,25.45v67.48h23.71v-70.74c0-27.56-17.51-44.67-45.69-44.67Z"/>
<path class="cls-1" d="M1020.76,88.26v11.55s-5.81-14.77-32.24-14.77c-32.84,0-53.24,22.66-53.24,59.15,0,20.59,6.58,36.8,18.19,47.04,9.03,7.96,21.09,12.04,35.45,12.32,9.99.19,16.46-2.53,20.5-5.1,7.76-4.94,10.64-9.63,10.64-9.63,0,0-.33,3.67-.93,8.64-.43,3.6-1.24,6.13-1.24,6.13h0c-3.61,12.85-14.17,20.28-29.57,20.28s-24.73-5.07-26.58-15.06l-23.05,6.88c3.98,19.2,22,30.66,48.2,30.66,17.81,0,31.77-4.84,41.5-14.4,9.81-9.64,14.79-23.53,14.79-41.29v-102.41h-22.42ZM1019.26,145.21c0,22.44-10.96,35.84-29.32,35.84-19.67,0-30.95-13.44-30.95-36.86s11.28-36.66,30.95-36.66c17.92,0,29.15,13.34,29.32,34.82v2.86Z"/>
<path class="cls-1" d="M1259.01,85.04c-2.6,0-5.13.17-7.59.49-17.88,2.79-23.14,11.9-23.14,11.9v-2.67h-.01s0-45.69,0-45.69h-23.71v151.39h23.71v-62.18c0-21.27,15.41-30.95,29.73-30.95,15.48,0,23,8.32,23,25.45v67.68h23.71v-70.94c0-27.01-17.94-44.47-45.69-44.47Z"/>
<circle class="cls-1" cx="1450.93" cy="64.47" r="15.37"/>
<path class="cls-1" d="M1439.14,88.2v56.94h0c-6.75-5.56-14.6-9.75-23.5-12.26v-7.23c0-25.42-16.7-40.6-44.67-40.6-22.76,0-39.78,10.67-46.69,29.27-.44,1.19-1.77,4.78-1.77,4.78l19.51,12.62,2.65-6.91c4.52-11.78,12.88-17.27,26.3-17.27s21.1,6.51,20.96,19.33c0,.08,0,1.15,0,2.86-10.04-.28-19.38.69-27.77,2.66,0,0,0,0,0,0-11.06,2.5-31.6,8.85-38.94,25.36-.05.11-1.13,2.96-1.13,2.96-1.06,3.28-1.59,6.84-1.59,10.7,0,10.39,5.77,21.64,16.3,27.95,6.32,3.78,14.57,5.21,23.68,5.21,5.88,0,11.6-.86,16.91-2.44,12.49-4.04,15.96-12.16,15.96-12.16v10.47h20.29v-34.27c-5.7-3.56-14.26-5.66-23.65-5.64,0,2.65,0,4.33,0,4.33,0,7.86-8.56,18.93-28.5,18.93-5.63,0-9.62-1.49-12.28-3.71-3.56-2.97-4.73-7.24-4.24-11.01.21-1.64,1.2-5.17,4.87-8.23l-.04-.11c8.42-6.89,24.97-9.64,40.17-9.04v.03c12.94.47,22.62,3.01,29.53,7.77,1.88,1.19,3.65,2.52,5.28,3.98,6.94,6.23,9.73,13.9,10.93,18.38,1.95,7.31,1.43,18.57,1.43,18.57h23.59v-112.2h-23.59Z"/>
</g>
<path id="LangChain-symbol" class="cls-1" d="M393.52,75.2c9.66,9.66,9.66,25.38,0,35.04l-21.64,21.29-.22-1.22c-1.58-8.75-5.74-16.69-12.02-22.97-4.73-4.72-10.32-8.21-16.62-10.37-3.91,3.93-6.06,9.08-6.06,14.5,0,1.1.1,2.24.3,3.38,3.47,1.25,6.54,3.18,9.12,5.76,9.66,9.66,9.66,25.38,0,35.04l-18.84,18.84c-4.83,4.83-11.17,7.24-17.52,7.24s-12.69-2.41-17.52-7.24c-9.66-9.66-9.66-25.38,0-35.04l21.64-21.28.22,1.22c1.57,8.73,5.73,16.67,12.03,22.96,4.74,4.74,9.99,7.89,16.28,10.04l1.16-1.16c3.52-3.52,5.45-8.2,5.45-13.19,0-1.11-.1-2.22-.29-3.31-3.63-1.2-6.62-2.91-9.34-5.63-3.92-3.92-6.36-8.93-7.04-14.48-.05-.4-.08-.79-.12-1.19-.54-7.23,2.07-14.29,7.16-19.37l18.84-18.84c4.67-4.67,10.89-7.25,17.52-7.25s12.85,2.57,17.52,7.25ZM491.9,125c0,68.93-56.08,125-125,125H125C56.08,250,0,193.93,0,125S56.08,0,125,0h241.9C435.82,0,491.9,56.08,491.9,125ZM240.9,187.69c1.97-2.39-7.13-9.12-8.99-11.59-3.78-4.1-3.8-10-6.35-14.79-6.24-14.46-13.41-28.81-23.44-41.05-10.6-13.39-23.68-24.47-35.17-37.04-8.53-8.77-10.81-21.26-18.34-30.69-10.38-15.33-43.2-19.51-48.01,2.14.02.68-.19,1.11-.78,1.54-2.66,1.93-5.03,4.14-7.02,6.81-4.87,6.78-5.62,18.28.46,24.37.2-3.21.31-6.24,2.85-8.54,4.7,4.03,11.8,5.46,17.25,2.45,12.04,17.19,9.04,40.97,18.6,59.49,2.64,4.38,5.3,8.85,8.69,12.69,2.75,4.28,12.25,9.33,12.81,13.29.1,6.8-.7,14.23,3.76,19.92,2.1,4.26-3.06,8.54-7.22,8.01-5.4.74-11.99-3.63-16.72-.94-1.67,1.81-4.94-.19-6.38,2.32-.5,1.3-3.2,3.13-1.59,4.38,1.79-1.36,3.45-2.78,5.86-1.97-.36,1.96,1.19,2.24,2.42,2.81-.04,1.33-.82,2.69.2,3.82,1.19-1.2,1.9-2.9,3.79-3.4,6.28,8.37,12.67-8.47,26.26-.89-2.76-.14-5.21.21-7.07,2.48-.46.51-.85,1.11-.04,1.77,7.33-4.73,7.29,1.62,12.05-.33,3.66-1.91,7.3-4.3,11.65-3.62-4.23,1.22-4.4,4.62-6.88,7.49-.42.44-.62.94-.13,1.67,8.78-.74,9.5-3.66,16.59-7.24,5.29-3.23,10.56,4.6,15.14.14,1.01-.97,2.39-.64,3.64-.77-1.6-8.53-19.19,1.56-18.91-9.88,5.66-3.85,4.36-11.22,4.74-17.17,6.51,3.61,13.75,5.71,20.13,9.16,3.22,5.2,8.27,12.07,15,11.62.18-.52.34-.98.53-1.51,2.04.35,4.66,1.7,5.78-.88,3.05,3.19,7.53,3.03,11.52,2.21,2.95-2.4-5.55-5.82-6.69-8.29ZM419.51,92.72c0-11.64-4.52-22.57-12.73-30.78-8.21-8.21-19.14-12.73-30.79-12.73s-22.58,4.52-30.79,12.73l-18.84,18.84c-4.4,4.4-7.74,9.57-9.93,15.36l-.13.33-.34.1c-6.84,2.11-12.87,5.73-17.92,10.78l-18.84,18.84c-16.97,16.98-16.97,44.6,0,61.57,8.21,8.21,19.14,12.73,30.78,12.73h0c11.64,0,22.58-4.52,30.79-12.73l18.84-18.84c4.38-4.38,7.7-9.53,9.89-15.31l.13-.33.34-.11c6.72-2.06,12.92-5.8,17.95-10.82l18.84-18.84c8.21-8.21,12.73-19.14,12.73-30.79ZM172.38,173.6c-1.62,6.32-2.15,17.09-10.37,17.4-.68,3.65,2.53,5.02,5.44,3.85,2.89-1.33,4.26,1.05,5.23,3.42,4.46.65,11.06-1.49,11.31-6.77-6.66-3.84-8.72-11.14-11.62-17.9Z"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 6.4 KiB

View File

@@ -1,84 +0,0 @@
# Label PRs (config)
# Automatically applies labels based on changed files and branch patterns
# Core packages
core:
- changed-files:
- any-glob-to-any-file:
- "libs/core/**/*"
langchain:
- changed-files:
- any-glob-to-any-file:
- "libs/langchain/**/*"
- "libs/langchain_v1/**/*"
v1:
- changed-files:
- any-glob-to-any-file:
- "libs/langchain_v1/**/*"
cli:
- changed-files:
- any-glob-to-any-file:
- "libs/cli/**/*"
standard-tests:
- changed-files:
- any-glob-to-any-file:
- "libs/standard-tests/**/*"
text-splitters:
- changed-files:
- any-glob-to-any-file:
- "libs/text-splitters/**/*"
# Partner integrations
integration:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/**/*"
# Infrastructure and DevOps
infra:
- changed-files:
- any-glob-to-any-file:
- ".github/**/*"
- "Makefile"
- ".pre-commit-config.yaml"
- "scripts/**/*"
- "docker/**/*"
- "Dockerfile*"
github_actions:
- changed-files:
- any-glob-to-any-file:
- ".github/workflows/**/*"
- ".github/actions/**/*"
dependencies:
- changed-files:
- any-glob-to-any-file:
- "**/pyproject.toml"
- "uv.lock"
- "**/requirements*.txt"
- "**/poetry.lock"
# Documentation
documentation:
- changed-files:
- any-glob-to-any-file:
- "**/*.md"
- "**/*.rst"
- "**/README*"
# Security related changes
security:
- changed-files:
- any-glob-to-any-file:
- "**/*security*"
- "**/*auth*"
- "**/*credential*"
- "**/*secret*"
- "**/*token*"
- ".github/workflows/security*"

View File

@@ -1,41 +0,0 @@
# PR title labeler config
#
# Labels PRs based on conventional commit patterns in titles
#
# Format: type(scope): description or type!: description (breaking)
add-missing-labels: true
clear-prexisting: false
include-commits: false
include-title: true
label-for-breaking-changes: breaking
label-mapping:
documentation: ["docs"]
feature: ["feat"]
fix: ["fix"]
infra: ["build", "ci", "chore"]
integration:
[
"anthropic",
"chroma",
"deepseek",
"exa",
"fireworks",
"groq",
"huggingface",
"mistralai",
"nomic",
"ollama",
"openai",
"perplexity",
"prompty",
"qdrant",
"xai",
]
linting: ["style"]
performance: ["perf"]
refactor: ["refactor"]
release: ["release"]
revert: ["revert"]
tests: ["test"]

View File

@@ -1,251 +1,17 @@
"""Analyze git diffs to determine which directories need to be tested.
Intelligently determines which LangChain packages and directories need to be tested,
linted, or built based on the changes. Handles dependency relationships between
packages, maps file changes to appropriate CI job configurations, and outputs JSON
configurations for GitHub Actions.
- Maps changed files to affected package directories (libs/core, libs/partners/*, etc.)
- Builds dependency graph to include dependent packages when core components change
- Generates test matrix configurations with appropriate Python versions
- Handles special cases for Pydantic version testing and performance benchmarks
Used as part of the check_diffs workflow.
"""
import glob
import json
import os
import sys
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Set
import tomllib
from get_min_versions import get_min_version_from_toml
from packaging.requirements import Requirement
LANGCHAIN_DIRS = [
ALL_DIRS = {
"libs/core",
"libs/text-splitters",
"libs/langchain",
"libs/langchain_v1",
]
# When set to True, we are ignoring core dependents
# in order to be able to get CI to pass for each individual
# package that depends on core
# e.g. if you touch core, we don't then add textsplitters/etc to CI
IGNORE_CORE_DEPENDENTS = False
# ignored partners are removed from dependents
# but still run if directly edited
IGNORED_PARTNERS = [
# remove huggingface from dependents because of CI instability
# specifically in huggingface jobs
# https://github.com/langchain-ai/langchain/issues/25558
"huggingface",
# prompty exhibiting issues with numpy for Python 3.13
# https://github.com/langchain-ai/langchain/actions/runs/12651104685/job/35251034969?pr=29065
"prompty",
]
def all_package_dirs() -> Set[str]:
return {
"/".join(path.split("/")[:-1]).lstrip("./")
for path in glob.glob("./libs/**/pyproject.toml", recursive=True)
if "libs/cli" not in path and "libs/standard-tests" not in path
}
def dependents_graph() -> dict:
"""Construct a mapping of package -> dependents
Done such that we can run tests on all dependents of a package when a change is made.
"""
dependents = defaultdict(set)
for path in glob.glob("./libs/**/pyproject.toml", recursive=True):
if "template" in path:
continue
# load regular and test deps from pyproject.toml
with open(path, "rb") as f:
pyproject = tomllib.load(f)
pkg_dir = "libs" + "/".join(path.split("libs")[1].split("/")[:-1])
for dep in [
*pyproject["project"]["dependencies"],
*pyproject["dependency-groups"]["test"],
]:
requirement = Requirement(dep)
package_name = requirement.name
if "langchain" in dep:
dependents[package_name].add(pkg_dir)
continue
# load extended deps from extended_testing_deps.txt
package_path = Path(path).parent
extended_requirement_path = package_path / "extended_testing_deps.txt"
if extended_requirement_path.exists():
with open(extended_requirement_path, "r") as f:
extended_deps = f.read().splitlines()
for depline in extended_deps:
if depline.startswith("-e "):
# editable dependency
assert depline.startswith("-e ../partners/"), (
"Extended test deps should only editable install partner packages"
)
partner = depline.split("partners/")[1]
dep = f"langchain-{partner}"
else:
dep = depline.split("==")[0]
if "langchain" in dep:
dependents[dep].add(pkg_dir)
for k in dependents:
for partner in IGNORED_PARTNERS:
if f"libs/partners/{partner}" in dependents[k]:
dependents[k].remove(f"libs/partners/{partner}")
return dependents
def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]:
updated = set()
for dir_ in dirs_to_eval:
# handle core manually because it has so many dependents
if "core" in dir_:
updated.add(dir_)
continue
pkg = "langchain-" + dir_.split("/")[-1]
updated.update(dependents[pkg])
updated.add(dir_)
return list(updated)
def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
if job == "test-pydantic":
return _get_pydantic_test_configs(dir_)
if job == "codspeed":
py_versions = ["3.12"] # 3.13 is not yet supported
elif dir_ == "libs/core":
py_versions = ["3.10", "3.11", "3.12", "3.13"]
# custom logic for specific directories
elif dir_ == "libs/langchain" and job == "extended-tests":
py_versions = ["3.10", "3.13"]
elif dir_ == "libs/langchain_v1":
py_versions = ["3.10", "3.13"]
elif dir_ in {"libs/cli"}:
py_versions = ["3.10", "3.13"]
elif dir_ == ".":
# unable to install with 3.13 because tokenizers doesn't support 3.13 yet
py_versions = ["3.10", "3.12"]
else:
py_versions = ["3.10", "3.13"]
return [{"working-directory": dir_, "python-version": py_v} for py_v in py_versions]
def _get_pydantic_test_configs(
dir_: str, *, python_version: str = "3.11"
) -> List[Dict[str, str]]:
with open("./libs/core/uv.lock", "rb") as f:
core_uv_lock_data = tomllib.load(f)
for package in core_uv_lock_data["package"]:
if package["name"] == "pydantic":
core_max_pydantic_minor = package["version"].split(".")[1]
break
with open(f"./{dir_}/uv.lock", "rb") as f:
dir_uv_lock_data = tomllib.load(f)
for package in dir_uv_lock_data["package"]:
if package["name"] == "pydantic":
dir_max_pydantic_minor = package["version"].split(".")[1]
break
core_min_pydantic_version = get_min_version_from_toml(
"./libs/core/pyproject.toml", "release", python_version, include=["pydantic"]
)["pydantic"]
core_min_pydantic_minor = (
core_min_pydantic_version.split(".")[1]
if "." in core_min_pydantic_version
else "0"
)
dir_min_pydantic_version = get_min_version_from_toml(
f"./{dir_}/pyproject.toml", "release", python_version, include=["pydantic"]
).get("pydantic", "0.0.0")
dir_min_pydantic_minor = (
dir_min_pydantic_version.split(".")[1]
if "." in dir_min_pydantic_version
else "0"
)
max_pydantic_minor = min(
int(dir_max_pydantic_minor),
int(core_max_pydantic_minor),
)
min_pydantic_minor = max(
int(dir_min_pydantic_minor),
int(core_min_pydantic_minor),
)
configs = [
{
"working-directory": dir_,
"pydantic-version": f"2.{v}.0",
"python-version": python_version,
}
for v in range(min_pydantic_minor, max_pydantic_minor + 1)
]
return configs
def _get_configs_for_multi_dirs(
job: str, dirs_to_run: Dict[str, Set[str]], dependents: dict
) -> List[Dict[str, str]]:
if job == "lint":
dirs = add_dependents(
dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"],
dependents,
)
elif job in ["test", "compile-integration-tests", "dependencies", "test-pydantic"]:
dirs = add_dependents(
dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
)
elif job == "extended-tests":
dirs = list(dirs_to_run["extended-test"])
elif job == "codspeed":
dirs = list(dirs_to_run["codspeed"])
else:
raise ValueError(f"Unknown job: {job}")
return [
config for dir_ in dirs for config in _get_configs_for_single_dir(job, dir_)
]
"libs/experimental",
"libs/community",
"libs/partners/openai",
}
if __name__ == "__main__":
files = sys.argv[1:]
dirs_to_run: Dict[str, set] = {
"lint": set(),
"test": set(),
"extended-test": set(),
"codspeed": set(),
}
docs_edited = False
if len(files) >= 300:
# max diff length is 300 files - there are likely files missing
dirs_to_run["lint"] = all_package_dirs()
dirs_to_run["test"] = all_package_dirs()
dirs_to_run["extended-test"] = set(LANGCHAIN_DIRS)
dirs_to_run = set()
for file in files:
if any(
@@ -254,93 +20,28 @@ if __name__ == "__main__":
".github/workflows",
".github/tools",
".github/actions",
"libs/core",
".github/scripts/check_diff.py",
)
):
# Infrastructure changes (workflows, actions, CI scripts) trigger tests on
# all core packages as a safety measure. This ensures that changes to CI/CD
# infrastructure don't inadvertently break package testing, even if the change
# appears unrelated (e.g., documentation build workflows). This is intentionally
# conservative to catch unexpected side effects from workflow modifications.
#
# Example: A PR modifying .github/workflows/api_doc_build.yml will trigger
# lint/test jobs for libs/core, libs/text-splitters, libs/langchain, and
# libs/langchain_v1, even though the workflow may only affect documentation.
dirs_to_run["extended-test"].update(LANGCHAIN_DIRS)
if file.startswith("libs/core"):
dirs_to_run["codspeed"].add("libs/core")
if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS):
# add that dir and all dirs after in LANGCHAIN_DIRS
# for extended testing
found = False
for dir_ in LANGCHAIN_DIRS:
if dir_ == "libs/core" and IGNORE_CORE_DEPENDENTS:
dirs_to_run["extended-test"].add(dir_)
continue
if file.startswith(dir_):
found = True
if found:
dirs_to_run["extended-test"].add(dir_)
elif file.startswith("libs/standard-tests"):
# TODO: update to include all packages that rely on standard-tests (all partner packages)
# Note: won't run on external repo partners
dirs_to_run["lint"].add("libs/standard-tests")
dirs_to_run["test"].add("libs/standard-tests")
dirs_to_run["test"].add("libs/partners/mistralai")
dirs_to_run["test"].add("libs/partners/openai")
dirs_to_run["test"].add("libs/partners/anthropic")
dirs_to_run["test"].add("libs/partners/fireworks")
dirs_to_run["test"].add("libs/partners/groq")
elif file.startswith("libs/cli"):
dirs_to_run["lint"].add("libs/cli")
dirs_to_run["test"].add("libs/cli")
elif file.startswith("libs/partners"):
partner_dir = file.split("/")[2]
if os.path.isdir(f"libs/partners/{partner_dir}") and [
filename
for filename in os.listdir(f"libs/partners/{partner_dir}")
if not filename.startswith(".")
] != ["README.md"]:
dirs_to_run["test"].add(f"libs/partners/{partner_dir}")
dirs_to_run["codspeed"].add(f"libs/partners/{partner_dir}")
# Skip if the directory was deleted or is just a tombstone readme
elif file.startswith("libs/"):
# Check if this is a root-level file in libs/ (e.g., libs/README.md)
file_parts = file.split("/")
if len(file_parts) == 2:
# Root-level file in libs/, skip it (no tests needed)
continue
raise ValueError(
f"Unknown lib: {file}. check_diff.py likely needs "
"an update for this new library!"
dirs_to_run = ALL_DIRS
break
elif "libs/community" in file:
dirs_to_run.update(
("libs/community", "libs/langchain", "libs/experimental")
)
elif file in [
"pyproject.toml",
"uv.lock",
]: # root uv files
docs_edited = True
dependents = dependents_graph()
# we now have dirs_by_job
# todo: clean this up
map_job_to_configs = {
job: _get_configs_for_multi_dirs(job, dirs_to_run, dependents)
for job in [
"lint",
"test",
"extended-tests",
"compile-integration-tests",
"dependencies",
"test-pydantic",
"codspeed",
]
}
for key, value in map_job_to_configs.items():
json_output = json.dumps(value)
print(f"{key}={json_output}")
elif "libs/partners" in file:
partner_dir = file.split("/")[2]
dirs_to_run.update(
(f"libs/partners/{partner_dir}", "libs/langchain", "libs/experimental")
)
elif "libs/langchain" in file:
dirs_to_run.update(("libs/langchain", "libs/experimental"))
elif "libs/experimental" in file:
dirs_to_run.add("libs/experimental")
elif file.startswith("libs/"):
dirs_to_run = ALL_DIRS
break
else:
pass
print(json.dumps(list(dirs_to_run)))

View File

@@ -1,36 +0,0 @@
"""Check that no dependencies allow prereleases unless we're releasing a prerelease."""
import sys
import tomllib
if __name__ == "__main__":
# Get the TOML file path from the command line argument
toml_file = sys.argv[1]
with open(toml_file, "rb") as file:
toml_data = tomllib.load(file)
# See if we're releasing an rc or dev version
version = toml_data["project"]["version"]
releasing_rc = "rc" in version or "dev" in version
# If not, iterate through dependencies and make sure none allow prereleases
if not releasing_rc:
dependencies = toml_data["project"]["dependencies"]
for dep_version in dependencies:
dep_version_string = (
dep_version["version"] if isinstance(dep_version, dict) else dep_version
)
if "rc" in dep_version_string:
raise ValueError(
f"Dependency {dep_version} has a prerelease version. Please remove this."
)
if isinstance(dep_version, dict) and dep_version.get(
"allow-prereleases", False
):
raise ValueError(
f"Dependency {dep_version} has allow-prereleases set to true. Please remove this."
)

View File

@@ -1,199 +0,0 @@
"""Get minimum versions of dependencies from a pyproject.toml file."""
import sys
from collections import defaultdict
if sys.version_info >= (3, 11):
import tomllib
else:
# For Python 3.10 and below, which doesnt have stdlib tomllib
import tomli as tomllib
import re
from typing import List
import requests
from packaging.requirements import Requirement
from packaging.specifiers import SpecifierSet
from packaging.version import Version, parse
MIN_VERSION_LIBS = [
"langchain-core",
"langchain",
"langchain-text-splitters",
"numpy",
"SQLAlchemy",
]
# some libs only get checked on release because of simultaneous changes in
# multiple libs
SKIP_IF_PULL_REQUEST = [
"langchain-core",
"langchain-text-splitters",
"langchain",
]
def get_pypi_versions(package_name: str) -> List[str]:
"""Fetch all available versions for a package from PyPI.
Args:
package_name: Name of the package
Returns:
List of all available versions
Raises:
requests.exceptions.RequestException: If PyPI API request fails
KeyError: If package not found or response format unexpected
"""
pypi_url = f"https://pypi.org/pypi/{package_name}/json"
response = requests.get(pypi_url)
response.raise_for_status()
return list(response.json()["releases"].keys())
def get_minimum_version(package_name: str, spec_string: str) -> str | None:
"""Find the minimum published version that satisfies the given constraints.
Args:
package_name: Name of the package
spec_string: Version specification string (e.g., ">=0.2.43,<0.4.0,!=0.3.0")
Returns:
Minimum compatible version or None if no compatible version found
"""
# Rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
spec_string = re.sub(r"\^0\.0\.(\d+)", r"0.0.\1", spec_string)
# Rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1 (can be anywhere in constraint string)
for y in range(1, 10):
spec_string = re.sub(
rf"\^0\.{y}\.(\d+)", rf">=0.{y}.\1,<0.{y + 1}", spec_string
)
# Rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
for x in range(1, 10):
spec_string = re.sub(
rf"\^{x}\.(\d+)\.(\d+)", rf">={x}.\1.\2,<{x + 1}", spec_string
)
spec_set = SpecifierSet(spec_string)
all_versions = get_pypi_versions(package_name)
valid_versions = []
for version_str in all_versions:
try:
version = parse(version_str)
if spec_set.contains(version):
valid_versions.append(version)
except ValueError:
continue
return str(min(valid_versions)) if valid_versions else None
def _check_python_version_from_requirement(
requirement: Requirement, python_version: str
) -> bool:
if not requirement.marker:
return True
else:
marker_str = str(requirement.marker)
if "python_version" or "python_full_version" in marker_str:
python_version_str = "".join(
char
for char in marker_str
if char.isdigit() or char in (".", "<", ">", "=", ",")
)
return check_python_version(python_version, python_version_str)
return True
def get_min_version_from_toml(
toml_path: str,
versions_for: str,
python_version: str,
*,
include: list | None = None,
):
# Parse the TOML file
with open(toml_path, "rb") as file:
toml_data = tomllib.load(file)
dependencies = defaultdict(list)
for dep in toml_data["project"]["dependencies"]:
requirement = Requirement(dep)
dependencies[requirement.name].append(requirement)
# Initialize a dictionary to store the minimum versions
min_versions = {}
# Iterate over the libs in MIN_VERSION_LIBS
for lib in set(MIN_VERSION_LIBS + (include or [])):
if versions_for == "pull_request" and lib in SKIP_IF_PULL_REQUEST:
# some libs only get checked on release because of simultaneous
# changes in multiple libs
continue
# Check if the lib is present in the dependencies
if lib in dependencies:
if include and lib not in include:
continue
requirements = dependencies[lib]
for requirement in requirements:
if _check_python_version_from_requirement(requirement, python_version):
version_string = str(requirement.specifier)
break
# Use parse_version to get the minimum supported version from version_string
min_version = get_minimum_version(lib, version_string)
# Store the minimum version in the min_versions dictionary
min_versions[lib] = min_version
return min_versions
def check_python_version(version_string, constraint_string):
"""Check if the given Python version matches the given constraints.
Args:
version_string: A string representing the Python version (e.g. "3.8.5").
constraint_string: A string representing the package's Python version
constraints (e.g. ">=3.6, <4.0").
Returns:
True if the version matches the constraints
"""
# Rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
constraint_string = re.sub(r"\^0\.0\.(\d+)", r"0.0.\1", constraint_string)
# Rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1.0 (can be anywhere in constraint string)
for y in range(1, 10):
constraint_string = re.sub(
rf"\^0\.{y}\.(\d+)", rf">=0.{y}.\1,<0.{y + 1}.0", constraint_string
)
# Rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
for x in range(1, 10):
constraint_string = re.sub(
rf"\^{x}\.0\.(\d+)", rf">={x}.0.\1,<{x + 1}.0.0", constraint_string
)
try:
version = Version(version_string)
constraints = SpecifierSet(constraint_string)
return version in constraints
except Exception as e:
print(f"Error: {e}")
return False
if __name__ == "__main__":
# Get the TOML file path from the command line argument
toml_file = sys.argv[1]
versions_for = sys.argv[2]
python_version = sys.argv[3]
assert versions_for in ["release", "pull_request"]
# Call the function to get the minimum versions
min_versions = get_min_version_from_toml(toml_file, versions_for, python_version)
print(" ".join([f"{lib}=={version}" for lib, version in min_versions.items()]))

View File

@@ -81,93 +81,56 @@ import time
__version__ = "2022.12+dev"
# Update symlinks only if the platform supports not following them
UPDATE_SYMLINKS = bool(os.utime in getattr(os, "supports_follow_symlinks", []))
UPDATE_SYMLINKS = bool(os.utime in getattr(os, 'supports_follow_symlinks', []))
# Call os.path.normpath() only if not in a POSIX platform (Windows)
NORMALIZE_PATHS = os.path.sep != "/"
NORMALIZE_PATHS = (os.path.sep != '/')
# How many files to process in each batch when re-trying merge commits
STEPMISSING = 100
# (Extra) keywords for the os.utime() call performed by touch()
UTIME_KWS = {} if not UPDATE_SYMLINKS else {"follow_symlinks": False}
UTIME_KWS = {} if not UPDATE_SYMLINKS else {'follow_symlinks': False}
# Command-line interface ######################################################
def parse_args():
parser = argparse.ArgumentParser(description=__doc__.split("\n---")[0])
parser = argparse.ArgumentParser(
description=__doc__.split('\n---')[0])
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--quiet",
"-q",
dest="loglevel",
action="store_const",
const=logging.WARNING,
default=logging.INFO,
help="Suppress informative messages and summary statistics.",
)
group.add_argument(
"--verbose",
"-v",
action="count",
help="""
group.add_argument('--quiet', '-q', dest='loglevel',
action="store_const", const=logging.WARNING, default=logging.INFO,
help="Suppress informative messages and summary statistics.")
group.add_argument('--verbose', '-v', action="count", help="""
Print additional information for each processed file.
Specify twice to further increase verbosity.
""",
)
""")
parser.add_argument(
"--cwd",
"-C",
metavar="DIRECTORY",
help="""
parser.add_argument('--cwd', '-C', metavar="DIRECTORY", help="""
Run as if %(prog)s was started in directory %(metavar)s.
This affects how --work-tree, --git-dir and PATHSPEC arguments are handled.
See 'man 1 git' or 'git --help' for more information.
""",
)
""")
parser.add_argument(
"--git-dir",
dest="gitdir",
metavar="GITDIR",
help="""
parser.add_argument('--git-dir', dest='gitdir', metavar="GITDIR", help="""
Path to the git repository, by default auto-discovered by searching
the current directory and its parents for a .git/ subdirectory.
""",
)
""")
parser.add_argument(
"--work-tree",
dest="workdir",
metavar="WORKTREE",
help="""
parser.add_argument('--work-tree', dest='workdir', metavar="WORKTREE", help="""
Path to the work tree root, by default the parent of GITDIR if it's
automatically discovered, or the current directory if GITDIR is set.
""",
)
""")
parser.add_argument(
"--force",
"-f",
default=False,
action="store_true",
help="""
parser.add_argument('--force', '-f', default=False, action="store_true", help="""
Force updating files with uncommitted modifications.
Untracked files and uncommitted deletions, renames and additions are
always ignored.
""",
)
""")
parser.add_argument(
"--merge",
"-m",
default=False,
action="store_true",
help="""
parser.add_argument('--merge', '-m', default=False, action="store_true", help="""
Include merge commits.
Leads to more recent times and more files per commit, thus with the same
time, which may or may not be what you want.
@@ -175,130 +138,71 @@ def parse_args():
are found sooner, which can improve performance, sometimes substantially.
But as merge commits are usually huge, processing them may also take longer.
By default, merge commits are only used for files missing from regular commits.
""",
)
""")
parser.add_argument(
"--first-parent",
default=False,
action="store_true",
help="""
parser.add_argument('--first-parent', default=False, action="store_true", help="""
Consider only the first parent, the "main branch", when evaluating merge commits.
Only effective when merge commits are processed, either when --merge is
used or when finding missing files after the first regular log search.
See --skip-missing.
""",
)
""")
parser.add_argument(
"--skip-missing",
"-s",
dest="missing",
default=True,
action="store_false",
help="""
parser.add_argument('--skip-missing', '-s', dest="missing", default=True,
action="store_false", help="""
Do not try to find missing files.
If merge commits were not evaluated with --merge and some files were
not found in regular commits, by default %(prog)s searches for these
files again in the merge commits.
This option disables this retry, so files found only in merge commits
will not have their timestamp updated.
""",
)
""")
parser.add_argument(
"--no-directories",
"-D",
dest="dirs",
default=True,
action="store_false",
help="""
parser.add_argument('--no-directories', '-D', dest='dirs', default=True,
action="store_false", help="""
Do not update directory timestamps.
By default, use the time of its most recently created, renamed or deleted file.
Note that just modifying a file will NOT update its directory time.
""",
)
""")
parser.add_argument(
"--test",
"-t",
default=False,
action="store_true",
help="Test run: do not actually update any file timestamp.",
)
parser.add_argument('--test', '-t', default=False, action="store_true",
help="Test run: do not actually update any file timestamp.")
parser.add_argument(
"--commit-time",
"-c",
dest="commit_time",
default=False,
action="store_true",
help="Use commit time instead of author time.",
)
parser.add_argument('--commit-time', '-c', dest='commit_time', default=False,
action='store_true', help="Use commit time instead of author time.")
parser.add_argument(
"--oldest-time",
"-o",
dest="reverse_order",
default=False,
action="store_true",
help="""
parser.add_argument('--oldest-time', '-o', dest='reverse_order', default=False,
action='store_true', help="""
Update times based on the oldest, instead of the most recent commit of a file.
This reverses the order in which the git log is processed to emulate a
file "creation" date. Note this will be inaccurate for files deleted and
re-created at later dates.
""",
)
""")
parser.add_argument(
"--skip-older-than",
metavar="SECONDS",
type=int,
help="""
parser.add_argument('--skip-older-than', metavar='SECONDS', type=int, help="""
Ignore files that are currently older than %(metavar)s.
Useful in workflows that assume such files already have a correct timestamp,
as it may improve performance by processing fewer files.
""",
)
""")
parser.add_argument(
"--skip-older-than-commit",
"-N",
default=False,
action="store_true",
help="""
parser.add_argument('--skip-older-than-commit', '-N', default=False,
action='store_true', help="""
Ignore files older than the timestamp it would be updated to.
Such files may be considered "original", likely in the author's repository.
""",
)
""")
parser.add_argument(
"--unique-times",
default=False,
action="store_true",
help="""
parser.add_argument('--unique-times', default=False, action="store_true", help="""
Set the microseconds to a unique value per commit.
Allows telling apart changes that would otherwise have identical timestamps,
as git's time accuracy is in seconds.
""",
)
""")
parser.add_argument(
"pathspec",
nargs="*",
metavar="PATHSPEC",
help="""
parser.add_argument('pathspec', nargs='*', metavar='PATHSPEC', help="""
Only modify paths matching %(metavar)s, relative to current directory.
By default, update all but untracked files and submodules.
""",
)
""")
parser.add_argument(
"--version",
"-V",
action="version",
version="%(prog)s version {version}".format(version=get_version()),
)
parser.add_argument('--version', '-V', action='version',
version='%(prog)s version {version}'.format(version=get_version()))
args_ = parser.parse_args()
if args_.verbose:
@@ -308,18 +212,17 @@ def parse_args():
def get_version(version=__version__):
if not version.endswith("+dev"):
if not version.endswith('+dev'):
return version
try:
cwd = os.path.dirname(os.path.realpath(__file__))
return Git(cwd=cwd, errors=False).describe().lstrip("v")
return Git(cwd=cwd, errors=False).describe().lstrip('v')
except Git.Error:
return "-".join((version, "unknown"))
return '-'.join((version, "unknown"))
# Helper functions ############################################################
def setup_logging():
"""Add TRACE logging level and corresponding method, return the root logger"""
logging.TRACE = TRACE = logging.DEBUG // 2
@@ -352,13 +255,11 @@ def normalize(path):
if path and path[0] == '"':
# Python 2: path = path[1:-1].decode("string-escape")
# Python 3: https://stackoverflow.com/a/46650050/624066
path = (
path[1:-1] # Remove enclosing double quotes
.encode("latin1") # Convert to bytes, required by 'unicode-escape'
.decode("unicode-escape") # Perform the actual octal-escaping decode
.encode("latin1") # 1:1 mapping to bytes, UTF-8 encoded
.decode("utf8", "surrogateescape")
) # Decode from UTF-8
path = (path[1:-1] # Remove enclosing double quotes
.encode('latin1') # Convert to bytes, required by 'unicode-escape'
.decode('unicode-escape') # Perform the actual octal-escaping decode
.encode('latin1') # 1:1 mapping to bytes, UTF-8 encoded
.decode('utf8', 'surrogateescape')) # Decode from UTF-8
if NORMALIZE_PATHS:
# Make sure the slash matches the OS; for Windows we need a backslash
path = os.path.normpath(path)
@@ -381,12 +282,12 @@ def touch_ns(path, mtime_ns):
def isodate(secs: int):
# time.localtime() accepts floats, but discards fractional part
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(secs))
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(secs))
def isodate_ns(ns: int):
# for integers fromtimestamp() is equivalent and ~16% slower than isodate()
return datetime.datetime.fromtimestamp(ns / 1000000000).isoformat(sep=" ")
return datetime.datetime.fromtimestamp(ns / 1000000000).isoformat(sep=' ')
def get_mtime_ns(secs: int, idx: int):
@@ -404,49 +305,35 @@ def get_mtime_path(path):
# Git class and parse_log(), the heart of the script ##########################
class Git:
def __init__(self, workdir=None, gitdir=None, cwd=None, errors=True):
self.gitcmd = ["git"]
self.gitcmd = ['git']
self.errors = errors
self._proc = None
if workdir:
self.gitcmd.extend(("--work-tree", workdir))
if gitdir:
self.gitcmd.extend(("--git-dir", gitdir))
if cwd:
self.gitcmd.extend(("-C", cwd))
if workdir: self.gitcmd.extend(('--work-tree', workdir))
if gitdir: self.gitcmd.extend(('--git-dir', gitdir))
if cwd: self.gitcmd.extend(('-C', cwd))
self.workdir, self.gitdir = self._get_repo_dirs()
def ls_files(self, paths: list = None):
return (normalize(_) for _ in self._run("ls-files --full-name", paths))
return (normalize(_) for _ in self._run('ls-files --full-name', paths))
def ls_dirty(self, force=False):
return (
normalize(_[3:].split(" -> ", 1)[-1])
for _ in self._run("status --porcelain")
if _[:2] != "??" and (not force or (_[0] in ("R", "A") or _[1] == "D"))
)
return (normalize(_[3:].split(' -> ', 1)[-1])
for _ in self._run('status --porcelain')
if _[:2] != '??' and (not force or (_[0] in ('R', 'A')
or _[1] == 'D')))
def log(
self,
merge=False,
first_parent=False,
commit_time=False,
reverse_order=False,
paths: list = None,
):
cmd = "whatchanged --pretty={}".format("%ct" if commit_time else "%at")
if merge:
cmd += " -m"
if first_parent:
cmd += " --first-parent"
if reverse_order:
cmd += " --reverse"
def log(self, merge=False, first_parent=False, commit_time=False,
reverse_order=False, paths: list = None):
cmd = 'whatchanged --pretty={}'.format('%ct' if commit_time else '%at')
if merge: cmd += ' -m'
if first_parent: cmd += ' --first-parent'
if reverse_order: cmd += ' --reverse'
return self._run(cmd, paths)
def describe(self):
return self._run("describe --tags", check=True)[0]
return self._run('describe --tags', check=True)[0]
def terminate(self):
if self._proc is None:
@@ -458,22 +345,18 @@ class Git:
pass
def _get_repo_dirs(self):
return (
os.path.normpath(_)
for _ in self._run(
"rev-parse --show-toplevel --absolute-git-dir", check=True
)
)
return (os.path.normpath(_) for _ in
self._run('rev-parse --show-toplevel --absolute-git-dir', check=True))
def _run(self, cmdstr: str, paths: list = None, output=True, check=False):
cmdlist = self.gitcmd + shlex.split(cmdstr)
if paths:
cmdlist.append("--")
cmdlist.append('--')
cmdlist.extend(paths)
popen_args = dict(universal_newlines=True, encoding="utf8")
popen_args = dict(universal_newlines=True, encoding='utf8')
if not self.errors:
popen_args["stderr"] = subprocess.DEVNULL
log.trace("Executing: %s", " ".join(cmdlist))
popen_args['stderr'] = subprocess.DEVNULL
log.trace("Executing: %s", ' '.join(cmdlist))
if not output:
return subprocess.call(cmdlist, **popen_args)
if check:
@@ -496,26 +379,30 @@ def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
mtime = 0
datestr = isodate(0)
for line in git.log(
merge, args.first_parent, args.commit_time, args.reverse_order, filterlist
merge,
args.first_parent,
args.commit_time,
args.reverse_order,
filterlist
):
stats["loglines"] += 1
stats['loglines'] += 1
# Blank line between Date and list of files
if not line:
continue
# Date line
if line[0] != ":": # Faster than `not line.startswith(':')`
stats["commits"] += 1
if line[0] != ':': # Faster than `not line.startswith(':')`
stats['commits'] += 1
mtime = int(line)
if args.unique_times:
mtime = get_mtime_ns(mtime, stats["commits"])
mtime = get_mtime_ns(mtime, stats['commits'])
if args.debug:
datestr = isodate(mtime)
continue
# File line: three tokens if it describes a renaming, otherwise two
tokens = line.split("\t")
tokens = line.split('\t')
# Possible statuses:
# M: Modified (content changed)
@@ -524,7 +411,7 @@ def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
# T: Type changed: to/from regular file, symlinks, submodules
# R099: Renamed (moved), with % of unchanged content. 100 = pure rename
# Not possible in log: C=Copied, U=Unmerged, X=Unknown, B=pairing Broken
status = tokens[0].split(" ")[-1]
status = tokens[0].split(' ')[-1]
file = tokens[-1]
# Handles non-ASCII chars and OS path separator
@@ -532,76 +419,56 @@ def parse_log(filelist, dirlist, stats, git, merge=False, filterlist=None):
def do_file():
if args.skip_older_than_commit and get_mtime_path(file) <= mtime:
stats["skip"] += 1
stats['skip'] += 1
return
if args.debug:
log.debug(
"%d\t%d\t%d\t%s\t%s",
stats["loglines"],
stats["commits"],
stats["files"],
datestr,
file,
)
log.debug("%d\t%d\t%d\t%s\t%s",
stats['loglines'], stats['commits'], stats['files'],
datestr, file)
try:
touch(os.path.join(git.workdir, file), mtime)
stats["touches"] += 1
stats['touches'] += 1
except Exception as e:
log.error("ERROR: %s: %s", e, file)
stats["errors"] += 1
stats['errors'] += 1
def do_dir():
if args.debug:
log.debug(
"%d\t%d\t-\t%s\t%s",
stats["loglines"],
stats["commits"],
datestr,
"{}/".format(dirname or "."),
)
log.debug("%d\t%d\t-\t%s\t%s",
stats['loglines'], stats['commits'],
datestr, "{}/".format(dirname or '.'))
try:
touch(os.path.join(git.workdir, dirname), mtime)
stats["dirtouches"] += 1
stats['dirtouches'] += 1
except Exception as e:
log.error("ERROR: %s: %s", e, dirname)
stats["direrrors"] += 1
stats['direrrors'] += 1
if file in filelist:
stats["files"] -= 1
stats['files'] -= 1
filelist.remove(file)
do_file()
if args.dirs and status in ("A", "D"):
if args.dirs and status in ('A', 'D'):
dirname = os.path.dirname(file)
if dirname in dirlist:
dirlist.remove(dirname)
do_dir()
# All files done?
if not stats["files"]:
if not stats['files']:
git.terminate()
return
# Main Logic ##################################################################
def main():
start = time.time() # yes, Wall time. CPU time is not realistic for users.
stats = {
_: 0
for _ in (
"loglines",
"commits",
"touches",
"skip",
"errors",
"dirtouches",
"direrrors",
)
}
stats = {_: 0 for _ in ('loglines', 'commits', 'touches', 'skip', 'errors',
'dirtouches', 'direrrors')}
logging.basicConfig(level=args.loglevel, format="%(message)s")
logging.basicConfig(level=args.loglevel, format='%(message)s')
log.trace("Arguments: %s", args)
# First things first: Where and Who are we?
@@ -632,16 +499,13 @@ def main():
# Symlink (to file, to dir or broken - git handles the same way)
if not UPDATE_SYMLINKS and os.path.islink(fullpath):
log.warning(
"WARNING: Skipping symlink, no OS support for updates: %s", path
)
log.warning("WARNING: Skipping symlink, no OS support for updates: %s",
path)
continue
# skip files which are older than given threshold
if (
args.skip_older_than
and start - get_mtime_path(fullpath) > args.skip_older_than
):
if (args.skip_older_than
and start - get_mtime_path(fullpath) > args.skip_older_than):
continue
# Always add files relative to worktree root
@@ -655,17 +519,15 @@ def main():
else:
dirty = set(git.ls_dirty())
if dirty:
log.warning(
"WARNING: Modified files in the working directory were ignored."
"\nTo include such files, commit your changes or use --force."
)
log.warning("WARNING: Modified files in the working directory were ignored."
"\nTo include such files, commit your changes or use --force.")
filelist -= dirty
# Build dir list to be processed
dirlist = set(os.path.dirname(_) for _ in filelist) if args.dirs else set()
stats["totalfiles"] = stats["files"] = len(filelist)
log.info("{0:,} files to be processed in work dir".format(stats["totalfiles"]))
stats['totalfiles'] = stats['files'] = len(filelist)
log.info("{0:,} files to be processed in work dir".format(stats['totalfiles']))
if not filelist:
# Nothing to do. Exit silently and without errors, just like git does
@@ -682,18 +544,10 @@ def main():
if args.missing and not args.merge:
filterlist = list(filelist)
missing = len(filterlist)
log.info(
"{0:,} files not found in log, trying merge commits".format(missing)
)
log.info("{0:,} files not found in log, trying merge commits".format(missing))
for i in range(0, missing, STEPMISSING):
parse_log(
filelist,
dirlist,
stats,
git,
merge=True,
filterlist=filterlist[i : i + STEPMISSING],
)
parse_log(filelist, dirlist, stats, git,
merge=True, filterlist=filterlist[i:i + STEPMISSING])
# Still missing some?
for file in filelist:
@@ -702,33 +556,29 @@ def main():
# Final statistics
# Suggestion: use git-log --before=mtime to brag about skipped log entries
def log_info(msg, *a, width=13):
ifmt = "{:%d,}" % (width,) # not using 'n' for consistency with ffmt
ffmt = "{:%d,.2f}" % (width,)
ifmt = '{:%d,}' % (width,) # not using 'n' for consistency with ffmt
ffmt = '{:%d,.2f}' % (width,)
# %-formatting lacks a thousand separator, must pre-render with .format()
log.info(msg.replace("%d", ifmt).replace("%f", ffmt).format(*a))
log.info(msg.replace('%d', ifmt).replace('%f', ffmt).format(*a))
log_info(
"Statistics:\n%f seconds\n%d log lines processed\n%d commits evaluated",
time.time() - start,
stats["loglines"],
stats["commits"],
)
"Statistics:\n"
"%f seconds\n"
"%d log lines processed\n"
"%d commits evaluated",
time.time() - start, stats['loglines'], stats['commits'])
if args.dirs:
if stats["direrrors"]:
log_info("%d directory update errors", stats["direrrors"])
log_info("%d directories updated", stats["dirtouches"])
if stats['direrrors']: log_info("%d directory update errors", stats['direrrors'])
log_info("%d directories updated", stats['dirtouches'])
if stats["touches"] != stats["totalfiles"]:
log_info("%d files", stats["totalfiles"])
if stats["skip"]:
log_info("%d files skipped", stats["skip"])
if stats["files"]:
log_info("%d files missing", stats["files"])
if stats["errors"]:
log_info("%d file update errors", stats["errors"])
if stats['touches'] != stats['totalfiles']:
log_info("%d files", stats['totalfiles'])
if stats['skip']: log_info("%d files skipped", stats['skip'])
if stats['files']: log_info("%d files missing", stats['files'])
if stats['errors']: log_info("%d file update errors", stats['errors'])
log_info("%d files updated", stats["touches"])
log_info("%d files updated", stats['touches'])
if args.test:
log.info("TEST RUN - No files modified!")

104
.github/workflows/_all_ci.yml vendored Normal file
View File

@@ -0,0 +1,104 @@
---
name: langchain CI
on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"
workflow_dispatch:
inputs:
working-directory:
required: true
type: choice
default: 'libs/langchain'
options:
- libs/langchain
- libs/core
- libs/experimental
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.
#
# There's no point in testing an outdated version of the code. GitHub only allows
# a limited number of job runners to be active at the same time, so it's better to cancel
# pointless jobs early so that more useful jobs can run sooner.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.working-directory }}
cancel-in-progress: true
env:
POETRY_VERSION: "1.6.1"
jobs:
lint:
uses: ./.github/workflows/_lint.yml
with:
working-directory: ${{ inputs.working-directory }}
secrets: inherit
test:
uses: ./.github/workflows/_test.yml
with:
working-directory: ${{ inputs.working-directory }}
secrets: inherit
compile-integration-tests:
uses: ./.github/workflows/_compile_integration_test.yml
with:
working-directory: ${{ inputs.working-directory }}
secrets: inherit
pydantic-compatibility:
uses: ./.github/workflows/_pydantic_compatibility.yml
with:
working-directory: ${{ inputs.working-directory }}
secrets: inherit
extended-tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: Python ${{ matrix.python-version }} extended tests
defaults:
run:
working-directory: ${{ inputs.working-directory }}
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: extended
- name: Install dependencies
shell: bash
run: |
echo "Running extended tests, installing dependencies with poetry..."
poetry install -E extended_testing --with test
- name: Run extended tests
run: make extended_tests
- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'

View File

@@ -1,12 +1,4 @@
# Validates that a package's integration tests compile without syntax or import errors.
#
# (If an integration test fails to compile, it won't run.)
#
# Called as part of check_diffs.yml workflow
#
# Runs pytest with compile marker to check syntax/imports.
name: "🔗 Compile Integration Tests"
name: compile-integration-test
on:
workflow_call:
@@ -15,16 +7,9 @@ on:
required: true
type: string
description: "From which folder this pipeline executes"
python-version:
required: true
type: string
description: "Python version to use"
permissions:
contents: read
env:
UV_FROZEN: "true"
POETRY_VERSION: "1.6.1"
jobs:
build:
@@ -32,27 +17,34 @@ jobs:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
timeout-minutes: 20
name: "Python ${{ inputs.python-version }}"
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ inputs.python-version }}
cache-suffix: compile-integration-tests-${{ inputs.working-directory }}
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: compile-integration
- name: "📦 Install Integration Dependencies"
- name: Install integration dependencies
shell: bash
run: uv sync --group test --group test_integration
run: poetry install --with=test_integration,test
- name: "🔗 Check Integration Tests Compile"
- name: Check integration tests compile
shell: bash
run: uv run pytest -m compile tests/integration_tests
run: poetry run pytest -m compile tests/integration_tests
- name: "🧹 Verify Clean Working Directory"
- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu

View File

@@ -1,11 +1,4 @@
# Runs linting.
#
# Uses the package's Makefile to run the checks, specifically the
# `lint_package` and `lint_tests` targets.
#
# Called as part of check_diffs.yml workflow.
name: "🧹 Linting"
name: lint
on:
workflow_call:
@@ -14,62 +7,115 @@ on:
required: true
type: string
description: "From which folder this pipeline executes"
python-version:
required: true
langchain-location:
required: false
type: string
description: "Python version to use"
permissions:
contents: read
description: "Relative path to the langchain library folder"
env:
POETRY_VERSION: "1.6.1"
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
# This env var allows us to get inline annotations when ruff has complaints.
RUFF_OUTPUT_FORMAT: github
UV_FROZEN: "true"
jobs:
# Linting job - runs quality checks on package and test code
build:
name: "Python ${{ inputs.python-version }}"
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
matrix:
# Only lint on the min and max supported Python versions.
# It's extremely unlikely that there's a lint issue on any version in between
# that doesn't show up on the min or max versions.
#
# GitHub rate-limits how many jobs can be running at any one time.
# Starting new jobs is also relatively slow,
# so linting on fewer versions makes CI faster.
python-version:
- "3.8"
- "3.11"
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ inputs.python-version }}
cache-suffix: lint-${{ inputs.working-directory }}
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: lint-with-extras
- name: "📦 Install Lint & Typing Dependencies"
- name: Check Poetry File
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
uv sync --group lint --group typing
poetry check
- name: "🔍 Analyze Package Code with Linters"
- name: Check lock file
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
poetry lock --check
- name: Install dependencies
# Also installs dev/lint/test/typing dependencies, to ensure we have
# type hints for as many of our libraries as possible.
# This helps catch errors that require dependencies to be spotted, for example:
# https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
#
# If you change this configuration, make sure to change the `cache-key`
# in the `poetry_setup` action above to stop using the old cache.
# It doesn't matter how you change it, any change will cause a cache-bust.
working-directory: ${{ inputs.working-directory }}
run: |
poetry install --with lint,typing
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
if: ${{ inputs.langchain-location }}
env:
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
run: |
poetry run pip install -e "$LANGCHAIN_LOCATION"
- name: Get .mypy_cache to speed up mypy
uses: actions/cache@v3
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
with:
path: |
${{ env.WORKDIR }}/.mypy_cache
key: mypy-lint-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
- name: Analysing the code with our lint
working-directory: ${{ inputs.working-directory }}
run: |
make lint_package
- name: "📦 Install Test Dependencies (non-partners)"
# (For directories NOT starting with libs/partners/)
if: ${{ ! startsWith(inputs.working-directory, 'libs/partners/') }}
- name: Install test dependencies
# Also installs dev/lint/test/typing dependencies, to ensure we have
# type hints for as many of our libraries as possible.
# This helps catch errors that require dependencies to be spotted, for example:
# https://github.com/langchain-ai/langchain/pull/10249/files#diff-935185cd488d015f026dcd9e19616ff62863e8cde8c0bee70318d3ccbca98341
#
# If you change this configuration, make sure to change the `cache-key`
# in the `poetry_setup` action above to stop using the old cache.
# It doesn't matter how you change it, any change will cause a cache-bust.
working-directory: ${{ inputs.working-directory }}
run: |
uv sync --inexact --group test
- name: "📦 Install Test Dependencies"
if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
working-directory: ${{ inputs.working-directory }}
run: |
uv sync --inexact --group test --group test_integration
poetry install --with test
- name: "🔍 Analyze Test Code with Linters"
- name: Get .mypy_cache_test to speed up mypy
uses: actions/cache@v3
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
with:
path: |
${{ env.WORKDIR }}/.mypy_cache_test
key: mypy-test-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
- name: Analysing the code with our lint
working-directory: ${{ inputs.working-directory }}
run: |
make lint_tests

View File

@@ -0,0 +1,105 @@
name: pydantic v1/v2 compatibility
on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"
langchain-location:
required: false
type: string
description: "Relative path to the langchain library folder"
env:
POETRY_VERSION: "1.6.1"
jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: pydantic-cross-compat
- name: Install dependencies
shell: bash
run: poetry install --with test
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
if: ${{ inputs.langchain-location }}
env:
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
run: |
poetry run pip install -e "$LANGCHAIN_LOCATION"
- name: Install the opposite major version of pydantic
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
shell: bash
run: |
# Determine the major part of pydantic version
REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
if [[ "$REGULAR_VERSION" == "1" ]]; then
PYDANTIC_DEP=">=2.1,<3"
TEST_WITH_VERSION="2"
elif [[ "$REGULAR_VERSION" == "2" ]]; then
PYDANTIC_DEP="<2"
TEST_WITH_VERSION="1"
else
echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
exit 1
fi
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
# which would prevent caching from working: the cache would get saved
# to a different key than where it gets loaded from.
poetry run pip install "pydantic${PYDANTIC_DEP}"
# Ensure that the correct pydantic is installed now.
echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
# Determine the major part of pydantic version
CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
# Check that the major part of pydantic version is as expected, if not
# raise an error
if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
exit 1
fi
echo "Found pydantic version ${CURRENT_VERSION}, as expected"
- name: Run pydantic compatibility tests
shell: bash
run: make test
- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'

View File

@@ -1,11 +1,5 @@
# Builds and publishes LangChain packages to PyPI.
#
# Manually triggered, though can be used as a reusable workflow (workflow_call).
#
# Handles version bumping, building, and publishing to PyPI with authentication.
name: release
name: "🚀 Package Release"
run-name: "Release ${{ inputs.working-directory }} ${{ inputs.release-version }}"
on:
workflow_call:
inputs:
@@ -13,58 +7,35 @@ on:
required: true
type: string
description: "From which folder this pipeline executes"
workflow_dispatch:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"
default: "libs/langchain"
release-version:
required: true
type: string
default: "0.1.0"
description: "New version of package being released"
dangerous-nonmaster-release:
required: false
type: boolean
default: false
description: "Release from a non-master branch (danger!) - Only use for hotfixes"
env:
PYTHON_VERSION: "3.11"
UV_FROZEN: "true"
UV_NO_SYNC: "true"
permissions:
contents: write # Required for creating GitHub releases
PYTHON_VERSION: "3.10"
POETRY_VERSION: "1.6.1"
jobs:
# Build the distribution package and extract version info
# Runs in isolated environment with minimal permissions for security
build:
if: github.ref == 'refs/heads/master' || inputs.dangerous-nonmaster-release
environment: Scheduled testing
if: github.ref == 'refs/heads/master'
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
pkg-name: ${{ steps.check-version.outputs.pkg-name }}
version: ${{ steps.check-version.outputs.version }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: release
# We want to keep this build stage *separate* from the release stage,
# so that there's no sharing of permissions between them.
# (Release stage has trusted publishing and GitHub repo contents write access,
#
# The release stage has trusted publishing and GitHub repo contents write access,
# and we want to keep the scope of that access limited just to the release job.
# Otherwise, a malicious `build` step (e.g. via a compromised dependency)
# could get access to our GitHub or PyPI credentials.
#
@@ -73,172 +44,38 @@ jobs:
# > from the publish job.
# https://github.com/pypa/gh-action-pypi-publish#non-goals
- name: Build project for distribution
run: uv build
run: poetry build
working-directory: ${{ inputs.working-directory }}
- name: Upload build
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
- name: Check version
- name: Check Version
id: check-version
shell: python
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
import os
import tomllib
with open("pyproject.toml", "rb") as f:
data = tomllib.load(f)
pkg_name = data["project"]["name"]
version = data["project"]["version"]
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write(f"pkg-name={pkg_name}\n")
f.write(f"version={version}\n")
release-notes:
needs:
- build
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
release-body: ${{ steps.generate-release-body.outputs.release-body }}
steps:
- uses: actions/checkout@v5
with:
repository: langchain-ai/langchain
path: langchain
sparse-checkout: | # this only grabs files for relevant dir
${{ inputs.working-directory }}
ref: ${{ github.ref }} # this scopes to just ref'd branch
fetch-depth: 0 # this fetches entire commit history
- name: Check tags
id: check-tags
shell: bash
working-directory: langchain/${{ inputs.working-directory }}
env:
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
VERSION: ${{ needs.build.outputs.version }}
run: |
# Handle regular versions and pre-release versions differently
if [[ "$VERSION" == *"-"* ]]; then
# This is a pre-release version (contains a hyphen)
# Extract the base version without the pre-release suffix
BASE_VERSION=${VERSION%%-*}
# Look for the latest release of the same base version
REGEX="^$PKG_NAME==$BASE_VERSION\$"
PREV_TAG=$(git tag --sort=-creatordate | (grep -P "$REGEX" || true) | head -1)
# If no exact base version match, look for the latest release of any kind
if [ -z "$PREV_TAG" ]; then
REGEX="^$PKG_NAME==\\d+\\.\\d+\\.\\d+\$"
PREV_TAG=$(git tag --sort=-creatordate | (grep -P "$REGEX" || true) | head -1)
fi
else
# Regular version handling
PREV_TAG="$PKG_NAME==${VERSION%.*}.$(( ${VERSION##*.} - 1 ))"; [[ "${VERSION##*.}" -eq 0 ]] && PREV_TAG=""
# backup case if releasing e.g. 0.3.0, looks up last release
# note if last release (chronologically) was e.g. 0.1.47 it will get
# that instead of the last 0.2 release
if [ -z "$PREV_TAG" ]; then
REGEX="^$PKG_NAME==\\d+\\.\\d+\\.\\d+\$"
echo $REGEX
PREV_TAG=$(git tag --sort=-creatordate | (grep -P $REGEX || true) | head -1)
fi
fi
# if PREV_TAG is empty, let it be empty
if [ -z "$PREV_TAG" ]; then
echo "No previous tag found - first release"
else
# confirm prev-tag actually exists in git repo with git tag
GIT_TAG_RESULT=$(git tag -l "$PREV_TAG")
if [ -z "$GIT_TAG_RESULT" ]; then
echo "Previous tag $PREV_TAG not found in git repo"
exit 1
fi
fi
TAG="${PKG_NAME}==${VERSION}"
if [ "$TAG" == "$PREV_TAG" ]; then
echo "No new version to release"
exit 1
fi
echo tag="$TAG" >> $GITHUB_OUTPUT
echo prev-tag="$PREV_TAG" >> $GITHUB_OUTPUT
- name: Generate release body
id: generate-release-body
working-directory: langchain
env:
WORKING_DIR: ${{ inputs.working-directory }}
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
TAG: ${{ steps.check-tags.outputs.tag }}
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
run: |
PREAMBLE="Changes since $PREV_TAG"
# if PREV_TAG is empty, then we are releasing the first version
if [ -z "$PREV_TAG" ]; then
PREAMBLE="Initial release"
PREV_TAG=$(git rev-list --max-parents=0 HEAD)
fi
{
echo 'release-body<<EOF'
echo $PREAMBLE
echo
git log --format="%s" "$PREV_TAG"..HEAD -- $WORKING_DIR
echo EOF
} >> "$GITHUB_OUTPUT"
echo pkg-name="$(poetry version | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
echo version="$(poetry version --short)" >> $GITHUB_OUTPUT
test-pypi-publish:
needs:
- build
- release-notes
runs-on: ubuntu-latest
permissions:
# This permission is used for trusted publishing:
# https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
#
# Trusted publishing has to also be configured on PyPI for each package:
# https://docs.pypi.org/trusted-publishers/adding-a-publisher/
id-token: write
steps:
- uses: actions/checkout@v5
- uses: actions/download-artifact@v5
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
- name: Publish to test PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: ${{ inputs.working-directory }}/dist/
verbose: true
print-hash: true
repository-url: https://test.pypi.org/legacy/
# We overwrite any existing distributions with the same name and version.
# This is *only for CI use* and is *extremely dangerous* otherwise!
# https://github.com/pypa/gh-action-pypi-publish#tolerating-release-package-file-duplicates
skip-existing: true
# Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0
attestations: false
uses:
./.github/workflows/_test_release.yml
with:
working-directory: ${{ inputs.working-directory }}
secrets: inherit
pre-release-checks:
needs:
- build
- release-notes
- test-pypi-publish
runs-on: ubuntu-latest
permissions:
contents: read
timeout-minutes: 20
steps:
- uses: actions/checkout@v5
# We explicitly *don't* set up caching here. This ensures our tests are
# maximally sensitive to catching breakage.
#
@@ -251,231 +88,40 @@ jobs:
# - Tests pass, because the dependency is present even though it wasn't specified.
# - The package is published, and it breaks on the missing dependency when
# used in the real world.
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
id: setup-python
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v5
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
- name: Import dist package
- name: Test published package
shell: bash
working-directory: ${{ inputs.working-directory }}
env:
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
VERSION: ${{ needs.build.outputs.version }}
# Here we use:
# - The default regular PyPI index as the *primary* index, meaning
# - The default regular PyPI index as the *primary* index, meaning
# that it takes priority (https://pypi.org/simple)
# - The test PyPI index as an extra index, so that any dependencies that
# are not found on test PyPI can be resolved and installed anyway.
# (https://test.pypi.org/simple). This will include the PKG_NAME==VERSION
# package because VERSION will not have been uploaded to regular PyPI yet.
# - attempt install again after 5 seconds if it fails because there is
# sometimes a delay in availability on test pypi
#
# TODO: add more in-depth pre-publish tests after testing that importing works
run: |
uv venv
VIRTUAL_ENV=.venv uv pip install dist/*.whl
pip install \
--extra-index-url https://test.pypi.org/simple/ \
"$PKG_NAME==$VERSION"
# Replace all dashes in the package name with underscores,
# since that's how Python imports packages with dashes in the name.
# also remove _official suffix
IMPORT_NAME="$(echo "$PKG_NAME" | sed s/-/_/g | sed s/_official//g)"
IMPORT_NAME="$(echo "$PKG_NAME" | sed s/-/_/g)"
uv run python -c "import $IMPORT_NAME; print(dir($IMPORT_NAME))"
- name: Import test dependencies
run: uv sync --group test
working-directory: ${{ inputs.working-directory }}
# Overwrite the local version of the package with the built version
- name: Import published package (again)
working-directory: ${{ inputs.working-directory }}
shell: bash
env:
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
VERSION: ${{ needs.build.outputs.version }}
run: |
VIRTUAL_ENV=.venv uv pip install dist/*.whl
- name: Check for prerelease versions
# Block release if any dependencies allow prerelease versions
# (unless this is itself a prerelease version)
working-directory: ${{ inputs.working-directory }}
run: |
uv run python $GITHUB_WORKSPACE/.github/scripts/check_prerelease_dependencies.py pyproject.toml
- name: Run unit tests
run: make tests
working-directory: ${{ inputs.working-directory }}
- name: Get minimum versions
# Find the minimum published versions that satisfies the given constraints
working-directory: ${{ inputs.working-directory }}
id: min-version
run: |
VIRTUAL_ENV=.venv uv pip install packaging requests
python_version="$(uv run python --version | awk '{print $2}')"
min_versions="$(uv run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml release $python_version)"
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
echo "min-versions=$min_versions"
- name: Run unit tests with minimum dependency versions
if: ${{ steps.min-version.outputs.min-versions != '' }}
env:
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
run: |
VIRTUAL_ENV=.venv uv pip install --force-reinstall --editable .
VIRTUAL_ENV=.venv uv pip install --force-reinstall $MIN_VERSIONS
make tests
working-directory: ${{ inputs.working-directory }}
- name: Import integration test dependencies
run: uv sync --group test --group test_integration
working-directory: ${{ inputs.working-directory }}
- name: Run integration tests
# Uses the Makefile's `integration_tests` target for the specified package
if: ${{ startsWith(inputs.working-directory, 'libs/partners/') }}
env:
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
ES_URL: ${{ secrets.ES_URL }}
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
run: make integration_tests
working-directory: ${{ inputs.working-directory }}
# Test select published packages against new core
# Done when code changes are made to langchain-core
test-prior-published-packages-against-new-core:
# Installs the new core with old partners: Installs the new unreleased core
# alongside the previously published partner packages and runs integration tests
needs:
- build
- release-notes
- test-pypi-publish
- pre-release-checks
runs-on: ubuntu-latest
permissions:
contents: read
strategy:
matrix:
partner: [openai, anthropic]
fail-fast: false # Continue testing other partners if one fails
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_FILES_API_IMAGE_ID: ${{ secrets.ANTHROPIC_FILES_API_IMAGE_ID }}
ANTHROPIC_FILES_API_PDF_ID: ${{ secrets.ANTHROPIC_FILES_API_PDF_ID }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
steps:
- uses: actions/checkout@v5
# We implement this conditional as Github Actions does not have good support
# for conditionally needing steps. https://github.com/actions/runner/issues/491
# TODO: this seems to be resolved upstream, so we can probably remove this workaround
- name: Check if libs/core
run: |
if [ "${{ startsWith(inputs.working-directory, 'libs/core') }}" != "true" ]; then
echo "Not in libs/core. Exiting successfully."
exit 0
fi
- name: Set up Python + uv
if: startsWith(inputs.working-directory, 'libs/core')
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v5
if: startsWith(inputs.working-directory, 'libs/core')
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
- name: Test against ${{ matrix.partner }}
if: startsWith(inputs.working-directory, 'libs/core')
run: |
# Identify latest tag, excluding pre-releases
LATEST_PACKAGE_TAG="$(
git ls-remote --tags origin "langchain-${{ matrix.partner }}*" \
| awk '{print $2}' \
| sed 's|refs/tags/||' \
| grep -E '[0-9]+\.[0-9]+\.[0-9]+([a-zA-Z]+[0-9]+)?$' \
| sort -Vr \
| head -n 1
)"
echo "Latest package tag: $LATEST_PACKAGE_TAG"
# Shallow-fetch just that single tag
git fetch --depth=1 origin tag "$LATEST_PACKAGE_TAG"
# Checkout the latest package files
rm -rf $GITHUB_WORKSPACE/libs/partners/${{ matrix.partner }}/*
rm -rf $GITHUB_WORKSPACE/libs/standard-tests/*
cd $GITHUB_WORKSPACE/libs/
git checkout "$LATEST_PACKAGE_TAG" -- standard-tests/
git checkout "$LATEST_PACKAGE_TAG" -- partners/${{ matrix.partner }}/
cd partners/${{ matrix.partner }}
# Print as a sanity check
echo "Version number from pyproject.toml: "
cat pyproject.toml | grep "version = "
# Run tests
uv sync --group test --group test_integration
uv pip install ../../core/dist/*.whl
make integration_tests
python -c "import $IMPORT_NAME; print(dir($IMPORT_NAME))"
publish:
# Publishes the package to PyPI
needs:
- build
- release-notes
- test-pypi-publish
- pre-release-checks
- test-prior-published-packages-against-new-core
runs-on: ubuntu-latest
permissions:
# This permission is used for trusted publishing:
@@ -490,14 +136,17 @@ jobs:
working-directory: ${{ inputs.working-directory }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: release
- uses: actions/download-artifact@v5
- uses: actions/download-artifact@v3
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -508,21 +157,17 @@ jobs:
packages-dir: ${{ inputs.working-directory }}/dist/
verbose: true
print-hash: true
# Temp workaround since attestations are on by default as of gh-action-pypi-publish v1.11.0
attestations: false
mark-release:
# Marks the GitHub release with the new version tag
needs:
- build
- release-notes
- test-pypi-publish
- pre-release-checks
- publish
runs-on: ubuntu-latest
permissions:
# This permission is needed by `ncipollo/release-action` to
# create the GitHub release/tag
# create the GitHub release.
contents: write
defaults:
@@ -530,25 +175,28 @@ jobs:
working-directory: ${{ inputs.working-directory }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: release
- uses: actions/download-artifact@v5
- uses: actions/download-artifact@v3
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
- name: Create Tag
- name: Create Release
uses: ncipollo/release-action@v1
if: ${{ inputs.working-directory == 'libs/langchain' }}
with:
artifacts: "dist/*"
token: ${{ secrets.GITHUB_TOKEN }}
generateReleaseNotes: false
tag: ${{needs.build.outputs.pkg-name}}==${{ needs.build.outputs.version }}
body: ${{ needs.release-notes.outputs.release-body }}
commit: ${{ github.sha }}
makeLatest: ${{ needs.build.outputs.pkg-name == 'langchain-core'}}
draft: false
generateReleaseNotes: true
tag: v${{ needs.build.outputs.version }}
commit: master

62
.github/workflows/_release_docker.yml vendored Normal file
View File

@@ -0,0 +1,62 @@
name: release_docker
on:
workflow_call:
inputs:
dockerfile:
required: true
type: string
description: "Path to the Dockerfile to build"
image:
required: true
type: string
description: "Name of the image to build"
env:
TEST_TAG: ${{ inputs.image }}:test
LATEST_TAG: ${{ inputs.image }}:latest
jobs:
docker:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Get git tag
uses: actions-ecosystem/action-get-latest-tag@v1
id: get-latest-tag
- name: Set docker tag
env:
VERSION: ${{ steps.get-latest-tag.outputs.tag }}
run: |
echo "VERSION_TAG=${{ inputs.image }}:${VERSION#v}" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build for Test
uses: docker/build-push-action@v5
with:
context: .
file: ${{ inputs.dockerfile }}
load: true
tags: ${{ env.TEST_TAG }}
- name: Test
run: |
docker run --rm ${{ env.TEST_TAG }} python -c "import langchain"
- name: Build and Push to Docker Hub
uses: docker/build-push-action@v5
with:
context: .
file: ${{ inputs.dockerfile }}
# We can only build for the intersection of platforms supported by
# QEMU and base python image, for now build only for
# linux/amd64 and linux/arm64
platforms: linux/amd64,linux/arm64
tags: ${{ env.LATEST_TAG }},${{ env.VERSION_TAG }}
push: true

View File

@@ -1,7 +1,4 @@
# Runs unit tests with both current and minimum supported dependency versions
# to ensure compatibility across the supported range.
name: "🧪 Unit Testing"
name: test
on:
workflow_call:
@@ -10,69 +7,57 @@ on:
required: true
type: string
description: "From which folder this pipeline executes"
python-version:
required: true
langchain-location:
required: false
type: string
description: "Python version to use"
permissions:
contents: read
description: "Relative path to the langchain library folder"
env:
UV_FROZEN: "true"
UV_NO_SYNC: "true"
POETRY_VERSION: "1.6.1"
jobs:
# Main test job - runs unit tests with current deps, then retests with minimum versions
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
timeout-minutes: 20
name: "Python ${{ inputs.python-version }}"
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: Python ${{ matrix.python-version }}
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@v5
- uses: actions/checkout@v4
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
id: setup-python
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ inputs.python-version }}
cache-suffix: test-${{ inputs.working-directory }}
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: core
- name: "📦 Install Test Dependencies"
- name: Install dependencies
shell: bash
run: uv sync --group test --dev
run: poetry install --with test
- name: "🧪 Run Core Unit Tests"
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
if: ${{ inputs.langchain-location }}
env:
LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
run: |
poetry run pip install -e "$LANGCHAIN_LOCATION"
- name: Run core tests
shell: bash
run: |
make test
- name: "🔍 Calculate Minimum Dependency Versions"
working-directory: ${{ inputs.working-directory }}
id: min-version
shell: bash
run: |
VIRTUAL_ENV=.venv uv pip install packaging tomli requests
python_version="$(uv run python --version | awk '{print $2}')"
min_versions="$(uv run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml pull_request $python_version)"
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
echo "min-versions=$min_versions"
- name: "🧪 Run Tests with Minimum Dependencies"
if: ${{ steps.min-version.outputs.min-versions != '' }}
env:
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
run: |
VIRTUAL_ENV=.venv uv pip install $MIN_VERSIONS
make tests
working-directory: ${{ inputs.working-directory }}
- name: "🧹 Verify Clean Working Directory"
- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu

View File

@@ -1,71 +0,0 @@
# Facilitate unit testing against different Pydantic versions for a provided package.
name: "🐍 Pydantic Version Testing"
on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"
python-version:
required: false
type: string
description: "Python version to use"
default: "3.11"
pydantic-version:
required: true
type: string
description: "Pydantic version to test."
permissions:
contents: read
env:
UV_FROZEN: "true"
UV_NO_SYNC: "true"
jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
timeout-minutes: 20
name: "Pydantic ~=${{ inputs.pydantic-version }}"
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@v5
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ inputs.python-version }}
cache-suffix: test-pydantic-${{ inputs.working-directory }}
working-directory: ${{ inputs.working-directory }}
- name: "📦 Install Test Dependencies"
shell: bash
run: uv sync --group test
- name: "🔄 Install Specific Pydantic Version"
shell: bash
run: VIRTUAL_ENV=.venv uv pip install pydantic~=${{ inputs.pydantic-version }}
- name: "🧪 Run Core Tests"
shell: bash
run: |
make test
- name: "🧹 Verify Clean Working Directory"
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'

95
.github/workflows/_test_release.yml vendored Normal file
View File

@@ -0,0 +1,95 @@
name: test-release
on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"
env:
POETRY_VERSION: "1.6.1"
PYTHON_VERSION: "3.10"
jobs:
build:
if: github.ref == 'refs/heads/master'
runs-on: ubuntu-latest
outputs:
pkg-name: ${{ steps.check-version.outputs.pkg-name }}
version: ${{ steps.check-version.outputs.version }}
steps:
- uses: actions/checkout@v4
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: release
# We want to keep this build stage *separate* from the release stage,
# so that there's no sharing of permissions between them.
# The release stage has trusted publishing and GitHub repo contents write access,
# and we want to keep the scope of that access limited just to the release job.
# Otherwise, a malicious `build` step (e.g. via a compromised dependency)
# could get access to our GitHub or PyPI credentials.
#
# Per the trusted publishing GitHub Action:
# > It is strongly advised to separate jobs for building [...]
# > from the publish job.
# https://github.com/pypa/gh-action-pypi-publish#non-goals
- name: Build project for distribution
run: poetry build
working-directory: ${{ inputs.working-directory }}
- name: Upload build
uses: actions/upload-artifact@v3
with:
name: test-dist
path: ${{ inputs.working-directory }}/dist/
- name: Check Version
id: check-version
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
echo pkg-name="$(poetry version | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
echo version="$(poetry version --short)" >> $GITHUB_OUTPUT
publish:
needs:
- build
runs-on: ubuntu-latest
permissions:
# This permission is used for trusted publishing:
# https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
#
# Trusted publishing has to also be configured on PyPI for each package:
# https://docs.pypi.org/trusted-publishers/adding-a-publisher/
id-token: write
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v3
with:
name: test-dist
path: ${{ inputs.working-directory }}/dist/
- name: Publish to test PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: ${{ inputs.working-directory }}/dist/
verbose: true
print-hash: true
repository-url: https://test.pypi.org/legacy/
# We overwrite any existing distributions with the same name and version.
# This is *only for CI use* and is *extremely dangerous* otherwise!
# https://github.com/pypa/gh-action-pypi-publish#tolerating-release-package-file-duplicates
skip-existing: true

View File

@@ -1,51 +0,0 @@
# Ensures version numbers in pyproject.toml and version.py stay in sync.
#
# (Prevents releases with mismatched version numbers)
name: "🔍 Check Version Equality"
on:
pull_request:
paths:
- "libs/core/pyproject.toml"
- "libs/core/langchain_core/version.py"
permissions:
contents: read
jobs:
check_version_equality:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: "✅ Verify pyproject.toml & version.py Match"
run: |
# Check core versions
CORE_PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' libs/core/pyproject.toml)
CORE_VERSION_PY_VERSION=$(grep -Po '(?<=^VERSION = ")[^"]*' libs/core/langchain_core/version.py)
# Compare core versions
if [ "$CORE_PYPROJECT_VERSION" != "$CORE_VERSION_PY_VERSION" ]; then
echo "langchain-core versions in pyproject.toml and version.py do not match!"
echo "pyproject.toml version: $CORE_PYPROJECT_VERSION"
echo "version.py version: $CORE_VERSION_PY_VERSION"
exit 1
else
echo "Core versions match: $CORE_PYPROJECT_VERSION"
fi
# Check langchain_v1 versions
LANGCHAIN_PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' libs/langchain_v1/pyproject.toml)
LANGCHAIN_INIT_PY_VERSION=$(grep -Po '(?<=^__version__ = ")[^"]*' libs/langchain_v1/langchain/__init__.py)
# Compare langchain_v1 versions
if [ "$LANGCHAIN_PYPROJECT_VERSION" != "$LANGCHAIN_INIT_PY_VERSION" ]; then
echo "langchain_v1 versions in pyproject.toml and __init__.py do not match!"
echo "pyproject.toml version: $LANGCHAIN_PYPROJECT_VERSION"
echo "version.py version: $LANGCHAIN_INIT_PY_VERSION"
exit 1
else
echo "Langchain v1 versions match: $LANGCHAIN_PYPROJECT_VERSION"
fi

View File

@@ -1,262 +1,47 @@
# Primary CI workflow.
#
# Only runs against packages that have changed files.
#
# Runs:
# - Linting (_lint.yml)
# - Unit Tests (_test.yml)
# - Pydantic compatibility tests (_test_pydantic.yml)
# - Integration test compilation checks (_compile_integration_test.yml)
# - Extended test suites that require additional dependencies
# - Codspeed benchmarks (if not labeled 'codspeed-ignore')
#
# Reports status to GitHub checks and PR status.
name: "🔧 CI"
---
name: Check library diffs
on:
push:
branches: [master]
pull_request:
merge_group:
paths:
- ".github/actions/**"
- ".github/tools/**"
- ".github/workflows/**"
- "libs/**"
# Optimizes CI performance by canceling redundant workflow runs
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.
#
# There's no point in testing an outdated version of the code. GitHub only allows
# a limited number of job runners to be active at the same time, so it's better to
# cancel pointless jobs early so that more useful jobs can run sooner.
# a limited number of job runners to be active at the same time, so it's better to cancel
# pointless jobs early so that more useful jobs can run sooner.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
env:
UV_FROZEN: "true"
UV_NO_SYNC: "true"
jobs:
# This job analyzes which files changed and creates a dynamic test matrix
# to only run tests/lints for the affected packages, improving CI efficiency
build:
name: "Detect Changes & Set Matrix"
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ci-ignore') }}
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@v5
- name: "🐍 Setup Python 3.11"
uses: actions/setup-python@v6
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: "📂 Get Changed Files"
id: files
uses: Ana06/get-changed-files@v2.3.0
- name: "🔍 Analyze Changed Files & Generate Build Matrix"
id: set-matrix
run: |
python -m pip install packaging requests
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
python-version: '3.10'
- id: files
uses: Ana06/get-changed-files@v2.2.0
- id: set-matrix
run: echo "dirs-to-run=$(python .github/scripts/check_diff.py ${{ steps.files.outputs.all }})" >> $GITHUB_OUTPUT
outputs:
lint: ${{ steps.set-matrix.outputs.lint }}
test: ${{ steps.set-matrix.outputs.test }}
extended-tests: ${{ steps.set-matrix.outputs.extended-tests }}
compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
dependencies: ${{ steps.set-matrix.outputs.dependencies }}
test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
codspeed: ${{ steps.set-matrix.outputs.codspeed }}
# Run linting only on packages that have changed files
lint:
needs: [build]
if: ${{ needs.build.outputs.lint != '[]' }}
dirs-to-run: ${{ steps.set-matrix.outputs.dirs-to-run }}
ci:
needs: [ build ]
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.lint) }}
fail-fast: false
uses: ./.github/workflows/_lint.yml
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-run) }}
uses: ./.github/workflows/_all_ci.yml
with:
working-directory: ${{ matrix.job-configs.working-directory }}
python-version: ${{ matrix.job-configs.python-version }}
secrets: inherit
working-directory: ${{ matrix.working-directory }}
# Run unit tests only on packages that have changed files
test:
needs: [build]
if: ${{ needs.build.outputs.test != '[]' }}
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.test) }}
fail-fast: false
uses: ./.github/workflows/_test.yml
with:
working-directory: ${{ matrix.job-configs.working-directory }}
python-version: ${{ matrix.job-configs.python-version }}
secrets: inherit
# Test compatibility with different Pydantic versions for affected packages
test-pydantic:
needs: [build]
if: ${{ needs.build.outputs.test-pydantic != '[]' }}
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.test-pydantic) }}
fail-fast: false
uses: ./.github/workflows/_test_pydantic.yml
with:
working-directory: ${{ matrix.job-configs.working-directory }}
pydantic-version: ${{ matrix.job-configs.pydantic-version }}
secrets: inherit
# Verify integration tests compile without actually running them (faster feedback)
compile-integration-tests:
name: "Compile Integration Tests"
needs: [build]
if: ${{ needs.build.outputs.compile-integration-tests != '[]' }}
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.compile-integration-tests) }}
fail-fast: false
uses: ./.github/workflows/_compile_integration_test.yml
with:
working-directory: ${{ matrix.job-configs.working-directory }}
python-version: ${{ matrix.job-configs.python-version }}
secrets: inherit
# Run extended test suites that require additional dependencies
extended-tests:
name: "Extended Tests"
needs: [build]
if: ${{ needs.build.outputs.extended-tests != '[]' }}
strategy:
matrix:
# note different variable for extended test dirs
job-configs: ${{ fromJson(needs.build.outputs.extended-tests) }}
fail-fast: false
runs-on: ubuntu-latest
timeout-minutes: 20
defaults:
run:
working-directory: ${{ matrix.job-configs.working-directory }}
steps:
- uses: actions/checkout@v5
- name: "🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ matrix.job-configs.python-version }}
cache-suffix: extended-tests-${{ matrix.job-configs.working-directory }}
working-directory: ${{ matrix.job-configs.working-directory }}
- name: "📦 Install Dependencies & Run Extended Tests"
shell: bash
run: |
echo "Running extended tests, installing dependencies with uv..."
uv venv
uv sync --group test
VIRTUAL_ENV=.venv uv pip install -r extended_testing_deps.txt
VIRTUAL_ENV=.venv make extended_tests
- name: "🧹 Verify Clean Working Directory"
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'
# Run codspeed benchmarks only on packages that have changed files
codspeed:
name: "⚡ CodSpeed Benchmarks"
needs: [build]
if: ${{ needs.build.outputs.codspeed != '[]' && !contains(github.event.pull_request.labels.*.name, 'codspeed-ignore') }}
runs-on: ubuntu-latest
strategy:
matrix:
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
fail-fast: false
steps:
- uses: actions/checkout@v5
# We have to use 3.12 as 3.13 is not yet supported
- name: "📦 Install UV Package Manager"
uses: astral-sh/setup-uv@v7
with:
python-version: "3.12"
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: "📦 Install Test Dependencies"
run: uv sync --group test
working-directory: ${{ matrix.job-configs.working-directory }}
- name: "⚡ Run Benchmarks: ${{ matrix.job-configs.working-directory }}"
uses: CodSpeedHQ/action@v4
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_FILES_API_IMAGE_ID: ${{ secrets.ANTHROPIC_FILES_API_IMAGE_ID }}
ANTHROPIC_FILES_API_PDF_ID: ${{ secrets.ANTHROPIC_FILES_API_PDF_ID }}
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: |
cd ${{ matrix.job-configs.working-directory }}
if [ "${{ matrix.job-configs.working-directory }}" = "libs/core" ]; then
uv run --no-sync pytest ./tests/benchmarks --codspeed
else
uv run --no-sync pytest ./tests/ --codspeed
fi
mode: ${{ matrix.job-configs.working-directory == 'libs/core' && 'walltime' || 'instrumentation' }}
# Final status check - ensures all required jobs passed before allowing merge
ci_success:
name: "✅ CI Success"
needs:
[
build,
lint,
test,
compile-integration-tests,
extended-tests,
test-pydantic,
codspeed,
]
if: |
always()
runs-on: ubuntu-latest
env:
JOBS_JSON: ${{ toJSON(needs) }}
RESULTS_JSON: ${{ toJSON(needs.*.result) }}
EXIT_CODE: ${{!contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && '0' || '1'}}
steps:
- name: "🎉 All Checks Passed"
run: |
echo $JOBS_JSON
echo $RESULTS_JSON
echo "Exiting with $EXIT_CODE"
exit $EXIT_CODE

36
.github/workflows/codespell.yml vendored Normal file
View File

@@ -0,0 +1,36 @@
---
name: Codespell
on:
push:
branches: [master]
pull_request:
branches: [master]
permissions:
contents: read
jobs:
codespell:
name: Check for spelling errors
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Dependencies
run: |
pip install toml
- name: Extract Ignore Words List
run: |
# Use a Python script to extract the ignore words list from pyproject.toml
python .github/workflows/extract_ignored_words_list.py
id: extract_ignore_words
- name: Codespell
uses: codespell-project/actions-codespell@v2
with:
skip: guide_imports.json
ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }}

35
.github/workflows/doc_lint.yml vendored Normal file
View File

@@ -0,0 +1,35 @@
---
name: Docs, templates, cookbook lint
on:
push:
branches: [ master ]
pull_request:
paths:
- 'docs/**'
- 'templates/**'
- 'cookbook/**'
- '.github/workflows/_lint.yml'
- '.github/workflows/doc_lint.yml'
workflow_dispatch:
jobs:
check:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run import check
run: |
# We should not encourage imports directly from main init file
# Expect for hub
git grep 'from langchain import' {docs/docs,templates,cookbook} | grep -vE 'from langchain import (hub)' && exit 1 || exit 0
lint:
uses:
./.github/workflows/_lint.yml
with:
working-directory: "."
secrets: inherit

View File

@@ -0,0 +1,10 @@
import toml
pyproject_toml = toml.load("pyproject.toml")
# Extract the ignore words list (adjust the key as per your TOML structure)
ignore_words_list = (
pyproject_toml.get("tool", {}).get("codespell", {}).get("ignore-words-list")
)
print(f"::set-output name=ignore_words_list::{ignore_words_list}")

View File

@@ -1,180 +0,0 @@
# Routine integration tests against partner libraries with live API credentials.
#
# Uses `make integration_tests` for each library in the matrix.
#
# Runs daily. Can also be triggered manually for immediate updates.
name: "⏰ Integration Tests"
run-name: "Run Integration Tests - ${{ inputs.working-directory-force || 'all libs' }} (Python ${{ inputs.python-version-force || '3.10, 3.13' }})"
on:
workflow_dispatch:
inputs:
working-directory-force:
type: string
description: "From which folder this pipeline executes - defaults to all in matrix - example value: libs/partners/anthropic"
python-version-force:
type: string
description: "Python version to use - defaults to 3.10 and 3.13 in matrix - example value: 3.11"
schedule:
- cron: "0 13 * * *" # Runs daily at 1PM UTC (9AM EDT/6AM PDT)
permissions:
contents: read
env:
UV_FROZEN: "true"
DEFAULT_LIBS: '["libs/partners/openai", "libs/partners/anthropic", "libs/partners/fireworks", "libs/partners/groq", "libs/partners/mistralai", "libs/partners/xai", "libs/partners/google-vertexai", "libs/partners/google-genai", "libs/partners/aws"]'
jobs:
# Generate dynamic test matrix based on input parameters or defaults
# Only runs on the main repo (for scheduled runs) or when manually triggered
compute-matrix:
if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
runs-on: ubuntu-latest
name: "📋 Compute Test Matrix"
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: "🔢 Generate Python & Library Matrix"
id: set-matrix
env:
DEFAULT_LIBS: ${{ env.DEFAULT_LIBS }}
WORKING_DIRECTORY_FORCE: ${{ github.event.inputs.working-directory-force || '' }}
PYTHON_VERSION_FORCE: ${{ github.event.inputs.python-version-force || '' }}
run: |
# echo "matrix=..." where matrix is a json formatted str with keys python-version and working-directory
# python-version should default to 3.10 and 3.13, but is overridden to [PYTHON_VERSION_FORCE] if set
# working-directory should default to DEFAULT_LIBS, but is overridden to [WORKING_DIRECTORY_FORCE] if set
python_version='["3.10", "3.13"]'
working_directory="$DEFAULT_LIBS"
if [ -n "$PYTHON_VERSION_FORCE" ]; then
python_version="[\"$PYTHON_VERSION_FORCE\"]"
fi
if [ -n "$WORKING_DIRECTORY_FORCE" ]; then
working_directory="[\"$WORKING_DIRECTORY_FORCE\"]"
fi
matrix="{\"python-version\": $python_version, \"working-directory\": $working_directory}"
echo $matrix
echo "matrix=$matrix" >> $GITHUB_OUTPUT
# Run integration tests against partner libraries with live API credentials
build:
if: github.repository_owner == 'langchain-ai' || github.event_name != 'schedule'
name: "🐍 Python ${{ matrix.python-version }}: ${{ matrix.working-directory }}"
runs-on: ubuntu-latest
needs: [compute-matrix]
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
python-version: ${{ fromJSON(needs.compute-matrix.outputs.matrix).python-version }}
working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}
steps:
- uses: actions/checkout@v5
with:
path: langchain
- uses: actions/checkout@v5
with:
repository: langchain-ai/langchain-google
path: langchain-google
- uses: actions/checkout@v5
with:
repository: langchain-ai/langchain-aws
path: langchain-aws
- name: "📦 Organize External Libraries"
run: |
rm -rf \
langchain/libs/partners/google-genai \
langchain/libs/partners/google-vertexai
mv langchain-google/libs/genai langchain/libs/partners/google-genai
mv langchain-google/libs/vertexai langchain/libs/partners/google-vertexai
mv langchain-aws/libs/aws langchain/libs/partners/aws
- name: "🐍 Set up Python ${{ matrix.python-version }} + UV"
uses: "./langchain/.github/actions/uv_setup"
with:
python-version: ${{ matrix.python-version }}
- name: "🔐 Authenticate to Google Cloud"
id: "auth"
uses: google-github-actions/auth@v3
with:
credentials_json: "${{ secrets.GOOGLE_CREDENTIALS }}"
- name: "🔐 Configure AWS Credentials"
uses: aws-actions/configure-aws-credentials@v5
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: "📦 Install Dependencies"
run: |
echo "Running scheduled tests, installing dependencies with uv..."
cd langchain/${{ matrix.working-directory }}
uv sync --group test --group test_integration
- name: "🚀 Run Integration Tests"
env:
AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_FILES_API_IMAGE_ID: ${{ secrets.ANTHROPIC_FILES_API_IMAGE_ID }}
ANTHROPIC_FILES_API_PDF_ID: ${{ secrets.ANTHROPIC_FILES_API_PDF_ID }}
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
ES_URL: ${{ secrets.ES_URL }}
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
run: |
cd langchain/${{ matrix.working-directory }}
make integration_tests
- name: "🧹 Clean up External Libraries"
# Clean up external libraries to avoid affecting the following git status check
run: |
rm -rf \
langchain/libs/partners/google-genai \
langchain/libs/partners/google-vertexai \
langchain/libs/partners/aws
- name: "🧹 Verify Clean Working Directory"
working-directory: langchain
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'

View File

@@ -0,0 +1,13 @@
---
name: libs/cli Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_release.yml
with:
working-directory: libs/cli
secrets: inherit

View File

@@ -0,0 +1,13 @@
---
name: libs/community Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_release.yml
with:
working-directory: libs/community
secrets: inherit

View File

@@ -0,0 +1,13 @@
---
name: libs/core Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_release.yml
with:
working-directory: libs/core
secrets: inherit

View File

@@ -0,0 +1,13 @@
---
name: libs/experimental Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_release.yml
with:
working-directory: libs/experimental
secrets: inherit

View File

@@ -0,0 +1,13 @@
---
name: Experimental Test Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_test_release.yml
with:
working-directory: libs/experimental
secrets: inherit

View File

@@ -0,0 +1,13 @@
---
name: libs/core Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_release.yml
with:
working-directory: libs/core
secrets: inherit

27
.github/workflows/langchain_release.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
---
name: libs/langchain Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_release.yml
with:
working-directory: libs/langchain
secrets: inherit
# N.B.: It's possible that PyPI doesn't make the new release visible / available
# immediately after publishing. If that happens, the docker build might not
# create a new docker image for the new release, since it won't see it.
#
# If this ends up being a problem, add a check to the end of the `_release.yml`
# workflow that prevents the workflow from finishing until the new release
# is visible and installable on PyPI.
release-docker:
needs:
- release
uses:
./.github/workflows/langchain_release_docker.yml
secrets: inherit

View File

@@ -0,0 +1,14 @@
---
name: docker/langchain/langchain Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
workflow_call: # Allows triggering from another workflow
jobs:
release:
uses: ./.github/workflows/_release_docker.yml
with:
dockerfile: docker/Dockerfile.base
image: langchain/langchain
secrets: inherit

View File

@@ -0,0 +1,13 @@
---
name: Test Release
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
jobs:
release:
uses:
./.github/workflows/_test_release.yml
with:
working-directory: libs/langchain
secrets: inherit

View File

@@ -1,28 +0,0 @@
# Label PRs based on changed files.
#
# See `.github/pr-file-labeler.yml` to see rules for each label/directory.
name: "🏷️ Pull Request Labeler"
on:
# Safe since we're not checking out or running the PR's code
# Never check out the PR's head in a pull_request_target job
pull_request_target:
types: [opened, synchronize, reopened, edited]
jobs:
labeler:
name: "label"
permissions:
contents: read
pull-requests: write
issues: write
runs-on: ubuntu-latest
steps:
- name: Label Pull Request
uses: actions/labeler@v6
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/pr-file-labeler.yml
sync-labels: false

View File

@@ -1,44 +0,0 @@
# Label PRs based on their titles.
#
# Uses conventional commit types from PR titles to apply labels.
# Note: Scope-based labeling (e.g., integration labels) is handled by pr_labeler_file.yml
name: "🏷️ PR Title Labeler"
on:
# Safe since we're not checking out or running the PR's code
# Never check out the PR's head in a pull_request_target job
pull_request_target:
types: [opened, edited]
jobs:
pr-title-labeler:
name: "label"
permissions:
contents: read
pull-requests: write
issues: write
runs-on: ubuntu-latest
steps:
- name: Label PR based on title
uses: bcoe/conventional-release-labels@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
type_labels: >-
{
"feat": "feature",
"fix": "fix",
"docs": "documentation",
"style": "linting",
"refactor": "refactor",
"perf": "performance",
"test": "tests",
"build": "infra",
"ci": "infra",
"chore": "infra",
"revert": "revert",
"release": "release",
"breaking": "breaking"
}
ignored_types: '[]'

View File

@@ -1,108 +0,0 @@
# PR title linting.
#
# FORMAT (Conventional Commits 1.0.0):
#
# <type>[optional scope]: <description>
# [optional body]
# [optional footer(s)]
#
# Examples:
# feat(core): add multitenant support
# fix(cli): resolve flag parsing error
# docs: update API usage examples
# docs(openai): update API usage examples
#
# Allowed Types:
# * feat — a new feature (MINOR)
# * fix — a bug fix (PATCH)
# * docs — documentation only changes
# * style — formatting, linting, etc.; no code change or typing refactors
# * refactor — code change that neither fixes a bug nor adds a feature
# * perf — code change that improves performance
# * test — adding tests or correcting existing
# * build — changes that affect the build system/external dependencies
# * ci — continuous integration/configuration changes
# * chore — other changes that don't modify source or test files
# * revert — reverts a previous commit
# * release — prepare a new release
#
# Allowed Scopes (optional):
# core, cli, langchain, langchain_v1, langchain_legacy, standard-tests,
# text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq,
# huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant,
# xai, infra
#
# Rules:
# 1. The 'Type' must start with a lowercase letter.
# 2. Breaking changes: append "!" after type/scope (e.g., feat!: drop x support)
# 3. When releasing (updating the pyproject.toml and uv.lock), the commit message
# should be: `release(scope): x.y.z` (e.g., `release(core): 1.2.0` with no
# body, footer, or preceeding/proceeding text).
#
# Enforces Conventional Commits format for pull request titles to maintain a clear and
# machine-readable change history.
name: "🏷️ PR Title Lint"
permissions:
pull-requests: read
on:
pull_request:
types: [opened, edited, synchronize]
jobs:
# Validates that PR title follows Conventional Commits 1.0.0 specification
lint-pr-title:
name: "validate format"
runs-on: ubuntu-latest
steps:
- name: "✅ Validate Conventional Commits Format"
uses: amannn/action-semantic-pull-request@v6
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
types: |
feat
fix
docs
style
refactor
perf
test
build
ci
chore
revert
release
scopes: |
core
cli
langchain
langchain_v1
langchain_legacy
standard-tests
text-splitters
docs
anthropic
chroma
deepseek
exa
fireworks
groq
huggingface
mistralai
nomic
ollama
openai
perplexity
prompty
qdrant
xai
infra
requireScope: false
disallowScopes: |
release
[A-Z]+
ignoreLabels: |
ignore-lint-pr-title

86
.github/workflows/scheduled_test.yml vendored Normal file
View File

@@ -0,0 +1,86 @@
name: Scheduled tests
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
schedule:
- cron: '0 13 * * *'
env:
POETRY_VERSION: "1.6.1"
jobs:
build:
defaults:
run:
working-directory: libs/langchain
runs-on: ubuntu-latest
environment: Scheduled testing
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: libs/langchain
cache-key: scheduled
- name: 'Authenticate to Google Cloud'
id: 'auth'
uses: 'google-github-actions/auth@v1'
with:
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}
- name: Install dependencies
working-directory: libs/langchain
shell: bash
run: |
echo "Running scheduled tests, installing dependencies with poetry..."
poetry install --with=test_integration
poetry run pip install google-cloud-aiplatform
poetry run pip install "boto3>=1.28.57"
if [[ ${{ matrix.python-version }} != "3.8" ]]
then
poetry run pip install fireworks-ai
fi
- name: Run tests
shell: bash
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_DEPLOYMENT_NAME }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
run: |
make scheduled_tests
- name: Ensure the tests did not create any additional files
shell: bash
run: |
set -eu
STATUS="$(git status)"
echo "$STATUS"
# grep will exit non-zero if the target message isn't found,
# and `set -e` above will cause the step to fail.
echo "$STATUS" | grep 'nothing to commit, working tree clean'

36
.github/workflows/templates_ci.yml vendored Normal file
View File

@@ -0,0 +1,36 @@
---
name: templates CI
on:
push:
branches: [ master ]
pull_request:
paths:
- '.github/actions/poetry_setup/action.yml'
- '.github/tools/**'
- '.github/workflows/_lint.yml'
- '.github/workflows/templates_ci.yml'
- 'templates/**'
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.
#
# There's no point in testing an outdated version of the code. GitHub only allows
# a limited number of job runners to be active at the same time, so it's better to cancel
# pointless jobs early so that more useful jobs can run sooner.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
POETRY_VERSION: "1.6.1"
WORKDIR: "templates"
jobs:
lint:
uses:
./.github/workflows/_lint.yml
with:
working-directory: templates
secrets: inherit

View File

@@ -1,164 +0,0 @@
# Build the API reference documentation for v0.3 branch.
#
# Manual trigger only.
#
# Built HTML pushed to langchain-ai/langchain-api-docs-html.
#
# Looks for langchain-ai org repos in packages.yml and checks them out.
# Calls prep_api_docs_build.py.
name: "📚 API Docs (v0.3)"
run-name: "Build & Deploy API Reference (v0.3)"
on:
workflow_dispatch:
env:
PYTHON_VERSION: "3.11"
jobs:
build:
if: github.repository == 'langchain-ai/langchain' || github.event_name != 'schedule'
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v5
with:
ref: v0.3
path: langchain
- uses: actions/checkout@v5
with:
repository: langchain-ai/langchain-api-docs-html
path: langchain-api-docs-html
token: ${{ secrets.TOKEN_GITHUB_API_DOCS_HTML }}
- name: "📋 Extract Repository List with yq"
id: get-unsorted-repos
uses: mikefarah/yq@master
with:
cmd: |
# Extract repos from packages.yml that are in the langchain-ai org
# (excluding 'langchain' itself)
yq '
.packages[]
| select(
(
(.repo | test("^langchain-ai/"))
and
(.repo != "langchain-ai/langchain")
)
or
(.include_in_api_ref // false)
)
| .repo
' langchain/libs/packages.yml
- name: "📋 Parse YAML & Checkout Repositories"
env:
REPOS_UNSORTED: ${{ steps.get-unsorted-repos.outputs.result }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Get unique repositories
REPOS=$(echo "$REPOS_UNSORTED" | sort -u)
# Checkout each unique repository
for repo in $REPOS; do
# Validate repository format (allow any org with proper format)
if [[ ! "$repo" =~ ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ ]]; then
echo "Error: Invalid repository format: $repo"
exit 1
fi
REPO_NAME=$(echo $repo | cut -d'/' -f2)
# Additional validation for repo name
if [[ ! "$REPO_NAME" =~ ^[a-zA-Z0-9_.-]+$ ]]; then
echo "Error: Invalid repository name: $REPO_NAME"
exit 1
fi
echo "Checking out $repo to $REPO_NAME"
# Special handling for langchain-tavily: checkout by commit hash
if [[ "$REPO_NAME" == "langchain-tavily" ]]; then
git clone https://github.com/$repo.git $REPO_NAME
cd $REPO_NAME
git checkout f3515654724a9e87bdfe2c2f509d6cdde646e563
cd ..
else
git clone --depth 1 --branch v0.3 https://github.com/$repo.git $REPO_NAME
fi
done
- name: "🐍 Setup Python ${{ env.PYTHON_VERSION }}"
uses: actions/setup-python@v6
id: setup-python
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: "📦 Install Initial Python Dependencies using uv"
working-directory: langchain
run: |
python -m pip install -U uv
python -m uv pip install --upgrade --no-cache-dir pip setuptools pyyaml
- name: "📦 Organize Library Directories"
# Places cloned partner packages into libs/partners structure
run: python langchain/.github/scripts/prep_api_docs_build.py
- name: "🧹 Clear Prior Build"
run:
# Remove artifacts from prior docs build
rm -rf langchain-api-docs-html/api_reference_build/html
- name: "📦 Install Documentation Dependencies using uv"
working-directory: langchain
run: |
# Install all partner packages in editable mode with overrides
python -m uv pip install $(ls ./libs/partners | grep -v azure-ai | xargs -I {} echo "./libs/partners/{}") --overrides ./docs/vercel_overrides.txt --prerelease=allow
# Install langchain-azure-ai with tools extra
python -m uv pip install "./libs/partners/azure-ai[tools]" --overrides ./docs/vercel_overrides.txt --prerelease=allow
# Install core langchain and other main packages
python -m uv pip install libs/core libs/langchain libs/text-splitters libs/community libs/experimental libs/standard-tests
# Install Sphinx and related packages for building docs
python -m uv pip install -r docs/api_reference/requirements.txt
- name: "🔧 Configure Git Settings"
working-directory: langchain
run: |
git config --local user.email "actions@github.com"
git config --local user.name "Github Actions"
- name: "📚 Build API Documentation"
working-directory: langchain
run: |
# Generate the API reference RST files
python docs/api_reference/create_api_rst.py
# Build the HTML documentation using Sphinx
# -T: show full traceback on exception
# -E: don't use cached environment (force rebuild, ignore cached doctrees)
# -b html: build HTML docs (vs PDS, etc.)
# -d: path for the cached environment (parsed document trees / doctrees)
# - Separate from output dir for faster incremental builds
# -c: path to conf.py
# -j auto: parallel build using all available CPU cores
python -m sphinx -T -E -b html -d ../langchain-api-docs-html/_build/doctrees -c docs/api_reference docs/api_reference ../langchain-api-docs-html/api_reference_build/html -j auto
# Post-process the generated HTML
python docs/api_reference/scripts/custom_formatter.py ../langchain-api-docs-html/api_reference_build/html
# Default index page is blank so we copy in the actual home page.
cp ../langchain-api-docs-html/api_reference_build/html/{reference,index}.html
# Removes Sphinx's intermediate build artifacts after the build is complete.
rm -rf ../langchain-api-docs-html/_build/
# Commit and push changes to langchain-api-docs-html repo
- uses: EndBug/add-and-commit@v9
with:
cwd: langchain-api-docs-html
message: "Update API docs build from v0.3 branch"

View File

@@ -1,8 +0,0 @@
With the deprecation of v0 docs, the following files will need to be migrated/supported
in the new docs repo:
- run_notebooks.yml: New repo should run Integration tests on code snippets?
- people.yml: Need to fix and somehow display on the new docs site
- Subsequently, `.github/actions/people/`
- _test_doc_imports.yml
- check-broken-links.yml

33
.gitignore vendored
View File

@@ -1,5 +1,5 @@
.vs/
.claude/
.vscode/
.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
@@ -59,7 +59,6 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
.codspeed/
# Translations
*.mo
@@ -78,6 +77,10 @@ instance/
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
docs/docs/_build/
# PyBuilder
target/
@@ -112,11 +115,13 @@ celerybeat.pid
# Environments
.env
.envrc
.venv*
venv*
.venv
.venvs
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
@@ -130,7 +135,6 @@ env.bak/
# mypy
.mypy_cache/
.mypy_cache_test/
.dmypy.json
dmypy.json
@@ -158,6 +162,19 @@ data_map*
*replit*
node_modules
prof
virtualenv/
docs/.yarn/
docs/node_modules/
docs/.docusaurus/
docs/.cache-loader/
docs/_dist
docs/api_reference/*api_reference.rst
docs/api_reference/_build
docs/api_reference/*/
!docs/api_reference/_static/
!docs/api_reference/templates/
!docs/api_reference/themes/
docs/docs/build
docs/docs/node_modules
docs/docs/yarn.lock
_dist
docs/docs/templates

View File

@@ -1,14 +0,0 @@
{
"MD013": false,
"MD024": {
"siblings_only": true
},
"MD025": false,
"MD033": false,
"MD034": false,
"MD036": false,
"MD041": false,
"MD046": {
"style": "fenced"
}
}

View File

@@ -1,99 +0,0 @@
repos:
- repo: local
hooks:
- id: core
name: format and lint core
language: system
entry: make -C libs/core format lint
files: ^libs/core/
pass_filenames: false
- id: langchain
name: format and lint langchain
language: system
entry: make -C libs/langchain format lint
files: ^libs/langchain/
pass_filenames: false
- id: standard-tests
name: format and lint standard-tests
language: system
entry: make -C libs/standard-tests format lint
files: ^libs/standard-tests/
pass_filenames: false
- id: text-splitters
name: format and lint text-splitters
language: system
entry: make -C libs/text-splitters format lint
files: ^libs/text-splitters/
pass_filenames: false
- id: anthropic
name: format and lint partners/anthropic
language: system
entry: make -C libs/partners/anthropic format lint
files: ^libs/partners/anthropic/
pass_filenames: false
- id: chroma
name: format and lint partners/chroma
language: system
entry: make -C libs/partners/chroma format lint
files: ^libs/partners/chroma/
pass_filenames: false
- id: exa
name: format and lint partners/exa
language: system
entry: make -C libs/partners/exa format lint
files: ^libs/partners/exa/
pass_filenames: false
- id: fireworks
name: format and lint partners/fireworks
language: system
entry: make -C libs/partners/fireworks format lint
files: ^libs/partners/fireworks/
pass_filenames: false
- id: groq
name: format and lint partners/groq
language: system
entry: make -C libs/partners/groq format lint
files: ^libs/partners/groq/
pass_filenames: false
- id: huggingface
name: format and lint partners/huggingface
language: system
entry: make -C libs/partners/huggingface format lint
files: ^libs/partners/huggingface/
pass_filenames: false
- id: mistralai
name: format and lint partners/mistralai
language: system
entry: make -C libs/partners/mistralai format lint
files: ^libs/partners/mistralai/
pass_filenames: false
- id: nomic
name: format and lint partners/nomic
language: system
entry: make -C libs/partners/nomic format lint
files: ^libs/partners/nomic/
pass_filenames: false
- id: ollama
name: format and lint partners/ollama
language: system
entry: make -C libs/partners/ollama format lint
files: ^libs/partners/ollama/
pass_filenames: false
- id: openai
name: format and lint partners/openai
language: system
entry: make -C libs/partners/openai format lint
files: ^libs/partners/openai/
pass_filenames: false
- id: prompty
name: format and lint partners/prompty
language: system
entry: make -C libs/partners/prompty format lint
files: ^libs/partners/prompty/
pass_filenames: false
- id: qdrant
name: format and lint partners/qdrant
language: system
entry: make -C libs/partners/qdrant format lint
files: ^libs/partners/qdrant/
pass_filenames: false

32
.readthedocs.yaml Normal file
View File

@@ -0,0 +1,32 @@
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.11"
commands:
- python -mvirtualenv $READTHEDOCS_VIRTUALENV_PATH
- python -m pip install --upgrade --no-cache-dir pip setuptools
- python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext
- python -m pip install --exists-action=w --no-cache-dir -r docs/api_reference/requirements.txt
- python docs/api_reference/create_api_rst.py
- cat docs/api_reference/conf.py
- python -m sphinx -T -E -b html -d _build/doctrees -c docs/api_reference docs/api_reference $READTHEDOCS_OUTPUT/html -j auto
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/api_reference/conf.py
# If using Sphinx, optionally build your docs in additional formats such as PDF
# formats:
# - pdf
# Optionally declare the Python requirements required to build your docs
python:
install:
- requirements: docs/api_reference/requirements.txt

View File

@@ -0,0 +1,9 @@
"""Main entrypoint into package."""
from importlib import metadata
try:
__version__ = metadata.version(__package__)
except metadata.PackageNotFoundError:
# Case where package metadata is not available.
__version__ = ""
del metadata # optional, avoids polluting the results of dir(__package__)

View File

@@ -0,0 +1,123 @@
"""Agent toolkits contain integrations with various resources and services.
LangChain has a large ecosystem of integrations with various external resources
like local and remote file systems, APIs and databases.
These integrations allow developers to create versatile applications that combine the
power of LLMs with the ability to access, interact with and manipulate external
resources.
When developing an application, developers should inspect the capabilities and
permissions of the tools that underlie the given agent toolkit, and determine
whether permissions of the given toolkit are appropriate for the application.
See [Security](https://python.langchain.com/docs/security) for more information.
"""
from pathlib import Path
from typing import Any
from langchain_core._api.path import as_import_path
from langchain_community.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
from langchain_community.agent_toolkits.amadeus.toolkit import AmadeusToolkit
from langchain_community.agent_toolkits.azure_cognitive_services import (
AzureCognitiveServicesToolkit,
)
from langchain_community.agent_toolkits.conversational_retrieval.openai_functions import ( # noqa: E501
create_conversational_retrieval_agent,
)
from langchain_community.agent_toolkits.file_management.toolkit import (
FileManagementToolkit,
)
from langchain_community.agent_toolkits.gmail.toolkit import GmailToolkit
from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit
from langchain_community.agent_toolkits.json.base import create_json_agent
from langchain_community.agent_toolkits.json.toolkit import JsonToolkit
from langchain_community.agent_toolkits.multion.toolkit import MultionToolkit
from langchain_community.agent_toolkits.nasa.toolkit import NasaToolkit
from langchain_community.agent_toolkits.nla.toolkit import NLAToolkit
from langchain_community.agent_toolkits.office365.toolkit import O365Toolkit
from langchain_community.agent_toolkits.openapi.base import create_openapi_agent
from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit
from langchain_community.agent_toolkits.playwright.toolkit import (
PlayWrightBrowserToolkit,
)
from langchain_community.agent_toolkits.powerbi.base import create_pbi_agent
from langchain_community.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent
from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit
from langchain_community.agent_toolkits.slack.toolkit import SlackToolkit
from langchain_community.agent_toolkits.spark_sql.base import create_spark_sql_agent
from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
from langchain_community.agent_toolkits.sql.base import create_sql_agent
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
from langchain_community.agent_toolkits.steam.toolkit import SteamToolkit
from langchain_community.agent_toolkits.vectorstore.base import (
create_vectorstore_agent,
create_vectorstore_router_agent,
)
from langchain_community.agent_toolkits.vectorstore.toolkit import (
VectorStoreInfo,
VectorStoreRouterToolkit,
VectorStoreToolkit,
)
from langchain_community.agent_toolkits.zapier.toolkit import ZapierToolkit
from langchain_community.tools.retriever import create_retriever_tool
DEPRECATED_AGENTS = [
"create_csv_agent",
"create_pandas_dataframe_agent",
"create_xorbits_agent",
"create_python_agent",
"create_spark_dataframe_agent",
]
def __getattr__(name: str) -> Any:
"""Get attr name."""
if name in DEPRECATED_AGENTS:
relative_path = as_import_path(Path(__file__).parent, suffix=name)
old_path = "langchain." + relative_path
new_path = "langchain_experimental." + relative_path
raise ImportError(
f"{name} has been moved to langchain experimental. "
"See https://github.com/langchain-ai/langchain/discussions/11680"
"for more information.\n"
f"Please update your import statement from: `{old_path}` to `{new_path}`."
)
raise AttributeError(f"{name} does not exist")
__all__ = [
"AINetworkToolkit",
"AmadeusToolkit",
"AzureCognitiveServicesToolkit",
"FileManagementToolkit",
"GmailToolkit",
"JiraToolkit",
"JsonToolkit",
"MultionToolkit",
"NasaToolkit",
"NLAToolkit",
"O365Toolkit",
"OpenAPIToolkit",
"PlayWrightBrowserToolkit",
"PowerBIToolkit",
"SlackToolkit",
"SteamToolkit",
"SQLDatabaseToolkit",
"SparkSQLToolkit",
"VectorStoreInfo",
"VectorStoreRouterToolkit",
"VectorStoreToolkit",
"ZapierToolkit",
"create_json_agent",
"create_openapi_agent",
"create_pbi_agent",
"create_pbi_chat_agent",
"create_spark_sql_agent",
"create_sql_agent",
"create_vectorstore_agent",
"create_vectorstore_router_agent",
"create_conversational_retrieval_agent",
"create_retriever_tool",
]

View File

@@ -0,0 +1,88 @@
from __future__ import annotations
from typing import Any, List, Optional, TYPE_CHECKING
from langchain_core.language_models import BaseLanguageModel
from langchain_core.memory import BaseMemory
from langchain_core.messages import SystemMessage
from langchain_core.prompts.chat import MessagesPlaceholder
from langchain_core.tools import BaseTool
if TYPE_CHECKING:
from langchain.agents.agent import AgentExecutor
def _get_default_system_message() -> SystemMessage:
return SystemMessage(
content=(
"Do your best to answer the questions. "
"Feel free to use any tools available to look up "
"relevant information, only if necessary"
)
)
def create_conversational_retrieval_agent(
llm: BaseLanguageModel,
tools: List[BaseTool],
remember_intermediate_steps: bool = True,
memory_key: str = "chat_history",
system_message: Optional[SystemMessage] = None,
verbose: bool = False,
max_token_limit: int = 2000,
**kwargs: Any,
) -> AgentExecutor:
"""A convenience method for creating a conversational retrieval agent.
Args:
llm: The language model to use, should be ChatOpenAI
tools: A list of tools the agent has access to
remember_intermediate_steps: Whether the agent should remember intermediate
steps or not. Intermediate steps refer to prior action/observation
pairs from previous questions. The benefit of remembering these is if
there is relevant information in there, the agent can use it to answer
follow up questions. The downside is it will take up more tokens.
memory_key: The name of the memory key in the prompt.
system_message: The system message to use. By default, a basic one will
be used.
verbose: Whether or not the final AgentExecutor should be verbose or not,
defaults to False.
max_token_limit: The max number of tokens to keep around in memory.
Defaults to 2000.
Returns:
An agent executor initialized appropriately
"""
from langchain.agents.agent import AgentExecutor
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
AgentTokenBufferMemory,
)
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.memory.token_buffer import ConversationTokenBufferMemory
if remember_intermediate_steps:
memory: BaseMemory = AgentTokenBufferMemory(
memory_key=memory_key, llm=llm, max_token_limit=max_token_limit
)
else:
memory = ConversationTokenBufferMemory(
memory_key=memory_key,
return_messages=True,
output_key="output",
llm=llm,
max_token_limit=max_token_limit,
)
_system_message = system_message or _get_default_system_message()
prompt = OpenAIFunctionsAgent.create_prompt(
system_message=_system_message,
extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
)
agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
return AgentExecutor(
agent=agent,
tools=tools,
memory=memory,
verbose=verbose,
return_intermediate_steps=remember_intermediate_steps,
**kwargs,
)

View File

@@ -0,0 +1,53 @@
"""Json agent."""
from __future__ import annotations
from typing import Any, Dict, List, Optional, TYPE_CHECKING
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models import BaseLanguageModel
from langchain_community.agent_toolkits.json.prompt import JSON_PREFIX, JSON_SUFFIX
from langchain_community.agent_toolkits.json.toolkit import JsonToolkit
if TYPE_CHECKING:
from langchain.agents.agent import AgentExecutor
def create_json_agent(
llm: BaseLanguageModel,
toolkit: JsonToolkit,
callback_manager: Optional[BaseCallbackManager] = None,
prefix: str = JSON_PREFIX,
suffix: str = JSON_SUFFIX,
format_instructions: Optional[str] = None,
input_variables: Optional[List[str]] = None,
verbose: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Construct a json agent from an LLM and tools."""
from langchain.agents.agent import AgentExecutor
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.chains.llm import LLMChain
tools = toolkit.get_tools()
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=input_variables,
**prompt_params,
)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1,57 @@
"""Tool for interacting with a single API with natural language definition."""
from __future__ import annotations
from typing import Any, Optional, TYPE_CHECKING
from langchain_core.language_models import BaseLanguageModel
from langchain_core.tools import Tool
from langchain_community.tools.openapi.utils.api_models import APIOperation
from langchain_community.tools.openapi.utils.openapi_utils import OpenAPISpec
from langchain_community.utilities.requests import Requests
if TYPE_CHECKING:
from langchain.chains.api.openapi.chain import OpenAPIEndpointChain
class NLATool(Tool):
"""Natural Language API Tool."""
@classmethod
def from_open_api_endpoint_chain(
cls, chain: OpenAPIEndpointChain, api_title: str
) -> "NLATool":
"""Convert an endpoint chain to an API endpoint tool."""
expanded_name = (
f'{api_title.replace(" ", "_")}.{chain.api_operation.operation_id}'
)
description = (
f"I'm an AI from {api_title}. Instruct what you want,"
" and I'll assist via an API with description:"
f" {chain.api_operation.description}"
)
return cls(name=expanded_name, func=chain.run, description=description)
@classmethod
def from_llm_and_method(
cls,
llm: BaseLanguageModel,
path: str,
method: str,
spec: OpenAPISpec,
requests: Optional[Requests] = None,
verbose: bool = False,
return_intermediate_steps: bool = False,
**kwargs: Any,
) -> "NLATool":
"""Instantiate the tool from the specified path and method."""
api_operation = APIOperation.from_openapi_spec(spec, path, method)
chain = OpenAPIEndpointChain.from_api_operation(
api_operation,
llm,
requests=requests,
verbose=verbose,
return_intermediate_steps=return_intermediate_steps,
**kwargs,
)
return cls.from_open_api_endpoint_chain(chain, spec.info.title)

View File

@@ -0,0 +1,77 @@
"""OpenAPI spec agent."""
from __future__ import annotations
from typing import Any, Dict, List, Optional, TYPE_CHECKING
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models import BaseLanguageModel
from langchain_community.agent_toolkits.openapi.prompt import (
OPENAPI_PREFIX,
OPENAPI_SUFFIX,
)
from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit
if TYPE_CHECKING:
from langchain.agents.agent import AgentExecutor
def create_openapi_agent(
llm: BaseLanguageModel,
toolkit: OpenAPIToolkit,
callback_manager: Optional[BaseCallbackManager] = None,
prefix: str = OPENAPI_PREFIX,
suffix: str = OPENAPI_SUFFIX,
format_instructions: Optional[str] = None,
input_variables: Optional[List[str]] = None,
max_iterations: Optional[int] = 15,
max_execution_time: Optional[float] = None,
early_stopping_method: str = "force",
verbose: bool = False,
return_intermediate_steps: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Construct an OpenAPI agent from an LLM and tools.
*Security Note*: When creating an OpenAPI agent, check the permissions
and capabilities of the underlying toolkit.
For example, if the default implementation of OpenAPIToolkit
uses the RequestsToolkit which contains tools to make arbitrary
network requests against any URL (e.g., GET, POST, PATCH, PUT, DELETE),
Control access to who can submit issue requests using this toolkit and
what network access it has.
See https://python.langchain.com/docs/security for more information.
"""
from langchain.agents.agent import AgentExecutor
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.chains.llm import LLMChain
tools = toolkit.get_tools()
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=input_variables,
**prompt_params
)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
return_intermediate_steps=return_intermediate_steps,
max_iterations=max_iterations,
max_execution_time=max_execution_time,
early_stopping_method=early_stopping_method,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1,366 @@
"""Agent that interacts with OpenAPI APIs via a hierarchical planning approach."""
import json
import re
from functools import partial
from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING
import yaml
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import BasePromptTemplate, PromptTemplate
from langchain_core.pydantic_v1 import Field
from langchain_core.tools import BaseTool, Tool
from langchain_openai.llms import OpenAI
from langchain_community.agent_toolkits.openapi.planner_prompt import (
API_CONTROLLER_PROMPT,
API_CONTROLLER_TOOL_DESCRIPTION,
API_CONTROLLER_TOOL_NAME,
API_ORCHESTRATOR_PROMPT,
API_PLANNER_PROMPT,
API_PLANNER_TOOL_DESCRIPTION,
API_PLANNER_TOOL_NAME,
PARSING_DELETE_PROMPT,
PARSING_GET_PROMPT,
PARSING_PATCH_PROMPT,
PARSING_POST_PROMPT,
PARSING_PUT_PROMPT,
REQUESTS_DELETE_TOOL_DESCRIPTION,
REQUESTS_GET_TOOL_DESCRIPTION,
REQUESTS_PATCH_TOOL_DESCRIPTION,
REQUESTS_POST_TOOL_DESCRIPTION,
REQUESTS_PUT_TOOL_DESCRIPTION,
)
from langchain_community.agent_toolkits.openapi.spec import ReducedOpenAPISpec
from langchain_community.output_parsers.json import parse_json_markdown
from langchain_community.tools.requests.tool import BaseRequestsTool
from langchain_community.utilities.requests import RequestsWrapper
if TYPE_CHECKING:
from langchain.agents.agent import AgentExecutor
from langchain.chains.llm import LLMChain
from langchain.memory import ReadOnlySharedMemory
#
# Requests tools with LLM-instructed extraction of truncated responses.
#
# Of course, truncating so bluntly may lose a lot of valuable
# information in the response.
# However, the goal for now is to have only a single inference step.
MAX_RESPONSE_LENGTH = 5000
"""Maximum length of the response to be returned."""
def _get_default_llm_chain(prompt: BasePromptTemplate) -> LLMChain:
from langchain.chains.llm import LLMChain
return LLMChain(
llm=OpenAI(),
prompt=prompt,
)
def _get_default_llm_chain_factory(
prompt: BasePromptTemplate,
) -> Callable[[], LLMChain]:
"""Returns a default LLMChain factory."""
return partial(_get_default_llm_chain, prompt)
class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool):
"""Requests GET tool with LLM-instructed extraction of truncated responses."""
name: str = "requests_get"
"""Tool name."""
description = REQUESTS_GET_TOOL_DESCRIPTION
"""Tool description."""
response_length: Optional[int] = MAX_RESPONSE_LENGTH
"""Maximum length of the response to be returned."""
llm_chain: Any = Field(
default_factory=_get_default_llm_chain_factory(PARSING_GET_PROMPT)
)
"""LLMChain used to extract the response."""
def _run(self, text: str) -> str:
try:
data = parse_json_markdown(text)
except json.JSONDecodeError as e:
raise e
data_params = data.get("params")
response = self.requests_wrapper.get(data["url"], params=data_params)
response = response[: self.response_length]
return self.llm_chain.predict(
response=response, instructions=data["output_instructions"]
).strip()
async def _arun(self, text: str) -> str:
raise NotImplementedError()
class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool):
"""Requests POST tool with LLM-instructed extraction of truncated responses."""
name: str = "requests_post"
"""Tool name."""
description = REQUESTS_POST_TOOL_DESCRIPTION
"""Tool description."""
response_length: Optional[int] = MAX_RESPONSE_LENGTH
"""Maximum length of the response to be returned."""
llm_chain: Any = Field(
default_factory=_get_default_llm_chain_factory(PARSING_POST_PROMPT)
)
"""LLMChain used to extract the response."""
def _run(self, text: str) -> str:
try:
data = parse_json_markdown(text)
except json.JSONDecodeError as e:
raise e
response = self.requests_wrapper.post(data["url"], data["data"])
response = response[: self.response_length]
return self.llm_chain.predict(
response=response, instructions=data["output_instructions"]
).strip()
async def _arun(self, text: str) -> str:
raise NotImplementedError()
class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool):
"""Requests PATCH tool with LLM-instructed extraction of truncated responses."""
name: str = "requests_patch"
"""Tool name."""
description = REQUESTS_PATCH_TOOL_DESCRIPTION
"""Tool description."""
response_length: Optional[int] = MAX_RESPONSE_LENGTH
"""Maximum length of the response to be returned."""
llm_chain: Any = Field(
default_factory=_get_default_llm_chain_factory(PARSING_PATCH_PROMPT)
)
"""LLMChain used to extract the response."""
def _run(self, text: str) -> str:
try:
data = parse_json_markdown(text)
except json.JSONDecodeError as e:
raise e
response = self.requests_wrapper.patch(data["url"], data["data"])
response = response[: self.response_length]
return self.llm_chain.predict(
response=response, instructions=data["output_instructions"]
).strip()
async def _arun(self, text: str) -> str:
raise NotImplementedError()
class RequestsPutToolWithParsing(BaseRequestsTool, BaseTool):
"""Requests PUT tool with LLM-instructed extraction of truncated responses."""
name: str = "requests_put"
"""Tool name."""
description = REQUESTS_PUT_TOOL_DESCRIPTION
"""Tool description."""
response_length: Optional[int] = MAX_RESPONSE_LENGTH
"""Maximum length of the response to be returned."""
llm_chain: Any = Field(
default_factory=_get_default_llm_chain_factory(PARSING_PUT_PROMPT)
)
"""LLMChain used to extract the response."""
def _run(self, text: str) -> str:
try:
data = parse_json_markdown(text)
except json.JSONDecodeError as e:
raise e
response = self.requests_wrapper.put(data["url"], data["data"])
response = response[: self.response_length]
return self.llm_chain.predict(
response=response, instructions=data["output_instructions"]
).strip()
async def _arun(self, text: str) -> str:
raise NotImplementedError()
class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool):
"""A tool that sends a DELETE request and parses the response."""
name: str = "requests_delete"
"""The name of the tool."""
description = REQUESTS_DELETE_TOOL_DESCRIPTION
"""The description of the tool."""
response_length: Optional[int] = MAX_RESPONSE_LENGTH
"""The maximum length of the response."""
llm_chain: Any = Field(
default_factory=_get_default_llm_chain_factory(PARSING_DELETE_PROMPT)
)
"""The LLM chain used to parse the response."""
def _run(self, text: str) -> str:
try:
data = parse_json_markdown(text)
except json.JSONDecodeError as e:
raise e
response = self.requests_wrapper.delete(data["url"])
response = response[: self.response_length]
return self.llm_chain.predict(
response=response, instructions=data["output_instructions"]
).strip()
async def _arun(self, text: str) -> str:
raise NotImplementedError()
#
# Orchestrator, planner, controller.
#
def _create_api_planner_tool(
api_spec: ReducedOpenAPISpec, llm: BaseLanguageModel
) -> Tool:
from langchain.chains.llm import LLMChain
endpoint_descriptions = [
f"{name} {description}" for name, description, _ in api_spec.endpoints
]
prompt = PromptTemplate(
template=API_PLANNER_PROMPT,
input_variables=["query"],
partial_variables={"endpoints": "- " + "- ".join(endpoint_descriptions)},
)
chain = LLMChain(llm=llm, prompt=prompt)
tool = Tool(
name=API_PLANNER_TOOL_NAME,
description=API_PLANNER_TOOL_DESCRIPTION,
func=chain.run,
)
return tool
def _create_api_controller_agent(
api_url: str,
api_docs: str,
requests_wrapper: RequestsWrapper,
llm: BaseLanguageModel,
) -> AgentExecutor:
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.agent import AgentExecutor
from langchain.chains.llm import LLMChain
get_llm_chain = LLMChain(llm=llm, prompt=PARSING_GET_PROMPT)
post_llm_chain = LLMChain(llm=llm, prompt=PARSING_POST_PROMPT)
tools: List[BaseTool] = [
RequestsGetToolWithParsing(
requests_wrapper=requests_wrapper, llm_chain=get_llm_chain
),
RequestsPostToolWithParsing(
requests_wrapper=requests_wrapper, llm_chain=post_llm_chain
),
]
prompt = PromptTemplate(
template=API_CONTROLLER_PROMPT,
input_variables=["input", "agent_scratchpad"],
partial_variables={
"api_url": api_url,
"api_docs": api_docs,
"tool_names": ", ".join([tool.name for tool in tools]),
"tool_descriptions": "\n".join(
[f"{tool.name}: {tool.description}" for tool in tools]
),
},
)
agent = ZeroShotAgent(
llm_chain=LLMChain(llm=llm, prompt=prompt),
allowed_tools=[tool.name for tool in tools],
)
return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
def _create_api_controller_tool(
api_spec: ReducedOpenAPISpec,
requests_wrapper: RequestsWrapper,
llm: BaseLanguageModel,
) -> Tool:
"""Expose controller as a tool.
The tool is invoked with a plan from the planner, and dynamically
creates a controller agent with relevant documentation only to
constrain the context.
"""
base_url = api_spec.servers[0]["url"] # TODO: do better.
def _create_and_run_api_controller_agent(plan_str: str) -> str:
pattern = r"\b(GET|POST|PATCH|DELETE)\s+(/\S+)*"
matches = re.findall(pattern, plan_str)
endpoint_names = [
"{method} {route}".format(method=method, route=route.split("?")[0])
for method, route in matches
]
docs_str = ""
for endpoint_name in endpoint_names:
found_match = False
for name, _, docs in api_spec.endpoints:
regex_name = re.compile(re.sub("\{.*?\}", ".*", name))
if regex_name.match(endpoint_name):
found_match = True
docs_str += f"== Docs for {endpoint_name} == \n{yaml.dump(docs)}\n"
if not found_match:
raise ValueError(f"{endpoint_name} endpoint does not exist.")
agent = _create_api_controller_agent(base_url, docs_str, requests_wrapper, llm)
return agent.run(plan_str)
return Tool(
name=API_CONTROLLER_TOOL_NAME,
func=_create_and_run_api_controller_agent,
description=API_CONTROLLER_TOOL_DESCRIPTION,
)
def create_openapi_agent(
api_spec: ReducedOpenAPISpec,
requests_wrapper: RequestsWrapper,
llm: BaseLanguageModel,
shared_memory: Optional[ReadOnlySharedMemory] = None,
callback_manager: Optional[BaseCallbackManager] = None,
verbose: bool = True,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Instantiate OpenAI API planner and controller for a given spec.
Inject credentials via requests_wrapper.
We use a top-level "orchestrator" agent to invoke the planner and controller,
rather than a top-level planner
that invokes a controller with its plan. This is to keep the planner simple.
"""
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.agent import AgentExecutor
from langchain.chains.llm import LLMChain
tools = [
_create_api_planner_tool(api_spec, llm),
_create_api_controller_tool(api_spec, requests_wrapper, llm),
]
prompt = PromptTemplate(
template=API_ORCHESTRATOR_PROMPT,
input_variables=["input", "agent_scratchpad"],
partial_variables={
"tool_names": ", ".join([tool.name for tool in tools]),
"tool_descriptions": "\n".join(
[f"{tool.name}: {tool.description}" for tool in tools]
),
},
)
agent = ZeroShotAgent(
llm_chain=LLMChain(llm=llm, prompt=prompt, memory=shared_memory),
allowed_tools=[tool.name for tool in tools],
**kwargs,
)
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1,90 @@
"""Requests toolkit."""
from __future__ import annotations
from typing import Any, List
from langchain_core.language_models import BaseLanguageModel
from langchain_core.tools import Tool
from langchain_community.agent_toolkits.base import BaseToolkit
from langchain_community.agent_toolkits.json.base import create_json_agent
from langchain_community.agent_toolkits.json.toolkit import JsonToolkit
from langchain_community.agent_toolkits.openapi.prompt import DESCRIPTION
from langchain_community.tools import BaseTool
from langchain_community.tools.json.tool import JsonSpec
from langchain_community.tools.requests.tool import (
RequestsDeleteTool,
RequestsGetTool,
RequestsPatchTool,
RequestsPostTool,
RequestsPutTool,
)
from langchain_community.utilities.requests import TextRequestsWrapper
class RequestsToolkit(BaseToolkit):
"""Toolkit for making REST requests.
*Security Note*: This toolkit contains tools to make GET, POST, PATCH, PUT,
and DELETE requests to an API.
Exercise care in who is allowed to use this toolkit. If exposing
to end users, consider that users will be able to make arbitrary
requests on behalf of the server hosting the code. For example,
users could ask the server to make a request to a private API
that is only accessible from the server.
Control access to who can submit issue requests using this toolkit and
what network access it has.
See https://python.langchain.com/docs/security for more information.
"""
requests_wrapper: TextRequestsWrapper
def get_tools(self) -> List[BaseTool]:
"""Return a list of tools."""
return [
RequestsGetTool(requests_wrapper=self.requests_wrapper),
RequestsPostTool(requests_wrapper=self.requests_wrapper),
RequestsPatchTool(requests_wrapper=self.requests_wrapper),
RequestsPutTool(requests_wrapper=self.requests_wrapper),
RequestsDeleteTool(requests_wrapper=self.requests_wrapper),
]
class OpenAPIToolkit(BaseToolkit):
"""Toolkit for interacting with an OpenAPI API.
*Security Note*: This toolkit contains tools that can read and modify
the state of a service; e.g., by creating, deleting, or updating,
reading underlying data.
For example, this toolkit can be used to delete data exposed via
an OpenAPI compliant API.
"""
json_agent: Any
requests_wrapper: TextRequestsWrapper
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
json_agent_tool = Tool(
name="json_explorer",
func=self.json_agent.run,
description=DESCRIPTION,
)
request_toolkit = RequestsToolkit(requests_wrapper=self.requests_wrapper)
return [*request_toolkit.get_tools(), json_agent_tool]
@classmethod
def from_llm(
cls,
llm: BaseLanguageModel,
json_spec: JsonSpec,
requests_wrapper: TextRequestsWrapper,
**kwargs: Any,
) -> OpenAPIToolkit:
"""Create json agent from llm, then initialize."""
json_agent = create_json_agent(llm, JsonToolkit(spec=json_spec), **kwargs)
return cls(json_agent=json_agent, requests_wrapper=requests_wrapper)

View File

@@ -0,0 +1,68 @@
"""Power BI agent."""
from __future__ import annotations
from typing import Any, Dict, List, Optional, TYPE_CHECKING
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models import BaseLanguageModel
from langchain_community.agent_toolkits.powerbi.prompt import (
POWERBI_PREFIX,
POWERBI_SUFFIX,
)
from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit
from langchain_community.utilities.powerbi import PowerBIDataset
if TYPE_CHECKING:
from langchain.agents import AgentExecutor
def create_pbi_agent(
llm: BaseLanguageModel,
toolkit: Optional[PowerBIToolkit] = None,
powerbi: Optional[PowerBIDataset] = None,
callback_manager: Optional[BaseCallbackManager] = None,
prefix: str = POWERBI_PREFIX,
suffix: str = POWERBI_SUFFIX,
format_instructions: Optional[str] = None,
examples: Optional[str] = None,
input_variables: Optional[List[str]] = None,
top_k: int = 10,
verbose: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Construct a Power BI agent from an LLM and tools."""
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents import AgentExecutor
from langchain.chains.llm import LLMChain
if toolkit is None:
if powerbi is None:
raise ValueError("Must provide either a toolkit or powerbi dataset")
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
tools = toolkit.get_tools()
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
agent = ZeroShotAgent(
llm_chain=LLMChain(
llm=llm,
prompt=ZeroShotAgent.create_prompt(
tools,
prefix=prefix.format(top_k=top_k).format(tables=tables),
suffix=suffix,
input_variables=input_variables,
**prompt_params,
),
callback_manager=callback_manager, # type: ignore
verbose=verbose,
),
allowed_tools=[tool.name for tool in tools],
**kwargs,
)
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1,69 @@
"""Power BI agent."""
from __future__ import annotations
from typing import Any, Dict, List, Optional, TYPE_CHECKING
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_community.agent_toolkits.powerbi.prompt import (
POWERBI_CHAT_PREFIX,
POWERBI_CHAT_SUFFIX,
)
from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit
from langchain_community.utilities.powerbi import PowerBIDataset
if TYPE_CHECKING:
from langchain.agents import AgentExecutor
from langchain.agents.agent import AgentOutputParser
from langchain.memory.chat_memory import BaseChatMemory
def create_pbi_chat_agent(
llm: BaseChatModel,
toolkit: Optional[PowerBIToolkit] = None,
powerbi: Optional[PowerBIDataset] = None,
callback_manager: Optional[BaseCallbackManager] = None,
output_parser: Optional[AgentOutputParser] = None,
prefix: str = POWERBI_CHAT_PREFIX,
suffix: str = POWERBI_CHAT_SUFFIX,
examples: Optional[str] = None,
input_variables: Optional[List[str]] = None,
memory: Optional[BaseChatMemory] = None,
top_k: int = 10,
verbose: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Construct a Power BI agent from a Chat LLM and tools.
If you supply only a toolkit and no Power BI dataset, the same LLM is used for both.
"""
from langchain.agents import AgentExecutor
from langchain.agents.conversational_chat.base import ConversationalChatAgent
from langchain.memory import ConversationBufferMemory
if toolkit is None:
if powerbi is None:
raise ValueError("Must provide either a toolkit or powerbi dataset")
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
tools = toolkit.get_tools()
tables = powerbi.table_names if powerbi else toolkit.powerbi.table_names
agent = ConversationalChatAgent.from_llm_and_tools(
llm=llm,
tools=tools,
system_message=prefix.format(top_k=top_k).format(tables=tables),
human_message=suffix,
input_variables=input_variables,
callback_manager=callback_manager,
output_parser=output_parser,
verbose=verbose,
**kwargs,
)
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
memory=memory
or ConversationBufferMemory(memory_key="chat_history", return_messages=True),
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1,106 @@
"""Toolkit for interacting with a Power BI dataset."""
from __future__ import annotations
from typing import List, Optional, Union, TYPE_CHECKING
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models import BaseLanguageModel
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain_core.pydantic_v1 import Field
from langchain_community.agent_toolkits.base import BaseToolkit
from langchain_community.tools import BaseTool
from langchain_community.tools.powerbi.prompt import (
QUESTION_TO_QUERY_BASE,
SINGLE_QUESTION_TO_QUERY,
USER_INPUT,
)
from langchain_community.tools.powerbi.tool import (
InfoPowerBITool,
ListPowerBITool,
QueryPowerBITool,
)
from langchain_community.utilities.powerbi import PowerBIDataset
if TYPE_CHECKING:
from langchain.chains.llm import LLMChain
class PowerBIToolkit(BaseToolkit):
"""Toolkit for interacting with Power BI dataset.
*Security Note*: This toolkit interacts with an external service.
Control access to who can use this toolkit.
Make sure that the capabilities given by this toolkit to the calling
code are appropriately scoped to the application.
See https://python.langchain.com/docs/security for more information.
"""
powerbi: PowerBIDataset = Field(exclude=True)
llm: Union[BaseLanguageModel, BaseChatModel] = Field(exclude=True)
examples: Optional[str] = None
max_iterations: int = 5
callback_manager: Optional[BaseCallbackManager] = None
output_token_limit: Optional[int] = None
tiktoken_model_name: Optional[str] = None
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
return [
QueryPowerBITool(
llm_chain=self._get_chain(),
powerbi=self.powerbi,
examples=self.examples,
max_iterations=self.max_iterations,
output_token_limit=self.output_token_limit,
tiktoken_model_name=self.tiktoken_model_name,
),
InfoPowerBITool(powerbi=self.powerbi),
ListPowerBITool(powerbi=self.powerbi),
]
def _get_chain(self) -> LLMChain:
"""Construct the chain based on the callback manager and model type."""
from langchain.chains.llm import LLMChain
if isinstance(self.llm, BaseLanguageModel):
return LLMChain(
llm=self.llm,
callback_manager=self.callback_manager
if self.callback_manager
else None,
prompt=PromptTemplate(
template=SINGLE_QUESTION_TO_QUERY,
input_variables=["tool_input", "tables", "schemas", "examples"],
),
)
system_prompt = SystemMessagePromptTemplate(
prompt=PromptTemplate(
template=QUESTION_TO_QUERY_BASE,
input_variables=["tables", "schemas", "examples"],
)
)
human_prompt = HumanMessagePromptTemplate(
prompt=PromptTemplate(
template=USER_INPUT,
input_variables=["tool_input"],
)
)
return LLMChain(
llm=self.llm,
callback_manager=self.callback_manager if self.callback_manager else None,
prompt=ChatPromptTemplate.from_messages([system_prompt, human_prompt]),
)

View File

@@ -0,0 +1,64 @@
"""Spark SQL agent."""
from __future__ import annotations
from typing import Any, Dict, List, Optional, TYPE_CHECKING
from langchain_core.callbacks import BaseCallbackManager, Callbacks
from langchain_core.language_models import BaseLanguageModel
from langchain_community.agent_toolkits.spark_sql.prompt import SQL_PREFIX, SQL_SUFFIX
from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
if TYPE_CHECKING:
from langchain.agents.agent import AgentExecutor
def create_spark_sql_agent(
llm: BaseLanguageModel,
toolkit: SparkSQLToolkit,
callback_manager: Optional[BaseCallbackManager] = None,
callbacks: Callbacks = None,
prefix: str = SQL_PREFIX,
suffix: str = SQL_SUFFIX,
format_instructions: Optional[str] = None,
input_variables: Optional[List[str]] = None,
top_k: int = 10,
max_iterations: Optional[int] = 15,
max_execution_time: Optional[float] = None,
early_stopping_method: str = "force",
verbose: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Construct a Spark SQL agent from an LLM and tools."""
from langchain.agents.agent import AgentExecutor
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.chains.llm import LLMChain
tools = toolkit.get_tools()
prefix = prefix.format(top_k=top_k)
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=input_variables,
**prompt_params,
)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
callbacks=callbacks,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
callbacks=callbacks,
verbose=verbose,
max_iterations=max_iterations,
max_execution_time=max_execution_time,
early_stopping_method=early_stopping_method,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1,102 @@
"""SQL agent."""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Sequence, TYPE_CHECKING
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models import BaseLanguageModel
from langchain_core.messages import AIMessage, SystemMessage
from langchain_core.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
)
from langchain_community.agent_toolkits.sql.prompt import (
SQL_FUNCTIONS_SUFFIX,
SQL_PREFIX,
SQL_SUFFIX,
)
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
from langchain_community.tools import BaseTool
if TYPE_CHECKING:
from langchain.agents.agent import AgentExecutor
from langchain.agents.agent_types import AgentType
def create_sql_agent(
llm: BaseLanguageModel,
toolkit: SQLDatabaseToolkit,
agent_type: Optional[AgentType] = None,
callback_manager: Optional[BaseCallbackManager] = None,
prefix: str = SQL_PREFIX,
suffix: Optional[str] = None,
format_instructions: Optional[str] = None,
input_variables: Optional[List[str]] = None,
top_k: int = 10,
max_iterations: Optional[int] = 15,
max_execution_time: Optional[float] = None,
early_stopping_method: str = "force",
verbose: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
extra_tools: Sequence[BaseTool] = (),
**kwargs: Any,
) -> AgentExecutor:
"""Construct an SQL agent from an LLM and tools."""
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
from langchain.agents.agent_types import AgentType
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.chains.llm import LLMChain
agent_type = agent_type or AgentType.ZERO_SHOT_REACT_DESCRIPTION
tools = toolkit.get_tools() + list(extra_tools)
prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k)
agent: BaseSingleActionAgent
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
prompt_params = {"format_instructions": format_instructions} if format_instructions is not None else {}
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix or SQL_SUFFIX,
input_variables=input_variables,
**prompt_params,
)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
elif agent_type == AgentType.OPENAI_FUNCTIONS:
messages = [
SystemMessage(content=prefix),
HumanMessagePromptTemplate.from_template("{input}"),
AIMessage(content=suffix or SQL_FUNCTIONS_SUFFIX),
MessagesPlaceholder(variable_name="agent_scratchpad"),
]
input_variables = ["input", "agent_scratchpad"]
_prompt = ChatPromptTemplate(input_variables=input_variables, messages=messages)
agent = OpenAIFunctionsAgent(
llm=llm,
prompt=_prompt,
tools=tools,
callback_manager=callback_manager,
**kwargs,
)
else:
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
max_iterations=max_iterations,
max_execution_time=max_execution_time,
early_stopping_method=early_stopping_method,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1,103 @@
"""VectorStore agent."""
from __future__ import annotations
from typing import Any, Dict, Optional, TYPE_CHECKING
from langchain_core.callbacks import BaseCallbackManager
from langchain_core.language_models import BaseLanguageModel
from langchain_community.agent_toolkits.vectorstore.prompt import PREFIX, ROUTER_PREFIX
from langchain_community.agent_toolkits.vectorstore.toolkit import (
VectorStoreRouterToolkit,
VectorStoreToolkit,
)
if TYPE_CHECKING:
from langchain.agents.agent import AgentExecutor
def create_vectorstore_agent(
llm: BaseLanguageModel,
toolkit: VectorStoreToolkit,
callback_manager: Optional[BaseCallbackManager] = None,
prefix: str = PREFIX,
verbose: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Construct a VectorStore agent from an LLM and tools.
Args:
llm (BaseLanguageModel): LLM that will be used by the agent
toolkit (VectorStoreToolkit): Set of tools for the agent
callback_manager (Optional[BaseCallbackManager], optional): Object to handle the callback [ Defaults to None. ]
prefix (str, optional): The prefix prompt for the agent. If not provided uses default PREFIX.
verbose (bool, optional): If you want to see the content of the scratchpad. [ Defaults to False ]
agent_executor_kwargs (Optional[Dict[str, Any]], optional): If there is any other parameter you want to send to the agent. [ Defaults to None ]
**kwargs: Additional named parameters to pass to the ZeroShotAgent.
Returns:
AgentExecutor: Returns a callable AgentExecutor object. Either you can call it or use run method with the query to get the response
""" # noqa: E501
from langchain.agents.agent import AgentExecutor
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.chains.llm import LLMChain
tools = toolkit.get_tools()
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)
def create_vectorstore_router_agent(
llm: BaseLanguageModel,
toolkit: VectorStoreRouterToolkit,
callback_manager: Optional[BaseCallbackManager] = None,
prefix: str = ROUTER_PREFIX,
verbose: bool = False,
agent_executor_kwargs: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Construct a VectorStore router agent from an LLM and tools.
Args:
llm (BaseLanguageModel): LLM that will be used by the agent
toolkit (VectorStoreRouterToolkit): Set of tools for the agent which have routing capability with multiple vector stores
callback_manager (Optional[BaseCallbackManager], optional): Object to handle the callback [ Defaults to None. ]
prefix (str, optional): The prefix prompt for the router agent. If not provided uses default ROUTER_PREFIX.
verbose (bool, optional): If you want to see the content of the scratchpad. [ Defaults to False ]
agent_executor_kwargs (Optional[Dict[str, Any]], optional): If there is any other parameter you want to send to the agent. [ Defaults to None ]
**kwargs: Additional named parameters to pass to the ZeroShotAgent.
Returns:
AgentExecutor: Returns a callable AgentExecutor object. Either you can call it or use run method with the query to get the response.
""" # noqa: E501
from langchain.agents.agent import AgentExecutor
from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.chains.llm import LLMChain
tools = toolkit.get_tools()
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
llm_chain = LLMChain(
llm=llm,
prompt=prompt,
callback_manager=callback_manager,
)
tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
return AgentExecutor.from_agent_and_tools(
agent=agent,
tools=tools,
callback_manager=callback_manager,
verbose=verbose,
**(agent_executor_kwargs or {}),
)

View File

@@ -0,0 +1,83 @@
"""**Callback handlers** allow listening to events in LangChain.
**Class hierarchy:**
.. code-block::
BaseCallbackHandler --> <name>CallbackHandler # Example: AimCallbackHandler
"""
from langchain_core.callbacks import (
StdOutCallbackHandler,
StreamingStdOutCallbackHandler,
)
from langchain_core.tracers.langchain import LangChainTracer
from langchain_community.callbacks.aim_callback import AimCallbackHandler
from langchain_community.callbacks.argilla_callback import ArgillaCallbackHandler
from langchain_community.callbacks.arize_callback import ArizeCallbackHandler
from langchain_community.callbacks.arthur_callback import ArthurCallbackHandler
from langchain_community.callbacks.clearml_callback import ClearMLCallbackHandler
from langchain_community.callbacks.comet_ml_callback import CometCallbackHandler
from langchain_community.callbacks.context_callback import ContextCallbackHandler
from langchain_community.callbacks.file import FileCallbackHandler
from langchain_community.callbacks.flyte_callback import FlyteCallbackHandler
from langchain_community.callbacks.human import HumanApprovalCallbackHandler
from langchain_community.callbacks.infino_callback import InfinoCallbackHandler
from langchain_community.callbacks.labelstudio_callback import (
LabelStudioCallbackHandler,
)
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
from langchain_community.callbacks.manager import (
get_openai_callback,
wandb_tracing_enabled,
)
from langchain_community.callbacks.mlflow_callback import MlflowCallbackHandler
from langchain_community.callbacks.openai_info import OpenAICallbackHandler
from langchain_community.callbacks.promptlayer_callback import (
PromptLayerCallbackHandler,
)
from langchain_community.callbacks.sagemaker_callback import SageMakerCallbackHandler
from langchain_community.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
from langchain_community.callbacks.streaming_stdout_final_only import (
FinalStreamingStdOutCallbackHandler,
)
from langchain_community.callbacks.streamlit import (
LLMThoughtLabeler,
StreamlitCallbackHandler,
)
from langchain_community.callbacks.trubrics_callback import TrubricsCallbackHandler
from langchain_community.callbacks.wandb_callback import WandbCallbackHandler
from langchain_community.callbacks.whylabs_callback import WhyLabsCallbackHandler
__all__ = [
"AimCallbackHandler",
"ArgillaCallbackHandler",
"ArizeCallbackHandler",
"PromptLayerCallbackHandler",
"ArthurCallbackHandler",
"ClearMLCallbackHandler",
"CometCallbackHandler",
"ContextCallbackHandler",
"FileCallbackHandler",
"HumanApprovalCallbackHandler",
"InfinoCallbackHandler",
"MlflowCallbackHandler",
"LLMonitorCallbackHandler",
"OpenAICallbackHandler",
"StdOutCallbackHandler",
"AsyncIteratorCallbackHandler",
"StreamingStdOutCallbackHandler",
"FinalStreamingStdOutCallbackHandler",
"LLMThoughtLabeler",
"LangChainTracer",
"StreamlitCallbackHandler",
"WandbCallbackHandler",
"WhyLabsCallbackHandler",
"get_openai_callback",
"wandb_tracing_enabled",
"FlyteCallbackHandler",
"SageMakerCallbackHandler",
"LabelStudioCallbackHandler",
"TrubricsCallbackHandler",
]

View File

@@ -0,0 +1,69 @@
from __future__ import annotations
import logging
from contextlib import contextmanager
from contextvars import ContextVar
from typing import (
Generator,
Optional,
)
from langchain_core.tracers.context import register_configure_hook
from langchain_community.callbacks.openai_info import OpenAICallbackHandler
from langchain_community.callbacks.tracers.wandb import WandbTracer
logger = logging.getLogger(__name__)
openai_callback_var: ContextVar[Optional[OpenAICallbackHandler]] = ContextVar(
"openai_callback", default=None
)
wandb_tracing_callback_var: ContextVar[Optional[WandbTracer]] = ContextVar( # noqa: E501
"tracing_wandb_callback", default=None
)
register_configure_hook(openai_callback_var, True)
register_configure_hook(
wandb_tracing_callback_var, True, WandbTracer, "LANGCHAIN_WANDB_TRACING"
)
@contextmanager
def get_openai_callback() -> Generator[OpenAICallbackHandler, None, None]:
"""Get the OpenAI callback handler in a context manager.
which conveniently exposes token and cost information.
Returns:
OpenAICallbackHandler: The OpenAI callback handler.
Example:
>>> with get_openai_callback() as cb:
... # Use the OpenAI callback handler
"""
cb = OpenAICallbackHandler()
openai_callback_var.set(cb)
yield cb
openai_callback_var.set(None)
@contextmanager
def wandb_tracing_enabled(
session_name: str = "default",
) -> Generator[None, None, None]:
"""Get the WandbTracer in a context manager.
Args:
session_name (str, optional): The name of the session.
Defaults to "default".
Returns:
None
Example:
>>> with wandb_tracing_enabled() as session:
... # Use the WandbTracer session
"""
cb = WandbTracer()
wandb_tracing_callback_var.set(cb)
yield None
wandb_tracing_callback_var.set(None)

View File

@@ -0,0 +1,20 @@
"""Tracers that record execution of LangChain runs."""
from langchain_core.tracers.langchain import LangChainTracer
from langchain_core.tracers.langchain_v1 import LangChainTracerV1
from langchain_core.tracers.stdout import (
ConsoleCallbackHandler,
FunctionCallbackHandler,
)
from langchain_community.callbacks.tracers.logging import LoggingCallbackHandler
from langchain_community.callbacks.tracers.wandb import WandbTracer
__all__ = [
"ConsoleCallbackHandler",
"FunctionCallbackHandler",
"LoggingCallbackHandler",
"LangChainTracer",
"LangChainTracerV1",
"WandbTracer",
]

View File

@@ -0,0 +1,78 @@
"""**Chat Models** are a variation on language models.
While Chat Models use language models under the hood, the interface they expose
is a bit different. Rather than expose a "text in, text out" API, they expose
an interface where "chat messages" are the inputs and outputs.
**Class hierarchy:**
.. code-block::
BaseLanguageModel --> BaseChatModel --> <name> # Examples: ChatOpenAI, ChatGooglePalm
**Main helpers:**
.. code-block::
AIMessage, BaseMessage, HumanMessage
""" # noqa: E501
from langchain_community.chat_models.anthropic import ChatAnthropic
from langchain_community.chat_models.anyscale import ChatAnyscale
from langchain_community.chat_models.baichuan import ChatBaichuan
from langchain_community.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint
from langchain_community.chat_models.bedrock import BedrockChat
from langchain_community.chat_models.cohere import ChatCohere
from langchain_community.chat_models.databricks import ChatDatabricks
from langchain_community.chat_models.ernie import ErnieBotChat
from langchain_community.chat_models.everlyai import ChatEverlyAI
from langchain_community.chat_models.fake import FakeListChatModel
from langchain_community.chat_models.fireworks import ChatFireworks
from langchain_community.chat_models.gigachat import GigaChat
from langchain_community.chat_models.google_palm import ChatGooglePalm
from langchain_community.chat_models.human import HumanInputChatModel
from langchain_community.chat_models.hunyuan import ChatHunyuan
from langchain_community.chat_models.javelin_ai_gateway import ChatJavelinAIGateway
from langchain_community.chat_models.jinachat import JinaChat
from langchain_community.chat_models.konko import ChatKonko
from langchain_community.chat_models.litellm import ChatLiteLLM
from langchain_community.chat_models.minimax import MiniMaxChat
from langchain_community.chat_models.mlflow import ChatMlflow
from langchain_community.chat_models.mlflow_ai_gateway import ChatMLflowAIGateway
from langchain_community.chat_models.ollama import ChatOllama
from langchain_community.chat_models.pai_eas_endpoint import PaiEasChatEndpoint
from langchain_community.chat_models.promptlayer_openai import PromptLayerChatOpenAI
from langchain_community.chat_models.vertexai import ChatVertexAI
from langchain_community.chat_models.volcengine_maas import VolcEngineMaasChat
from langchain_community.chat_models.yandex import ChatYandexGPT
__all__ = [
"BedrockChat",
"FakeListChatModel",
"PromptLayerChatOpenAI",
"ChatDatabricks",
"ChatEverlyAI",
"ChatAnthropic",
"ChatCohere",
"ChatGooglePalm",
"ChatMlflow",
"ChatMLflowAIGateway",
"ChatOllama",
"ChatVertexAI",
"JinaChat",
"HumanInputChatModel",
"MiniMaxChat",
"ChatAnyscale",
"ChatLiteLLM",
"ErnieBotChat",
"ChatJavelinAIGateway",
"ChatKonko",
"PaiEasChatEndpoint",
"QianfanChatEndpoint",
"ChatFireworks",
"ChatYandexGPT",
"ChatBaichuan",
"ChatHunyuan",
"GigaChat",
"VolcEngineMaasChat",
]

View File

@@ -0,0 +1,101 @@
"""Abstract interface for document loader implementations."""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Iterator, List, Optional, TYPE_CHECKING
from langchain_core.documents import Document
from langchain_community.document_loaders.blob_loaders import Blob
if TYPE_CHECKING:
from langchain.text_splitter import TextSplitter
class BaseLoader(ABC):
"""Interface for Document Loader.
Implementations should implement the lazy-loading method using generators
to avoid loading all Documents into memory at once.
The `load` method will remain as is for backwards compatibility, but its
implementation should be just `list(self.lazy_load())`.
"""
# Sub-classes should implement this method
# as return list(self.lazy_load()).
# This method returns a List which is materialized in memory.
@abstractmethod
def load(self) -> List[Document]:
"""Load data into Document objects."""
def load_and_split(
self, text_splitter: Optional[TextSplitter] = None
) -> List[Document]:
"""Load Documents and split into chunks. Chunks are returned as Documents.
Args:
text_splitter: TextSplitter instance to use for splitting documents.
Defaults to RecursiveCharacterTextSplitter.
Returns:
List of Documents.
"""
from langchain.text_splitter import RecursiveCharacterTextSplitter
if text_splitter is None:
_text_splitter: TextSplitter = RecursiveCharacterTextSplitter()
else:
_text_splitter = text_splitter
docs = self.load()
return _text_splitter.split_documents(docs)
# Attention: This method will be upgraded into an abstractmethod once it's
# implemented in all the existing subclasses.
def lazy_load(
self,
) -> Iterator[Document]:
"""A lazy loader for Documents."""
raise NotImplementedError(
f"{self.__class__.__name__} does not implement lazy_load()"
)
class BaseBlobParser(ABC):
"""Abstract interface for blob parsers.
A blob parser provides a way to parse raw data stored in a blob into one
or more documents.
The parser can be composed with blob loaders, making it easy to reuse
a parser independent of how the blob was originally loaded.
"""
@abstractmethod
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazy parsing interface.
Subclasses are required to implement this method.
Args:
blob: Blob instance
Returns:
Generator of documents
"""
def parse(self, blob: Blob) -> List[Document]:
"""Eagerly parse the blob into a document or documents.
This is a convenience method for interactive development environment.
Production applications should favor the lazy_parse method instead.
Subclasses should generally not over-ride this parse method.
Args:
blob: Blob instance
Returns:
List of documents
"""
return list(self.lazy_parse(blob))

View File

@@ -0,0 +1,147 @@
"""Use to load blobs from the local file system."""
from pathlib import Path
from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union
from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader
T = TypeVar("T")
def _make_iterator(
length_func: Callable[[], int], show_progress: bool = False
) -> Callable[[Iterable[T]], Iterator[T]]:
"""Create a function that optionally wraps an iterable in tqdm."""
if show_progress:
try:
from tqdm.auto import tqdm
except ImportError:
raise ImportError(
"You must install tqdm to use show_progress=True."
"You can install tqdm with `pip install tqdm`."
)
# Make sure to provide `total` here so that tqdm can show
# a progress bar that takes into account the total number of files.
def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]:
"""Wrap an iterable in a tqdm progress bar."""
return tqdm(iterable, total=length_func())
iterator = _with_tqdm
else:
iterator = iter # type: ignore
return iterator
# PUBLIC API
class FileSystemBlobLoader(BlobLoader):
"""Load blobs in the local file system.
Example:
.. code-block:: python
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
loader = FileSystemBlobLoader("/path/to/directory")
for blob in loader.yield_blobs():
print(blob)
""" # noqa: E501
def __init__(
self,
path: Union[str, Path],
*,
glob: str = "**/[!.]*",
exclude: Sequence[str] = (),
suffixes: Optional[Sequence[str]] = None,
show_progress: bool = False,
) -> None:
"""Initialize with a path to directory and how to glob over it.
Args:
path: Path to directory to load from or path to file to load.
If a path to a file is provided, glob/exclude/suffixes are ignored.
glob: Glob pattern relative to the specified path
by default set to pick up all non-hidden files
exclude: patterns to exclude from results, use glob syntax
suffixes: Provide to keep only files with these suffixes
Useful when wanting to keep files with different suffixes
Suffixes must include the dot, e.g. ".txt"
show_progress: If true, will show a progress bar as the files are loaded.
This forces an iteration through all matching files
to count them prior to loading them.
Examples:
.. code-block:: python
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
# Load a single file.
loader = FileSystemBlobLoader("/path/to/file.txt")
# Recursively load all text files in a directory.
loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt")
# Recursively load all non-hidden files in a directory.
loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*")
# Load all files in a directory without recursion.
loader = FileSystemBlobLoader("/path/to/directory", glob="*")
# Recursively load all files in a directory, except for py or pyc files.
loader = FileSystemBlobLoader(
"/path/to/directory",
glob="**/*.txt",
exclude=["**/*.py", "**/*.pyc"]
)
""" # noqa: E501
if isinstance(path, Path):
_path = path
elif isinstance(path, str):
_path = Path(path)
else:
raise TypeError(f"Expected str or Path, got {type(path)}")
self.path = _path.expanduser() # Expand user to handle ~
self.glob = glob
self.suffixes = set(suffixes or [])
self.show_progress = show_progress
self.exclude = exclude
def yield_blobs(
self,
) -> Iterable[Blob]:
"""Yield blobs that match the requested pattern."""
iterator = _make_iterator(
length_func=self.count_matching_files, show_progress=self.show_progress
)
for path in iterator(self._yield_paths()):
yield Blob.from_path(path)
def _yield_paths(self) -> Iterable[Path]:
"""Yield paths that match the requested pattern."""
if self.path.is_file():
yield self.path
return
paths = self.path.glob(self.glob)
for path in paths:
if self.exclude:
if any(path.match(glob) for glob in self.exclude):
continue
if path.is_file():
if self.suffixes and path.suffix not in self.suffixes:
continue
yield path
def count_matching_files(self) -> int:
"""Count files that match the pattern without loading them."""
# Carry out a full iteration to count the files without
# materializing anything expensive in memory.
num = 0
for _ in self._yield_paths():
num += 1
return num

View File

@@ -0,0 +1,190 @@
from __future__ import annotations
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Iterator,
List,
Literal,
Optional,
Sequence,
Union,
)
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser, BaseLoader
from langchain_community.document_loaders.blob_loaders import (
BlobLoader,
FileSystemBlobLoader,
)
from langchain_community.document_loaders.parsers.registry import get_parser
if TYPE_CHECKING:
from langchain.text_splitter import TextSplitter
_PathLike = Union[str, Path]
DEFAULT = Literal["default"]
class GenericLoader(BaseLoader):
"""Generic Document Loader.
A generic document loader that allows combining an arbitrary blob loader with
a blob parser.
Examples:
Parse a specific PDF file:
.. code-block:: python
from langchain_community.document_loaders import GenericLoader
from langchain_community.document_loaders.parsers.pdf import PyPDFParser
# Recursively load all text files in a directory.
loader = GenericLoader.from_filesystem(
"my_lovely_pdf.pdf",
parser=PyPDFParser()
)
.. code-block:: python
from langchain_community.document_loaders import GenericLoader
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
loader = GenericLoader.from_filesystem(
path="path/to/directory",
glob="**/[!.]*",
suffixes=[".pdf"],
show_progress=True,
)
docs = loader.lazy_load()
next(docs)
Example instantiations to change which files are loaded:
.. code-block:: python
# Recursively load all text files in a directory.
loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/*.txt")
# Recursively load all non-hidden files in a directory.
loader = GenericLoader.from_filesystem("/path/to/dir", glob="**/[!.]*")
# Load all files in a directory without recursion.
loader = GenericLoader.from_filesystem("/path/to/dir", glob="*")
Example instantiations to change which parser is used:
.. code-block:: python
from langchain_community.document_loaders.parsers.pdf import PyPDFParser
# Recursively load all text files in a directory.
loader = GenericLoader.from_filesystem(
"/path/to/dir",
glob="**/*.pdf",
parser=PyPDFParser()
)
""" # noqa: E501
def __init__(
self,
blob_loader: BlobLoader,
blob_parser: BaseBlobParser,
) -> None:
"""A generic document loader.
Args:
blob_loader: A blob loader which knows how to yield blobs
blob_parser: A blob parser which knows how to parse blobs into documents
"""
self.blob_loader = blob_loader
self.blob_parser = blob_parser
def lazy_load(
self,
) -> Iterator[Document]:
"""Load documents lazily. Use this when working at a large scale."""
for blob in self.blob_loader.yield_blobs():
yield from self.blob_parser.lazy_parse(blob)
def load(self) -> List[Document]:
"""Load all documents."""
return list(self.lazy_load())
def load_and_split(
self, text_splitter: Optional[TextSplitter] = None
) -> List[Document]:
"""Load all documents and split them into sentences."""
raise NotImplementedError(
"Loading and splitting is not yet implemented for generic loaders. "
"When they will be implemented they will be added via the initializer. "
"This method should not be used going forward."
)
@classmethod
def from_filesystem(
cls,
path: _PathLike,
*,
glob: str = "**/[!.]*",
exclude: Sequence[str] = (),
suffixes: Optional[Sequence[str]] = None,
show_progress: bool = False,
parser: Union[DEFAULT, BaseBlobParser] = "default",
parser_kwargs: Optional[dict] = None,
) -> GenericLoader:
"""Create a generic document loader using a filesystem blob loader.
Args:
path: The path to the directory to load documents from OR the path to a
single file to load. If this is a file, glob, exclude, suffixes
will be ignored.
glob: The glob pattern to use to find documents.
suffixes: The suffixes to use to filter documents. If None, all files
matching the glob will be loaded.
exclude: A list of patterns to exclude from the loader.
show_progress: Whether to show a progress bar or not (requires tqdm).
Proxies to the file system loader.
parser: A blob parser which knows how to parse blobs into documents,
will instantiate a default parser if not provided.
The default can be overridden by either passing a parser or
setting the class attribute `blob_parser` (the latter
should be used with inheritance).
parser_kwargs: Keyword arguments to pass to the parser.
Returns:
A generic document loader.
"""
blob_loader = FileSystemBlobLoader(
path,
glob=glob,
exclude=exclude,
suffixes=suffixes,
show_progress=show_progress,
)
if isinstance(parser, str):
if parser == "default":
try:
# If there is an implementation of get_parser on the class, use it.
blob_parser = cls.get_parser(**(parser_kwargs or {}))
except NotImplementedError:
# if not then use the global registry.
blob_parser = get_parser(parser)
else:
blob_parser = get_parser(parser)
else:
blob_parser = parser
return cls(blob_loader, blob_parser)
@staticmethod
def get_parser(**kwargs: Any) -> BaseBlobParser:
"""Override this method to associate a default parser with the class."""
raise NotImplementedError()

View File

@@ -0,0 +1,70 @@
"""Code for generic / auxiliary parsers.
This module contains some logic to help assemble more sophisticated parsers.
"""
from typing import Iterator, Mapping, Optional
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders.schema import Blob
class MimeTypeBasedParser(BaseBlobParser):
"""Parser that uses `mime`-types to parse a blob.
This parser is useful for simple pipelines where the mime-type is sufficient
to determine how to parse a blob.
To use, configure handlers based on mime-types and pass them to the initializer.
Example:
.. code-block:: python
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
parser = MimeTypeBasedParser(
handlers={
"application/pdf": ...,
},
fallback_parser=...,
)
""" # noqa: E501
def __init__(
self,
handlers: Mapping[str, BaseBlobParser],
*,
fallback_parser: Optional[BaseBlobParser] = None,
) -> None:
"""Define a parser that uses mime-types to determine how to parse a blob.
Args:
handlers: A mapping from mime-types to functions that take a blob, parse it
and return a document.
fallback_parser: A fallback_parser parser to use if the mime-type is not
found in the handlers. If provided, this parser will be
used to parse blobs with all mime-types not found in
the handlers.
If not provided, a ValueError will be raised if the
mime-type is not found in the handlers.
"""
self.handlers = handlers
self.fallback_parser = fallback_parser
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Load documents from a blob."""
mimetype = blob.mimetype
if mimetype is None:
raise ValueError(f"{blob} does not have a mimetype.")
if mimetype in self.handlers:
handler = self.handlers[mimetype]
yield from handler.lazy_parse(blob)
else:
if self.fallback_parser is not None:
yield from self.fallback_parser.lazy_parse(blob)
else:
raise ValueError(f"Unsupported mime type: {mimetype}")

View File

@@ -0,0 +1,157 @@
from __future__ import annotations
from typing import Any, Dict, Iterator, Optional, TYPE_CHECKING
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders import Blob
from langchain_community.document_loaders.parsers.language.cobol import CobolSegmenter
from langchain_community.document_loaders.parsers.language.javascript import (
JavaScriptSegmenter,
)
from langchain_community.document_loaders.parsers.language.python import PythonSegmenter
if TYPE_CHECKING:
from langchain.text_splitter import Language
try:
from langchain.text_splitter import Language
LANGUAGE_EXTENSIONS: Dict[str, str] = {
"py": Language.PYTHON,
"js": Language.JS,
"cobol": Language.COBOL,
}
LANGUAGE_SEGMENTERS: Dict[str, Any] = {
Language.PYTHON: PythonSegmenter,
Language.JS: JavaScriptSegmenter,
Language.COBOL: CobolSegmenter,
}
except ImportError:
LANGUAGE_EXTENSIONS = {}
LANGUAGE_SEGMENTERS = {}
class LanguageParser(BaseBlobParser):
"""Parse using the respective programming language syntax.
Each top-level function and class in the code is loaded into separate documents.
Furthermore, an extra document is generated, containing the remaining top-level code
that excludes the already segmented functions and classes.
This approach can potentially improve the accuracy of QA models over source code.
Currently, the supported languages for code parsing are Python and JavaScript.
The language used for parsing can be configured, along with the minimum number of
lines required to activate the splitting based on syntax.
Examples:
.. code-block:: python
from langchain.text_splitter.Language
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
loader = GenericLoader.from_filesystem(
"./code",
glob="**/*",
suffixes=[".py", ".js"],
parser=LanguageParser()
)
docs = loader.load()
Example instantiations to manually select the language:
.. code-block:: python
from langchain.text_splitter import Language
loader = GenericLoader.from_filesystem(
"./code",
glob="**/*",
suffixes=[".py"],
parser=LanguageParser(language=Language.PYTHON)
)
Example instantiations to set number of lines threshold:
.. code-block:: python
loader = GenericLoader.from_filesystem(
"./code",
glob="**/*",
suffixes=[".py"],
parser=LanguageParser(parser_threshold=200)
)
"""
def __init__(self, language: Optional[Language] = None, parser_threshold: int = 0):
"""
Language parser that split code using the respective language syntax.
Args:
language: If None (default), it will try to infer language from source.
parser_threshold: Minimum lines needed to activate parsing (0 by default).
"""
self.language = language
self.parser_threshold = parser_threshold
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
code = blob.as_string()
language = self.language or (
LANGUAGE_EXTENSIONS.get(blob.source.rsplit(".", 1)[-1])
if isinstance(blob.source, str)
else None
)
if language is None:
yield Document(
page_content=code,
metadata={
"source": blob.source,
},
)
return
if self.parser_threshold >= len(code.splitlines()):
yield Document(
page_content=code,
metadata={
"source": blob.source,
"language": language,
},
)
return
self.Segmenter = LANGUAGE_SEGMENTERS[language]
segmenter = self.Segmenter(blob.as_string())
if not segmenter.is_valid():
yield Document(
page_content=code,
metadata={
"source": blob.source,
},
)
return
for functions_classes in segmenter.extract_functions_classes():
yield Document(
page_content=functions_classes,
metadata={
"source": blob.source,
"content_type": "functions_classes",
"language": language,
},
)
yield Document(
page_content=segmenter.simplify_code(),
metadata={
"source": blob.source,
"content_type": "simplified_code",
"language": language,
},
)

View File

@@ -0,0 +1,262 @@
from __future__ import annotations
import asyncio
import json
from pathlib import Path
from typing import TYPE_CHECKING, Dict, List, Optional, Union
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
if TYPE_CHECKING:
import pandas as pd
from telethon.hints import EntityLike
def concatenate_rows(row: dict) -> str:
"""Combine message information in a readable format ready to be used."""
date = row["date"]
sender = row["from"]
text = row["text"]
return f"{sender} on {date}: {text}\n\n"
class TelegramChatFileLoader(BaseLoader):
"""Load from `Telegram chat` dump."""
def __init__(self, path: str):
"""Initialize with a path."""
self.file_path = path
def load(self) -> List[Document]:
"""Load documents."""
p = Path(self.file_path)
with open(p, encoding="utf8") as f:
d = json.load(f)
text = "".join(
concatenate_rows(message)
for message in d["messages"]
if message["type"] == "message" and isinstance(message["text"], str)
)
metadata = {"source": str(p)}
return [Document(page_content=text, metadata=metadata)]
def text_to_docs(text: Union[str, List[str]]) -> List[Document]:
"""Convert a string or list of strings to a list of Documents with metadata."""
from langchain.text_splitter import RecursiveCharacterTextSplitter
if isinstance(text, str):
# Take a single string as one page
text = [text]
page_docs = [Document(page_content=page) for page in text]
# Add page numbers as metadata
for i, doc in enumerate(page_docs):
doc.metadata["page"] = i + 1
# Split pages into chunks
doc_chunks = []
for doc in page_docs:
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
chunk_overlap=20,
)
chunks = text_splitter.split_text(doc.page_content)
for i, chunk in enumerate(chunks):
doc = Document(
page_content=chunk, metadata={"page": doc.metadata["page"], "chunk": i}
)
# Add sources a metadata
doc.metadata["source"] = f"{doc.metadata['page']}-{doc.metadata['chunk']}"
doc_chunks.append(doc)
return doc_chunks
class TelegramChatApiLoader(BaseLoader):
"""Load `Telegram` chat json directory dump."""
def __init__(
self,
chat_entity: Optional[EntityLike] = None,
api_id: Optional[int] = None,
api_hash: Optional[str] = None,
username: Optional[str] = None,
file_path: str = "telegram_data.json",
):
"""Initialize with API parameters.
Args:
chat_entity: The chat entity to fetch data from.
api_id: The API ID.
api_hash: The API hash.
username: The username.
file_path: The file path to save the data to. Defaults to
"telegram_data.json".
"""
self.chat_entity = chat_entity
self.api_id = api_id
self.api_hash = api_hash
self.username = username
self.file_path = file_path
async def fetch_data_from_telegram(self) -> None:
"""Fetch data from Telegram API and save it as a JSON file."""
from telethon.sync import TelegramClient
data = []
async with TelegramClient(self.username, self.api_id, self.api_hash) as client:
async for message in client.iter_messages(self.chat_entity):
is_reply = message.reply_to is not None
reply_to_id = message.reply_to.reply_to_msg_id if is_reply else None
data.append(
{
"sender_id": message.sender_id,
"text": message.text,
"date": message.date.isoformat(),
"message.id": message.id,
"is_reply": is_reply,
"reply_to_id": reply_to_id,
}
)
with open(self.file_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def _get_message_threads(self, data: pd.DataFrame) -> dict:
"""Create a dictionary of message threads from the given data.
Args:
data (pd.DataFrame): A DataFrame containing the conversation \
data with columns:
- message.sender_id
- text
- date
- message.id
- is_reply
- reply_to_id
Returns:
dict: A dictionary where the key is the parent message ID and \
the value is a list of message IDs in ascending order.
"""
def find_replies(parent_id: int, reply_data: pd.DataFrame) -> List[int]:
"""
Recursively find all replies to a given parent message ID.
Args:
parent_id (int): The parent message ID.
reply_data (pd.DataFrame): A DataFrame containing reply messages.
Returns:
list: A list of message IDs that are replies to the parent message ID.
"""
# Find direct replies to the parent message ID
direct_replies = reply_data[reply_data["reply_to_id"] == parent_id][
"message.id"
].tolist()
# Recursively find replies to the direct replies
all_replies = []
for reply_id in direct_replies:
all_replies += [reply_id] + find_replies(reply_id, reply_data)
return all_replies
# Filter out parent messages
parent_messages = data[~data["is_reply"]]
# Filter out reply messages and drop rows with NaN in 'reply_to_id'
reply_messages = data[data["is_reply"]].dropna(subset=["reply_to_id"])
# Convert 'reply_to_id' to integer
reply_messages["reply_to_id"] = reply_messages["reply_to_id"].astype(int)
# Create a dictionary of message threads with parent message IDs as keys and \
# lists of reply message IDs as values
message_threads = {
parent_id: [parent_id] + find_replies(parent_id, reply_messages)
for parent_id in parent_messages["message.id"]
}
return message_threads
def _combine_message_texts(
self, message_threads: Dict[int, List[int]], data: pd.DataFrame
) -> str:
"""
Combine the message texts for each parent message ID based \
on the list of message threads.
Args:
message_threads (dict): A dictionary where the key is the parent message \
ID and the value is a list of message IDs in ascending order.
data (pd.DataFrame): A DataFrame containing the conversation data:
- message.sender_id
- text
- date
- message.id
- is_reply
- reply_to_id
Returns:
str: A combined string of message texts sorted by date.
"""
combined_text = ""
# Iterate through sorted parent message IDs
for parent_id, message_ids in message_threads.items():
# Get the message texts for the message IDs and sort them by date
message_texts = (
data[data["message.id"].isin(message_ids)]
.sort_values(by="date")["text"]
.tolist()
)
message_texts = [str(elem) for elem in message_texts]
# Combine the message texts
combined_text += " ".join(message_texts) + ".\n"
return combined_text.strip()
def load(self) -> List[Document]:
"""Load documents."""
if self.chat_entity is not None:
try:
import nest_asyncio
nest_asyncio.apply()
asyncio.run(self.fetch_data_from_telegram())
except ImportError:
raise ImportError(
"""`nest_asyncio` package not found.
please install with `pip install nest_asyncio`
"""
)
p = Path(self.file_path)
with open(p, encoding="utf8") as f:
d = json.load(f)
try:
import pandas as pd
except ImportError:
raise ImportError(
"""`pandas` package not found.
please install with `pip install pandas`
"""
)
normalized_messages = pd.json_normalize(d)
df = pd.DataFrame(normalized_messages)
message_threads = self._get_message_threads(df)
combined_texts = self._combine_message_texts(message_threads, df)
return text_to_docs(combined_texts)

View File

@@ -0,0 +1,149 @@
from typing import Any, Iterator, List, Sequence, cast
from langchain_core.documents import BaseDocumentTransformer, Document
class BeautifulSoupTransformer(BaseDocumentTransformer):
"""Transform HTML content by extracting specific tags and removing unwanted ones.
Example:
.. code-block:: python
from langchain_community.document_transformers import BeautifulSoupTransformer
bs4_transformer = BeautifulSoupTransformer()
docs_transformed = bs4_transformer.transform_documents(docs)
""" # noqa: E501
def __init__(self) -> None:
"""
Initialize the transformer.
This checks if the BeautifulSoup4 package is installed.
If not, it raises an ImportError.
"""
try:
import bs4 # noqa:F401
except ImportError:
raise ImportError(
"BeautifulSoup4 is required for BeautifulSoupTransformer. "
"Please install it with `pip install beautifulsoup4`."
)
def transform_documents(
self,
documents: Sequence[Document],
unwanted_tags: List[str] = ["script", "style"],
tags_to_extract: List[str] = ["p", "li", "div", "a"],
remove_lines: bool = True,
**kwargs: Any,
) -> Sequence[Document]:
"""
Transform a list of Document objects by cleaning their HTML content.
Args:
documents: A sequence of Document objects containing HTML content.
unwanted_tags: A list of tags to be removed from the HTML.
tags_to_extract: A list of tags whose content will be extracted.
remove_lines: If set to True, unnecessary lines will be
removed from the HTML content.
Returns:
A sequence of Document objects with transformed content.
"""
for doc in documents:
cleaned_content = doc.page_content
cleaned_content = self.remove_unwanted_tags(cleaned_content, unwanted_tags)
cleaned_content = self.extract_tags(cleaned_content, tags_to_extract)
if remove_lines:
cleaned_content = self.remove_unnecessary_lines(cleaned_content)
doc.page_content = cleaned_content
return documents
@staticmethod
def remove_unwanted_tags(html_content: str, unwanted_tags: List[str]) -> str:
"""
Remove unwanted tags from a given HTML content.
Args:
html_content: The original HTML content string.
unwanted_tags: A list of tags to be removed from the HTML.
Returns:
A cleaned HTML string with unwanted tags removed.
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
for tag in unwanted_tags:
for element in soup.find_all(tag):
element.decompose()
return str(soup)
@staticmethod
def extract_tags(html_content: str, tags: List[str]) -> str:
"""
Extract specific tags from a given HTML content.
Args:
html_content: The original HTML content string.
tags: A list of tags to be extracted from the HTML.
Returns:
A string combining the content of the extracted tags.
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
text_parts: List[str] = []
for element in soup.find_all():
if element.name in tags:
# Extract all navigable strings recursively from this element.
text_parts += get_navigable_strings(element)
# To avoid duplicate text, remove all descendants from the soup.
element.decompose()
return " ".join(text_parts)
@staticmethod
def remove_unnecessary_lines(content: str) -> str:
"""
Clean up the content by removing unnecessary lines.
Args:
content: A string, which may contain unnecessary lines or spaces.
Returns:
A cleaned string with unnecessary lines removed.
"""
lines = content.split("\n")
stripped_lines = [line.strip() for line in lines]
non_empty_lines = [line for line in stripped_lines if line]
cleaned_content = " ".join(non_empty_lines)
return cleaned_content
async def atransform_documents(
self,
documents: Sequence[Document],
**kwargs: Any,
) -> Sequence[Document]:
raise NotImplementedError
def get_navigable_strings(element: Any) -> Iterator[str]:
from bs4 import NavigableString, Tag
for child in cast(Tag, element).children:
if isinstance(child, Tag):
yield from get_navigable_strings(child)
elif isinstance(child, NavigableString):
if (element.name == "a") and (href := element.get("href")):
yield f"{child.strip()} ({href})"
else:
yield child.strip()

View File

@@ -0,0 +1,140 @@
"""Document transformers that use OpenAI Functions models"""
from typing import Any, Dict, Optional, Sequence, Type, Union
from langchain_core.documents import BaseDocumentTransformer, Document
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel
class OpenAIMetadataTagger(BaseDocumentTransformer, BaseModel):
"""Extract metadata tags from document contents using OpenAI functions.
Example:
.. code-block:: python
from langchain_community.chat_models import ChatOpenAI
from langchain_community.document_transformers import OpenAIMetadataTagger
from langchain_core.documents import Document
schema = {
"properties": {
"movie_title": { "type": "string" },
"critic": { "type": "string" },
"tone": {
"type": "string",
"enum": ["positive", "negative"]
},
"rating": {
"type": "integer",
"description": "The number of stars the critic rated the movie"
}
},
"required": ["movie_title", "critic", "tone"]
}
# Must be an OpenAI model that supports functions
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
tagging_chain = create_tagging_chain(schema, llm)
document_transformer = OpenAIMetadataTagger(tagging_chain=tagging_chain)
original_documents = [
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."),
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
]
enhanced_documents = document_transformer.transform_documents(original_documents)
""" # noqa: E501
tagging_chain: Any
"""The chain used to extract metadata from each document."""
def transform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> Sequence[Document]:
"""Automatically extract and populate metadata
for each document according to the provided schema."""
new_documents = []
for document in documents:
extracted_metadata: Dict = self.tagging_chain.run(document.page_content) # type: ignore[assignment] # noqa: E501
new_document = Document(
page_content=document.page_content,
metadata={**extracted_metadata, **document.metadata},
)
new_documents.append(new_document)
return new_documents
async def atransform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> Sequence[Document]:
raise NotImplementedError
def create_metadata_tagger(
metadata_schema: Union[Dict[str, Any], Type[BaseModel]],
llm: BaseLanguageModel,
prompt: Optional[ChatPromptTemplate] = None,
*,
tagging_chain_kwargs: Optional[Dict] = None,
) -> OpenAIMetadataTagger:
"""Create a DocumentTransformer that uses an OpenAI function chain to automatically
tag documents with metadata based on their content and an input schema.
Args:
metadata_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
is passed in, it's assumed to already be a valid JsonSchema.
For best results, pydantic.BaseModels should have docstrings describing what
the schema represents and descriptions for the parameters.
llm: Language model to use, assumed to support the OpenAI function-calling API.
Defaults to use "gpt-3.5-turbo-0613"
prompt: BasePromptTemplate to pass to the model.
Returns:
An LLMChain that will pass the given function to the model.
Example:
.. code-block:: python
from langchain_community.chat_models import ChatOpenAI
from langchain_community.document_transformers import create_metadata_tagger
from langchain_core.documents import Document
schema = {
"properties": {
"movie_title": { "type": "string" },
"critic": { "type": "string" },
"tone": {
"type": "string",
"enum": ["positive", "negative"]
},
"rating": {
"type": "integer",
"description": "The number of stars the critic rated the movie"
}
},
"required": ["movie_title", "critic", "tone"]
}
# Must be an OpenAI model that supports functions
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
document_transformer = create_metadata_tagger(schema, llm)
original_documents = [
Document(page_content="Review of The Bee Movie\nBy Roger Ebert\n\nThis is the greatest movie ever made. 4 out of 5 stars."),
Document(page_content="Review of The Godfather\nBy Anonymous\n\nThis movie was super boring. 1 out of 5 stars.", metadata={"reliable": False}),
]
enhanced_documents = document_transformer.transform_documents(original_documents)
""" # noqa: E501
from langchain.chains.openai_functions import create_tagging_chain
metadata_schema = (
metadata_schema
if isinstance(metadata_schema, dict)
else metadata_schema.schema()
)
_tagging_chain_kwargs = tagging_chain_kwargs or {}
tagging_chain = create_tagging_chain(
metadata_schema, llm, prompt=prompt, **_tagging_chain_kwargs
)
return OpenAIMetadataTagger(tagging_chain=tagging_chain)

View File

@@ -0,0 +1,160 @@
"""**Embedding models** are wrappers around embedding models
from different APIs and services.
**Embedding models** can be LLMs or not.
**Class hierarchy:**
.. code-block::
Embeddings --> <name>Embeddings # Examples: CohereEmbeddings, HuggingFaceEmbeddings
"""
import logging
from typing import Any
from langchain_community.embeddings.aleph_alpha import (
AlephAlphaAsymmetricSemanticEmbedding,
AlephAlphaSymmetricSemanticEmbedding,
)
from langchain_community.embeddings.awa import AwaEmbeddings
from langchain_community.embeddings.baidu_qianfan_endpoint import (
QianfanEmbeddingsEndpoint,
)
from langchain_community.embeddings.bedrock import BedrockEmbeddings
from langchain_community.embeddings.bookend import BookendEmbeddings
from langchain_community.embeddings.cache import CacheBackedEmbeddings
from langchain_community.embeddings.clarifai import ClarifaiEmbeddings
from langchain_community.embeddings.cohere import CohereEmbeddings
from langchain_community.embeddings.dashscope import DashScopeEmbeddings
from langchain_community.embeddings.databricks import DatabricksEmbeddings
from langchain_community.embeddings.deepinfra import DeepInfraEmbeddings
from langchain_community.embeddings.edenai import EdenAiEmbeddings
from langchain_community.embeddings.elasticsearch import ElasticsearchEmbeddings
from langchain_community.embeddings.embaas import EmbaasEmbeddings
from langchain_community.embeddings.ernie import ErnieEmbeddings
from langchain_community.embeddings.fake import (
DeterministicFakeEmbedding,
FakeEmbeddings,
)
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.embeddings.google_palm import GooglePalmEmbeddings
from langchain_community.embeddings.gpt4all import GPT4AllEmbeddings
from langchain_community.embeddings.gradient_ai import GradientEmbeddings
from langchain_community.embeddings.huggingface import (
HuggingFaceBgeEmbeddings,
HuggingFaceEmbeddings,
HuggingFaceInferenceAPIEmbeddings,
HuggingFaceInstructEmbeddings,
)
from langchain_community.embeddings.huggingface_hub import HuggingFaceHubEmbeddings
from langchain_community.embeddings.infinity import InfinityEmbeddings
from langchain_community.embeddings.javelin_ai_gateway import JavelinAIGatewayEmbeddings
from langchain_community.embeddings.jina import JinaEmbeddings
from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings
from langchain_community.embeddings.llamacpp import LlamaCppEmbeddings
from langchain_community.embeddings.localai import LocalAIEmbeddings
from langchain_community.embeddings.minimax import MiniMaxEmbeddings
from langchain_community.embeddings.mlflow import MlflowEmbeddings
from langchain_community.embeddings.mlflow_gateway import MlflowAIGatewayEmbeddings
from langchain_community.embeddings.modelscope_hub import ModelScopeEmbeddings
from langchain_community.embeddings.mosaicml import MosaicMLInstructorEmbeddings
from langchain_community.embeddings.nlpcloud import NLPCloudEmbeddings
from langchain_community.embeddings.octoai_embeddings import OctoAIEmbeddings
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.embeddings.sagemaker_endpoint import (
SagemakerEndpointEmbeddings,
)
from langchain_community.embeddings.self_hosted import SelfHostedEmbeddings
from langchain_community.embeddings.self_hosted_hugging_face import (
SelfHostedHuggingFaceEmbeddings,
SelfHostedHuggingFaceInstructEmbeddings,
)
from langchain_community.embeddings.sentence_transformer import (
SentenceTransformerEmbeddings,
)
from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings
from langchain_community.embeddings.tensorflow_hub import TensorflowHubEmbeddings
from langchain_community.embeddings.vertexai import VertexAIEmbeddings
from langchain_community.embeddings.voyageai import VoyageEmbeddings
from langchain_community.embeddings.xinference import XinferenceEmbeddings
logger = logging.getLogger(__name__)
__all__ = [
"CacheBackedEmbeddings",
"ClarifaiEmbeddings",
"CohereEmbeddings",
"DatabricksEmbeddings",
"ElasticsearchEmbeddings",
"FastEmbedEmbeddings",
"HuggingFaceEmbeddings",
"HuggingFaceInferenceAPIEmbeddings",
"InfinityEmbeddings",
"GradientEmbeddings",
"JinaEmbeddings",
"LlamaCppEmbeddings",
"HuggingFaceHubEmbeddings",
"MlflowEmbeddings",
"MlflowAIGatewayEmbeddings",
"ModelScopeEmbeddings",
"TensorflowHubEmbeddings",
"SagemakerEndpointEmbeddings",
"HuggingFaceInstructEmbeddings",
"MosaicMLInstructorEmbeddings",
"SelfHostedEmbeddings",
"SelfHostedHuggingFaceEmbeddings",
"SelfHostedHuggingFaceInstructEmbeddings",
"FakeEmbeddings",
"DeterministicFakeEmbedding",
"AlephAlphaAsymmetricSemanticEmbedding",
"AlephAlphaSymmetricSemanticEmbedding",
"SentenceTransformerEmbeddings",
"GooglePalmEmbeddings",
"MiniMaxEmbeddings",
"VertexAIEmbeddings",
"BedrockEmbeddings",
"DeepInfraEmbeddings",
"EdenAiEmbeddings",
"DashScopeEmbeddings",
"EmbaasEmbeddings",
"OctoAIEmbeddings",
"SpacyEmbeddings",
"NLPCloudEmbeddings",
"GPT4AllEmbeddings",
"XinferenceEmbeddings",
"LocalAIEmbeddings",
"AwaEmbeddings",
"HuggingFaceBgeEmbeddings",
"ErnieEmbeddings",
"JavelinAIGatewayEmbeddings",
"OllamaEmbeddings",
"QianfanEmbeddingsEndpoint",
"JohnSnowLabsEmbeddings",
"VoyageEmbeddings",
"BookendEmbeddings",
]
# TODO: this is in here to maintain backwards compatibility
class HypotheticalDocumentEmbedder:
def __init__(self, *args: Any, **kwargs: Any):
logger.warning(
"Using a deprecated class. Please use "
"`from langchain.chains import HypotheticalDocumentEmbedder` instead"
)
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H
return H(*args, **kwargs) # type: ignore
@classmethod
def from_llm(cls, *args: Any, **kwargs: Any) -> Any:
logger.warning(
"Using a deprecated class. Please use "
"`from langchain.chains import HypotheticalDocumentEmbedder` instead"
)
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder as H
return H.from_llm(*args, **kwargs)

View File

@@ -0,0 +1,176 @@
"""Module contains code for a cache backed embedder.
The cache backed embedder is a wrapper around an embedder that caches
embeddings in a key-value store. The cache is used to avoid recomputing
embeddings for the same text.
The text is hashed and the hash is used as the key in the cache.
"""
from __future__ import annotations
import hashlib
import json
import uuid
from functools import partial
from typing import Callable, List, Sequence, Union, cast
from langchain_core.embeddings import Embeddings
from langchain_core.stores import BaseStore, ByteStore
from langchain_community.storage.encoder_backed import EncoderBackedStore
NAMESPACE_UUID = uuid.UUID(int=1985)
def _hash_string_to_uuid(input_string: str) -> uuid.UUID:
"""Hash a string and returns the corresponding UUID."""
hash_value = hashlib.sha1(input_string.encode("utf-8")).hexdigest()
return uuid.uuid5(NAMESPACE_UUID, hash_value)
def _key_encoder(key: str, namespace: str) -> str:
"""Encode a key."""
return namespace + str(_hash_string_to_uuid(key))
def _create_key_encoder(namespace: str) -> Callable[[str], str]:
"""Create an encoder for a key."""
return partial(_key_encoder, namespace=namespace)
def _value_serializer(value: Sequence[float]) -> bytes:
"""Serialize a value."""
return json.dumps(value).encode()
def _value_deserializer(serialized_value: bytes) -> List[float]:
"""Deserialize a value."""
return cast(List[float], json.loads(serialized_value.decode()))
class CacheBackedEmbeddings(Embeddings):
"""Interface for caching results from embedding models.
The interface allows works with any store that implements
the abstract store interface accepting keys of type str and values of list of
floats.
If need be, the interface can be extended to accept other implementations
of the value serializer and deserializer, as well as the key encoder.
Examples:
.. code-block: python
from langchain_community.embeddings import CacheBackedEmbeddings, OpenAIEmbeddings
from langchain_community.storage import LocalFileStore
store = LocalFileStore('./my_cache')
underlying_embedder = OpenAIEmbeddings()
embedder = CacheBackedEmbeddings.from_bytes_store(
underlying_embedder, store, namespace=underlying_embedder.model
)
# Embedding is computed and cached
embeddings = embedder.embed_documents(["hello", "goodbye"])
# Embeddings are retrieved from the cache, no computation is done
embeddings = embedder.embed_documents(["hello", "goodbye"])
""" # noqa: E501
def __init__(
self,
underlying_embeddings: Embeddings,
document_embedding_store: BaseStore[str, List[float]],
) -> None:
"""Initialize the embedder.
Args:
underlying_embeddings: the embedder to use for computing embeddings.
document_embedding_store: The store to use for caching document embeddings.
"""
super().__init__()
self.document_embedding_store = document_embedding_store
self.underlying_embeddings = underlying_embeddings
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Embed a list of texts.
The method first checks the cache for the embeddings.
If the embeddings are not found, the method uses the underlying embedder
to embed the documents and stores the results in the cache.
Args:
texts: A list of texts to embed.
Returns:
A list of embeddings for the given texts.
"""
vectors: List[Union[List[float], None]] = self.document_embedding_store.mget(
texts
)
missing_indices: List[int] = [
i for i, vector in enumerate(vectors) if vector is None
]
missing_texts = [texts[i] for i in missing_indices]
if missing_texts:
missing_vectors = self.underlying_embeddings.embed_documents(missing_texts)
self.document_embedding_store.mset(
list(zip(missing_texts, missing_vectors))
)
for index, updated_vector in zip(missing_indices, missing_vectors):
vectors[index] = updated_vector
return cast(
List[List[float]], vectors
) # Nones should have been resolved by now
def embed_query(self, text: str) -> List[float]:
"""Embed query text.
This method does not support caching at the moment.
Support for caching queries is easily to implement, but might make
sense to hold off to see the most common patterns.
If the cache has an eviction policy, we may need to be a bit more careful
about sharing the cache between documents and queries. Generally,
one is OK evicting query caches, but document caches should be kept.
Args:
text: The text to embed.
Returns:
The embedding for the given text.
"""
return self.underlying_embeddings.embed_query(text)
@classmethod
def from_bytes_store(
cls,
underlying_embeddings: Embeddings,
document_embedding_cache: ByteStore,
*,
namespace: str = "",
) -> CacheBackedEmbeddings:
"""On-ramp that adds the necessary serialization and encoding to the store.
Args:
underlying_embeddings: The embedder to use for embedding.
document_embedding_cache: The cache to use for storing document embeddings.
*,
namespace: The namespace to use for document cache.
This namespace is used to avoid collisions with other caches.
For example, set it to the name of the embedding model used.
"""
namespace = namespace
key_encoder = _create_key_encoder(namespace)
encoder_backed_store = EncoderBackedStore[str, List[float]](
document_embedding_cache,
key_encoder,
_value_serializer,
_value_deserializer,
)
return cls(underlying_embeddings, encoder_backed_store)

View File

@@ -0,0 +1,343 @@
from typing import Any, Dict, List, Optional
import requests
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra, Field
DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large"
DEFAULT_BGE_MODEL = "BAAI/bge-large-en"
DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: "
DEFAULT_QUERY_INSTRUCTION = (
"Represent the question for retrieving supporting documents: "
)
DEFAULT_QUERY_BGE_INSTRUCTION_EN = (
"Represent this question for searching relevant passages: "
)
DEFAULT_QUERY_BGE_INSTRUCTION_ZH = "为这个句子生成表示以用于检索相关文章:"
class HuggingFaceEmbeddings(BaseModel, Embeddings):
"""HuggingFace sentence_transformers embedding models.
To use, you should have the ``sentence_transformers`` python package installed.
Example:
.. code-block:: python
from langchain_community.embeddings import HuggingFaceEmbeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
"""
client: Any #: :meta private:
model_name: str = DEFAULT_MODEL_NAME
"""Model name to use."""
cache_folder: Optional[str] = None
"""Path to store models.
Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass to the model."""
encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass when calling the `encode` method of the model."""
multi_process: bool = False
"""Run encode() on multiple GPUs."""
def __init__(self, **kwargs: Any):
"""Initialize the sentence_transformer."""
super().__init__(**kwargs)
try:
import sentence_transformers
except ImportError as exc:
raise ImportError(
"Could not import sentence_transformers python package. "
"Please install it with `pip install sentence-transformers`."
) from exc
self.client = sentence_transformers.SentenceTransformer(
self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
)
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a HuggingFace transformer model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
import sentence_transformers
texts = list(map(lambda x: x.replace("\n", " "), texts))
if self.multi_process:
pool = self.client.start_multi_process_pool()
embeddings = self.client.encode_multi_process(texts, pool)
sentence_transformers.SentenceTransformer.stop_multi_process_pool(pool)
else:
embeddings = self.client.encode(texts, **self.encode_kwargs)
return embeddings.tolist()
def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a HuggingFace transformer model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self.embed_documents([text])[0]
class HuggingFaceInstructEmbeddings(BaseModel, Embeddings):
"""Wrapper around sentence_transformers embedding models.
To use, you should have the ``sentence_transformers``
and ``InstructorEmbedding`` python packages installed.
Example:
.. code-block:: python
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
model_name = "hkunlp/instructor-large"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceInstructEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
"""
client: Any #: :meta private:
model_name: str = DEFAULT_INSTRUCT_MODEL
"""Model name to use."""
cache_folder: Optional[str] = None
"""Path to store models.
Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass to the model."""
encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass when calling the `encode` method of the model."""
embed_instruction: str = DEFAULT_EMBED_INSTRUCTION
"""Instruction to use for embedding documents."""
query_instruction: str = DEFAULT_QUERY_INSTRUCTION
"""Instruction to use for embedding query."""
def __init__(self, **kwargs: Any):
"""Initialize the sentence_transformer."""
super().__init__(**kwargs)
try:
from InstructorEmbedding import INSTRUCTOR
self.client = INSTRUCTOR(
self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
)
except ImportError as e:
raise ImportError("Dependencies for InstructorEmbedding not found.") from e
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a HuggingFace instruct model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
instruction_pairs = [[self.embed_instruction, text] for text in texts]
embeddings = self.client.encode(instruction_pairs, **self.encode_kwargs)
return embeddings.tolist()
def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a HuggingFace instruct model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
instruction_pair = [self.query_instruction, text]
embedding = self.client.encode([instruction_pair], **self.encode_kwargs)[0]
return embedding.tolist()
class HuggingFaceBgeEmbeddings(BaseModel, Embeddings):
"""HuggingFace BGE sentence_transformers embedding models.
To use, you should have the ``sentence_transformers`` python package installed.
Example:
.. code-block:: python
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
model_name = "BAAI/bge-large-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceBgeEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
"""
client: Any #: :meta private:
model_name: str = DEFAULT_BGE_MODEL
"""Model name to use."""
cache_folder: Optional[str] = None
"""Path to store models.
Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass to the model."""
encode_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Keyword arguments to pass when calling the `encode` method of the model."""
query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN
"""Instruction to use for embedding query."""
def __init__(self, **kwargs: Any):
"""Initialize the sentence_transformer."""
super().__init__(**kwargs)
try:
import sentence_transformers
except ImportError as exc:
raise ImportError(
"Could not import sentence_transformers python package. "
"Please install it with `pip install sentence_transformers`."
) from exc
self.client = sentence_transformers.SentenceTransformer(
self.model_name, cache_folder=self.cache_folder, **self.model_kwargs
)
if "-zh" in self.model_name:
self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a HuggingFace transformer model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
texts = [t.replace("\n", " ") for t in texts]
embeddings = self.client.encode(texts, **self.encode_kwargs)
return embeddings.tolist()
def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a HuggingFace transformer model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
text = text.replace("\n", " ")
embedding = self.client.encode(
self.query_instruction + text, **self.encode_kwargs
)
return embedding.tolist()
class HuggingFaceInferenceAPIEmbeddings(BaseModel, Embeddings):
"""Embed texts using the HuggingFace API.
Requires a HuggingFace Inference API key and a model name.
"""
api_key: str
"""Your API key for the HuggingFace Inference API."""
model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
"""The name of the model to use for text embeddings."""
api_url: Optional[str] = None
"""Custom inference endpoint url. None for using default public url."""
@property
def _api_url(self) -> str:
return self.api_url or self._default_api_url
@property
def _default_api_url(self) -> str:
return (
"https://api-inference.huggingface.co"
"/pipeline"
"/feature-extraction"
f"/{self.model_name}"
)
@property
def _headers(self) -> dict:
return {"Authorization": f"Bearer {self.api_key}"}
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Get the embeddings for a list of texts.
Args:
texts (Documents): A list of texts to get embeddings for.
Returns:
Embedded texts as List[List[float]], where each inner List[float]
corresponds to a single input text.
Example:
.. code-block:: python
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
api_key="your_api_key",
model_name="sentence-transformers/all-MiniLM-l6-v2"
)
texts = ["Hello, world!", "How are you?"]
hf_embeddings.embed_documents(texts)
""" # noqa: E501
response = requests.post(
self._api_url,
headers=self._headers,
json={
"inputs": texts,
"options": {"wait_for_model": True, "use_cache": True},
},
)
return response.json()
def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a HuggingFace transformer model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self.embed_documents([text])[0]

View File

@@ -0,0 +1,92 @@
import os
import sys
from typing import Any, List
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Extra
class JohnSnowLabsEmbeddings(BaseModel, Embeddings):
"""JohnSnowLabs embedding models
To use, you should have the ``johnsnowlabs`` python package installed.
Example:
.. code-block:: python
from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings
embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert')
output = embedding.embed_query("foo bar")
""" # noqa: E501
model: Any = "embed_sentence.bert"
def __init__(
self,
model: Any = "embed_sentence.bert",
hardware_target: str = "cpu",
**kwargs: Any,
):
"""Initialize the johnsnowlabs model."""
super().__init__(**kwargs)
# 1) Check imports
try:
from johnsnowlabs import nlp
from nlu.pipe.pipeline import NLUPipeline
except ImportError as exc:
raise ImportError(
"Could not import johnsnowlabs python package. "
"Please install it with `pip install johnsnowlabs`."
) from exc
# 2) Start a Spark Session
try:
os.environ["PYSPARK_PYTHON"] = sys.executable
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
nlp.start(hardware_target=hardware_target)
except Exception as exc:
raise Exception("Failure starting Spark Session") from exc
# 3) Load the model
try:
if isinstance(model, str):
self.model = nlp.load(model)
elif isinstance(model, NLUPipeline):
self.model = model
else:
self.model = nlp.to_nlu_pipe(model)
except Exception as exc:
raise Exception("Failure loading model") from exc
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a JohnSnowLabs transformer model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
df = self.model.predict(texts, output_level="document")
emb_col = None
for c in df.columns:
if "embedding" in c:
emb_col = c
return [vec.tolist() for vec in df[emb_col].tolist()]
def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a JohnSnowLabs transformer model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self.embed_documents([text])[0]

View File

@@ -0,0 +1,168 @@
import importlib
import logging
from typing import Any, Callable, List, Optional
from langchain_community.embeddings.self_hosted import SelfHostedEmbeddings
DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large"
DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: "
DEFAULT_QUERY_INSTRUCTION = (
"Represent the question for retrieving supporting documents: "
)
logger = logging.getLogger(__name__)
def _embed_documents(client: Any, *args: Any, **kwargs: Any) -> List[List[float]]:
"""Inference function to send to the remote hardware.
Accepts a sentence_transformer model_id and
returns a list of embeddings for each document in the batch.
"""
return client.encode(*args, **kwargs)
def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0) -> Any:
"""Load the embedding model."""
if not instruct:
import sentence_transformers
client = sentence_transformers.SentenceTransformer(model_id)
else:
from InstructorEmbedding import INSTRUCTOR
client = INSTRUCTOR(model_id)
if importlib.util.find_spec("torch") is not None:
import torch
cuda_device_count = torch.cuda.device_count()
if device < -1 or (device >= cuda_device_count):
raise ValueError(
f"Got device=={device}, "
f"device is required to be within [-1, {cuda_device_count})"
)
if device < 0 and cuda_device_count > 0:
logger.warning(
"Device has %d GPUs available. "
"Provide device={deviceId} to `from_model_id` to use available"
"GPUs for execution. deviceId is -1 for CPU and "
"can be a positive integer associated with CUDA device id.",
cuda_device_count,
)
client = client.to(device)
return client
class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings):
"""HuggingFace embedding models on self-hosted remote hardware.
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
and Lambda, as well as servers specified
by IP address and SSH credentials (such as on-prem, or another cloud
like Paperspace, Coreweave, etc.).
To use, you should have the ``runhouse`` python package installed.
Example:
.. code-block:: python
from langchain_community.embeddings import SelfHostedHuggingFaceEmbeddings
import runhouse as rh
model_name = "sentence-transformers/all-mpnet-base-v2"
gpu = rh.cluster(name="rh-a10x", instance_type="A100:1")
hf = SelfHostedHuggingFaceEmbeddings(model_name=model_name, hardware=gpu)
"""
client: Any #: :meta private:
model_id: str = DEFAULT_MODEL_NAME
"""Model name to use."""
model_reqs: List[str] = ["./", "sentence_transformers", "torch"]
"""Requirements to install on hardware to inference the model."""
hardware: Any
"""Remote hardware to send the inference function to."""
model_load_fn: Callable = load_embedding_model
"""Function to load the model remotely on the server."""
load_fn_kwargs: Optional[dict] = None
"""Keyword arguments to pass to the model load function."""
inference_fn: Callable = _embed_documents
"""Inference function to extract the embeddings."""
def __init__(self, **kwargs: Any):
"""Initialize the remote inference function."""
load_fn_kwargs = kwargs.pop("load_fn_kwargs", {})
load_fn_kwargs["model_id"] = load_fn_kwargs.get("model_id", DEFAULT_MODEL_NAME)
load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", False)
load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0)
super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs)
class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings):
"""HuggingFace InstructEmbedding models on self-hosted remote hardware.
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
and Lambda, as well as servers specified
by IP address and SSH credentials (such as on-prem, or another
cloud like Paperspace, Coreweave, etc.).
To use, you should have the ``runhouse`` python package installed.
Example:
.. code-block:: python
from langchain_community.embeddings import SelfHostedHuggingFaceInstructEmbeddings
import runhouse as rh
model_name = "hkunlp/instructor-large"
gpu = rh.cluster(name='rh-a10x', instance_type='A100:1')
hf = SelfHostedHuggingFaceInstructEmbeddings(
model_name=model_name, hardware=gpu)
""" # noqa: E501
model_id: str = DEFAULT_INSTRUCT_MODEL
"""Model name to use."""
embed_instruction: str = DEFAULT_EMBED_INSTRUCTION
"""Instruction to use for embedding documents."""
query_instruction: str = DEFAULT_QUERY_INSTRUCTION
"""Instruction to use for embedding query."""
model_reqs: List[str] = ["./", "InstructorEmbedding", "torch"]
"""Requirements to install on hardware to inference the model."""
def __init__(self, **kwargs: Any):
"""Initialize the remote inference function."""
load_fn_kwargs = kwargs.pop("load_fn_kwargs", {})
load_fn_kwargs["model_id"] = load_fn_kwargs.get(
"model_id", DEFAULT_INSTRUCT_MODEL
)
load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", True)
load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0)
super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs)
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a HuggingFace instruct model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
instruction_pairs = []
for text in texts:
instruction_pairs.append([self.embed_instruction, text])
embeddings = self.client(self.pipeline_ref, instruction_pairs)
return embeddings.tolist()
def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a HuggingFace instruct model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
instruction_pair = [self.query_instruction, text]
embedding = self.client(self.pipeline_ref, [instruction_pair])[0]
return embedding.tolist()

View File

@@ -0,0 +1,853 @@
"""
**LLM** classes provide
access to the large language model (**LLM**) APIs and services.
**Class hierarchy:**
.. code-block::
BaseLanguageModel --> BaseLLM --> LLM --> <name> # Examples: AI21, HuggingFaceHub
**Main helpers:**
.. code-block::
LLMResult, PromptValue,
CallbackManagerForLLMRun, AsyncCallbackManagerForLLMRun,
CallbackManager, AsyncCallbackManager,
AIMessage, BaseMessage
""" # noqa: E501
from typing import Any, Callable, Dict, Type
from langchain_core.language_models.llms import BaseLLM
def _import_ai21() -> Any:
from langchain_community.llms.ai21 import AI21
return AI21
def _import_aleph_alpha() -> Any:
from langchain_community.llms.aleph_alpha import AlephAlpha
return AlephAlpha
def _import_amazon_api_gateway() -> Any:
from langchain_community.llms.amazon_api_gateway import AmazonAPIGateway
return AmazonAPIGateway
def _import_anthropic() -> Any:
from langchain_community.llms.anthropic import Anthropic
return Anthropic
def _import_anyscale() -> Any:
from langchain_community.llms.anyscale import Anyscale
return Anyscale
def _import_arcee() -> Any:
from langchain_community.llms.arcee import Arcee
return Arcee
def _import_aviary() -> Any:
from langchain_community.llms.aviary import Aviary
return Aviary
def _import_azureml_endpoint() -> Any:
from langchain_community.llms.azureml_endpoint import AzureMLOnlineEndpoint
return AzureMLOnlineEndpoint
def _import_baidu_qianfan_endpoint() -> Any:
from langchain_community.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint
return QianfanLLMEndpoint
def _import_bananadev() -> Any:
from langchain_community.llms.bananadev import Banana
return Banana
def _import_baseten() -> Any:
from langchain_community.llms.baseten import Baseten
return Baseten
def _import_beam() -> Any:
from langchain_community.llms.beam import Beam
return Beam
def _import_bedrock() -> Any:
from langchain_community.llms.bedrock import Bedrock
return Bedrock
def _import_bittensor() -> Any:
from langchain_community.llms.bittensor import NIBittensorLLM
return NIBittensorLLM
def _import_cerebriumai() -> Any:
from langchain_community.llms.cerebriumai import CerebriumAI
return CerebriumAI
def _import_chatglm() -> Any:
from langchain_community.llms.chatglm import ChatGLM
return ChatGLM
def _import_clarifai() -> Any:
from langchain_community.llms.clarifai import Clarifai
return Clarifai
def _import_cohere() -> Any:
from langchain_community.llms.cohere import Cohere
return Cohere
def _import_ctransformers() -> Any:
from langchain_community.llms.ctransformers import CTransformers
return CTransformers
def _import_ctranslate2() -> Any:
from langchain_community.llms.ctranslate2 import CTranslate2
return CTranslate2
def _import_databricks() -> Any:
from langchain_community.llms.databricks import Databricks
return Databricks
def _import_databricks_chat() -> Any:
from langchain_community.chat_models.databricks import ChatDatabricks
return ChatDatabricks
def _import_deepinfra() -> Any:
from langchain_community.llms.deepinfra import DeepInfra
return DeepInfra
def _import_deepsparse() -> Any:
from langchain_community.llms.deepsparse import DeepSparse
return DeepSparse
def _import_edenai() -> Any:
from langchain_community.llms.edenai import EdenAI
return EdenAI
def _import_fake() -> Any:
from langchain_community.llms.fake import FakeListLLM
return FakeListLLM
def _import_fireworks() -> Any:
from langchain_community.llms.fireworks import Fireworks
return Fireworks
def _import_forefrontai() -> Any:
from langchain_community.llms.forefrontai import ForefrontAI
return ForefrontAI
def _import_gigachat() -> Any:
from langchain_community.llms.gigachat import GigaChat
return GigaChat
def _import_google_palm() -> Any:
from langchain_community.llms.google_palm import GooglePalm
return GooglePalm
def _import_gooseai() -> Any:
from langchain_community.llms.gooseai import GooseAI
return GooseAI
def _import_gpt4all() -> Any:
from langchain_community.llms.gpt4all import GPT4All
return GPT4All
def _import_gradient_ai() -> Any:
from langchain_community.llms.gradient_ai import GradientLLM
return GradientLLM
def _import_huggingface_endpoint() -> Any:
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
return HuggingFaceEndpoint
def _import_huggingface_hub() -> Any:
from langchain_community.llms.huggingface_hub import HuggingFaceHub
return HuggingFaceHub
def _import_huggingface_pipeline() -> Any:
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
return HuggingFacePipeline
def _import_huggingface_text_gen_inference() -> Any:
from langchain_community.llms.huggingface_text_gen_inference import (
HuggingFaceTextGenInference,
)
return HuggingFaceTextGenInference
def _import_human() -> Any:
from langchain_community.llms.human import HumanInputLLM
return HumanInputLLM
def _import_javelin_ai_gateway() -> Any:
from langchain_community.llms.javelin_ai_gateway import JavelinAIGateway
return JavelinAIGateway
def _import_koboldai() -> Any:
from langchain_community.llms.koboldai import KoboldApiLLM
return KoboldApiLLM
def _import_llamacpp() -> Any:
from langchain_community.llms.llamacpp import LlamaCpp
return LlamaCpp
def _import_manifest() -> Any:
from langchain_community.llms.manifest import ManifestWrapper
return ManifestWrapper
def _import_minimax() -> Any:
from langchain_community.llms.minimax import Minimax
return Minimax
def _import_mlflow() -> Any:
from langchain_community.llms.mlflow import Mlflow
return Mlflow
def _import_mlflow_chat() -> Any:
from langchain_community.chat_models.mlflow import ChatMlflow
return ChatMlflow
def _import_mlflow_ai_gateway() -> Any:
from langchain_community.llms.mlflow_ai_gateway import MlflowAIGateway
return MlflowAIGateway
def _import_modal() -> Any:
from langchain_community.llms.modal import Modal
return Modal
def _import_mosaicml() -> Any:
from langchain_community.llms.mosaicml import MosaicML
return MosaicML
def _import_nlpcloud() -> Any:
from langchain_community.llms.nlpcloud import NLPCloud
return NLPCloud
def _import_octoai_endpoint() -> Any:
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
return OctoAIEndpoint
def _import_ollama() -> Any:
from langchain_community.llms.ollama import Ollama
return Ollama
def _import_opaqueprompts() -> Any:
from langchain_community.llms.opaqueprompts import OpaquePrompts
return OpaquePrompts
def _import_openllm() -> Any:
from langchain_community.llms.openllm import OpenLLM
return OpenLLM
def _import_openlm() -> Any:
from langchain_community.llms.openlm import OpenLM
return OpenLM
def _import_pai_eas_endpoint() -> Any:
from langchain_community.llms.pai_eas_endpoint import PaiEasEndpoint
return PaiEasEndpoint
def _import_petals() -> Any:
from langchain_community.llms.petals import Petals
return Petals
def _import_pipelineai() -> Any:
from langchain_community.llms.pipelineai import PipelineAI
return PipelineAI
def _import_predibase() -> Any:
from langchain_community.llms.predibase import Predibase
return Predibase
def _import_predictionguard() -> Any:
from langchain_community.llms.predictionguard import PredictionGuard
return PredictionGuard
def _import_promptlayer() -> Any:
from langchain_community.llms.promptlayer_openai import PromptLayerOpenAI
return PromptLayerOpenAI
def _import_promptlayer_chat() -> Any:
from langchain_community.llms.promptlayer_openai import PromptLayerOpenAIChat
return PromptLayerOpenAIChat
def _import_replicate() -> Any:
from langchain_community.llms.replicate import Replicate
return Replicate
def _import_rwkv() -> Any:
from langchain_community.llms.rwkv import RWKV
return RWKV
def _import_sagemaker_endpoint() -> Any:
from langchain_community.llms.sagemaker_endpoint import SagemakerEndpoint
return SagemakerEndpoint
def _import_self_hosted() -> Any:
from langchain_community.llms.self_hosted import SelfHostedPipeline
return SelfHostedPipeline
def _import_self_hosted_hugging_face() -> Any:
from langchain_community.llms.self_hosted_hugging_face import (
SelfHostedHuggingFaceLLM,
)
return SelfHostedHuggingFaceLLM
def _import_stochasticai() -> Any:
from langchain_community.llms.stochasticai import StochasticAI
return StochasticAI
def _import_symblai_nebula() -> Any:
from langchain_community.llms.symblai_nebula import Nebula
return Nebula
def _import_textgen() -> Any:
from langchain_community.llms.textgen import TextGen
return TextGen
def _import_titan_takeoff() -> Any:
from langchain_community.llms.titan_takeoff import TitanTakeoff
return TitanTakeoff
def _import_titan_takeoff_pro() -> Any:
from langchain_community.llms.titan_takeoff_pro import TitanTakeoffPro
return TitanTakeoffPro
def _import_together() -> Any:
from langchain_community.llms.together import Together
return Together
def _import_tongyi() -> Any:
from langchain_community.llms.tongyi import Tongyi
return Tongyi
def _import_vertex() -> Any:
from langchain_community.llms.vertexai import VertexAI
return VertexAI
def _import_vertex_model_garden() -> Any:
from langchain_community.llms.vertexai import VertexAIModelGarden
return VertexAIModelGarden
def _import_vllm() -> Any:
from langchain_community.llms.vllm import VLLM
return VLLM
def _import_vllm_openai() -> Any:
from langchain_community.llms.vllm import VLLMOpenAI
return VLLMOpenAI
def _import_watsonxllm() -> Any:
from langchain_community.llms.watsonxllm import WatsonxLLM
return WatsonxLLM
def _import_writer() -> Any:
from langchain_community.llms.writer import Writer
return Writer
def _import_xinference() -> Any:
from langchain_community.llms.xinference import Xinference
return Xinference
def _import_yandex_gpt() -> Any:
from langchain_community.llms.yandex import YandexGPT
return YandexGPT
def _import_volcengine_maas() -> Any:
from langchain_community.llms.volcengine_maas import VolcEngineMaasLLM
return VolcEngineMaasLLM
def __getattr__(name: str) -> Any:
if name == "AI21":
return _import_ai21()
elif name == "AlephAlpha":
return _import_aleph_alpha()
elif name == "AmazonAPIGateway":
return _import_amazon_api_gateway()
elif name == "Anthropic":
return _import_anthropic()
elif name == "Anyscale":
return _import_anyscale()
elif name == "Arcee":
return _import_arcee()
elif name == "Aviary":
return _import_aviary()
elif name == "AzureMLOnlineEndpoint":
return _import_azureml_endpoint()
elif name == "QianfanLLMEndpoint":
return _import_baidu_qianfan_endpoint()
elif name == "Banana":
return _import_bananadev()
elif name == "Baseten":
return _import_baseten()
elif name == "Beam":
return _import_beam()
elif name == "Bedrock":
return _import_bedrock()
elif name == "NIBittensorLLM":
return _import_bittensor()
elif name == "CerebriumAI":
return _import_cerebriumai()
elif name == "ChatGLM":
return _import_chatglm()
elif name == "Clarifai":
return _import_clarifai()
elif name == "Cohere":
return _import_cohere()
elif name == "CTransformers":
return _import_ctransformers()
elif name == "CTranslate2":
return _import_ctranslate2()
elif name == "Databricks":
return _import_databricks()
elif name == "DeepInfra":
return _import_deepinfra()
elif name == "DeepSparse":
return _import_deepsparse()
elif name == "EdenAI":
return _import_edenai()
elif name == "FakeListLLM":
return _import_fake()
elif name == "Fireworks":
return _import_fireworks()
elif name == "ForefrontAI":
return _import_forefrontai()
elif name == "GigaChat":
return _import_gigachat()
elif name == "GooglePalm":
return _import_google_palm()
elif name == "GooseAI":
return _import_gooseai()
elif name == "GPT4All":
return _import_gpt4all()
elif name == "GradientLLM":
return _import_gradient_ai()
elif name == "HuggingFaceEndpoint":
return _import_huggingface_endpoint()
elif name == "HuggingFaceHub":
return _import_huggingface_hub()
elif name == "HuggingFacePipeline":
return _import_huggingface_pipeline()
elif name == "HuggingFaceTextGenInference":
return _import_huggingface_text_gen_inference()
elif name == "HumanInputLLM":
return _import_human()
elif name == "JavelinAIGateway":
return _import_javelin_ai_gateway()
elif name == "KoboldApiLLM":
return _import_koboldai()
elif name == "LlamaCpp":
return _import_llamacpp()
elif name == "ManifestWrapper":
return _import_manifest()
elif name == "Minimax":
return _import_minimax()
elif name == "Mlflow":
return _import_mlflow()
elif name == "MlflowAIGateway":
return _import_mlflow_ai_gateway()
elif name == "Modal":
return _import_modal()
elif name == "MosaicML":
return _import_mosaicml()
elif name == "NLPCloud":
return _import_nlpcloud()
elif name == "OctoAIEndpoint":
return _import_octoai_endpoint()
elif name == "Ollama":
return _import_ollama()
elif name == "OpaquePrompts":
return _import_opaqueprompts()
elif name == "OpenLLM":
return _import_openllm()
elif name == "OpenLM":
return _import_openlm()
elif name == "PaiEasEndpoint":
return _import_pai_eas_endpoint()
elif name == "Petals":
return _import_petals()
elif name == "PipelineAI":
return _import_pipelineai()
elif name == "Predibase":
return _import_predibase()
elif name == "PredictionGuard":
return _import_predictionguard()
elif name == "PromptLayerOpenAI":
return _import_promptlayer()
elif name == "PromptLayerOpenAIChat":
return _import_promptlayer_chat()
elif name == "Replicate":
return _import_replicate()
elif name == "RWKV":
return _import_rwkv()
elif name == "SagemakerEndpoint":
return _import_sagemaker_endpoint()
elif name == "SelfHostedPipeline":
return _import_self_hosted()
elif name == "SelfHostedHuggingFaceLLM":
return _import_self_hosted_hugging_face()
elif name == "StochasticAI":
return _import_stochasticai()
elif name == "Nebula":
return _import_symblai_nebula()
elif name == "TextGen":
return _import_textgen()
elif name == "TitanTakeoff":
return _import_titan_takeoff()
elif name == "TitanTakeoffPro":
return _import_titan_takeoff_pro()
elif name == "Together":
return _import_together()
elif name == "Tongyi":
return _import_tongyi()
elif name == "VertexAI":
return _import_vertex()
elif name == "VertexAIModelGarden":
return _import_vertex_model_garden()
elif name == "VLLM":
return _import_vllm()
elif name == "VLLMOpenAI":
return _import_vllm_openai()
elif name == "WatsonxLLM":
return _import_watsonxllm()
elif name == "Writer":
return _import_writer()
elif name == "Xinference":
return _import_xinference()
elif name == "YandexGPT":
return _import_yandex_gpt()
elif name == "VolcEngineMaasLLM":
return _import_volcengine_maas()
elif name == "type_to_cls_dict":
# for backwards compatibility
type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
k: v() for k, v in get_type_to_cls_dict().items()
}
return type_to_cls_dict
else:
raise AttributeError(f"Could not find: {name}")
__all__ = [
"AI21",
"AlephAlpha",
"AmazonAPIGateway",
"Anthropic",
"Anyscale",
"Arcee",
"Aviary",
"AzureMLOnlineEndpoint",
"Banana",
"Baseten",
"Beam",
"Bedrock",
"CTransformers",
"CTranslate2",
"CerebriumAI",
"ChatGLM",
"Clarifai",
"Cohere",
"Databricks",
"DeepInfra",
"DeepSparse",
"EdenAI",
"FakeListLLM",
"Fireworks",
"ForefrontAI",
"GigaChat",
"GPT4All",
"GooglePalm",
"GooseAI",
"GradientLLM",
"HuggingFaceEndpoint",
"HuggingFaceHub",
"HuggingFacePipeline",
"HuggingFaceTextGenInference",
"HumanInputLLM",
"KoboldApiLLM",
"LlamaCpp",
"TextGen",
"ManifestWrapper",
"Minimax",
"MlflowAIGateway",
"Modal",
"MosaicML",
"Nebula",
"NIBittensorLLM",
"NLPCloud",
"Ollama",
"OpenLLM",
"OpenLM",
"PaiEasEndpoint",
"Petals",
"PipelineAI",
"Predibase",
"PredictionGuard",
"PromptLayerOpenAI",
"PromptLayerOpenAIChat",
"OpaquePrompts",
"RWKV",
"Replicate",
"SagemakerEndpoint",
"SelfHostedHuggingFaceLLM",
"SelfHostedPipeline",
"StochasticAI",
"TitanTakeoff",
"TitanTakeoffPro",
"Tongyi",
"VertexAI",
"VertexAIModelGarden",
"VLLM",
"VLLMOpenAI",
"WatsonxLLM",
"Writer",
"OctoAIEndpoint",
"Xinference",
"JavelinAIGateway",
"QianfanLLMEndpoint",
"YandexGPT",
"VolcEngineMaasLLM",
]
def get_type_to_cls_dict() -> Dict[str, Callable[[], Type[BaseLLM]]]:
return {
"ai21": _import_ai21,
"aleph_alpha": _import_aleph_alpha,
"amazon_api_gateway": _import_amazon_api_gateway,
"amazon_bedrock": _import_bedrock,
"anthropic": _import_anthropic,
"anyscale": _import_anyscale,
"arcee": _import_arcee,
"aviary": _import_aviary,
"azureml_endpoint": _import_azureml_endpoint,
"bananadev": _import_bananadev,
"baseten": _import_baseten,
"beam": _import_beam,
"cerebriumai": _import_cerebriumai,
"chat_glm": _import_chatglm,
"clarifai": _import_clarifai,
"cohere": _import_cohere,
"ctransformers": _import_ctransformers,
"ctranslate2": _import_ctranslate2,
"databricks": _import_databricks,
"databricks-chat": _import_databricks_chat,
"deepinfra": _import_deepinfra,
"deepsparse": _import_deepsparse,
"edenai": _import_edenai,
"fake-list": _import_fake,
"forefrontai": _import_forefrontai,
"giga-chat-model": _import_gigachat,
"google_palm": _import_google_palm,
"gooseai": _import_gooseai,
"gradient": _import_gradient_ai,
"gpt4all": _import_gpt4all,
"huggingface_endpoint": _import_huggingface_endpoint,
"huggingface_hub": _import_huggingface_hub,
"huggingface_pipeline": _import_huggingface_pipeline,
"huggingface_textgen_inference": _import_huggingface_text_gen_inference,
"human-input": _import_human,
"koboldai": _import_koboldai,
"llamacpp": _import_llamacpp,
"textgen": _import_textgen,
"minimax": _import_minimax,
"mlflow": _import_mlflow,
"mlflow-chat": _import_mlflow_chat,
"mlflow-ai-gateway": _import_mlflow_ai_gateway,
"modal": _import_modal,
"mosaic": _import_mosaicml,
"nebula": _import_symblai_nebula,
"nibittensor": _import_bittensor,
"nlpcloud": _import_nlpcloud,
"ollama": _import_ollama,
"openlm": _import_openlm,
"pai_eas_endpoint": _import_pai_eas_endpoint,
"petals": _import_petals,
"pipelineai": _import_pipelineai,
"predibase": _import_predibase,
"opaqueprompts": _import_opaqueprompts,
"replicate": _import_replicate,
"rwkv": _import_rwkv,
"sagemaker_endpoint": _import_sagemaker_endpoint,
"self_hosted": _import_self_hosted,
"self_hosted_hugging_face": _import_self_hosted_hugging_face,
"stochasticai": _import_stochasticai,
"together": _import_together,
"tongyi": _import_tongyi,
"titan_takeoff": _import_titan_takeoff,
"titan_takeoff_pro": _import_titan_takeoff_pro,
"vertexai": _import_vertex,
"vertexai_model_garden": _import_vertex_model_garden,
"openllm": _import_openllm,
"openllm_client": _import_openllm,
"vllm": _import_vllm,
"vllm_openai": _import_vllm_openai,
"watsonxllm": _import_watsonxllm,
"writer": _import_writer,
"xinference": _import_xinference,
"javelin-ai-gateway": _import_javelin_ai_gateway,
"qianfan_endpoint": _import_baidu_qianfan_endpoint,
"yandex_gpt": _import_yandex_gpt,
"VolcEngineMaasLLM": _import_volcengine_maas,
}

View File

@@ -0,0 +1,351 @@
import re
import warnings
from typing import (
Any,
AsyncIterator,
Callable,
Dict,
Iterator,
List,
Mapping,
Optional,
)
from langchain_core.callbacks import (
AsyncCallbackManagerForLLMRun,
CallbackManagerForLLMRun,
)
from langchain_core.language_models import BaseLanguageModel
from langchain_core.language_models.llms import LLM
from langchain_core.outputs import GenerationChunk
from langchain_core.prompt_values import PromptValue
from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
from langchain_core.utils import (
check_package_version,
get_from_dict_or_env,
get_pydantic_field_names,
)
from langchain_core.utils.utils import build_extra_kwargs, convert_to_secret_str
class _AnthropicCommon(BaseLanguageModel):
client: Any = None #: :meta private:
async_client: Any = None #: :meta private:
model: str = Field(default="claude-2", alias="model_name")
"""Model name to use."""
max_tokens_to_sample: int = Field(default=256, alias="max_tokens")
"""Denotes the number of tokens to predict per generation."""
temperature: Optional[float] = None
"""A non-negative float that tunes the degree of randomness in generation."""
top_k: Optional[int] = None
"""Number of most likely tokens to consider at each step."""
top_p: Optional[float] = None
"""Total probability mass of tokens to consider at each step."""
streaming: bool = False
"""Whether to stream the results."""
default_request_timeout: Optional[float] = None
"""Timeout for requests to Anthropic Completion API. Default is 600 seconds."""
anthropic_api_url: Optional[str] = None
anthropic_api_key: Optional[SecretStr] = None
HUMAN_PROMPT: Optional[str] = None
AI_PROMPT: Optional[str] = None
count_tokens: Optional[Callable[[str], int]] = None
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
@root_validator(pre=True)
def build_extra(cls, values: Dict) -> Dict:
extra = values.get("model_kwargs", {})
all_required_field_names = get_pydantic_field_names(cls)
values["model_kwargs"] = build_extra_kwargs(
extra, values, all_required_field_names
)
return values
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and python package exists in environment."""
values["anthropic_api_key"] = convert_to_secret_str(
get_from_dict_or_env(values, "anthropic_api_key", "ANTHROPIC_API_KEY")
)
# Get custom api url from environment.
values["anthropic_api_url"] = get_from_dict_or_env(
values,
"anthropic_api_url",
"ANTHROPIC_API_URL",
default="https://api.anthropic.com",
)
try:
import anthropic
check_package_version("anthropic", gte_version="0.3")
values["client"] = anthropic.Anthropic(
base_url=values["anthropic_api_url"],
api_key=values["anthropic_api_key"].get_secret_value(),
timeout=values["default_request_timeout"],
)
values["async_client"] = anthropic.AsyncAnthropic(
base_url=values["anthropic_api_url"],
api_key=values["anthropic_api_key"].get_secret_value(),
timeout=values["default_request_timeout"],
)
values["HUMAN_PROMPT"] = anthropic.HUMAN_PROMPT
values["AI_PROMPT"] = anthropic.AI_PROMPT
values["count_tokens"] = values["client"].count_tokens
except ImportError:
raise ImportError(
"Could not import anthropic python package. "
"Please it install it with `pip install anthropic`."
)
return values
@property
def _default_params(self) -> Mapping[str, Any]:
"""Get the default parameters for calling Anthropic API."""
d = {
"max_tokens_to_sample": self.max_tokens_to_sample,
"model": self.model,
}
if self.temperature is not None:
d["temperature"] = self.temperature
if self.top_k is not None:
d["top_k"] = self.top_k
if self.top_p is not None:
d["top_p"] = self.top_p
return {**d, **self.model_kwargs}
@property
def _identifying_params(self) -> Mapping[str, Any]:
"""Get the identifying parameters."""
return {**{}, **self._default_params}
def _get_anthropic_stop(self, stop: Optional[List[str]] = None) -> List[str]:
if not self.HUMAN_PROMPT or not self.AI_PROMPT:
raise NameError("Please ensure the anthropic package is loaded")
if stop is None:
stop = []
# Never want model to invent new turns of Human / Assistant dialog.
stop.extend([self.HUMAN_PROMPT])
return stop
class Anthropic(LLM, _AnthropicCommon):
"""Anthropic large language models.
To use, you should have the ``anthropic`` python package installed, and the
environment variable ``ANTHROPIC_API_KEY`` set with your API key, or pass
it as a named parameter to the constructor.
Example:
.. code-block:: python
import anthropic
from langchain_community.llms import Anthropic
model = Anthropic(model="<model_name>", anthropic_api_key="my-api-key")
# Simplest invocation, automatically wrapped with HUMAN_PROMPT
# and AI_PROMPT.
response = model("What are the biggest risks facing humanity?")
# Or if you want to use the chat mode, build a few-shot-prompt, or
# put words in the Assistant's mouth, use HUMAN_PROMPT and AI_PROMPT:
raw_prompt = "What are the biggest risks facing humanity?"
prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}"
response = model(prompt)
"""
class Config:
"""Configuration for this pydantic object."""
allow_population_by_field_name = True
arbitrary_types_allowed = True
@root_validator()
def raise_warning(cls, values: Dict) -> Dict:
"""Raise warning that this class is deprecated."""
warnings.warn(
"This Anthropic LLM is deprecated. "
"Please use `from langchain_community.chat_models import ChatAnthropic` "
"instead"
)
return values
@property
def _llm_type(self) -> str:
"""Return type of llm."""
return "anthropic-llm"
def _wrap_prompt(self, prompt: str) -> str:
if not self.HUMAN_PROMPT or not self.AI_PROMPT:
raise NameError("Please ensure the anthropic package is loaded")
if prompt.startswith(self.HUMAN_PROMPT):
return prompt # Already wrapped.
# Guard against common errors in specifying wrong number of newlines.
corrected_prompt, n_subs = re.subn(r"^\n*Human:", self.HUMAN_PROMPT, prompt)
if n_subs == 1:
return corrected_prompt
# As a last resort, wrap the prompt ourselves to emulate instruct-style.
return f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT} Sure, here you go:\n"
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
r"""Call out to Anthropic's completion endpoint.
Args:
prompt: The prompt to pass into the model.
stop: Optional list of stop words to use when generating.
Returns:
The string generated by the model.
Example:
.. code-block:: python
prompt = "What are the biggest risks facing humanity?"
prompt = f"\n\nHuman: {prompt}\n\nAssistant:"
response = model(prompt)
"""
if self.streaming:
completion = ""
for chunk in self._stream(
prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
):
completion += chunk.text
return completion
stop = self._get_anthropic_stop(stop)
params = {**self._default_params, **kwargs}
response = self.client.completions.create(
prompt=self._wrap_prompt(prompt),
stop_sequences=stop,
**params,
)
return response.completion
def convert_prompt(self, prompt: PromptValue) -> str:
return self._wrap_prompt(prompt.to_string())
async def _acall(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""Call out to Anthropic's completion endpoint asynchronously."""
if self.streaming:
completion = ""
async for chunk in self._astream(
prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
):
completion += chunk.text
return completion
stop = self._get_anthropic_stop(stop)
params = {**self._default_params, **kwargs}
response = await self.async_client.completions.create(
prompt=self._wrap_prompt(prompt),
stop_sequences=stop,
**params,
)
return response.completion
def _stream(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[GenerationChunk]:
r"""Call Anthropic completion_stream and return the resulting generator.
Args:
prompt: The prompt to pass into the model.
stop: Optional list of stop words to use when generating.
Returns:
A generator representing the stream of tokens from Anthropic.
Example:
.. code-block:: python
prompt = "Write a poem about a stream."
prompt = f"\n\nHuman: {prompt}\n\nAssistant:"
generator = anthropic.stream(prompt)
for token in generator:
yield token
"""
stop = self._get_anthropic_stop(stop)
params = {**self._default_params, **kwargs}
for token in self.client.completions.create(
prompt=self._wrap_prompt(prompt), stop_sequences=stop, stream=True, **params
):
chunk = GenerationChunk(text=token.completion)
yield chunk
if run_manager:
run_manager.on_llm_new_token(chunk.text, chunk=chunk)
async def _astream(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> AsyncIterator[GenerationChunk]:
r"""Call Anthropic completion_stream and return the resulting generator.
Args:
prompt: The prompt to pass into the model.
stop: Optional list of stop words to use when generating.
Returns:
A generator representing the stream of tokens from Anthropic.
Example:
.. code-block:: python
prompt = "Write a poem about a stream."
prompt = f"\n\nHuman: {prompt}\n\nAssistant:"
generator = anthropic.stream(prompt)
for token in generator:
yield token
"""
stop = self._get_anthropic_stop(stop)
params = {**self._default_params, **kwargs}
async for token in await self.async_client.completions.create(
prompt=self._wrap_prompt(prompt),
stop_sequences=stop,
stream=True,
**params,
):
chunk = GenerationChunk(text=token.completion)
yield chunk
if run_manager:
await run_manager.on_llm_new_token(chunk.text, chunk=chunk)
def get_num_tokens(self, text: str) -> int:
"""Calculate number of tokens."""
if not self.count_tokens:
raise NameError("Please ensure the anthropic package is loaded")
return self.count_tokens(text)

View File

@@ -0,0 +1,126 @@
import json
import logging
from typing import Any, Dict, Iterator, List, Optional
import requests
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.outputs import GenerationChunk
logger = logging.getLogger(__name__)
class CloudflareWorkersAI(LLM):
"""Langchain LLM class to help to access Cloudflare Workers AI service.
To use, you must provide an API token and
account ID to access Cloudflare Workers AI, and
pass it as a named parameter to the constructor.
Example:
.. code-block:: python
from langchain_community.llms.cloudflare_workersai import CloudflareWorkersAI
my_account_id = "my_account_id"
my_api_token = "my_secret_api_token"
llm_model = "@cf/meta/llama-2-7b-chat-int8"
cf_ai = CloudflareWorkersAI(
account_id=my_account_id,
api_token=my_api_token,
model=llm_model
)
""" # noqa: E501
account_id: str
api_token: str
model: str = "@cf/meta/llama-2-7b-chat-int8"
base_url: str = "https://api.cloudflare.com/client/v4/accounts"
streaming: bool = False
endpoint_url: str = ""
def __init__(self, **kwargs: Any) -> None:
"""Initialize the Cloudflare Workers AI class."""
super().__init__(**kwargs)
self.endpoint_url = f"{self.base_url}/{self.account_id}/ai/run/{self.model}"
@property
def _llm_type(self) -> str:
"""Return type of LLM."""
return "cloudflare"
@property
def _default_params(self) -> Dict[str, Any]:
"""Default parameters"""
return {}
@property
def _identifying_params(self) -> Dict[str, Any]:
"""Identifying parameters"""
return {
"account_id": self.account_id,
"api_token": self.api_token,
"model": self.model,
"base_url": self.base_url,
}
def _call_api(self, prompt: str, params: Dict[str, Any]) -> requests.Response:
"""Call Cloudflare Workers API"""
headers = {"Authorization": f"Bearer {self.api_token}"}
data = {"prompt": prompt, "stream": self.streaming, **params}
response = requests.post(self.endpoint_url, headers=headers, json=data)
return response
def _process_response(self, response: requests.Response) -> str:
"""Process API response"""
if response.ok:
data = response.json()
return data["result"]["response"]
else:
raise ValueError(f"Request failed with status {response.status_code}")
def _stream(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[GenerationChunk]:
"""Streaming prediction"""
original_steaming: bool = self.streaming
self.streaming = True
_response_prefix_count = len("data: ")
_response_stream_end = b"data: [DONE]"
for chunk in self._call_api(prompt, kwargs).iter_lines():
if chunk == _response_stream_end:
break
if len(chunk) > _response_prefix_count:
try:
data = json.loads(chunk[_response_prefix_count:])
except Exception as e:
logger.debug(chunk)
raise e
if data is not None and "response" in data:
yield GenerationChunk(text=data["response"])
if run_manager:
run_manager.on_llm_new_token(data["response"])
logger.debug("stream end")
self.streaming = original_steaming
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
"""Regular prediction"""
if self.streaming:
return "".join(
[c.text for c in self._stream(prompt, stop, run_manager, **kwargs)]
)
else:
response = self._call_api(prompt, kwargs)
return self._process_response(response)

View File

@@ -0,0 +1,50 @@
from typing import Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.tools.amadeus.base import AmadeusBaseTool
class ClosestAirportSchema(BaseModel):
"""Schema for the AmadeusClosestAirport tool."""
location: str = Field(
description=(
" The location for which you would like to find the nearest airport "
" along with optional details such as country, state, region, or "
" province, allowing for easy processing and identification of "
" the closest airport. Examples of the format are the following:\n"
" Cali, Colombia\n "
" Lincoln, Nebraska, United States\n"
" New York, United States\n"
" Sydney, New South Wales, Australia\n"
" Rome, Lazio, Italy\n"
" Toronto, Ontario, Canada\n"
)
)
class AmadeusClosestAirport(AmadeusBaseTool):
"""Tool for finding the closest airport to a particular location."""
name: str = "closest_airport"
description: str = (
"Use this tool to find the closest airport to a particular location."
)
args_schema: Type[ClosestAirportSchema] = ClosestAirportSchema
def _run(
self,
location: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
content = (
f" What is the nearest airport to {location}? Please respond with the "
" airport's International Air Transport Association (IATA) Location "
' Identifier in the following JSON format. JSON: "iataCode": "IATA '
' Location Identifier" '
)
return ChatOpenAI(temperature=0).invoke(content)

View File

@@ -0,0 +1,42 @@
"""
This tool allows agents to interact with the clickup library
and operate on a Clickup instance.
To use this tool, you must first set as environment variables:
client_secret
client_id
code
Below is a sample script that uses the Clickup tool:
```python
from langchain_community.agent_toolkits.clickup.toolkit import ClickupToolkit
from langchain_community.utilities.clickup import ClickupAPIWrapper
clickup = ClickupAPIWrapper()
toolkit = ClickupToolkit.from_clickup_api_wrapper(clickup)
```
"""
from typing import Optional
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import Field
from langchain_core.tools import BaseTool
from langchain_community.utilities.clickup import ClickupAPIWrapper
class ClickupAction(BaseTool):
"""Tool that queries the Clickup API."""
api_wrapper: ClickupAPIWrapper = Field(default_factory=ClickupAPIWrapper)
mode: str
name: str = ""
description: str = ""
def _run(
self,
instructions: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the Clickup API to run an operation."""
return self.api_wrapper.run(self.mode, instructions)

View File

@@ -0,0 +1,44 @@
"""
This tool allows agents to interact with the atlassian-python-api library
and operate on a Jira instance. For more information on the
atlassian-python-api library, see https://atlassian-python-api.readthedocs.io/jira.html
To use this tool, you must first set as environment variables:
JIRA_API_TOKEN
JIRA_USERNAME
JIRA_INSTANCE_URL
Below is a sample script that uses the Jira tool:
```python
from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit
from langchain_community.utilities.jira import JiraAPIWrapper
jira = JiraAPIWrapper()
toolkit = JiraToolkit.from_jira_api_wrapper(jira)
```
"""
from typing import Optional
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import Field
from langchain_core.tools import BaseTool
from langchain_community.utilities.jira import JiraAPIWrapper
class JiraAction(BaseTool):
"""Tool that queries the Atlassian Jira API."""
api_wrapper: JiraAPIWrapper = Field(default_factory=JiraAPIWrapper)
mode: str
name: str = ""
description: str = ""
def _run(
self,
instructions: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the Atlassian Jira API to run an operation."""
return self.api_wrapper.run(self.mode, instructions)

View File

@@ -0,0 +1,276 @@
"""Tools for interacting with a Power BI dataset."""
import logging
from time import perf_counter
from typing import Any, Dict, Optional, Tuple
from langchain_core.callbacks import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain_core.pydantic_v1 import Field, validator
from langchain_core.tools import BaseTool
from langchain_openai.chat_models import _import_tiktoken
from langchain_community.tools.powerbi.prompt import (
BAD_REQUEST_RESPONSE,
DEFAULT_FEWSHOT_EXAMPLES,
RETRY_RESPONSE,
)
from langchain_community.utilities.powerbi import PowerBIDataset, json_to_md
logger = logging.getLogger(__name__)
class QueryPowerBITool(BaseTool):
"""Tool for querying a Power BI Dataset."""
name: str = "query_powerbi"
description: str = """
Input to this tool is a detailed question about the dataset, output is a result from the dataset. It will try to answer the question using the dataset, and if it cannot, it will ask for clarification.
Example Input: "How many rows are in table1?"
""" # noqa: E501
llm_chain: Any
powerbi: PowerBIDataset = Field(exclude=True)
examples: Optional[str] = DEFAULT_FEWSHOT_EXAMPLES
session_cache: Dict[str, Any] = Field(default_factory=dict, exclude=True)
max_iterations: int = 5
output_token_limit: int = 4000
tiktoken_model_name: Optional[str] = None # "cl100k_base"
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
@validator("llm_chain")
def validate_llm_chain_input_variables( # pylint: disable=E0213
cls, llm_chain: Any
) -> Any:
"""Make sure the LLM chain has the correct input variables."""
for var in llm_chain.prompt.input_variables:
if var not in ["tool_input", "tables", "schemas", "examples"]:
raise ValueError(
"LLM chain for QueryPowerBITool must have input variables ['tool_input', 'tables', 'schemas', 'examples'], found %s", # noqa: C0301 E501 # pylint: disable=C0301
llm_chain.prompt.input_variables,
)
return llm_chain
def _check_cache(self, tool_input: str) -> Optional[str]:
"""Check if the input is present in the cache.
If the value is a bad request, overwrite with the escalated version,
if not present return None."""
if tool_input not in self.session_cache:
return None
return self.session_cache[tool_input]
def _run(
self,
tool_input: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
**kwargs: Any,
) -> str:
"""Execute the query, return the results or an error message."""
if cache := self._check_cache(tool_input):
logger.debug("Found cached result for %s: %s", tool_input, cache)
return cache
try:
logger.info("Running PBI Query Tool with input: %s", tool_input)
query = self.llm_chain.predict(
tool_input=tool_input,
tables=self.powerbi.get_table_names(),
schemas=self.powerbi.get_schemas(),
examples=self.examples,
callbacks=run_manager.get_child() if run_manager else None,
)
except Exception as exc: # pylint: disable=broad-except
self.session_cache[tool_input] = f"Error on call to LLM: {exc}"
return self.session_cache[tool_input]
if query == "I cannot answer this":
self.session_cache[tool_input] = query
return self.session_cache[tool_input]
logger.info("PBI Query:\n%s", query)
start_time = perf_counter()
pbi_result = self.powerbi.run(command=query)
end_time = perf_counter()
logger.debug("PBI Result: %s", pbi_result)
logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}")
result, error = self._parse_output(pbi_result)
if error is not None and "TokenExpired" in error:
self.session_cache[
tool_input
] = "Authentication token expired or invalid, please try reauthenticate."
return self.session_cache[tool_input]
iterations = kwargs.get("iterations", 0)
if error and iterations < self.max_iterations:
return self._run(
tool_input=RETRY_RESPONSE.format(
tool_input=tool_input, query=query, error=error
),
run_manager=run_manager,
iterations=iterations + 1,
)
self.session_cache[tool_input] = (
result if result else BAD_REQUEST_RESPONSE.format(error=error)
)
return self.session_cache[tool_input]
async def _arun(
self,
tool_input: str,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
**kwargs: Any,
) -> str:
"""Execute the query, return the results or an error message."""
if cache := self._check_cache(tool_input):
logger.debug("Found cached result for %s: %s", tool_input, cache)
return f"{cache}, from cache, you have already asked this question."
try:
logger.info("Running PBI Query Tool with input: %s", tool_input)
query = await self.llm_chain.apredict(
tool_input=tool_input,
tables=self.powerbi.get_table_names(),
schemas=self.powerbi.get_schemas(),
examples=self.examples,
callbacks=run_manager.get_child() if run_manager else None,
)
except Exception as exc: # pylint: disable=broad-except
self.session_cache[tool_input] = f"Error on call to LLM: {exc}"
return self.session_cache[tool_input]
if query == "I cannot answer this":
self.session_cache[tool_input] = query
return self.session_cache[tool_input]
logger.info("PBI Query: %s", query)
start_time = perf_counter()
pbi_result = await self.powerbi.arun(command=query)
end_time = perf_counter()
logger.debug("PBI Result: %s", pbi_result)
logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}")
result, error = self._parse_output(pbi_result)
if error is not None and ("TokenExpired" in error or "TokenError" in error):
self.session_cache[
tool_input
] = "Authentication token expired or invalid, please try to reauthenticate or check the scope of the credential." # noqa: E501
return self.session_cache[tool_input]
iterations = kwargs.get("iterations", 0)
if error and iterations < self.max_iterations:
return await self._arun(
tool_input=RETRY_RESPONSE.format(
tool_input=tool_input, query=query, error=error
),
run_manager=run_manager,
iterations=iterations + 1,
)
self.session_cache[tool_input] = (
result if result else BAD_REQUEST_RESPONSE.format(error=error)
)
return self.session_cache[tool_input]
def _parse_output(
self, pbi_result: Dict[str, Any]
) -> Tuple[Optional[str], Optional[Any]]:
"""Parse the output of the query to a markdown table."""
if "results" in pbi_result:
rows = pbi_result["results"][0]["tables"][0]["rows"]
if len(rows) == 0:
logger.info("0 records in result, query was valid.")
return (
None,
"0 rows returned, this might be correct, but please validate if all filter values were correct?", # noqa: E501
)
result = json_to_md(rows)
too_long, length = self._result_too_large(result)
if too_long:
return (
f"Result too large, please try to be more specific or use the `TOPN` function. The result is {length} tokens long, the limit is {self.output_token_limit} tokens.", # noqa: E501
None,
)
return result, None
if "error" in pbi_result:
if (
"pbi.error" in pbi_result["error"]
and "details" in pbi_result["error"]["pbi.error"]
):
return None, pbi_result["error"]["pbi.error"]["details"][0]["detail"]
return None, pbi_result["error"]
return None, pbi_result
def _result_too_large(self, result: str) -> Tuple[bool, int]:
"""Tokenize the output of the query."""
if self.tiktoken_model_name:
tiktoken_ = _import_tiktoken()
encoding = tiktoken_.encoding_for_model(self.tiktoken_model_name)
length = len(encoding.encode(result))
logger.info("Result length: %s", length)
return length > self.output_token_limit, length
return False, 0
class InfoPowerBITool(BaseTool):
"""Tool for getting metadata about a PowerBI Dataset."""
name: str = "schema_powerbi"
description: str = """
Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables.
Be sure that the tables actually exist by calling list_tables_powerbi first!
Example Input: "table1, table2, table3"
""" # noqa: E501
powerbi: PowerBIDataset = Field(exclude=True)
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
def _run(
self,
tool_input: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Get the schema for tables in a comma-separated list."""
return self.powerbi.get_table_info(tool_input.split(", "))
async def _arun(
self,
tool_input: str,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
return await self.powerbi.aget_table_info(tool_input.split(", "))
class ListPowerBITool(BaseTool):
"""Tool for getting tables names."""
name: str = "list_tables_powerbi"
description: str = "Input is an empty string, output is a comma separated list of tables in the database." # noqa: E501 # pylint: disable=C0301
powerbi: PowerBIDataset = Field(exclude=True)
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
def _run(
self,
tool_input: Optional[str] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Get the names of the tables."""
return ", ".join(self.powerbi.get_table_names())
async def _arun(
self,
tool_input: Optional[str] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
"""Get the names of the tables."""
return ", ".join(self.powerbi.get_table_names())

Some files were not shown because too many files have changed in this diff Show More