mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-04 00:00:34 +00:00
Compare commits
189 Commits
sr/typing-
...
langchain-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2e0bed6a21 | ||
|
|
5ec0fa69de | ||
|
|
6a416c6186 | ||
|
|
3dcafac79b | ||
|
|
d3e9c4d29d | ||
|
|
1cc4dc7cc9 | ||
|
|
398c067f30 | ||
|
|
d84eef667a | ||
|
|
8d93720c70 | ||
|
|
85c401f648 | ||
|
|
04ec6cacaf | ||
|
|
ed9bd6e3ad | ||
|
|
c739afd45b | ||
|
|
4fbeffcfee | ||
|
|
72f1d79022 | ||
|
|
f6297ced67 | ||
|
|
4804bd6ec2 | ||
|
|
10087ac024 | ||
|
|
f752c1a07f | ||
|
|
7902fa3238 | ||
|
|
4be9407b09 | ||
|
|
9225bff326 | ||
|
|
d4cb740e0c | ||
|
|
e5c9912a89 | ||
|
|
8bca31f8c4 | ||
|
|
c5baa3ac27 | ||
|
|
795e746ca7 | ||
|
|
6519a5675b | ||
|
|
e9f7cd3e0e | ||
|
|
5c94e47d14 | ||
|
|
e0950f29b7 | ||
|
|
71778cb721 | ||
|
|
37d8666276 | ||
|
|
c286c06f16 | ||
|
|
b83e9b1056 | ||
|
|
c1f66611fc | ||
|
|
f93bc48915 | ||
|
|
516d74b6df | ||
|
|
c85f7b6061 | ||
|
|
f167c35243 | ||
|
|
b8a76cb6e9 | ||
|
|
dbcdf0b702 | ||
|
|
beb2ee6edf | ||
|
|
9f61ed8b81 | ||
|
|
6cff82d02e | ||
|
|
0cd72b50fb | ||
|
|
1a3cd46d88 | ||
|
|
470160cf81 | ||
|
|
20b8342fdf | ||
|
|
2f8af61218 | ||
|
|
81758e22f3 | ||
|
|
54241f4d06 | ||
|
|
7c9223d2b2 | ||
|
|
3342e4d62d | ||
|
|
5842110dbc | ||
|
|
62db04c43a | ||
|
|
fb892ee50a | ||
|
|
8ad0e9f267 | ||
|
|
d0b13e926d | ||
|
|
6fa4a45311 | ||
|
|
97dd5f2cb8 | ||
|
|
2a82fbc0ff | ||
|
|
0e5e33ba03 | ||
|
|
fc35544e0d | ||
|
|
15cc090e52 | ||
|
|
0f940d74b2 | ||
|
|
7829b722b1 | ||
|
|
914730cf8d | ||
|
|
c3738ea376 | ||
|
|
cd124a0949 | ||
|
|
57ff48e62e | ||
|
|
bc232e6d03 | ||
|
|
be32382d92 | ||
|
|
16c984ef0a | ||
|
|
13dd115d1d | ||
|
|
75d365418b | ||
|
|
2cff369cdc | ||
|
|
f5b6eecf72 | ||
|
|
a528ea1796 | ||
|
|
bf6a5eb122 | ||
|
|
5720dea41b | ||
|
|
087107557f | ||
|
|
05ba853548 | ||
|
|
3fb90666be | ||
|
|
6a2a149f89 | ||
|
|
bbc1d46efe | ||
|
|
d6b5f05f33 | ||
|
|
10377a7373 | ||
|
|
373ad8ac2c | ||
|
|
5eec11e2db | ||
|
|
badc0cf1b6 | ||
|
|
3b7abdff96 | ||
|
|
4aebfbad59 | ||
|
|
ae1f03fbe0 | ||
|
|
46dbb3967e | ||
|
|
dd0b990ba5 | ||
|
|
5aa46501cf | ||
|
|
92df109dd5 | ||
|
|
d27fb0c432 | ||
|
|
69dd39c461 | ||
|
|
41cebfe4fb | ||
|
|
5350967ddc | ||
|
|
7542278997 | ||
|
|
ff6e3558d7 | ||
|
|
585e12e53b | ||
|
|
73ba156a7d | ||
|
|
395c8d0bd4 | ||
|
|
34d31b8394 | ||
|
|
2aa0555941 | ||
|
|
dff229d018 | ||
|
|
b009ca4d23 | ||
|
|
0254c12cb0 | ||
|
|
2faed37ff1 | ||
|
|
d886dcfba5 | ||
|
|
91d5ca275d | ||
|
|
dcb670f395 | ||
|
|
85012ae601 | ||
|
|
aa0f4fb927 | ||
|
|
d18cdc6f32 | ||
|
|
8a5f46322b | ||
|
|
a0e86b18bf | ||
|
|
6affec92ce | ||
|
|
a64aee310c | ||
|
|
ba6c2590ae | ||
|
|
bb71f53585 | ||
|
|
9875ffbabc | ||
|
|
b5efafe80c | ||
|
|
ff3353f02f | ||
|
|
3ace4e3680 | ||
|
|
80c397019f | ||
|
|
4a42158e6c | ||
|
|
7ba3e80057 | ||
|
|
50e27a447b | ||
|
|
78c10f8790 | ||
|
|
ccfc9f795a | ||
|
|
b21926fe6c | ||
|
|
f1ad0da8f5 | ||
|
|
f67af34ea0 | ||
|
|
3030ffc248 | ||
|
|
1ad9de4b45 | ||
|
|
b95cb770e8 | ||
|
|
1867521d1a | ||
|
|
8e3ca21bd3 | ||
|
|
e92c817518 | ||
|
|
28727618b3 | ||
|
|
3108b14164 | ||
|
|
1922adc092 | ||
|
|
4a242a8a4f | ||
|
|
064b37f90e | ||
|
|
062678fa18 | ||
|
|
5d3e3d3f31 | ||
|
|
5a7cf87626 | ||
|
|
c63f23d233 | ||
|
|
b7091d391d | ||
|
|
7a2952210e | ||
|
|
7549845d82 | ||
|
|
878f033ed7 | ||
|
|
4065106c2e | ||
|
|
12df938ace | ||
|
|
65ee43cc10 | ||
|
|
fe7c000fc1 | ||
|
|
dad50e5624 | ||
|
|
0a6d01e61d | ||
|
|
c6f8b0875a | ||
|
|
4c3800d743 | ||
|
|
7fe1c4b78f | ||
|
|
c375732396 | ||
|
|
9c21f83e82 | ||
|
|
880652b713 | ||
|
|
4ab94579ad | ||
|
|
eb0545a173 | ||
|
|
a2e389de9f | ||
|
|
01573c1375 | ||
|
|
2ba3ce81a6 | ||
|
|
4e4e5d7337 | ||
|
|
2a863727f9 | ||
|
|
30e2260e26 | ||
|
|
cbaea351b2 | ||
|
|
f070217c3b | ||
|
|
0915682c12 | ||
|
|
68ab9a1e56 | ||
|
|
47b79c30c0 | ||
|
|
5899f980aa | ||
|
|
b0bf4afe81 | ||
|
|
33e5d01f7c | ||
|
|
ee3373afc2 | ||
|
|
b296f103a9 | ||
|
|
525d5c0169 | ||
|
|
c4b6ba254e |
132
.github/CODE_OF_CONDUCT.md
vendored
132
.github/CODE_OF_CONDUCT.md
vendored
@@ -1,132 +0,0 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at
|
||||
conduct@langchain.dev.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
6
.github/CONTRIBUTING.md
vendored
6
.github/CONTRIBUTING.md
vendored
@@ -1,6 +0,0 @@
|
||||
# Contributing to LangChain
|
||||
|
||||
Hi there! Thank you for even being interested in contributing to LangChain.
|
||||
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether they involve new features, improved infrastructure, better documentation, or bug fixes.
|
||||
|
||||
To learn how to contribute to LangChain, please follow the [contribution guide here](https://docs.langchain.com/oss/python/contributing).
|
||||
3
.github/ISSUE_TEMPLATE/config.yml
vendored
3
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -13,6 +13,3 @@ contact_links:
|
||||
- name: 📚 API Reference Documentation
|
||||
url: https://reference.langchain.com/python/
|
||||
about: View the official LangChain API reference documentation
|
||||
- name: 💬 LangChain Forum
|
||||
url: https://forum.langchain.com/
|
||||
about: Ask questions and get help from the community
|
||||
|
||||
38
.github/PULL_REQUEST_TEMPLATE.md
vendored
38
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,28 +1,30 @@
|
||||
(Replace this entire block of text)
|
||||
|
||||
Thank you for contributing to LangChain! Follow these steps to mark your pull request as ready for review. **If any of these steps are not completed, your PR will not be considered for review.**
|
||||
Read the full contributing guidelines: https://docs.langchain.com/oss/python/contributing/overview
|
||||
|
||||
Thank you for contributing to LangChain! Follow these steps to have your pull request considered as ready for review.
|
||||
|
||||
1. PR title: Should follow the format: TYPE(SCOPE): DESCRIPTION
|
||||
|
||||
- [ ] **PR title**: Follows the format: {TYPE}({SCOPE}): {DESCRIPTION}
|
||||
- Examples:
|
||||
- fix(anthropic): resolve flag parsing error
|
||||
- feat(core): add multi-tenant support
|
||||
- fix(cli): resolve flag parsing error
|
||||
- docs(openai): update API usage examples
|
||||
- Allowed `{TYPE}` values:
|
||||
- feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert, release
|
||||
- Allowed `{SCOPE}` values (optional):
|
||||
- core, cli, langchain, standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq, huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant, xai, infra
|
||||
- Once you've written the title, please delete this checklist item; do not include it in the PR.
|
||||
- test(openai): update API usage tests
|
||||
- Allowed TYPE and SCOPE values: https://github.com/langchain-ai/langchain/blob/master/.github/workflows/pr_lint.yml#L15-L33
|
||||
|
||||
- [ ] **PR message**: ***Delete this entire checklist*** and replace with
|
||||
- **Description:** a description of the change. Include a [closing keyword](https://docs.github.com/en/issues/tracking-your-work-with-issues/using-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) if applicable to a relevant issue.
|
||||
- **Issue:** the issue # it fixes, if applicable (e.g. Fixes #123)
|
||||
- **Dependencies:** any dependencies required for this change
|
||||
2. PR description:
|
||||
|
||||
- [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. **We will not consider a PR unless these three are passing in CI.** See [contribution guidelines](https://docs.langchain.com/oss/python/contributing) for more.
|
||||
- Write 1-2 sentences summarizing the change.
|
||||
- If this PR addresses a specific issue, please include "Fixes #ISSUE_NUMBER" in the description to automatically close the issue when the PR is merged.
|
||||
- If there are any breaking changes, please clearly describe them.
|
||||
- If this PR depends on another PR being merged first, please include "Depends on #PR_NUMBER" inthe description.
|
||||
|
||||
3. Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified.
|
||||
|
||||
- We will not consider a PR unless these three are passing in CI.
|
||||
|
||||
Additional guidelines:
|
||||
|
||||
- Most PRs should not touch more than one package.
|
||||
- Please do not add dependencies to `pyproject.toml` files (even optional ones) unless they are **required** for unit tests. Likewise, please do not update the `uv.lock` files unless you are adding a required dependency.
|
||||
- Changes should be backwards compatible.
|
||||
- Make sure optional dependencies are imported within a function.
|
||||
- We ask that if you use generative AI for your contribution, you include a disclaimer.
|
||||
- PRs should not touch more than one package unless absolutely necessary.
|
||||
- Do not update the `uv.lock` files unless or add dependencies to `pyproject.toml` files (even optional ones) unless you have explicit permission to do so by a maintainer.
|
||||
|
||||
330
.github/copilot-instructions.md
vendored
330
.github/copilot-instructions.md
vendored
@@ -1,330 +0,0 @@
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
|
||||
## Core Development Principles
|
||||
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```
|
||||
|
||||
✅ **Good - Stable Interface:**
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring admonitions (using MkDocs Material, like `!!! warning`)
|
||||
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### 2. Code Quality Standards
|
||||
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
**Style Requirements:**
|
||||
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
### 3. Testing Requirements
|
||||
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
**Test Organization:**
|
||||
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
**Test Quality Checklist:**
|
||||
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
Checklist questions:
|
||||
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
with open(path) as f:
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
def load_config(path: str) -> dict:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
### 5. Documentation Standards
|
||||
|
||||
**Use Google-style docstrings with Args and Returns sections for all public functions.**
|
||||
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level.
|
||||
|
||||
Returns:
|
||||
True if email was sent successfully, False otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications.
|
||||
|
||||
Args:
|
||||
data: List of data items to process.
|
||||
|
||||
Returns:
|
||||
ProcessingResult with details of the operation.
|
||||
"""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
|
||||
# Format code
|
||||
make format
|
||||
|
||||
# Type checking
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
|
||||
**Local Development Dependencies:**
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `!fix(cli): resolve flag parsing error` (breaking change uses exclamation mark)
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
|
||||
- Follow the existing patterns in `langchain_core` for base abstractions
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components
|
||||
|
||||
### Partner Integrations
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference Checklist
|
||||
|
||||
Before submitting code changes:
|
||||
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
11
.github/pr-file-labeler.yml
vendored
11
.github/pr-file-labeler.yml
vendored
@@ -148,16 +148,5 @@ documentation:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "**/*.md"
|
||||
- "**/*.rst"
|
||||
- "**/README*"
|
||||
|
||||
# Security related changes
|
||||
security:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "**/*security*"
|
||||
- "**/*auth*"
|
||||
- "**/*credential*"
|
||||
- "**/*secret*"
|
||||
- "**/*token*"
|
||||
- ".github/workflows/security*"
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
8
.github/workflows/_lint.yml
vendored
8
.github/workflows/_lint.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -47,6 +47,12 @@ jobs:
|
||||
cache-suffix: lint-${{ inputs.working-directory }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
# - name: "🔒 Verify Lockfile is Up-to-Date"
|
||||
# working-directory: ${{ inputs.working-directory }}
|
||||
# run: |
|
||||
# unset UV_FROZEN
|
||||
# uv lock --check
|
||||
|
||||
- name: "📦 Install Lint & Typing Dependencies"
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
|
||||
30
.github/workflows/_release.yml
vendored
30
.github/workflows/_release.yml
vendored
@@ -19,7 +19,7 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
default: "libs/langchain"
|
||||
default: "libs/langchain_v1"
|
||||
release-version:
|
||||
required: true
|
||||
type: string
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
version: ${{ steps.check-version.outputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Upload build
|
||||
uses: actions/upload-artifact@v5
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
outputs:
|
||||
release-body: ${{ steps.generate-release-body.outputs.release-body }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain
|
||||
path: langchain
|
||||
@@ -206,9 +206,9 @@ jobs:
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -237,7 +237,7 @@ jobs:
|
||||
contents: read
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# We explicitly *don't* set up caching here. This ensures our tests are
|
||||
# maximally sensitive to catching breakage.
|
||||
@@ -258,7 +258,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -396,7 +396,7 @@ jobs:
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
partner: [anthropic]
|
||||
partner: [openai, anthropic]
|
||||
fail-fast: false # Continue testing other partners if one fails
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
@@ -412,7 +412,7 @@ jobs:
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# We implement this conditional as Github Actions does not have good support
|
||||
# for conditionally needing steps. https://github.com/actions/runner/issues/491
|
||||
@@ -430,7 +430,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v7
|
||||
if: startsWith(inputs.working-directory, 'libs/core')
|
||||
with:
|
||||
name: dist
|
||||
@@ -492,14 +492,14 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -532,14 +532,14 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
2
.github/workflows/_test.yml
vendored
2
.github/workflows/_test.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
2
.github/workflows/_test_pydantic.yml
vendored
2
.github/workflows/_test_pydantic.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
name: "Pydantic ~=${{ inputs.pydantic-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
2
.github/workflows/auto-label-by-package.yml
vendored
2
.github/workflows/auto-label-by-package.yml
vendored
@@ -12,7 +12,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Sync package labels
|
||||
uses: actions/github-script@v6
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const body = context.payload.issue.body || "";
|
||||
|
||||
2
.github/workflows/check_core_versions.yml
vendored
2
.github/workflows/check_core_versions.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "✅ Verify pyproject.toml & version.py Match"
|
||||
run: |
|
||||
|
||||
6
.github/workflows/check_diffs.yml
vendored
6
.github/workflows/check_diffs.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ci-ignore') }}
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
run:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "📦 Install UV Package Manager"
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
6
.github/workflows/integration_tests.yml
vendored
6
.github/workflows/integration_tests.yml
vendored
@@ -71,14 +71,14 @@ jobs:
|
||||
working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
path: langchain
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-google
|
||||
path: langchain-google
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-aws
|
||||
path: langchain-aws
|
||||
|
||||
9
.github/workflows/pr_lint.yml
vendored
9
.github/workflows/pr_lint.yml
vendored
@@ -27,10 +27,10 @@
|
||||
# * release — prepare a new release
|
||||
#
|
||||
# Allowed Scope(s) (optional):
|
||||
# core, cli, langchain, langchain_v1, langchain-classic, standard-tests,
|
||||
# text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq,
|
||||
# huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant,
|
||||
# xai, infra, deps
|
||||
# core, cli, langchain, langchain_v1, langchain-classic, model-profiles,
|
||||
# standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa,
|
||||
# fireworks, groq, huggingface, mistralai, nomic, ollama, openai,
|
||||
# perplexity, prompty, qdrant, xai, infra, deps
|
||||
#
|
||||
# Multiple scopes can be used by separating them with a comma.
|
||||
#
|
||||
@@ -102,6 +102,7 @@ jobs:
|
||||
qdrant
|
||||
xai
|
||||
infra
|
||||
deps
|
||||
requireScope: false
|
||||
disallowScopes: |
|
||||
release
|
||||
|
||||
4
.github/workflows/v03_api_doc_build.yml
vendored
4
.github/workflows/v03_api_doc_build.yml
vendored
@@ -23,12 +23,12 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
ref: v0.3
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-api-docs-html
|
||||
path: langchain-api-docs-html
|
||||
|
||||
8
.github/workflows/v1_changes.md
vendored
8
.github/workflows/v1_changes.md
vendored
@@ -1,8 +0,0 @@
|
||||
With the deprecation of v0 docs, the following files will need to be migrated/supported
|
||||
in the new docs repo:
|
||||
|
||||
- run_notebooks.yml: New repo should run Integration tests on code snippets?
|
||||
- people.yml: Need to fix and somehow display on the new docs site
|
||||
- Subsequently, `.github/actions/people/`
|
||||
- _test_doc_imports.yml
|
||||
- check-broken-links.yml
|
||||
2
.vscode/extensions.json
vendored
2
.vscode/extensions.json
vendored
@@ -6,8 +6,6 @@
|
||||
"ms-toolsai.jupyter",
|
||||
"ms-toolsai.jupyter-keymap",
|
||||
"ms-toolsai.jupyter-renderers",
|
||||
"ms-toolsai.vscode-jupyter-cell-tags",
|
||||
"ms-toolsai.vscode-jupyter-slideshow",
|
||||
"yzhang.markdown-all-in-one",
|
||||
"davidanson.vscode-markdownlint",
|
||||
"bierner.markdown-mermaid",
|
||||
|
||||
405
AGENTS.md
405
AGENTS.md
@@ -1,255 +1,58 @@
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
# Global development guidelines for the LangChain monorepo
|
||||
|
||||
## Core Development Principles
|
||||
This document provides context to understand the LangChain Python project and assist with development.
|
||||
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
## Project architecture and context
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
### Monorepo structure
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
This is a Python monorepo with multiple independently versioned packages that use `uv`.
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```txt
|
||||
langchain/
|
||||
├── libs/
|
||||
│ ├── core/ # `langchain-core` primitives and base abstractions
|
||||
│ ├── langchain/ # `langchain-classic` (legacy, no new features)
|
||||
│ ├── langchain_v1/ # Actively maintained `langchain` package
|
||||
│ ├── partners/ # Third-party integrations
|
||||
│ │ ├── openai/ # OpenAI models and embeddings
|
||||
│ │ ├── anthropic/ # Anthropic (Claude) integration
|
||||
│ │ ├── ollama/ # Local model support
|
||||
│ │ └── ... (other integrations maintained by the LangChain team)
|
||||
│ ├── text-splitters/ # Document chunking utilities
|
||||
│ ├── standard-tests/ # Shared test suite for integrations
|
||||
│ ├── model-profiles/ # Model configuration profiles
|
||||
│ └── cli/ # Command-line interface tools
|
||||
├── .github/ # CI/CD workflows and templates
|
||||
├── .vscode/ # VSCode IDE standard settings and recommended extensions
|
||||
└── README.md # Information about LangChain
|
||||
```
|
||||
|
||||
✅ **Good - Stable Interface:**
|
||||
- **Core layer** (`langchain-core`): Base abstractions, interfaces, and protocols. Users should not need to know about this layer directly.
|
||||
- **Implementation layer** (`langchain`): Concrete implementations and high-level public utilities
|
||||
- **Integration layer** (`partners/`): Third-party service integrations. Note that this monorepo is not exhaustive of all LangChain integrations; some are maintained in separate repos, such as `langchain-ai/langchain-google` and `langchain-ai/langchain-aws`. Usually these repos are cloned at the same level as this monorepo, so if needed, you can refer to their code directly by navigating to `../langchain-google/` from this monorepo.
|
||||
- **Testing layer** (`standard-tests/`): Standardized integration tests for partner integrations
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
### Development tools & commands**
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
- `uv` – Fast Python package installer and resolver (replaces pip/poetry)
|
||||
- `make` – Task runner for common development commands. Feel free to look at the `Makefile` for available commands and usage patterns.
|
||||
- `ruff` – Fast Python linter and formatter
|
||||
- `mypy` – Static type checking
|
||||
- `pytest` – Testing framework
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
This monorepo uses `uv` for dependency management. Local development uses editable installs: `[tool.uv.sources]`
|
||||
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### 2. Code Quality Standards
|
||||
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
**Style Requirements:**
|
||||
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
### 3. Testing Requirements
|
||||
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
**Test Organization:**
|
||||
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
**Test Quality Checklist:**
|
||||
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
Checklist questions:
|
||||
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
with open(path) as f:
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
def load_config(path: str) -> dict:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
### 5. Documentation Standards
|
||||
|
||||
**Use Google-style docstrings with Args section for all public functions.**
|
||||
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level (`'low'`, `'normal'`, `'high'`).
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
`InvalidEmailError`: If the email address format is invalid.
|
||||
`SMTPConnectionError`: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications."""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
@@ -261,66 +64,118 @@ make format
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
#### Key config files
|
||||
|
||||
**Local Development Dependencies:**
|
||||
- pyproject.toml: Main workspace configuration with dependency groups
|
||||
- uv.lock: Locked dependencies for reproducible builds
|
||||
- Makefile: Development tasks
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
#### Commit standards
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes.
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
#### Pull request guidelines
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
|
||||
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
|
||||
- Highlight areas of the proposed changes that require careful review.
|
||||
|
||||
## Core development principles
|
||||
|
||||
### Maintain stable public interfaces
|
||||
|
||||
CRITICAL: Always attempt to preserve function signatures, argument positions, and names for exported/public methods. Do not make breaking changes.
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
|
||||
Ask: "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### Code quality standards
|
||||
|
||||
All Python code MUST include type hints and return types.
|
||||
|
||||
```python title="Example"
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Single line description of the function.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
- Use descriptive, self-explanatory variable names.
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
### Testing requirements
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `fix(cli): resolve flag parsing error`
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
Every new feature or bugfix MUST be covered by unit tests.
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- We use `pytest` as the testing framework; if in doubt, check other existing tests for examples.
|
||||
- The testing file structure should mirror the source code structure.
|
||||
|
||||
- Follow the existing patterns in `langchain-core` for base abstractions
|
||||
- Use `langchain_core.callbacks` for execution tracking
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components like legacy `LLMChain`
|
||||
**Checklist:**
|
||||
|
||||
### Partner Integrations
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
### Security and risk assessment
|
||||
|
||||
---
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
## Quick Reference Checklist
|
||||
### Documentation standards
|
||||
|
||||
Before submitting code changes:
|
||||
Use Google-style docstrings with Args section for all public functions.
|
||||
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
```python title="Example"
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""Send an email to a recipient with specified priority.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level.
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
## Additional resources
|
||||
|
||||
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
|
||||
- **Contributing Guide:** [`.github/CONTRIBUTING.md`](https://docs.langchain.com/oss/python/contributing/overview)
|
||||
|
||||
405
CLAUDE.md
405
CLAUDE.md
@@ -1,255 +1,58 @@
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
# Global development guidelines for the LangChain monorepo
|
||||
|
||||
## Core Development Principles
|
||||
This document provides context to understand the LangChain Python project and assist with development.
|
||||
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
## Project architecture and context
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
### Monorepo structure
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
This is a Python monorepo with multiple independently versioned packages that use `uv`.
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```txt
|
||||
langchain/
|
||||
├── libs/
|
||||
│ ├── core/ # `langchain-core` primitives and base abstractions
|
||||
│ ├── langchain/ # `langchain-classic` (legacy, no new features)
|
||||
│ ├── langchain_v1/ # Actively maintained `langchain` package
|
||||
│ ├── partners/ # Third-party integrations
|
||||
│ │ ├── openai/ # OpenAI models and embeddings
|
||||
│ │ ├── anthropic/ # Anthropic (Claude) integration
|
||||
│ │ ├── ollama/ # Local model support
|
||||
│ │ └── ... (other integrations maintained by the LangChain team)
|
||||
│ ├── text-splitters/ # Document chunking utilities
|
||||
│ ├── standard-tests/ # Shared test suite for integrations
|
||||
│ ├── model-profiles/ # Model configuration profiles
|
||||
│ └── cli/ # Command-line interface tools
|
||||
├── .github/ # CI/CD workflows and templates
|
||||
├── .vscode/ # VSCode IDE standard settings and recommended extensions
|
||||
└── README.md # Information about LangChain
|
||||
```
|
||||
|
||||
✅ **Good - Stable Interface:**
|
||||
- **Core layer** (`langchain-core`): Base abstractions, interfaces, and protocols. Users should not need to know about this layer directly.
|
||||
- **Implementation layer** (`langchain`): Concrete implementations and high-level public utilities
|
||||
- **Integration layer** (`partners/`): Third-party service integrations. Note that this monorepo is not exhaustive of all LangChain integrations; some are maintained in separate repos, such as `langchain-ai/langchain-google` and `langchain-ai/langchain-aws`. Usually these repos are cloned at the same level as this monorepo, so if needed, you can refer to their code directly by navigating to `../langchain-google/` from this monorepo.
|
||||
- **Testing layer** (`standard-tests/`): Standardized integration tests for partner integrations
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
### Development tools & commands**
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
- `uv` – Fast Python package installer and resolver (replaces pip/poetry)
|
||||
- `make` – Task runner for common development commands. Feel free to look at the `Makefile` for available commands and usage patterns.
|
||||
- `ruff` – Fast Python linter and formatter
|
||||
- `mypy` – Static type checking
|
||||
- `pytest` – Testing framework
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
This monorepo uses `uv` for dependency management. Local development uses editable installs: `[tool.uv.sources]`
|
||||
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### 2. Code Quality Standards
|
||||
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
**Style Requirements:**
|
||||
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
### 3. Testing Requirements
|
||||
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
**Test Organization:**
|
||||
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
**Test Quality Checklist:**
|
||||
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
Checklist questions:
|
||||
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
with open(path) as f:
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
def load_config(path: str) -> dict:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
### 5. Documentation Standards
|
||||
|
||||
**Use Google-style docstrings with Args section for all public functions.**
|
||||
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level (`'low'`, `'normal'`, `'high'`).
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
`InvalidEmailError`: If the email address format is invalid.
|
||||
`SMTPConnectionError`: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications."""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
@@ -261,66 +64,118 @@ make format
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
#### Key config files
|
||||
|
||||
**Local Development Dependencies:**
|
||||
- pyproject.toml: Main workspace configuration with dependency groups
|
||||
- uv.lock: Locked dependencies for reproducible builds
|
||||
- Makefile: Development tasks
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
#### Commit standards
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes.
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
#### Pull request guidelines
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
|
||||
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
|
||||
- Highlight areas of the proposed changes that require careful review.
|
||||
|
||||
## Core development principles
|
||||
|
||||
### Maintain stable public interfaces
|
||||
|
||||
CRITICAL: Always attempt to preserve function signatures, argument positions, and names for exported/public methods. Do not make breaking changes.
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
|
||||
Ask: "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### Code quality standards
|
||||
|
||||
All Python code MUST include type hints and return types.
|
||||
|
||||
```python title="Example"
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Single line description of the function.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
- Use descriptive, self-explanatory variable names.
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
### Testing requirements
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `fix(cli): resolve flag parsing error`
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
Every new feature or bugfix MUST be covered by unit tests.
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- We use `pytest` as the testing framework; if in doubt, check other existing tests for examples.
|
||||
- The testing file structure should mirror the source code structure.
|
||||
|
||||
- Follow the existing patterns in `langchain-core` for base abstractions
|
||||
- Use `langchain_core.callbacks` for execution tracking
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components like legacy `LLMChain`
|
||||
**Checklist:**
|
||||
|
||||
### Partner Integrations
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
### Security and risk assessment
|
||||
|
||||
---
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
## Quick Reference Checklist
|
||||
### Documentation standards
|
||||
|
||||
Before submitting code changes:
|
||||
Use Google-style docstrings with Args section for all public functions.
|
||||
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
```python title="Example"
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""Send an email to a recipient with specified priority.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level.
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
## Additional resources
|
||||
|
||||
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
|
||||
- **Contributing Guide:** [`.github/CONTRIBUTING.md`](https://docs.langchain.com/oss/python/contributing/overview)
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
# Migrating
|
||||
|
||||
Please see the following guides for migrating LangChain code:
|
||||
|
||||
* Migrate to [LangChain v1.0](https://docs.langchain.com/oss/python/migrate/langchain-v1)
|
||||
* Migrate to [LangChain v0.3](https://python.langchain.com/docs/versions/v0_3/)
|
||||
* Migrate to [LangChain v0.2](https://python.langchain.com/docs/versions/v0_2/)
|
||||
* Migrating from [LangChain 0.0.x Chains](https://python.langchain.com/docs/versions/migrating_chains/)
|
||||
* Upgrade to [LangGraph Memory](https://python.langchain.com/docs/versions/migrating_memory/)
|
||||
80
SECURITY.md
80
SECURITY.md
@@ -1,80 +0,0 @@
|
||||
# Security Policy
|
||||
|
||||
LangChain has a large ecosystem of integrations with various external resources like local and remote file systems, APIs and databases. These integrations allow developers to create versatile applications that combine the power of LLMs with the ability to access, interact with and manipulate external resources.
|
||||
|
||||
## Best practices
|
||||
|
||||
When building such applications, developers should remember to follow good security practices:
|
||||
|
||||
* [**Limit Permissions**](https://en.wikipedia.org/wiki/Principle_of_least_privilege): Scope permissions specifically to the application's need. Granting broad or excessive permissions can introduce significant security vulnerabilities. To avoid such vulnerabilities, consider using read-only credentials, disallowing access to sensitive resources, using sandboxing techniques (such as running inside a container), specifying proxy configurations to control external requests, etc., as appropriate for your application.
|
||||
* **Anticipate Potential Misuse**: Just as humans can err, so can Large Language Models (LLMs). Always assume that any system access or credentials may be used in any way allowed by the permissions they are assigned. For example, if a pair of database credentials allows deleting data, it's safest to assume that any LLM able to use those credentials may in fact delete data.
|
||||
* [**Defense in Depth**](https://en.wikipedia.org/wiki/Defense_in_depth_(computing)): No security technique is perfect. Fine-tuning and good chain design can reduce, but not eliminate, the odds that a Large Language Model (LLM) may make a mistake. It's best to combine multiple layered security approaches rather than relying on any single layer of defense to ensure security. For example: use both read-only permissions and sandboxing to ensure that LLMs are only able to access data that is explicitly meant for them to use.
|
||||
|
||||
Risks of not doing so include, but are not limited to:
|
||||
|
||||
* Data corruption or loss.
|
||||
* Unauthorized access to confidential information.
|
||||
* Compromised performance or availability of critical resources.
|
||||
|
||||
Example scenarios with mitigation strategies:
|
||||
|
||||
* A user may ask an agent with access to the file system to delete files that should not be deleted or read the content of files that contain sensitive information. To mitigate, limit the agent to only use a specific directory and only allow it to read or write files that are safe to read or write. Consider further sandboxing the agent by running it in a container.
|
||||
* A user may ask an agent with write access to an external API to write malicious data to the API, or delete data from that API. To mitigate, give the agent read-only API keys, or limit it to only use endpoints that are already resistant to such misuse.
|
||||
* A user may ask an agent with access to a database to drop a table or mutate the schema. To mitigate, scope the credentials to only the tables that the agent needs to access and consider issuing READ-ONLY credentials.
|
||||
|
||||
If you're building applications that access external resources like file systems, APIs or databases, consider speaking with your company's security team to determine how to best design and secure your applications.
|
||||
|
||||
## Reporting OSS Vulnerabilities
|
||||
|
||||
LangChain is partnered with [huntr by Protect AI](https://huntr.com/) to provide
|
||||
a bounty program for our open source projects.
|
||||
|
||||
Please report security vulnerabilities associated with the LangChain
|
||||
open source projects at [huntr](https://huntr.com/bounties/disclose/?target=https%3A%2F%2Fgithub.com%2Flangchain-ai%2Flangchain&validSearch=true).
|
||||
|
||||
Before reporting a vulnerability, please review:
|
||||
|
||||
1) In-Scope Targets and Out-of-Scope Targets below.
|
||||
2) The [langchain-ai/langchain](https://docs.langchain.com/oss/python/contributing/code#repository-structure) monorepo structure.
|
||||
3) The [Best Practices](#best-practices) above to understand what we consider to be a security vulnerability vs. developer responsibility.
|
||||
|
||||
### In-Scope Targets
|
||||
|
||||
The following packages and repositories are eligible for bug bounties:
|
||||
|
||||
* langchain-core
|
||||
* langchain (see exceptions)
|
||||
* langchain-community (see exceptions)
|
||||
* langgraph
|
||||
* langserve
|
||||
|
||||
### Out of Scope Targets
|
||||
|
||||
All out of scope targets defined by huntr as well as:
|
||||
|
||||
* **langchain-experimental**: This repository is for experimental code and is not
|
||||
eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
|
||||
time and published with no bounty attached.
|
||||
* **tools**: Tools in either `langchain` or `langchain-community` are not eligible for bug
|
||||
bounties. This includes the following directories
|
||||
* `libs/langchain/langchain/tools`
|
||||
* `libs/community/langchain_community/tools`
|
||||
* Please review the [Best Practices](#best-practices)
|
||||
for more details, but generally tools interact with the real world. Developers are
|
||||
expected to understand the security implications of their code and are responsible
|
||||
for the security of their tools.
|
||||
* Code documented with security notices. This will be decided on a case-by-case basis, but likely will not be eligible for a bounty as the code is already
|
||||
documented with guidelines for developers that should be followed for making their
|
||||
application secure.
|
||||
* Any LangSmith related repositories or APIs (see [Reporting LangSmith Vulnerabilities](#reporting-langsmith-vulnerabilities)).
|
||||
|
||||
## Reporting LangSmith Vulnerabilities
|
||||
|
||||
Please report security vulnerabilities associated with LangSmith by email to `security@langchain.dev`.
|
||||
|
||||
* LangSmith site: [https://smith.langchain.com](https://smith.langchain.com)
|
||||
* SDK client: [https://github.com/langchain-ai/langsmith-sdk](https://github.com/langchain-ai/langsmith-sdk)
|
||||
|
||||
### Other Security Concerns
|
||||
|
||||
For any other security concerns, please contact us at `security@langchain.dev`.
|
||||
20
libs/Makefile
Normal file
20
libs/Makefile
Normal file
@@ -0,0 +1,20 @@
|
||||
# Makefile for libs/ directory
|
||||
# Contains targets that operate across multiple packages
|
||||
|
||||
LANGCHAIN_DIRS = core text-splitters langchain langchain_v1 model-profiles
|
||||
|
||||
.PHONY: lock check-lock
|
||||
|
||||
# Regenerate lockfiles for all core packages
|
||||
lock:
|
||||
@for dir in $(LANGCHAIN_DIRS); do \
|
||||
echo "=== Locking $$dir ==="; \
|
||||
(cd $$dir && uv lock); \
|
||||
done
|
||||
|
||||
# Verify all lockfiles are up-to-date
|
||||
check-lock:
|
||||
@for dir in $(LANGCHAIN_DIRS); do \
|
||||
echo "=== Checking $$dir ==="; \
|
||||
(cd $$dir && uv lock --check) || exit 1; \
|
||||
done
|
||||
@@ -28,6 +28,27 @@ from pydantic.v1.fields import FieldInfo as FieldInfoV1
|
||||
from langchain_core._api.internal import is_caller_internal
|
||||
|
||||
|
||||
def _build_deprecation_message(
|
||||
*,
|
||||
alternative: str = "",
|
||||
alternative_import: str = "",
|
||||
) -> str:
|
||||
"""Build a simple deprecation message for `__deprecated__` attribute.
|
||||
|
||||
Args:
|
||||
alternative: An alternative API name.
|
||||
alternative_import: A fully qualified import path for the alternative.
|
||||
|
||||
Returns:
|
||||
A deprecation message string for IDE/type checker display.
|
||||
"""
|
||||
if alternative_import:
|
||||
return f"Use {alternative_import} instead."
|
||||
if alternative:
|
||||
return f"Use {alternative} instead."
|
||||
return "Deprecated."
|
||||
|
||||
|
||||
class LangChainDeprecationWarning(DeprecationWarning):
|
||||
"""A class for issuing deprecation warnings for LangChain users."""
|
||||
|
||||
@@ -81,60 +102,57 @@ def deprecated(
|
||||
) -> Callable[[T], T]:
|
||||
"""Decorator to mark a function, a class, or a property as deprecated.
|
||||
|
||||
When deprecating a classmethod, a staticmethod, or a property, the
|
||||
`@deprecated` decorator should go *under* `@classmethod` and
|
||||
`@staticmethod` (i.e., `deprecated` should directly decorate the
|
||||
underlying callable), but *over* `@property`.
|
||||
When deprecating a classmethod, a staticmethod, or a property, the `@deprecated`
|
||||
decorator should go *under* `@classmethod` and `@staticmethod` (i.e., `deprecated`
|
||||
should directly decorate the underlying callable), but *over* `@property`.
|
||||
|
||||
When deprecating a class `C` intended to be used as a base class in a
|
||||
multiple inheritance hierarchy, `C` *must* define an `__init__` method
|
||||
(if `C` instead inherited its `__init__` from its own base class, then
|
||||
`@deprecated` would mess up `__init__` inheritance when installing its
|
||||
own (deprecation-emitting) `C.__init__`).
|
||||
When deprecating a class `C` intended to be used as a base class in a multiple
|
||||
inheritance hierarchy, `C` *must* define an `__init__` method (if `C` instead
|
||||
inherited its `__init__` from its own base class, then `@deprecated` would mess up
|
||||
`__init__` inheritance when installing its own (deprecation-emitting) `C.__init__`).
|
||||
|
||||
Parameters are the same as for `warn_deprecated`, except that *obj_type*
|
||||
defaults to 'class' if decorating a class, 'attribute' if decorating a
|
||||
property, and 'function' otherwise.
|
||||
Parameters are the same as for `warn_deprecated`, except that *obj_type* defaults to
|
||||
'class' if decorating a class, 'attribute' if decorating a property, and 'function'
|
||||
otherwise.
|
||||
|
||||
Args:
|
||||
since:
|
||||
The release at which this API became deprecated.
|
||||
message:
|
||||
Override the default deprecation message. The %(since)s,
|
||||
%(name)s, %(alternative)s, %(obj_type)s, %(addendum)s,
|
||||
and %(removal)s format specifiers will be replaced by the
|
||||
since: The release at which this API became deprecated.
|
||||
message: Override the default deprecation message.
|
||||
|
||||
The `%(since)s`, `%(name)s`, `%(alternative)s`, `%(obj_type)s`,
|
||||
`%(addendum)s`, and `%(removal)s` format specifiers will be replaced by the
|
||||
values of the respective arguments passed to this function.
|
||||
name:
|
||||
The name of the deprecated object.
|
||||
alternative:
|
||||
An alternative API that the user may use in place of the
|
||||
deprecated API. The deprecation warning will tell the user
|
||||
about this alternative if provided.
|
||||
alternative_import:
|
||||
An alternative import that the user may use instead.
|
||||
pending:
|
||||
If `True`, uses a `PendingDeprecationWarning` instead of a
|
||||
DeprecationWarning. Cannot be used together with removal.
|
||||
obj_type:
|
||||
The object type being deprecated.
|
||||
addendum:
|
||||
Additional text appended directly to the final message.
|
||||
removal:
|
||||
The expected removal version. With the default (an empty
|
||||
string), a removal version is automatically computed from
|
||||
since. Set to other Falsy values to not schedule a removal
|
||||
date. Cannot be used together with pending.
|
||||
package:
|
||||
The package of the deprecated object.
|
||||
name: The name of the deprecated object.
|
||||
alternative: An alternative API that the user may use in place of the deprecated
|
||||
API.
|
||||
|
||||
The deprecation warning will tell the user about this alternative if
|
||||
provided.
|
||||
alternative_import: An alternative import that the user may use instead.
|
||||
pending: If `True`, uses a `PendingDeprecationWarning` instead of a
|
||||
`DeprecationWarning`.
|
||||
|
||||
Cannot be used together with removal.
|
||||
obj_type: The object type being deprecated.
|
||||
addendum: Additional text appended directly to the final message.
|
||||
removal: The expected removal version.
|
||||
|
||||
With the default (an empty string), a removal version is automatically
|
||||
computed from since. Set to other Falsy values to not schedule a removal
|
||||
date.
|
||||
|
||||
Cannot be used together with pending.
|
||||
package: The package of the deprecated object.
|
||||
|
||||
Returns:
|
||||
A decorator to mark a function or class as deprecated.
|
||||
|
||||
```python
|
||||
@deprecated("1.4.0")
|
||||
def the_function_to_deprecate():
|
||||
pass
|
||||
```
|
||||
Example:
|
||||
```python
|
||||
@deprecated("1.4.0")
|
||||
def the_function_to_deprecate():
|
||||
pass
|
||||
```
|
||||
"""
|
||||
_validate_deprecation_params(
|
||||
removal, alternative, alternative_import, pending=pending
|
||||
@@ -223,6 +241,11 @@ def deprecated(
|
||||
obj.__init__ = functools.wraps(obj.__init__)( # type: ignore[misc]
|
||||
warn_if_direct_instance
|
||||
)
|
||||
# Set __deprecated__ for PEP 702 (IDE/type checker support)
|
||||
obj.__deprecated__ = _build_deprecation_message( # type: ignore[attr-defined]
|
||||
alternative=alternative,
|
||||
alternative_import=alternative_import,
|
||||
)
|
||||
return obj
|
||||
|
||||
elif isinstance(obj, FieldInfoV1):
|
||||
@@ -315,12 +338,15 @@ def deprecated(
|
||||
|
||||
def finalize(wrapper: Callable[..., Any], new_doc: str) -> T: # noqa: ARG001
|
||||
"""Finalize the property."""
|
||||
return cast(
|
||||
"T",
|
||||
_DeprecatedProperty(
|
||||
fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
|
||||
),
|
||||
prop = _DeprecatedProperty(
|
||||
fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
|
||||
)
|
||||
# Set __deprecated__ for PEP 702 (IDE/type checker support)
|
||||
prop.__deprecated__ = _build_deprecation_message( # type: ignore[attr-defined]
|
||||
alternative=alternative,
|
||||
alternative_import=alternative_import,
|
||||
)
|
||||
return cast("T", prop)
|
||||
|
||||
else:
|
||||
_name = _name or cast("type | Callable", obj).__qualname__
|
||||
@@ -343,6 +369,11 @@ def deprecated(
|
||||
"""
|
||||
wrapper = functools.wraps(wrapped)(wrapper)
|
||||
wrapper.__doc__ = new_doc
|
||||
# Set __deprecated__ for PEP 702 (IDE/type checker support)
|
||||
wrapper.__deprecated__ = _build_deprecation_message( # type: ignore[attr-defined]
|
||||
alternative=alternative,
|
||||
alternative_import=alternative_import,
|
||||
)
|
||||
return cast("T", wrapper)
|
||||
|
||||
old_doc = inspect.cleandoc(old_doc or "").strip("\n")
|
||||
@@ -398,7 +429,7 @@ def deprecated(
|
||||
|
||||
@contextlib.contextmanager
|
||||
def suppress_langchain_deprecation_warning() -> Generator[None, None, None]:
|
||||
"""Context manager to suppress LangChainDeprecationWarning."""
|
||||
"""Context manager to suppress `LangChainDeprecationWarning`."""
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", LangChainDeprecationWarning)
|
||||
warnings.simplefilter("ignore", LangChainPendingDeprecationWarning)
|
||||
@@ -421,35 +452,33 @@ def warn_deprecated(
|
||||
"""Display a standardized deprecation.
|
||||
|
||||
Args:
|
||||
since:
|
||||
The release at which this API became deprecated.
|
||||
message:
|
||||
Override the default deprecation message. The %(since)s,
|
||||
%(name)s, %(alternative)s, %(obj_type)s, %(addendum)s,
|
||||
and %(removal)s format specifiers will be replaced by the
|
||||
since: The release at which this API became deprecated.
|
||||
message: Override the default deprecation message.
|
||||
|
||||
The `%(since)s`, `%(name)s`, `%(alternative)s`, `%(obj_type)s`,
|
||||
`%(addendum)s`, and `%(removal)s` format specifiers will be replaced by the
|
||||
values of the respective arguments passed to this function.
|
||||
name:
|
||||
The name of the deprecated object.
|
||||
alternative:
|
||||
An alternative API that the user may use in place of the
|
||||
deprecated API. The deprecation warning will tell the user
|
||||
about this alternative if provided.
|
||||
alternative_import:
|
||||
An alternative import that the user may use instead.
|
||||
pending:
|
||||
If `True`, uses a `PendingDeprecationWarning` instead of a
|
||||
DeprecationWarning. Cannot be used together with removal.
|
||||
obj_type:
|
||||
The object type being deprecated.
|
||||
addendum:
|
||||
Additional text appended directly to the final message.
|
||||
removal:
|
||||
The expected removal version. With the default (an empty
|
||||
string), a removal version is automatically computed from
|
||||
since. Set to other Falsy values to not schedule a removal
|
||||
date. Cannot be used together with pending.
|
||||
package:
|
||||
The package of the deprecated object.
|
||||
name: The name of the deprecated object.
|
||||
alternative: An alternative API that the user may use in place of the
|
||||
deprecated API.
|
||||
|
||||
The deprecation warning will tell the user about this alternative if
|
||||
provided.
|
||||
alternative_import: An alternative import that the user may use instead.
|
||||
pending: If `True`, uses a `PendingDeprecationWarning` instead of a
|
||||
`DeprecationWarning`.
|
||||
|
||||
Cannot be used together with removal.
|
||||
obj_type: The object type being deprecated.
|
||||
addendum: Additional text appended directly to the final message.
|
||||
removal: The expected removal version.
|
||||
|
||||
With the default (an empty string), a removal version is automatically
|
||||
computed from since. Set to other Falsy values to not schedule a removal
|
||||
date.
|
||||
|
||||
Cannot be used together with pending.
|
||||
package: The package of the deprecated object.
|
||||
"""
|
||||
if not pending:
|
||||
if not removal:
|
||||
@@ -534,8 +563,8 @@ def rename_parameter(
|
||||
"""Decorator indicating that parameter *old* of *func* is renamed to *new*.
|
||||
|
||||
The actual implementation of *func* should use *new*, not *old*. If *old* is passed
|
||||
to *func*, a DeprecationWarning is emitted, and its value is used, even if *new* is
|
||||
also passed by keyword.
|
||||
to *func*, a `DeprecationWarning` is emitted, and its value is used, even if *new*
|
||||
is also passed by keyword.
|
||||
|
||||
Args:
|
||||
since: The version in which the parameter was renamed.
|
||||
|
||||
@@ -6,7 +6,6 @@ import asyncio
|
||||
import atexit
|
||||
import functools
|
||||
import logging
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
@@ -41,6 +40,7 @@ from langchain_core.tracers.context import (
|
||||
from langchain_core.tracers.langchain import LangChainTracer
|
||||
from langchain_core.tracers.stdout import ConsoleCallbackHandler
|
||||
from langchain_core.utils.env import env_var_is_set
|
||||
from langchain_core.utils.uuid import uuid7
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncGenerator, Coroutine, Generator, Sequence
|
||||
@@ -504,7 +504,7 @@ class BaseRunManager(RunManagerMixin):
|
||||
|
||||
"""
|
||||
return cls(
|
||||
run_id=uuid.uuid4(),
|
||||
run_id=uuid7(),
|
||||
handlers=[],
|
||||
inheritable_handlers=[],
|
||||
tags=[],
|
||||
@@ -1330,7 +1330,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
managers = []
|
||||
for i, prompt in enumerate(prompts):
|
||||
# Can't have duplicate runs with the same run ID (if provided)
|
||||
run_id_ = run_id if i == 0 and run_id is not None else uuid.uuid4()
|
||||
run_id_ = run_id if i == 0 and run_id is not None else uuid7()
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_llm_start",
|
||||
@@ -1384,7 +1384,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
run_id_ = run_id
|
||||
run_id = None
|
||||
else:
|
||||
run_id_ = uuid.uuid4()
|
||||
run_id_ = uuid7()
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_chat_model_start",
|
||||
@@ -1433,7 +1433,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_chain_start",
|
||||
@@ -1488,7 +1488,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
handle_event(
|
||||
self.handlers,
|
||||
@@ -1537,7 +1537,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
The callback manager for the retriever run.
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
handle_event(
|
||||
self.handlers,
|
||||
@@ -1594,7 +1594,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
)
|
||||
raise ValueError(msg)
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
handle_event(
|
||||
self.handlers,
|
||||
@@ -1816,7 +1816,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
run_id_ = run_id
|
||||
run_id = None
|
||||
else:
|
||||
run_id_ = uuid.uuid4()
|
||||
run_id_ = uuid7()
|
||||
|
||||
if inline_handlers:
|
||||
inline_tasks.append(
|
||||
@@ -1900,7 +1900,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
run_id_ = run_id
|
||||
run_id = None
|
||||
else:
|
||||
run_id_ = uuid.uuid4()
|
||||
run_id_ = uuid7()
|
||||
|
||||
for handler in self.handlers:
|
||||
task = ahandle_event(
|
||||
@@ -1962,7 +1962,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
The async callback manager for the chain run.
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
@@ -2010,7 +2010,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
The async callback manager for the tool run.
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
@@ -2060,7 +2060,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
if not self.handlers:
|
||||
return
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
if kwargs:
|
||||
msg = (
|
||||
@@ -2102,7 +2102,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
The async callback manager for the retriever run.
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
|
||||
@@ -95,7 +95,7 @@ def get_usage_metadata_callback(
|
||||
"""Get usage metadata callback.
|
||||
|
||||
Get context manager for tracking usage metadata across chat model calls using
|
||||
`AIMessage.usage_metadata`.
|
||||
[`AIMessage.usage_metadata`][langchain.messages.AIMessage.usage_metadata].
|
||||
|
||||
Args:
|
||||
name: The name of the context variable.
|
||||
|
||||
@@ -11,7 +11,7 @@ from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
|
||||
def _get_length_based(text: str) -> int:
|
||||
return len(re.split("\n| ", text))
|
||||
return len(re.split(r"\n| ", text))
|
||||
|
||||
|
||||
class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
|
||||
|
||||
@@ -302,6 +302,7 @@ def index(
|
||||
are not able to specify the uid of the document.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.25"
|
||||
|
||||
Added `scoped_full` cleanup mode.
|
||||
|
||||
!!! warning
|
||||
@@ -640,6 +641,7 @@ async def aindex(
|
||||
are not able to specify the uid of the document.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.25"
|
||||
|
||||
Added `scoped_full` cleanup mode.
|
||||
|
||||
!!! warning
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Language models.
|
||||
"""Core language model abstractions.
|
||||
|
||||
LangChain has two main classes to work with language models: chat models and
|
||||
"old-fashioned" LLMs.
|
||||
"old-fashioned" LLMs (string-in, string-out).
|
||||
|
||||
**Chat models**
|
||||
|
||||
@@ -11,14 +11,16 @@ as outputs (as opposed to using plain text).
|
||||
Chat models support the assignment of distinct roles to conversation messages, helping
|
||||
to distinguish messages from the AI, users, and instructions such as system messages.
|
||||
|
||||
The key abstraction for chat models is `BaseChatModel`. Implementations should inherit
|
||||
from this class.
|
||||
The key abstraction for chat models is
|
||||
[`BaseChatModel`][langchain_core.language_models.BaseChatModel]. Implementations should
|
||||
inherit from this class.
|
||||
|
||||
See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
|
||||
|
||||
**LLMs**
|
||||
**LLMs (legacy)**
|
||||
|
||||
Language models that takes a string as input and returns a string.
|
||||
|
||||
These are traditionally older models (newer models generally are chat models).
|
||||
|
||||
Although the underlying models are string in, string out, the LangChain wrappers also
|
||||
@@ -53,6 +55,10 @@ if TYPE_CHECKING:
|
||||
ParrotFakeChatModel,
|
||||
)
|
||||
from langchain_core.language_models.llms import LLM, BaseLLM
|
||||
from langchain_core.language_models.model_profile import (
|
||||
ModelProfile,
|
||||
ModelProfileRegistry,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"LLM",
|
||||
@@ -68,6 +74,8 @@ __all__ = (
|
||||
"LanguageModelInput",
|
||||
"LanguageModelLike",
|
||||
"LanguageModelOutput",
|
||||
"ModelProfile",
|
||||
"ModelProfileRegistry",
|
||||
"ParrotFakeChatModel",
|
||||
"SimpleChatModel",
|
||||
"get_tokenizer",
|
||||
@@ -90,6 +98,8 @@ _dynamic_imports = {
|
||||
"GenericFakeChatModel": "fake_chat_models",
|
||||
"ParrotFakeChatModel": "fake_chat_models",
|
||||
"LLM": "llms",
|
||||
"ModelProfile": "model_profile",
|
||||
"ModelProfileRegistry": "model_profile",
|
||||
"BaseLLM": "llms",
|
||||
"is_openai_data_block": "_utils",
|
||||
}
|
||||
|
||||
@@ -140,6 +140,7 @@ def _normalize_messages(
|
||||
- LangChain v0 standard content blocks for backward compatibility
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 1.0.0"
|
||||
|
||||
In previous versions, this function returned messages in LangChain v0 format.
|
||||
Now, it returns messages in LangChain v1 format, which upgraded chat models now
|
||||
expect to receive when passing back in message history. For backward
|
||||
|
||||
@@ -15,7 +15,6 @@ from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core._api.beta_decorator import beta
|
||||
from langchain_core.caches import BaseCache
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManager,
|
||||
@@ -34,6 +33,7 @@ from langchain_core.language_models.base import (
|
||||
LangSmithParams,
|
||||
LanguageModelInput,
|
||||
)
|
||||
from langchain_core.language_models.model_profile import ModelProfile
|
||||
from langchain_core.load import dumpd, dumps
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
@@ -76,8 +76,6 @@ from langchain_core.utils.utils import LC_ID_PREFIX, from_env
|
||||
if TYPE_CHECKING:
|
||||
import uuid
|
||||
|
||||
from langchain_model_profiles import ModelProfile # type: ignore[import-untyped]
|
||||
|
||||
from langchain_core.output_parsers.base import OutputParserLike
|
||||
from langchain_core.runnables import Runnable, RunnableConfig
|
||||
from langchain_core.tools import BaseTool
|
||||
@@ -335,10 +333,25 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
[`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this
|
||||
field to roll out new content formats in a backward-compatible way.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 1.0"
|
||||
!!! version-added "Added in `langchain-core` 1.0.0"
|
||||
|
||||
"""
|
||||
|
||||
profile: ModelProfile | None = Field(default=None, exclude=True)
|
||||
"""Profile detailing model capabilities.
|
||||
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. The format of model profiles is subject to change.
|
||||
|
||||
If not specified, automatically loaded from the provider package on initialization
|
||||
if data is available.
|
||||
|
||||
Example profile data includes context window sizes, supported modalities, or support
|
||||
for tool calling, structured output, and other features.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 1.1.0"
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
)
|
||||
@@ -534,7 +547,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
):
|
||||
if block["type"] != index_type:
|
||||
index_type = block["type"]
|
||||
index = index + 1
|
||||
index += 1
|
||||
if "index" not in block:
|
||||
block["index"] = index
|
||||
run_manager.on_llm_new_token(
|
||||
@@ -666,7 +679,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
):
|
||||
if block["type"] != index_type:
|
||||
index_type = block["type"]
|
||||
index = index + 1
|
||||
index += 1
|
||||
if "index" not in block:
|
||||
block["index"] = index
|
||||
await run_manager.on_llm_new_token(
|
||||
@@ -717,7 +730,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
|
||||
# --- Custom methods ---
|
||||
|
||||
def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict: # noqa: ARG002
|
||||
def _combine_llm_outputs(self, _llm_outputs: list[dict | None], /) -> dict:
|
||||
return {}
|
||||
|
||||
def _convert_cached_generations(self, cache_val: list) -> list[ChatGeneration]:
|
||||
@@ -1174,7 +1187,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
):
|
||||
if block["type"] != index_type:
|
||||
index_type = block["type"]
|
||||
index = index + 1
|
||||
index += 1
|
||||
if "index" not in block:
|
||||
block["index"] = index
|
||||
if run_manager:
|
||||
@@ -1292,7 +1305,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
):
|
||||
if block["type"] != index_type:
|
||||
index_type = block["type"]
|
||||
index = index + 1
|
||||
index += 1
|
||||
if "index" not in block:
|
||||
block["index"] = index
|
||||
if run_manager:
|
||||
@@ -1565,88 +1578,89 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
depends on the `schema` as described above.
|
||||
- `'parsing_error'`: `BaseException | None`
|
||||
|
||||
Example: Pydantic schema (`include_raw=False`):
|
||||
???+ example "Pydantic schema (`include_raw=False`)"
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
|
||||
answer: str
|
||||
justification: str
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(AnswerWithJustification)
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(AnswerWithJustification)
|
||||
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
|
||||
# -> AnswerWithJustification(
|
||||
# answer='They weigh the same',
|
||||
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
||||
# )
|
||||
```
|
||||
# -> AnswerWithJustification(
|
||||
# answer='They weigh the same',
|
||||
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
||||
# )
|
||||
```
|
||||
|
||||
Example: Pydantic schema (`include_raw=True`):
|
||||
??? example "Pydantic schema (`include_raw=True`)"
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
|
||||
answer: str
|
||||
justification: str
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(
|
||||
AnswerWithJustification, include_raw=True
|
||||
)
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(
|
||||
AnswerWithJustification, include_raw=True
|
||||
)
|
||||
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
# -> {
|
||||
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
|
||||
# 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
|
||||
# 'parsing_error': None
|
||||
# }
|
||||
```
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
# -> {
|
||||
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
|
||||
# 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
|
||||
# 'parsing_error': None
|
||||
# }
|
||||
```
|
||||
|
||||
Example: `dict` schema (`include_raw=False`):
|
||||
??? example "Dictionary schema (`include_raw=False`)"
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
|
||||
answer: str
|
||||
justification: str
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
|
||||
dict_schema = convert_to_openai_tool(AnswerWithJustification)
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(dict_schema)
|
||||
dict_schema = convert_to_openai_tool(AnswerWithJustification)
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(dict_schema)
|
||||
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
# -> {
|
||||
# 'answer': 'They weigh the same',
|
||||
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
||||
# }
|
||||
```
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
# -> {
|
||||
# 'answer': 'They weigh the same',
|
||||
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
||||
# }
|
||||
```
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.2.26"
|
||||
|
||||
Added support for `TypedDict` class.
|
||||
|
||||
""" # noqa: E501
|
||||
@@ -1688,40 +1702,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
return RunnableMap(raw=llm) | parser_with_fallback
|
||||
return llm | output_parser
|
||||
|
||||
@property
|
||||
@beta()
|
||||
def profile(self) -> ModelProfile:
|
||||
"""Return profiling information for the model.
|
||||
|
||||
This property relies on the `langchain-model-profiles` package to retrieve chat
|
||||
model capabilities, such as context window sizes and supported features.
|
||||
|
||||
Raises:
|
||||
ImportError: If `langchain-model-profiles` is not installed.
|
||||
|
||||
Returns:
|
||||
A `ModelProfile` object containing profiling information for the model.
|
||||
"""
|
||||
try:
|
||||
from langchain_model_profiles import get_model_profile # noqa: PLC0415
|
||||
except ImportError as err:
|
||||
informative_error_message = (
|
||||
"To access model profiling information, please install the "
|
||||
"`langchain-model-profiles` package: "
|
||||
"`pip install langchain-model-profiles`."
|
||||
)
|
||||
raise ImportError(informative_error_message) from err
|
||||
|
||||
provider_id = self._llm_type
|
||||
model_name = (
|
||||
# Model name is not standardized across integrations. New integrations
|
||||
# should prefer `model`.
|
||||
getattr(self, "model", None)
|
||||
or getattr(self, "model_name", None)
|
||||
or getattr(self, "model_id", "")
|
||||
)
|
||||
return get_model_profile(provider_id, model_name) or {}
|
||||
|
||||
|
||||
class SimpleChatModel(BaseChatModel):
|
||||
"""Simplified implementation for a chat model to inherit from.
|
||||
|
||||
@@ -61,6 +61,8 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_background_tasks: set[asyncio.Task] = set()
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _log_error_once(msg: str) -> None:
|
||||
@@ -100,9 +102,9 @@ def create_base_retry_decorator(
|
||||
asyncio.run(coro)
|
||||
else:
|
||||
if loop.is_running():
|
||||
# TODO: Fix RUF006 - this task should have a reference
|
||||
# and be awaited somewhere
|
||||
loop.create_task(coro) # noqa: RUF006
|
||||
task = loop.create_task(coro)
|
||||
_background_tasks.add(task)
|
||||
task.add_done_callback(_background_tasks.discard)
|
||||
else:
|
||||
asyncio.run(coro)
|
||||
except Exception as e:
|
||||
|
||||
84
libs/core/langchain_core/language_models/model_profile.py
Normal file
84
libs/core/langchain_core/language_models/model_profile.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""Model profile types and utilities."""
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
||||
class ModelProfile(TypedDict, total=False):
|
||||
"""Model profile.
|
||||
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. The format of model profiles is subject to change.
|
||||
|
||||
Provides information about chat model capabilities, such as context window sizes
|
||||
and supported features.
|
||||
"""
|
||||
|
||||
# --- Input constraints ---
|
||||
|
||||
max_input_tokens: int
|
||||
"""Maximum context window (tokens)"""
|
||||
|
||||
image_inputs: bool
|
||||
"""Whether image inputs are supported."""
|
||||
# TODO: add more detail about formats?
|
||||
|
||||
image_url_inputs: bool
|
||||
"""Whether [image URL inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
pdf_inputs: bool
|
||||
"""Whether [PDF inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
audio_inputs: bool
|
||||
"""Whether [audio inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
video_inputs: bool
|
||||
"""Whether [video inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
image_tool_message: bool
|
||||
"""Whether images can be included in tool messages."""
|
||||
|
||||
pdf_tool_message: bool
|
||||
"""Whether PDFs can be included in tool messages."""
|
||||
|
||||
# --- Output constraints ---
|
||||
|
||||
max_output_tokens: int
|
||||
"""Maximum output tokens"""
|
||||
|
||||
reasoning_output: bool
|
||||
"""Whether the model supports [reasoning / chain-of-thought](https://docs.langchain.com/oss/python/langchain/models#reasoning)"""
|
||||
|
||||
image_outputs: bool
|
||||
"""Whether [image outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
audio_outputs: bool
|
||||
"""Whether [audio outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
video_outputs: bool
|
||||
"""Whether [video outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
# --- Tool calling ---
|
||||
tool_calling: bool
|
||||
"""Whether the model supports [tool calling](https://docs.langchain.com/oss/python/langchain/models#tool-calling)"""
|
||||
|
||||
tool_choice: bool
|
||||
"""Whether the model supports [tool choice](https://docs.langchain.com/oss/python/langchain/models#forcing-tool-calls)"""
|
||||
|
||||
# --- Structured output ---
|
||||
structured_output: bool
|
||||
"""Whether the model supports a native [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||
feature"""
|
||||
|
||||
|
||||
ModelProfileRegistry = dict[str, ModelProfile]
|
||||
"""Registry mapping model identifiers or names to their ModelProfile."""
|
||||
@@ -6,7 +6,7 @@ from langchain_core._import_utils import import_attr
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.load.dump import dumpd, dumps
|
||||
from langchain_core.load.load import loads
|
||||
from langchain_core.load.load import InitValidator, loads
|
||||
from langchain_core.load.serializable import Serializable
|
||||
|
||||
# Unfortunately, we have to eagerly import load from langchain_core/load/load.py
|
||||
@@ -15,11 +15,19 @@ if TYPE_CHECKING:
|
||||
# the `from langchain_core.load.load import load` absolute import should also work.
|
||||
from langchain_core.load.load import load
|
||||
|
||||
__all__ = ("Serializable", "dumpd", "dumps", "load", "loads")
|
||||
__all__ = (
|
||||
"InitValidator",
|
||||
"Serializable",
|
||||
"dumpd",
|
||||
"dumps",
|
||||
"load",
|
||||
"loads",
|
||||
)
|
||||
|
||||
_dynamic_imports = {
|
||||
"dumpd": "dump",
|
||||
"dumps": "dump",
|
||||
"InitValidator": "load",
|
||||
"loads": "load",
|
||||
"Serializable": "serializable",
|
||||
}
|
||||
|
||||
176
libs/core/langchain_core/load/_validation.py
Normal file
176
libs/core/langchain_core/load/_validation.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""Validation utilities for LangChain serialization.
|
||||
|
||||
Provides escape-based protection against injection attacks in serialized objects. The
|
||||
approach uses an allowlist design: only dicts explicitly produced by
|
||||
`Serializable.to_json()` are treated as LC objects during deserialization.
|
||||
|
||||
## How escaping works
|
||||
|
||||
During serialization, plain dicts (user data) that contain an `'lc'` key are wrapped:
|
||||
|
||||
```python
|
||||
{"lc": 1, ...} # user data that looks like LC object
|
||||
# becomes:
|
||||
{"__lc_escaped__": {"lc": 1, ...}}
|
||||
```
|
||||
|
||||
During deserialization, escaped dicts are unwrapped and returned as plain dicts,
|
||||
NOT instantiated as LC objects.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
_LC_ESCAPED_KEY = "__lc_escaped__"
|
||||
"""Sentinel key used to mark escaped user dicts during serialization.
|
||||
|
||||
When a plain dict contains 'lc' key (which could be confused with LC objects),
|
||||
we wrap it as {"__lc_escaped__": {...original...}}.
|
||||
"""
|
||||
|
||||
|
||||
def _needs_escaping(obj: dict[str, Any]) -> bool:
|
||||
"""Check if a dict needs escaping to prevent confusion with LC objects.
|
||||
|
||||
A dict needs escaping if:
|
||||
|
||||
1. It has an `'lc'` key (could be confused with LC serialization format)
|
||||
2. It has only the escape key (would be mistaken for an escaped dict)
|
||||
"""
|
||||
return "lc" in obj or (len(obj) == 1 and _LC_ESCAPED_KEY in obj)
|
||||
|
||||
|
||||
def _escape_dict(obj: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Wrap a dict in the escape marker.
|
||||
|
||||
Example:
|
||||
```python
|
||||
{"key": "value"} # becomes {"__lc_escaped__": {"key": "value"}}
|
||||
```
|
||||
"""
|
||||
return {_LC_ESCAPED_KEY: obj}
|
||||
|
||||
|
||||
def _is_escaped_dict(obj: dict[str, Any]) -> bool:
|
||||
"""Check if a dict is an escaped user dict.
|
||||
|
||||
Example:
|
||||
```python
|
||||
{"__lc_escaped__": {...}} # is an escaped dict
|
||||
```
|
||||
"""
|
||||
return len(obj) == 1 and _LC_ESCAPED_KEY in obj
|
||||
|
||||
|
||||
def _serialize_value(obj: Any) -> Any:
|
||||
"""Serialize a value with escaping of user dicts.
|
||||
|
||||
Called recursively on kwarg values to escape any plain dicts that could be confused
|
||||
with LC objects.
|
||||
|
||||
Args:
|
||||
obj: The value to serialize.
|
||||
|
||||
Returns:
|
||||
The serialized value with user dicts escaped as needed.
|
||||
"""
|
||||
from langchain_core.load.serializable import ( # noqa: PLC0415
|
||||
Serializable,
|
||||
to_json_not_implemented,
|
||||
)
|
||||
|
||||
if isinstance(obj, Serializable):
|
||||
# This is an LC object - serialize it properly (not escaped)
|
||||
return _serialize_lc_object(obj)
|
||||
if isinstance(obj, dict):
|
||||
if not all(isinstance(k, (str, int, float, bool, type(None))) for k in obj):
|
||||
# if keys are not json serializable
|
||||
return to_json_not_implemented(obj)
|
||||
# Check if dict needs escaping BEFORE recursing into values.
|
||||
# If it needs escaping, wrap it as-is - the contents are user data that
|
||||
# will be returned as-is during deserialization (no instantiation).
|
||||
# This prevents re-escaping of already-escaped nested content.
|
||||
if _needs_escaping(obj):
|
||||
return _escape_dict(obj)
|
||||
# Safe dict (no 'lc' key) - recurse into values
|
||||
return {k: _serialize_value(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return [_serialize_value(item) for item in obj]
|
||||
if isinstance(obj, (str, int, float, bool, type(None))):
|
||||
return obj
|
||||
|
||||
# Non-JSON-serializable object (datetime, custom objects, etc.)
|
||||
return to_json_not_implemented(obj)
|
||||
|
||||
|
||||
def _is_lc_secret(obj: Any) -> bool:
|
||||
"""Check if an object is a LangChain secret marker."""
|
||||
expected_num_keys = 3
|
||||
return (
|
||||
isinstance(obj, dict)
|
||||
and obj.get("lc") == 1
|
||||
and obj.get("type") == "secret"
|
||||
and "id" in obj
|
||||
and len(obj) == expected_num_keys
|
||||
)
|
||||
|
||||
|
||||
def _serialize_lc_object(obj: Any) -> dict[str, Any]:
|
||||
"""Serialize a `Serializable` object with escaping of user data in kwargs.
|
||||
|
||||
Args:
|
||||
obj: The `Serializable` object to serialize.
|
||||
|
||||
Returns:
|
||||
The serialized dict with user data in kwargs escaped as needed.
|
||||
|
||||
Note:
|
||||
Kwargs values are processed with `_serialize_value` to escape user data (like
|
||||
metadata) that contains `'lc'` keys. Secret fields (from `lc_secrets`) are
|
||||
skipped because `to_json()` replaces their values with secret markers.
|
||||
"""
|
||||
from langchain_core.load.serializable import Serializable # noqa: PLC0415
|
||||
|
||||
if not isinstance(obj, Serializable):
|
||||
msg = f"Expected Serializable, got {type(obj)}"
|
||||
raise TypeError(msg)
|
||||
|
||||
serialized: dict[str, Any] = dict(obj.to_json())
|
||||
|
||||
# Process kwargs to escape user data that could be confused with LC objects
|
||||
# Skip secret fields - to_json() already converted them to secret markers
|
||||
if serialized.get("type") == "constructor" and "kwargs" in serialized:
|
||||
serialized["kwargs"] = {
|
||||
k: v if _is_lc_secret(v) else _serialize_value(v)
|
||||
for k, v in serialized["kwargs"].items()
|
||||
}
|
||||
|
||||
return serialized
|
||||
|
||||
|
||||
def _unescape_value(obj: Any) -> Any:
|
||||
"""Unescape a value, processing escape markers in dict values and lists.
|
||||
|
||||
When an escaped dict is encountered (`{"__lc_escaped__": ...}`), it's
|
||||
unwrapped and the contents are returned AS-IS (no further processing).
|
||||
The contents represent user data that should not be modified.
|
||||
|
||||
For regular dicts and lists, we recurse to find any nested escape markers.
|
||||
|
||||
Args:
|
||||
obj: The value to unescape.
|
||||
|
||||
Returns:
|
||||
The unescaped value.
|
||||
"""
|
||||
if isinstance(obj, dict):
|
||||
if _is_escaped_dict(obj):
|
||||
# Unwrap and return the user data as-is (no further unescaping).
|
||||
# The contents are user data that may contain more escape keys,
|
||||
# but those are part of the user's actual data.
|
||||
return obj[_LC_ESCAPED_KEY]
|
||||
|
||||
# Regular dict - recurse into values to find nested escape markers
|
||||
return {k: _unescape_value(v) for k, v in obj.items()}
|
||||
if isinstance(obj, list):
|
||||
return [_unescape_value(item) for item in obj]
|
||||
return obj
|
||||
@@ -1,10 +1,26 @@
|
||||
"""Dump objects to json."""
|
||||
"""Serialize LangChain objects to JSON.
|
||||
|
||||
Provides `dumps` (to JSON string) and `dumpd` (to dict) for serializing
|
||||
`Serializable` objects.
|
||||
|
||||
## Escaping
|
||||
|
||||
During serialization, plain dicts (user data) that contain an `'lc'` key are escaped
|
||||
by wrapping them: `{"__lc_escaped__": {...original...}}`. This prevents injection
|
||||
attacks where malicious data could trick the deserializer into instantiating
|
||||
arbitrary classes. The escape marker is removed during deserialization.
|
||||
|
||||
This is an allowlist approach: only dicts explicitly produced by
|
||||
`Serializable.to_json()` are treated as LC objects; everything else is escaped if it
|
||||
could be confused with the LC format.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain_core.load._validation import _serialize_value
|
||||
from langchain_core.load.serializable import Serializable, to_json_not_implemented
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.outputs import ChatGeneration
|
||||
@@ -25,6 +41,20 @@ def default(obj: Any) -> Any:
|
||||
|
||||
|
||||
def _dump_pydantic_models(obj: Any) -> Any:
|
||||
"""Convert nested Pydantic models to dicts for JSON serialization.
|
||||
|
||||
Handles the special case where a `ChatGeneration` contains an `AIMessage`
|
||||
with a parsed Pydantic model in `additional_kwargs["parsed"]`. Since
|
||||
Pydantic models aren't directly JSON serializable, this converts them to
|
||||
dicts.
|
||||
|
||||
Args:
|
||||
obj: The object to process.
|
||||
|
||||
Returns:
|
||||
A copy of the object with nested Pydantic models converted to dicts, or
|
||||
the original object unchanged if no conversion was needed.
|
||||
"""
|
||||
if (
|
||||
isinstance(obj, ChatGeneration)
|
||||
and isinstance(obj.message, AIMessage)
|
||||
@@ -40,10 +70,17 @@ def _dump_pydantic_models(obj: Any) -> Any:
|
||||
def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
|
||||
"""Return a JSON string representation of an object.
|
||||
|
||||
Note:
|
||||
Plain dicts containing an `'lc'` key are automatically escaped to prevent
|
||||
confusion with LC serialization format. The escape marker is removed during
|
||||
deserialization.
|
||||
|
||||
Args:
|
||||
obj: The object to dump.
|
||||
pretty: Whether to pretty print the json. If `True`, the json will be
|
||||
indented with 2 spaces (if no indent is provided as part of `kwargs`).
|
||||
pretty: Whether to pretty print the json.
|
||||
|
||||
If `True`, the json will be indented by either 2 spaces or the amount
|
||||
provided in the `indent` kwarg.
|
||||
**kwargs: Additional arguments to pass to `json.dumps`
|
||||
|
||||
Returns:
|
||||
@@ -55,28 +92,29 @@ def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
|
||||
if "default" in kwargs:
|
||||
msg = "`default` should not be passed to dumps"
|
||||
raise ValueError(msg)
|
||||
try:
|
||||
obj = _dump_pydantic_models(obj)
|
||||
if pretty:
|
||||
indent = kwargs.pop("indent", 2)
|
||||
return json.dumps(obj, default=default, indent=indent, **kwargs)
|
||||
return json.dumps(obj, default=default, **kwargs)
|
||||
except TypeError:
|
||||
if pretty:
|
||||
indent = kwargs.pop("indent", 2)
|
||||
return json.dumps(to_json_not_implemented(obj), indent=indent, **kwargs)
|
||||
return json.dumps(to_json_not_implemented(obj), **kwargs)
|
||||
|
||||
obj = _dump_pydantic_models(obj)
|
||||
serialized = _serialize_value(obj)
|
||||
|
||||
if pretty:
|
||||
indent = kwargs.pop("indent", 2)
|
||||
return json.dumps(serialized, indent=indent, **kwargs)
|
||||
return json.dumps(serialized, **kwargs)
|
||||
|
||||
|
||||
def dumpd(obj: Any) -> Any:
|
||||
"""Return a dict representation of an object.
|
||||
|
||||
Note:
|
||||
Plain dicts containing an `'lc'` key are automatically escaped to prevent
|
||||
confusion with LC serialization format. The escape marker is removed during
|
||||
deserialization.
|
||||
|
||||
Args:
|
||||
obj: The object to dump.
|
||||
|
||||
Returns:
|
||||
Dictionary that can be serialized to json using `json.dumps`.
|
||||
"""
|
||||
# Unfortunately this function is not as efficient as it could be because it first
|
||||
# dumps the object to a json string and then loads it back into a dictionary.
|
||||
return json.loads(dumps(obj))
|
||||
obj = _dump_pydantic_models(obj)
|
||||
return _serialize_value(obj)
|
||||
|
||||
@@ -1,11 +1,83 @@
|
||||
"""Load LangChain objects from JSON strings or objects."""
|
||||
"""Load LangChain objects from JSON strings or objects.
|
||||
|
||||
## How it works
|
||||
|
||||
Each `Serializable` LangChain object has a unique identifier (its "class path"), which
|
||||
is a list of strings representing the module path and class name. For example:
|
||||
|
||||
- `AIMessage` -> `["langchain_core", "messages", "ai", "AIMessage"]`
|
||||
- `ChatPromptTemplate` -> `["langchain_core", "prompts", "chat", "ChatPromptTemplate"]`
|
||||
|
||||
When deserializing, the class path from the JSON `'id'` field is checked against an
|
||||
allowlist. If the class is not in the allowlist, deserialization raises a `ValueError`.
|
||||
|
||||
## Security model
|
||||
|
||||
The `allowed_objects` parameter controls which classes can be deserialized:
|
||||
|
||||
- **`'core'` (default)**: Allow classes defined in the serialization mappings for
|
||||
langchain_core.
|
||||
- **`'all'`**: Allow classes defined in the serialization mappings. This
|
||||
includes core LangChain types (messages, prompts, documents, etc.) and trusted
|
||||
partner integrations. See `langchain_core.load.mapping` for the full list.
|
||||
- **Explicit list of classes**: Only those specific classes are allowed.
|
||||
|
||||
For simple data types like messages and documents, the default allowlist is safe to use.
|
||||
These classes do not perform side effects during initialization.
|
||||
|
||||
!!! note "Side effects in allowed classes"
|
||||
|
||||
Deserialization calls `__init__` on allowed classes. If those classes perform side
|
||||
effects during initialization (network calls, file operations, etc.), those side
|
||||
effects will occur. The allowlist prevents instantiation of classes outside the
|
||||
allowlist, but does not sandbox the allowed classes themselves.
|
||||
|
||||
Import paths are also validated against trusted namespaces before any module is
|
||||
imported.
|
||||
|
||||
### Injection protection (escape-based)
|
||||
|
||||
During serialization, plain dicts that contain an `'lc'` key are escaped by wrapping
|
||||
them: `{"__lc_escaped__": {...}}`. During deserialization, escaped dicts are unwrapped
|
||||
and returned as plain dicts, NOT instantiated as LC objects.
|
||||
|
||||
This is an allowlist approach: only dicts explicitly produced by
|
||||
`Serializable.to_json()` (which are NOT escaped) are treated as LC objects;
|
||||
everything else is user data.
|
||||
|
||||
Even if an attacker's payload includes `__lc_escaped__` wrappers, it will be unwrapped
|
||||
to plain dicts and NOT instantiated as malicious objects.
|
||||
|
||||
## Examples
|
||||
|
||||
```python
|
||||
from langchain_core.load import load
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
|
||||
# Use default allowlist (classes from mappings) - recommended
|
||||
obj = load(data)
|
||||
|
||||
# Allow only specific classes (most restrictive)
|
||||
obj = load(
|
||||
data,
|
||||
allowed_objects=[
|
||||
ChatPromptTemplate,
|
||||
AIMessage,
|
||||
HumanMessage,
|
||||
],
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
from typing import Any
|
||||
from collections.abc import Callable, Iterable
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
from langchain_core._api import beta
|
||||
from langchain_core.load._validation import _is_escaped_dict, _unescape_value
|
||||
from langchain_core.load.mapping import (
|
||||
_JS_SERIALIZABLE_MAPPING,
|
||||
_OG_SERIALIZABLE_MAPPING,
|
||||
@@ -44,34 +116,209 @@ ALL_SERIALIZABLE_MAPPINGS = {
|
||||
**_JS_SERIALIZABLE_MAPPING,
|
||||
}
|
||||
|
||||
# Cache for the default allowed class paths computed from mappings
|
||||
# Maps mode ("all" or "core") to the cached set of paths
|
||||
_default_class_paths_cache: dict[str, set[tuple[str, ...]]] = {}
|
||||
|
||||
|
||||
def _get_default_allowed_class_paths(
|
||||
allowed_object_mode: Literal["all", "core"],
|
||||
) -> set[tuple[str, ...]]:
|
||||
"""Get the default allowed class paths from the serialization mappings.
|
||||
|
||||
This uses the mappings as the source of truth for what classes are allowed
|
||||
by default. Both the legacy paths (keys) and current paths (values) are included.
|
||||
|
||||
Args:
|
||||
allowed_object_mode: either `'all'` or `'core'`.
|
||||
|
||||
Returns:
|
||||
Set of class path tuples that are allowed by default.
|
||||
"""
|
||||
if allowed_object_mode in _default_class_paths_cache:
|
||||
return _default_class_paths_cache[allowed_object_mode]
|
||||
|
||||
allowed_paths: set[tuple[str, ...]] = set()
|
||||
for key, value in ALL_SERIALIZABLE_MAPPINGS.items():
|
||||
if allowed_object_mode == "core" and value[0] != "langchain_core":
|
||||
continue
|
||||
allowed_paths.add(key)
|
||||
allowed_paths.add(value)
|
||||
|
||||
_default_class_paths_cache[allowed_object_mode] = allowed_paths
|
||||
return _default_class_paths_cache[allowed_object_mode]
|
||||
|
||||
|
||||
def _block_jinja2_templates(
|
||||
class_path: tuple[str, ...],
|
||||
kwargs: dict[str, Any],
|
||||
) -> None:
|
||||
"""Block jinja2 templates during deserialization for security.
|
||||
|
||||
Jinja2 templates can execute arbitrary code, so they are blocked by default when
|
||||
deserializing objects with `template_format='jinja2'`.
|
||||
|
||||
Note:
|
||||
We intentionally do NOT check the `class_path` here to keep this simple and
|
||||
future-proof. If any new class is added that accepts `template_format='jinja2'`,
|
||||
it will be automatically blocked without needing to update this function.
|
||||
|
||||
Args:
|
||||
class_path: The class path tuple being deserialized (unused).
|
||||
kwargs: The kwargs dict for the class constructor.
|
||||
|
||||
Raises:
|
||||
ValueError: If `template_format` is `'jinja2'`.
|
||||
"""
|
||||
_ = class_path # Unused - see docstring for rationale. Kept to satisfy signature.
|
||||
if kwargs.get("template_format") == "jinja2":
|
||||
msg = (
|
||||
"Jinja2 templates are not allowed during deserialization for security "
|
||||
"reasons. Use 'f-string' template format instead, or explicitly allow "
|
||||
"jinja2 by providing a custom init_validator."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
def default_init_validator(
|
||||
class_path: tuple[str, ...],
|
||||
kwargs: dict[str, Any],
|
||||
) -> None:
|
||||
"""Default init validator that blocks jinja2 templates.
|
||||
|
||||
This is the default validator used by `load()` and `loads()` when no custom
|
||||
validator is provided.
|
||||
|
||||
Args:
|
||||
class_path: The class path tuple being deserialized.
|
||||
kwargs: The kwargs dict for the class constructor.
|
||||
|
||||
Raises:
|
||||
ValueError: If template_format is `'jinja2'`.
|
||||
"""
|
||||
_block_jinja2_templates(class_path, kwargs)
|
||||
|
||||
|
||||
AllowedObject = type[Serializable]
|
||||
"""Type alias for classes that can be included in the `allowed_objects` parameter.
|
||||
|
||||
Must be a `Serializable` subclass (the class itself, not an instance).
|
||||
"""
|
||||
|
||||
InitValidator = Callable[[tuple[str, ...], dict[str, Any]], None]
|
||||
"""Type alias for a callable that validates kwargs during deserialization.
|
||||
|
||||
The callable receives:
|
||||
|
||||
- `class_path`: A tuple of strings identifying the class being instantiated
|
||||
(e.g., `('langchain', 'schema', 'messages', 'AIMessage')`).
|
||||
- `kwargs`: The kwargs dict that will be passed to the constructor.
|
||||
|
||||
The validator should raise an exception if the object should not be deserialized.
|
||||
"""
|
||||
|
||||
|
||||
def _compute_allowed_class_paths(
|
||||
allowed_objects: Iterable[AllowedObject],
|
||||
import_mappings: dict[tuple[str, ...], tuple[str, ...]],
|
||||
) -> set[tuple[str, ...]]:
|
||||
"""Return allowed class paths from an explicit list of classes.
|
||||
|
||||
A class path is a tuple of strings identifying a serializable class, derived from
|
||||
`Serializable.lc_id()`. For example: `('langchain_core', 'messages', 'AIMessage')`.
|
||||
|
||||
Args:
|
||||
allowed_objects: Iterable of `Serializable` subclasses to allow.
|
||||
import_mappings: Mapping of legacy class paths to current class paths.
|
||||
|
||||
Returns:
|
||||
Set of allowed class paths.
|
||||
|
||||
Example:
|
||||
```python
|
||||
# Allow a specific class
|
||||
_compute_allowed_class_paths([MyPrompt], {}) ->
|
||||
{("langchain_core", "prompts", "MyPrompt")}
|
||||
|
||||
# Include legacy paths that map to the same class
|
||||
import_mappings = {("old", "Prompt"): ("langchain_core", "prompts", "MyPrompt")}
|
||||
_compute_allowed_class_paths([MyPrompt], import_mappings) ->
|
||||
{("langchain_core", "prompts", "MyPrompt"), ("old", "Prompt")}
|
||||
```
|
||||
"""
|
||||
allowed_objects_list = list(allowed_objects)
|
||||
|
||||
allowed_class_paths: set[tuple[str, ...]] = set()
|
||||
for allowed_obj in allowed_objects_list:
|
||||
if not isinstance(allowed_obj, type) or not issubclass(
|
||||
allowed_obj, Serializable
|
||||
):
|
||||
msg = "allowed_objects must contain Serializable subclasses."
|
||||
raise TypeError(msg)
|
||||
|
||||
class_path = tuple(allowed_obj.lc_id())
|
||||
allowed_class_paths.add(class_path)
|
||||
# Add legacy paths that map to the same class.
|
||||
for mapping_key, mapping_value in import_mappings.items():
|
||||
if tuple(mapping_value) == class_path:
|
||||
allowed_class_paths.add(mapping_key)
|
||||
return allowed_class_paths
|
||||
|
||||
|
||||
class Reviver:
|
||||
"""Reviver for JSON objects."""
|
||||
"""Reviver for JSON objects.
|
||||
|
||||
Used as the `object_hook` for `json.loads` to reconstruct LangChain objects from
|
||||
their serialized JSON representation.
|
||||
|
||||
Only classes in the allowlist can be instantiated.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
allowed_objects: Iterable[AllowedObject] | Literal["all", "core"] = "core",
|
||||
secrets_map: dict[str, str] | None = None,
|
||||
valid_namespaces: list[str] | None = None,
|
||||
secrets_from_env: bool = True, # noqa: FBT001,FBT002
|
||||
secrets_from_env: bool = False, # noqa: FBT001,FBT002
|
||||
additional_import_mappings: dict[tuple[str, ...], tuple[str, ...]]
|
||||
| None = None,
|
||||
*,
|
||||
ignore_unserializable_fields: bool = False,
|
||||
init_validator: InitValidator | None = default_init_validator,
|
||||
) -> None:
|
||||
"""Initialize the reviver.
|
||||
|
||||
Args:
|
||||
secrets_map: A map of secrets to load.
|
||||
allowed_objects: Allowlist of classes that can be deserialized.
|
||||
- `'core'` (default): Allow classes defined in the serialization
|
||||
mappings for `langchain_core`.
|
||||
- `'all'`: Allow classes defined in the serialization mappings.
|
||||
|
||||
This includes core LangChain types (messages, prompts, documents,
|
||||
etc.) and trusted partner integrations. See
|
||||
`langchain_core.load.mapping` for the full list.
|
||||
- Explicit list of classes: Only those specific classes are allowed.
|
||||
secrets_map: A map of secrets to load.
|
||||
If a secret is not found in the map, it will be loaded from the
|
||||
environment if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
valid_namespaces: Additional namespaces (modules) to allow during
|
||||
deserialization, beyond the default trusted namespaces.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
additional_import_mappings: A dictionary of additional namespace mappings.
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
|
||||
When `allowed_objects` is `None` (using defaults), paths from these
|
||||
mappings are also added to the allowed class paths.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
init_validator: Optional callable to validate kwargs before instantiation.
|
||||
|
||||
If provided, this function is called with `(class_path, kwargs)` where
|
||||
`class_path` is the class path tuple and `kwargs` is the kwargs dict.
|
||||
The validator should raise an exception if the object should not be
|
||||
deserialized, otherwise return `None`.
|
||||
|
||||
Defaults to `default_init_validator` which blocks jinja2 templates.
|
||||
"""
|
||||
self.secrets_from_env = secrets_from_env
|
||||
self.secrets_map = secrets_map or {}
|
||||
@@ -90,7 +337,26 @@ class Reviver:
|
||||
if self.additional_import_mappings
|
||||
else ALL_SERIALIZABLE_MAPPINGS
|
||||
)
|
||||
# Compute allowed class paths:
|
||||
# - "all" -> use default paths from mappings (+ additional_import_mappings)
|
||||
# - Explicit list -> compute from those classes
|
||||
if allowed_objects in ("all", "core"):
|
||||
self.allowed_class_paths: set[tuple[str, ...]] | None = (
|
||||
_get_default_allowed_class_paths(
|
||||
cast("Literal['all', 'core']", allowed_objects)
|
||||
).copy()
|
||||
)
|
||||
# Add paths from additional_import_mappings to the defaults
|
||||
if self.additional_import_mappings:
|
||||
for key, value in self.additional_import_mappings.items():
|
||||
self.allowed_class_paths.add(key)
|
||||
self.allowed_class_paths.add(value)
|
||||
else:
|
||||
self.allowed_class_paths = _compute_allowed_class_paths(
|
||||
cast("Iterable[AllowedObject]", allowed_objects), self.import_mappings
|
||||
)
|
||||
self.ignore_unserializable_fields = ignore_unserializable_fields
|
||||
self.init_validator = init_validator
|
||||
|
||||
def __call__(self, value: dict[str, Any]) -> Any:
|
||||
"""Revive the value.
|
||||
@@ -141,6 +407,20 @@ class Reviver:
|
||||
[*namespace, name] = value["id"]
|
||||
mapping_key = tuple(value["id"])
|
||||
|
||||
if (
|
||||
self.allowed_class_paths is not None
|
||||
and mapping_key not in self.allowed_class_paths
|
||||
):
|
||||
msg = (
|
||||
f"Deserialization of {mapping_key!r} is not allowed. "
|
||||
"The default (allowed_objects='core') only permits core "
|
||||
"langchain-core classes. To allow trusted partner integrations, "
|
||||
"use allowed_objects='all'. Alternatively, pass an explicit list "
|
||||
"of allowed classes via allowed_objects=[...]. "
|
||||
"See langchain_core.load.mapping for the full allowlist."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
if (
|
||||
namespace[0] not in self.valid_namespaces
|
||||
# The root namespace ["langchain"] is not a valid identifier.
|
||||
@@ -148,13 +428,11 @@ class Reviver:
|
||||
):
|
||||
msg = f"Invalid namespace: {value}"
|
||||
raise ValueError(msg)
|
||||
# Has explicit import path.
|
||||
# Determine explicit import path
|
||||
if mapping_key in self.import_mappings:
|
||||
import_path = self.import_mappings[mapping_key]
|
||||
# Split into module and name
|
||||
import_dir, name = import_path[:-1], import_path[-1]
|
||||
# Import module
|
||||
mod = importlib.import_module(".".join(import_dir))
|
||||
elif namespace[0] in DISALLOW_LOAD_FROM_PATH:
|
||||
msg = (
|
||||
"Trying to deserialize something that cannot "
|
||||
@@ -162,9 +440,16 @@ class Reviver:
|
||||
f"{mapping_key}."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
# Otherwise, treat namespace as path.
|
||||
else:
|
||||
mod = importlib.import_module(".".join(namespace))
|
||||
# Otherwise, treat namespace as path.
|
||||
import_dir = namespace
|
||||
|
||||
# Validate import path is in trusted namespaces before importing
|
||||
if import_dir[0] not in self.valid_namespaces:
|
||||
msg = f"Invalid namespace: {value}"
|
||||
raise ValueError(msg)
|
||||
|
||||
mod = importlib.import_module(".".join(import_dir))
|
||||
|
||||
cls = getattr(mod, name)
|
||||
|
||||
@@ -176,6 +461,10 @@ class Reviver:
|
||||
# We don't need to recurse on kwargs
|
||||
# as json.loads will do that for us.
|
||||
kwargs = value.get("kwargs", {})
|
||||
|
||||
if self.init_validator is not None:
|
||||
self.init_validator(mapping_key, kwargs)
|
||||
|
||||
return cls(**kwargs)
|
||||
|
||||
return value
|
||||
@@ -185,42 +474,74 @@ class Reviver:
|
||||
def loads(
|
||||
text: str,
|
||||
*,
|
||||
allowed_objects: Iterable[AllowedObject] | Literal["all", "core"] = "core",
|
||||
secrets_map: dict[str, str] | None = None,
|
||||
valid_namespaces: list[str] | None = None,
|
||||
secrets_from_env: bool = True,
|
||||
secrets_from_env: bool = False,
|
||||
additional_import_mappings: dict[tuple[str, ...], tuple[str, ...]] | None = None,
|
||||
ignore_unserializable_fields: bool = False,
|
||||
init_validator: InitValidator | None = default_init_validator,
|
||||
) -> Any:
|
||||
"""Revive a LangChain class from a JSON string.
|
||||
|
||||
Equivalent to `load(json.loads(text))`.
|
||||
|
||||
Only classes in the allowlist can be instantiated. The default allowlist includes
|
||||
core LangChain types (messages, prompts, documents, etc.). See
|
||||
`langchain_core.load.mapping` for the full list.
|
||||
|
||||
Args:
|
||||
text: The string to load.
|
||||
allowed_objects: Allowlist of classes that can be deserialized.
|
||||
|
||||
- `'core'` (default): Allow classes defined in the serialization mappings
|
||||
for langchain_core.
|
||||
- `'all'`: Allow classes defined in the serialization mappings.
|
||||
|
||||
This includes core LangChain types (messages, prompts, documents, etc.)
|
||||
and trusted partner integrations. See `langchain_core.load.mapping` for
|
||||
the full list.
|
||||
- Explicit list of classes: Only those specific classes are allowed.
|
||||
- `[]`: Disallow all deserialization (will raise on any object).
|
||||
secrets_map: A map of secrets to load.
|
||||
|
||||
If a secret is not found in the map, it will be loaded from the environment
|
||||
if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
valid_namespaces: Additional namespaces (modules) to allow during
|
||||
deserialization, beyond the default trusted namespaces.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
additional_import_mappings: A dictionary of additional namespace mappings.
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
|
||||
When `allowed_objects` is `None` (using defaults), paths from these
|
||||
mappings are also added to the allowed class paths.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
init_validator: Optional callable to validate kwargs before instantiation.
|
||||
|
||||
If provided, this function is called with `(class_path, kwargs)` where
|
||||
`class_path` is the class path tuple and `kwargs` is the kwargs dict.
|
||||
The validator should raise an exception if the object should not be
|
||||
deserialized, otherwise return `None`. Defaults to
|
||||
`default_init_validator` which blocks jinja2 templates.
|
||||
|
||||
Returns:
|
||||
Revived LangChain objects.
|
||||
|
||||
Raises:
|
||||
ValueError: If an object's class path is not in the `allowed_objects` allowlist.
|
||||
"""
|
||||
return json.loads(
|
||||
text,
|
||||
object_hook=Reviver(
|
||||
secrets_map,
|
||||
valid_namespaces,
|
||||
secrets_from_env,
|
||||
additional_import_mappings,
|
||||
ignore_unserializable_fields=ignore_unserializable_fields,
|
||||
),
|
||||
# Parse JSON and delegate to load() for proper escape handling
|
||||
raw_obj = json.loads(text)
|
||||
return load(
|
||||
raw_obj,
|
||||
allowed_objects=allowed_objects,
|
||||
secrets_map=secrets_map,
|
||||
valid_namespaces=valid_namespaces,
|
||||
secrets_from_env=secrets_from_env,
|
||||
additional_import_mappings=additional_import_mappings,
|
||||
ignore_unserializable_fields=ignore_unserializable_fields,
|
||||
init_validator=init_validator,
|
||||
)
|
||||
|
||||
|
||||
@@ -228,45 +549,105 @@ def loads(
|
||||
def load(
|
||||
obj: Any,
|
||||
*,
|
||||
allowed_objects: Iterable[AllowedObject] | Literal["all", "core"] = "core",
|
||||
secrets_map: dict[str, str] | None = None,
|
||||
valid_namespaces: list[str] | None = None,
|
||||
secrets_from_env: bool = True,
|
||||
secrets_from_env: bool = False,
|
||||
additional_import_mappings: dict[tuple[str, ...], tuple[str, ...]] | None = None,
|
||||
ignore_unserializable_fields: bool = False,
|
||||
init_validator: InitValidator | None = default_init_validator,
|
||||
) -> Any:
|
||||
"""Revive a LangChain class from a JSON object.
|
||||
|
||||
Use this if you already have a parsed JSON object,
|
||||
eg. from `json.load` or `orjson.loads`.
|
||||
Use this if you already have a parsed JSON object, eg. from `json.load` or
|
||||
`orjson.loads`.
|
||||
|
||||
Only classes in the allowlist can be instantiated. The default allowlist includes
|
||||
core LangChain types (messages, prompts, documents, etc.). See
|
||||
`langchain_core.load.mapping` for the full list.
|
||||
|
||||
Args:
|
||||
obj: The object to load.
|
||||
allowed_objects: Allowlist of classes that can be deserialized.
|
||||
|
||||
- `'core'` (default): Allow classes defined in the serialization mappings
|
||||
for langchain_core.
|
||||
- `'all'`: Allow classes defined in the serialization mappings.
|
||||
|
||||
This includes core LangChain types (messages, prompts, documents, etc.)
|
||||
and trusted partner integrations. See `langchain_core.load.mapping` for
|
||||
the full list.
|
||||
- Explicit list of classes: Only those specific classes are allowed.
|
||||
- `[]`: Disallow all deserialization (will raise on any object).
|
||||
secrets_map: A map of secrets to load.
|
||||
|
||||
If a secret is not found in the map, it will be loaded from the environment
|
||||
if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
valid_namespaces: Additional namespaces (modules) to allow during
|
||||
deserialization, beyond the default trusted namespaces.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
additional_import_mappings: A dictionary of additional namespace mappings.
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
|
||||
When `allowed_objects` is `None` (using defaults), paths from these
|
||||
mappings are also added to the allowed class paths.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
init_validator: Optional callable to validate kwargs before instantiation.
|
||||
|
||||
If provided, this function is called with `(class_path, kwargs)` where
|
||||
`class_path` is the class path tuple and `kwargs` is the kwargs dict.
|
||||
The validator should raise an exception if the object should not be
|
||||
deserialized, otherwise return `None`. Defaults to
|
||||
`default_init_validator` which blocks jinja2 templates.
|
||||
|
||||
Returns:
|
||||
Revived LangChain objects.
|
||||
|
||||
Raises:
|
||||
ValueError: If an object's class path is not in the `allowed_objects` allowlist.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from langchain_core.load import load, dumpd
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
msg = AIMessage(content="Hello")
|
||||
data = dumpd(msg)
|
||||
|
||||
# Deserialize using default allowlist
|
||||
loaded = load(data)
|
||||
|
||||
# Or with explicit allowlist
|
||||
loaded = load(data, allowed_objects=[AIMessage])
|
||||
|
||||
# Or extend defaults with additional mappings
|
||||
loaded = load(
|
||||
data,
|
||||
additional_import_mappings={
|
||||
("my_pkg", "MyClass"): ("my_pkg", "module", "MyClass"),
|
||||
},
|
||||
)
|
||||
```
|
||||
"""
|
||||
reviver = Reviver(
|
||||
allowed_objects,
|
||||
secrets_map,
|
||||
valid_namespaces,
|
||||
secrets_from_env,
|
||||
additional_import_mappings,
|
||||
ignore_unserializable_fields=ignore_unserializable_fields,
|
||||
init_validator=init_validator,
|
||||
)
|
||||
|
||||
def _load(obj: Any) -> Any:
|
||||
if isinstance(obj, dict):
|
||||
# Need to revive leaf nodes before reviving this node
|
||||
# Check for escaped dict FIRST (before recursing).
|
||||
# Escaped dicts are user data that should NOT be processed as LC objects.
|
||||
if _is_escaped_dict(obj):
|
||||
return _unescape_value(obj)
|
||||
|
||||
# Not escaped - recurse into children then apply reviver
|
||||
loaded_obj = {k: _load(v) for k, v in obj.items()}
|
||||
return reviver(loaded_obj)
|
||||
if isinstance(obj, list):
|
||||
|
||||
@@ -1,21 +1,19 @@
|
||||
"""Serialization mapping.
|
||||
|
||||
This file contains a mapping between the lc_namespace path for a given
|
||||
subclass that implements from Serializable to the namespace
|
||||
This file contains a mapping between the `lc_namespace` path for a given
|
||||
subclass that implements from `Serializable` to the namespace
|
||||
where that class is actually located.
|
||||
|
||||
This mapping helps maintain the ability to serialize and deserialize
|
||||
well-known LangChain objects even if they are moved around in the codebase
|
||||
across different LangChain versions.
|
||||
|
||||
For example,
|
||||
For example, the code for the `AIMessage` class is located in
|
||||
`langchain_core.messages.ai.AIMessage`. This message is associated with the
|
||||
`lc_namespace` of `["langchain", "schema", "messages", "AIMessage"]`,
|
||||
because this code was originally in `langchain.schema.messages.AIMessage`.
|
||||
|
||||
The code for AIMessage class is located in langchain_core.messages.ai.AIMessage,
|
||||
This message is associated with the lc_namespace
|
||||
["langchain", "schema", "messages", "AIMessage"],
|
||||
because this code was originally in langchain.schema.messages.AIMessage.
|
||||
|
||||
The mapping allows us to deserialize an AIMessage created with an older
|
||||
The mapping allows us to deserialize an `AIMessage` created with an older
|
||||
version of LangChain where the code was in a different location.
|
||||
"""
|
||||
|
||||
@@ -275,6 +273,11 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
|
||||
"chat_models",
|
||||
"ChatGroq",
|
||||
),
|
||||
("langchain_xai", "chat_models", "ChatXAI"): (
|
||||
"langchain_xai",
|
||||
"chat_models",
|
||||
"ChatXAI",
|
||||
),
|
||||
("langchain", "chat_models", "fireworks", "ChatFireworks"): (
|
||||
"langchain_fireworks",
|
||||
"chat_models",
|
||||
@@ -529,16 +532,6 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
|
||||
"structured",
|
||||
"StructuredPrompt",
|
||||
),
|
||||
("langchain_sambanova", "chat_models", "ChatSambaNovaCloud"): (
|
||||
"langchain_sambanova",
|
||||
"chat_models",
|
||||
"ChatSambaNovaCloud",
|
||||
),
|
||||
("langchain_sambanova", "chat_models", "ChatSambaStudio"): (
|
||||
"langchain_sambanova",
|
||||
"chat_models",
|
||||
"ChatSambaStudio",
|
||||
),
|
||||
("langchain_core", "prompts", "message", "_DictMessagePromptTemplate"): (
|
||||
"langchain_core",
|
||||
"prompts",
|
||||
|
||||
@@ -51,22 +51,22 @@ class InputTokenDetails(TypedDict, total=False):
|
||||
May also hold extra provider-specific keys.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.9"
|
||||
|
||||
"""
|
||||
|
||||
audio: int
|
||||
"""Audio input tokens."""
|
||||
|
||||
cache_creation: int
|
||||
"""Input tokens that were cached and there was a cache miss.
|
||||
|
||||
Since there was a cache miss, the cache was created from these tokens.
|
||||
"""
|
||||
|
||||
cache_read: int
|
||||
"""Input tokens that were cached and there was a cache hit.
|
||||
|
||||
Since there was a cache hit, the tokens were read from the cache. More precisely,
|
||||
the model state given these tokens was read from the cache.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -91,12 +91,12 @@ class OutputTokenDetails(TypedDict, total=False):
|
||||
|
||||
audio: int
|
||||
"""Audio output tokens."""
|
||||
|
||||
reasoning: int
|
||||
"""Reasoning output tokens.
|
||||
|
||||
Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
|
||||
models) that are not returned as part of model output.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -124,9 +124,11 @@ class UsageMetadata(TypedDict):
|
||||
```
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.9"
|
||||
|
||||
Added `input_token_details` and `output_token_details`.
|
||||
|
||||
!!! note "LangSmith SDK"
|
||||
|
||||
The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,
|
||||
LangSmith's `UsageMetadata` has additional fields to capture cost information
|
||||
used by the LangSmith platform.
|
||||
@@ -134,15 +136,19 @@ class UsageMetadata(TypedDict):
|
||||
|
||||
input_tokens: int
|
||||
"""Count of input (or prompt) tokens. Sum of all input token types."""
|
||||
|
||||
output_tokens: int
|
||||
"""Count of output (or completion) tokens. Sum of all output token types."""
|
||||
|
||||
total_tokens: int
|
||||
"""Total token count. Sum of `input_tokens` + `output_tokens`."""
|
||||
|
||||
input_token_details: NotRequired[InputTokenDetails]
|
||||
"""Breakdown of input token counts.
|
||||
|
||||
Does *not* need to sum to full input token count. Does *not* need to have all keys.
|
||||
"""
|
||||
|
||||
output_token_details: NotRequired[OutputTokenDetails]
|
||||
"""Breakdown of output token counts.
|
||||
|
||||
@@ -162,8 +168,10 @@ class AIMessage(BaseMessage):
|
||||
|
||||
tool_calls: list[ToolCall] = []
|
||||
"""If present, tool calls associated with the message."""
|
||||
|
||||
invalid_tool_calls: list[InvalidToolCall] = []
|
||||
"""If present, tool calls with parsing errors associated with the message."""
|
||||
|
||||
usage_metadata: UsageMetadata | None = None
|
||||
"""If present, usage metadata for a message, such as token counts.
|
||||
|
||||
@@ -318,7 +326,7 @@ class AIMessage(BaseMessage):
|
||||
if tool_calls := values.get("tool_calls"):
|
||||
values["tool_calls"] = [
|
||||
create_tool_call(
|
||||
**{k: v for k, v in tc.items() if k not in ("type", "extras")}
|
||||
**{k: v for k, v in tc.items() if k not in {"type", "extras"}}
|
||||
)
|
||||
for tc in tool_calls
|
||||
]
|
||||
@@ -434,7 +442,7 @@ class AIMessageChunk(AIMessage, BaseMessageChunk):
|
||||
blocks = [
|
||||
block
|
||||
for block in blocks
|
||||
if block["type"] not in ("tool_call", "invalid_tool_call")
|
||||
if block["type"] not in {"tool_call", "invalid_tool_call"}
|
||||
]
|
||||
for tool_call_chunk in self.tool_call_chunks:
|
||||
tc: types.ToolCallChunk = {
|
||||
@@ -555,7 +563,7 @@ class AIMessageChunk(AIMessage, BaseMessageChunk):
|
||||
|
||||
@model_validator(mode="after")
|
||||
def init_server_tool_calls(self) -> Self:
|
||||
"""Parse `server_tool_call_chunks`."""
|
||||
"""Parse `server_tool_call_chunks` from [`ServerToolCallChunk`][langchain.messages.ServerToolCallChunk] objects.""" # noqa: E501
|
||||
if (
|
||||
self.chunk_position == "last"
|
||||
and self.response_metadata.get("output_version") == "v1"
|
||||
@@ -565,7 +573,7 @@ class AIMessageChunk(AIMessage, BaseMessageChunk):
|
||||
if (
|
||||
isinstance(block, dict)
|
||||
and block.get("type")
|
||||
in ("server_tool_call", "server_tool_call_chunk")
|
||||
in {"server_tool_call", "server_tool_call_chunk"}
|
||||
and (args_str := block.get("args"))
|
||||
and isinstance(args_str, str)
|
||||
):
|
||||
|
||||
@@ -391,12 +391,12 @@ class BaseMessageChunk(BaseMessage):
|
||||
Raises:
|
||||
TypeError: If the other object is not a message chunk.
|
||||
|
||||
For example,
|
||||
|
||||
`AIMessageChunk(content="Hello") + AIMessageChunk(content=" World")`
|
||||
|
||||
will give `AIMessageChunk(content="Hello World")`
|
||||
|
||||
Example:
|
||||
```txt
|
||||
AIMessageChunk(content="Hello", ...)
|
||||
+ AIMessageChunk(content=" World", ...)
|
||||
= AIMessageChunk(content="Hello World", ...)
|
||||
```
|
||||
"""
|
||||
if isinstance(other, BaseMessageChunk):
|
||||
# If both are (subclasses of) BaseMessageChunk,
|
||||
|
||||
@@ -159,12 +159,12 @@ def _convert_citation_to_v1(citation: dict[str, Any]) -> types.Annotation:
|
||||
|
||||
return url_citation
|
||||
|
||||
if citation_type in (
|
||||
if citation_type in {
|
||||
"char_location",
|
||||
"content_block_location",
|
||||
"page_location",
|
||||
"search_result_location",
|
||||
):
|
||||
}:
|
||||
document_citation: types.Citation = {
|
||||
"type": "citation",
|
||||
"cited_text": citation["cited_text"],
|
||||
@@ -173,8 +173,6 @@ def _convert_citation_to_v1(citation: dict[str, Any]) -> types.Annotation:
|
||||
document_citation["title"] = citation["document_title"]
|
||||
elif title := citation.get("title"):
|
||||
document_citation["title"] = title
|
||||
else:
|
||||
pass
|
||||
known_fields = {
|
||||
"type",
|
||||
"cited_text",
|
||||
@@ -245,11 +243,20 @@ def _convert_to_v1_from_anthropic(message: AIMessage) -> list[types.ContentBlock
|
||||
and message.chunk_position != "last"
|
||||
):
|
||||
# Isolated chunk
|
||||
tool_call_chunk: types.ToolCallChunk = (
|
||||
message.tool_call_chunks[0].copy() # type: ignore[assignment]
|
||||
chunk = message.tool_call_chunks[0]
|
||||
|
||||
tool_call_chunk = types.ToolCallChunk(
|
||||
name=chunk.get("name"),
|
||||
id=chunk.get("id"),
|
||||
args=chunk.get("args"),
|
||||
type="tool_call_chunk",
|
||||
)
|
||||
if "type" not in tool_call_chunk:
|
||||
tool_call_chunk["type"] = "tool_call_chunk"
|
||||
if "caller" in block:
|
||||
tool_call_chunk["extras"] = {"caller": block["caller"]}
|
||||
|
||||
index = chunk.get("index")
|
||||
if index is not None:
|
||||
tool_call_chunk["index"] = index
|
||||
yield tool_call_chunk
|
||||
else:
|
||||
tool_call_block: types.ToolCall | None = None
|
||||
@@ -271,8 +278,6 @@ def _convert_to_v1_from_anthropic(message: AIMessage) -> list[types.ContentBlock
|
||||
"id": tc.get("id"),
|
||||
}
|
||||
break
|
||||
else:
|
||||
pass
|
||||
if not tool_call_block:
|
||||
tool_call_block = {
|
||||
"type": "tool_call",
|
||||
@@ -282,17 +287,27 @@ def _convert_to_v1_from_anthropic(message: AIMessage) -> list[types.ContentBlock
|
||||
}
|
||||
if "index" in block:
|
||||
tool_call_block["index"] = block["index"]
|
||||
if "caller" in block:
|
||||
if "extras" not in tool_call_block:
|
||||
tool_call_block["extras"] = {}
|
||||
tool_call_block["extras"]["caller"] = block["caller"]
|
||||
|
||||
yield tool_call_block
|
||||
|
||||
elif block_type == "input_json_delta" and isinstance(
|
||||
message, AIMessageChunk
|
||||
):
|
||||
if len(message.tool_call_chunks) == 1:
|
||||
tool_call_chunk = (
|
||||
message.tool_call_chunks[0].copy() # type: ignore[assignment]
|
||||
chunk = message.tool_call_chunks[0]
|
||||
tool_call_chunk = types.ToolCallChunk(
|
||||
name=chunk.get("name"),
|
||||
id=chunk.get("id"),
|
||||
args=chunk.get("args"),
|
||||
type="tool_call_chunk",
|
||||
)
|
||||
if "type" not in tool_call_chunk:
|
||||
tool_call_chunk["type"] = "tool_call_chunk"
|
||||
index = chunk.get("index")
|
||||
if index is not None:
|
||||
tool_call_chunk["index"] = index
|
||||
yield tool_call_chunk
|
||||
|
||||
else:
|
||||
@@ -446,12 +461,26 @@ def _convert_to_v1_from_anthropic(message: AIMessage) -> list[types.ContentBlock
|
||||
|
||||
|
||||
def translate_content(message: AIMessage) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message with Anthropic content."""
|
||||
"""Derive standard content blocks from a message with Anthropic content.
|
||||
|
||||
Args:
|
||||
message: The message to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
return _convert_to_v1_from_anthropic(message)
|
||||
|
||||
|
||||
def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message chunk with Anthropic content."""
|
||||
"""Derive standard content blocks from a message chunk with Anthropic content.
|
||||
|
||||
Args:
|
||||
message: The message chunk to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
return _convert_to_v1_from_anthropic(message)
|
||||
|
||||
|
||||
|
||||
@@ -65,14 +65,28 @@ def _convert_to_v1_from_bedrock_chunk(
|
||||
|
||||
|
||||
def translate_content(message: AIMessage) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message with Bedrock content."""
|
||||
"""Derive standard content blocks from a message with Bedrock content.
|
||||
|
||||
Args:
|
||||
message: The message to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
if "claude" not in message.response_metadata.get("model_name", "").lower():
|
||||
raise NotImplementedError # fall back to best-effort parsing
|
||||
return _convert_to_v1_from_bedrock(message)
|
||||
|
||||
|
||||
def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message chunk with Bedrock content."""
|
||||
"""Derive standard content blocks from a message chunk with Bedrock content.
|
||||
|
||||
Args:
|
||||
message: The message chunk to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
# TODO: add model_name to all Bedrock chunks and update core merging logic
|
||||
# to not append during aggregation. Then raise NotImplementedError here if
|
||||
# not an Anthropic model to fall back to best-effort parsing.
|
||||
|
||||
@@ -209,11 +209,16 @@ def _convert_to_v1_from_converse(message: AIMessage) -> list[types.ContentBlock]
|
||||
and message.chunk_position != "last"
|
||||
):
|
||||
# Isolated chunk
|
||||
tool_call_chunk: types.ToolCallChunk = (
|
||||
message.tool_call_chunks[0].copy() # type: ignore[assignment]
|
||||
chunk = message.tool_call_chunks[0]
|
||||
tool_call_chunk = types.ToolCallChunk(
|
||||
name=chunk.get("name"),
|
||||
id=chunk.get("id"),
|
||||
args=chunk.get("args"),
|
||||
type="tool_call_chunk",
|
||||
)
|
||||
if "type" not in tool_call_chunk:
|
||||
tool_call_chunk["type"] = "tool_call_chunk"
|
||||
index = chunk.get("index")
|
||||
if index is not None:
|
||||
tool_call_chunk["index"] = index
|
||||
yield tool_call_chunk
|
||||
else:
|
||||
tool_call_block: types.ToolCall | None = None
|
||||
@@ -235,8 +240,6 @@ def _convert_to_v1_from_converse(message: AIMessage) -> list[types.ContentBlock]
|
||||
"id": tc.get("id"),
|
||||
}
|
||||
break
|
||||
else:
|
||||
pass
|
||||
if not tool_call_block:
|
||||
tool_call_block = {
|
||||
"type": "tool_call",
|
||||
@@ -253,11 +256,16 @@ def _convert_to_v1_from_converse(message: AIMessage) -> list[types.ContentBlock]
|
||||
and isinstance(message, AIMessageChunk)
|
||||
and len(message.tool_call_chunks) == 1
|
||||
):
|
||||
tool_call_chunk = (
|
||||
message.tool_call_chunks[0].copy() # type: ignore[assignment]
|
||||
chunk = message.tool_call_chunks[0]
|
||||
tool_call_chunk = types.ToolCallChunk(
|
||||
name=chunk.get("name"),
|
||||
id=chunk.get("id"),
|
||||
args=chunk.get("args"),
|
||||
type="tool_call_chunk",
|
||||
)
|
||||
if "type" not in tool_call_chunk:
|
||||
tool_call_chunk["type"] = "tool_call_chunk"
|
||||
index = chunk.get("index")
|
||||
if index is not None:
|
||||
tool_call_chunk["index"] = index
|
||||
yield tool_call_chunk
|
||||
|
||||
else:
|
||||
@@ -273,12 +281,26 @@ def _convert_to_v1_from_converse(message: AIMessage) -> list[types.ContentBlock]
|
||||
|
||||
|
||||
def translate_content(message: AIMessage) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message with Bedrock Converse content."""
|
||||
"""Derive standard content blocks from a message with Bedrock Converse content.
|
||||
|
||||
Args:
|
||||
message: The message to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
return _convert_to_v1_from_converse(message)
|
||||
|
||||
|
||||
def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a chunk with Bedrock Converse content."""
|
||||
"""Derive standard content blocks from a chunk with Bedrock Converse content.
|
||||
|
||||
Args:
|
||||
message: The message chunk to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
return _convert_to_v1_from_converse(message)
|
||||
|
||||
|
||||
|
||||
@@ -76,21 +76,36 @@ def translate_grounding_metadata_to_citations(
|
||||
for chunk_index in chunk_indices:
|
||||
if chunk_index < len(grounding_chunks):
|
||||
chunk = grounding_chunks[chunk_index]
|
||||
web_info = chunk.get("web", {})
|
||||
|
||||
# Handle web and maps grounding
|
||||
web_info = chunk.get("web") or {}
|
||||
maps_info = chunk.get("maps") or {}
|
||||
|
||||
# Extract citation info depending on source
|
||||
url = maps_info.get("uri") or web_info.get("uri")
|
||||
title = maps_info.get("title") or web_info.get("title")
|
||||
|
||||
# Note: confidence_scores is a legacy field from Gemini 2.0 and earlier
|
||||
# that indicated confidence (0.0-1.0) for each grounding chunk.
|
||||
#
|
||||
# In Gemini 2.5+, this field is always None/empty and should be ignored.
|
||||
extras_metadata = {
|
||||
"web_search_queries": web_search_queries,
|
||||
"grounding_chunk_index": chunk_index,
|
||||
"confidence_scores": support.get("confidence_scores") or [],
|
||||
}
|
||||
|
||||
# Add maps-specific metadata if present
|
||||
if maps_info.get("placeId"):
|
||||
extras_metadata["place_id"] = maps_info["placeId"]
|
||||
|
||||
citation = create_citation(
|
||||
url=web_info.get("uri"),
|
||||
title=web_info.get("title"),
|
||||
url=url,
|
||||
title=title,
|
||||
start_index=start_index,
|
||||
end_index=end_index,
|
||||
cited_text=cited_text,
|
||||
extras={
|
||||
"google_ai_metadata": {
|
||||
"web_search_queries": web_search_queries,
|
||||
"grounding_chunk_index": chunk_index,
|
||||
"confidence_scores": support.get("confidence_scores", []),
|
||||
}
|
||||
},
|
||||
google_ai_metadata=extras_metadata,
|
||||
)
|
||||
citations.append(citation)
|
||||
|
||||
@@ -396,7 +411,10 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
except Exception:
|
||||
# Not valid base64, treat as non-standard
|
||||
converted_blocks.append(
|
||||
{"type": "non_standard", "value": item}
|
||||
{
|
||||
"type": "non_standard",
|
||||
"value": item,
|
||||
}
|
||||
)
|
||||
else:
|
||||
# This likely won't be reached according to previous implementations
|
||||
@@ -508,12 +526,26 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
|
||||
|
||||
def translate_content(message: AIMessage) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message with Google (GenAI) content."""
|
||||
"""Derive standard content blocks from a message with Google (GenAI) content.
|
||||
|
||||
Args:
|
||||
message: The message to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
return _convert_to_v1_from_genai(message)
|
||||
|
||||
|
||||
def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a chunk with Google (GenAI) content."""
|
||||
"""Derive standard content blocks from a chunk with Google (GenAI) content.
|
||||
|
||||
Args:
|
||||
message: The message chunk to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
return _convert_to_v1_from_genai(message)
|
||||
|
||||
|
||||
|
||||
@@ -119,12 +119,26 @@ def _convert_to_v1_from_groq(message: AIMessage) -> list[types.ContentBlock]:
|
||||
|
||||
|
||||
def translate_content(message: AIMessage) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message with groq content."""
|
||||
"""Derive standard content blocks from a message with groq content.
|
||||
|
||||
Args:
|
||||
message: The message to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
return _convert_to_v1_from_groq(message)
|
||||
|
||||
|
||||
def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message chunk with groq content."""
|
||||
"""Derive standard content blocks from a message chunk with groq content.
|
||||
|
||||
Args:
|
||||
message: The message chunk to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
return _convert_to_v1_from_groq(message)
|
||||
|
||||
|
||||
|
||||
@@ -19,7 +19,18 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
def convert_to_openai_image_block(block: dict[str, Any]) -> dict:
|
||||
"""Convert `ImageContentBlock` to format expected by OpenAI Chat Completions."""
|
||||
"""Convert `ImageContentBlock` to format expected by OpenAI Chat Completions.
|
||||
|
||||
Args:
|
||||
block: The image content block to convert.
|
||||
|
||||
Raises:
|
||||
ValueError: If required keys are missing.
|
||||
ValueError: If source type is unsupported.
|
||||
|
||||
Returns:
|
||||
The formatted image content block.
|
||||
"""
|
||||
if "url" in block:
|
||||
return {
|
||||
"type": "image_url",
|
||||
@@ -50,6 +61,18 @@ def convert_to_openai_data_block(
|
||||
|
||||
"Standard data content block" can include old-style LangChain v0 blocks
|
||||
(URLContentBlock, Base64ContentBlock, IDContentBlock) or new ones.
|
||||
|
||||
Args:
|
||||
block: The content block to convert.
|
||||
api: The OpenAI API being targeted. Either "chat/completions" or "responses".
|
||||
|
||||
Raises:
|
||||
ValueError: If required keys are missing.
|
||||
ValueError: If file URLs are used with Chat Completions API.
|
||||
ValueError: If block type is unsupported.
|
||||
|
||||
Returns:
|
||||
The formatted content block.
|
||||
"""
|
||||
if block["type"] == "image":
|
||||
chat_completions_block = convert_to_openai_image_block(block)
|
||||
@@ -248,7 +271,7 @@ def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage:
|
||||
if block_type == "text":
|
||||
# Strip annotations
|
||||
new_content.append({"type": "text", "text": block["text"]})
|
||||
elif block_type in ("reasoning", "tool_call"):
|
||||
elif block_type in {"reasoning", "tool_call"}:
|
||||
pass
|
||||
else:
|
||||
new_content.append(block)
|
||||
@@ -706,8 +729,6 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
if invalid_tool_call.get("id") == call_id:
|
||||
tool_call_block = invalid_tool_call.copy()
|
||||
break
|
||||
else:
|
||||
pass
|
||||
if tool_call_block:
|
||||
if "id" in block:
|
||||
if "extras" not in tool_call_block:
|
||||
@@ -735,7 +756,7 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
k: v for k, v in block["action"].items() if k != "sources"
|
||||
}
|
||||
for key in block:
|
||||
if key not in ("type", "id", "action", "status", "index"):
|
||||
if key not in {"type", "id", "action", "status", "index"}:
|
||||
web_search_call[key] = block[key]
|
||||
|
||||
yield cast("types.ServerToolCall", web_search_call)
|
||||
@@ -761,8 +782,6 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
web_search_result["status"] = "success"
|
||||
elif status:
|
||||
web_search_result["extras"] = {"status": status}
|
||||
else:
|
||||
pass
|
||||
if "index" in block and isinstance(block["index"], int):
|
||||
web_search_result["index"] = f"lc_wsr_{block['index'] + 1}"
|
||||
yield cast("types.ServerToolResult", web_search_result)
|
||||
@@ -778,14 +797,14 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
file_search_call["index"] = f"lc_fsc_{block['index']}"
|
||||
|
||||
for key in block:
|
||||
if key not in (
|
||||
if key not in {
|
||||
"type",
|
||||
"id",
|
||||
"queries",
|
||||
"results",
|
||||
"status",
|
||||
"index",
|
||||
):
|
||||
}:
|
||||
file_search_call[key] = block[key]
|
||||
|
||||
yield cast("types.ServerToolCall", file_search_call)
|
||||
@@ -804,8 +823,6 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
file_search_result["status"] = "success"
|
||||
elif status:
|
||||
file_search_result["extras"] = {"status": status}
|
||||
else:
|
||||
pass
|
||||
if "index" in block and isinstance(block["index"], int):
|
||||
file_search_result["index"] = f"lc_fsr_{block['index'] + 1}"
|
||||
yield cast("types.ServerToolResult", file_search_result)
|
||||
@@ -849,8 +866,6 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
code_interpreter_result["status"] = "success"
|
||||
elif status:
|
||||
code_interpreter_result["extras"] = {"status": status}
|
||||
else:
|
||||
pass
|
||||
if "index" in block and isinstance(block["index"], int):
|
||||
code_interpreter_result["index"] = f"lc_cir_{block['index'] + 1}"
|
||||
|
||||
@@ -981,7 +996,14 @@ def _convert_to_v1_from_responses(message: AIMessage) -> list[types.ContentBlock
|
||||
|
||||
|
||||
def translate_content(message: AIMessage) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message with OpenAI content."""
|
||||
"""Derive standard content blocks from a message with OpenAI content.
|
||||
|
||||
Args:
|
||||
message: The message to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
if isinstance(message.content, str):
|
||||
return _convert_to_v1_from_chat_completions(message)
|
||||
message = _convert_from_v03_ai_message(message)
|
||||
@@ -989,7 +1011,14 @@ def translate_content(message: AIMessage) -> list[types.ContentBlock]:
|
||||
|
||||
|
||||
def translate_content_chunk(message: AIMessageChunk) -> list[types.ContentBlock]:
|
||||
"""Derive standard content blocks from a message chunk with OpenAI content."""
|
||||
"""Derive standard content blocks from a message chunk with OpenAI content.
|
||||
|
||||
Args:
|
||||
message: The message chunk to translate.
|
||||
|
||||
Returns:
|
||||
The derived content blocks.
|
||||
"""
|
||||
if isinstance(message.content, str):
|
||||
return _convert_to_v1_from_chat_completions_chunk(message)
|
||||
message = _convert_from_v03_ai_message(message) # type: ignore[assignment]
|
||||
|
||||
@@ -654,7 +654,7 @@ class PlainTextContentBlock(TypedDict):
|
||||
|
||||
!!! note
|
||||
Title and context are optional fields that may be passed to the model. See
|
||||
Anthropic [example](https://docs.claude.com/en/docs/build-with-claude/citations#citable-vs-non-citable-content).
|
||||
Anthropic [example](https://platform.claude.com/docs/en/build-with-claude/citations#citable-vs-non-citable-content).
|
||||
|
||||
!!! note "Factory function"
|
||||
`create_plaintext_block` may also be used as a factory to create a
|
||||
|
||||
@@ -29,38 +29,39 @@ class ToolMessage(BaseMessage, ToolOutputMixin):
|
||||
`ToolMessage` objects contain the result of a tool invocation. Typically, the result
|
||||
is encoded inside the `content` field.
|
||||
|
||||
Example: A `ToolMessage` representing a result of `42` from a tool call with id
|
||||
`tool_call_id` is used to associate the tool call request with the tool call
|
||||
response. Useful in situations where a chat model is able to request multiple tool
|
||||
calls in parallel.
|
||||
|
||||
```python
|
||||
from langchain_core.messages import ToolMessage
|
||||
Example:
|
||||
A `ToolMessage` representing a result of `42` from a tool call with id
|
||||
|
||||
ToolMessage(content="42", tool_call_id="call_Jja7J89XsjrOLA5r!MEOW!SL")
|
||||
```
|
||||
```python
|
||||
from langchain_core.messages import ToolMessage
|
||||
|
||||
Example: A `ToolMessage` where only part of the tool output is sent to the model
|
||||
and the full output is passed in to artifact.
|
||||
ToolMessage(content="42", tool_call_id="call_Jja7J89XsjrOLA5r!MEOW!SL")
|
||||
```
|
||||
|
||||
```python
|
||||
from langchain_core.messages import ToolMessage
|
||||
Example:
|
||||
A `ToolMessage` where only part of the tool output is sent to the model
|
||||
and the full output is passed in to artifact.
|
||||
|
||||
tool_output = {
|
||||
"stdout": "From the graph we can see that the correlation between "
|
||||
"x and y is ...",
|
||||
"stderr": None,
|
||||
"artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."},
|
||||
}
|
||||
```python
|
||||
from langchain_core.messages import ToolMessage
|
||||
|
||||
ToolMessage(
|
||||
content=tool_output["stdout"],
|
||||
artifact=tool_output,
|
||||
tool_call_id="call_Jja7J89XsjrOLA5r!MEOW!SL",
|
||||
)
|
||||
```
|
||||
|
||||
The `tool_call_id` field is used to associate the tool call request with the
|
||||
tool call response. Useful in situations where a chat model is able
|
||||
to request multiple tool calls in parallel.
|
||||
tool_output = {
|
||||
"stdout": "From the graph we can see that the correlation between "
|
||||
"x and y is ...",
|
||||
"stderr": None,
|
||||
"artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."},
|
||||
}
|
||||
|
||||
ToolMessage(
|
||||
content=tool_output["stdout"],
|
||||
artifact=tool_output,
|
||||
tool_call_id="call_Jja7J89XsjrOLA5r!MEOW!SL",
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
tool_call_id: str
|
||||
|
||||
@@ -15,12 +15,16 @@ import json
|
||||
import logging
|
||||
import math
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
from functools import partial
|
||||
from functools import partial, wraps
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Annotated,
|
||||
Any,
|
||||
Concatenate,
|
||||
Literal,
|
||||
ParamSpec,
|
||||
Protocol,
|
||||
TypeVar,
|
||||
cast,
|
||||
overload,
|
||||
)
|
||||
@@ -105,6 +109,11 @@ def get_buffer_string(
|
||||
Raises:
|
||||
ValueError: If an unsupported message type is encountered.
|
||||
|
||||
Note:
|
||||
If a message is an `AIMessage` and contains both tool calls under `tool_calls`
|
||||
and a function call under `additional_kwargs["function_call"]`, only the tool
|
||||
calls will be appended to the string representation.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from langchain_core import AIMessage, HumanMessage
|
||||
@@ -135,8 +144,12 @@ def get_buffer_string(
|
||||
msg = f"Got unsupported message type: {m}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
message = f"{role}: {m.text}"
|
||||
if isinstance(m, AIMessage) and "function_call" in m.additional_kwargs:
|
||||
message += f"{m.additional_kwargs['function_call']}"
|
||||
if isinstance(m, AIMessage):
|
||||
if m.tool_calls:
|
||||
message += f"{m.tool_calls}"
|
||||
elif "function_call" in m.additional_kwargs:
|
||||
# Legacy behavior assumes only one function call per message
|
||||
message += f"{m.additional_kwargs['function_call']}"
|
||||
string_messages.append(message)
|
||||
|
||||
return "\n".join(string_messages)
|
||||
@@ -384,33 +397,54 @@ def convert_to_messages(
|
||||
return [_convert_to_message(m) for m in messages]
|
||||
|
||||
|
||||
def _runnable_support(func: Callable) -> Callable:
|
||||
_P = ParamSpec("_P")
|
||||
_R_co = TypeVar("_R_co", covariant=True)
|
||||
|
||||
|
||||
class _RunnableSupportCallable(Protocol[_P, _R_co]):
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: None = None, **kwargs: Any
|
||||
) -> Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]: ...
|
||||
def __call__(
|
||||
self,
|
||||
messages: None = None,
|
||||
*args: _P.args,
|
||||
**kwargs: _P.kwargs,
|
||||
) -> Runnable[Sequence[MessageLikeRepresentation], _R_co]: ...
|
||||
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: Sequence[MessageLikeRepresentation], **kwargs: Any
|
||||
) -> list[BaseMessage]: ...
|
||||
def __call__(
|
||||
self,
|
||||
messages: Sequence[MessageLikeRepresentation] | PromptValue,
|
||||
*args: _P.args,
|
||||
**kwargs: _P.kwargs,
|
||||
) -> _R_co: ...
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
messages: Sequence[MessageLikeRepresentation] | PromptValue | None = None,
|
||||
*args: _P.args,
|
||||
**kwargs: _P.kwargs,
|
||||
) -> _R_co | Runnable[Sequence[MessageLikeRepresentation], _R_co]: ...
|
||||
|
||||
|
||||
def _runnable_support(
|
||||
func: Callable[
|
||||
Concatenate[Sequence[MessageLikeRepresentation] | PromptValue, _P], _R_co
|
||||
],
|
||||
) -> _RunnableSupportCallable[_P, _R_co]:
|
||||
@wraps(func)
|
||||
def wrapped(
|
||||
messages: Sequence[MessageLikeRepresentation] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> (
|
||||
list[BaseMessage]
|
||||
| Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]
|
||||
):
|
||||
messages: Sequence[MessageLikeRepresentation] | PromptValue | None = None,
|
||||
*args: _P.args,
|
||||
**kwargs: _P.kwargs,
|
||||
) -> _R_co | Runnable[Sequence[MessageLikeRepresentation], _R_co]:
|
||||
# Import locally to prevent circular import.
|
||||
from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415
|
||||
|
||||
if messages is not None:
|
||||
return func(messages, **kwargs)
|
||||
return func(messages, *args, **kwargs)
|
||||
return RunnableLambda(partial(func, **kwargs), name=func.__name__)
|
||||
|
||||
wrapped.__doc__ = func.__doc__
|
||||
return wrapped
|
||||
return cast("_RunnableSupportCallable[_P, _R_co]", wrapped)
|
||||
|
||||
|
||||
@_runnable_support
|
||||
@@ -695,7 +729,8 @@ def trim_messages(
|
||||
max_tokens: int,
|
||||
token_counter: Callable[[list[BaseMessage]], int]
|
||||
| Callable[[BaseMessage], int]
|
||||
| BaseLanguageModel,
|
||||
| BaseLanguageModel
|
||||
| Literal["approximate"],
|
||||
strategy: Literal["first", "last"] = "last",
|
||||
allow_partial: bool = False,
|
||||
end_on: str | type[BaseMessage] | Sequence[str | type[BaseMessage]] | None = None,
|
||||
@@ -733,51 +768,65 @@ def trim_messages(
|
||||
messages: Sequence of Message-like objects to trim.
|
||||
max_tokens: Max token count of trimmed messages.
|
||||
token_counter: Function or llm for counting tokens in a `BaseMessage` or a
|
||||
list of `BaseMessage`. If a `BaseLanguageModel` is passed in then
|
||||
`BaseLanguageModel.get_num_tokens_from_messages()` will be used.
|
||||
Set to `len` to count the number of **messages** in the chat history.
|
||||
list of `BaseMessage`.
|
||||
|
||||
If a `BaseLanguageModel` is passed in then
|
||||
`BaseLanguageModel.get_num_tokens_from_messages()` will be used. Set to
|
||||
`len` to count the number of **messages** in the chat history.
|
||||
|
||||
You can also use string shortcuts for convenience:
|
||||
|
||||
- `'approximate'`: Uses `count_tokens_approximately` for fast, approximate
|
||||
token counts.
|
||||
|
||||
!!! note
|
||||
Use `count_tokens_approximately` to get fast, approximate token
|
||||
counts.
|
||||
This is recommended for using `trim_messages` on the hot path, where
|
||||
exact token counting is not necessary.
|
||||
|
||||
`count_tokens_approximately` (or the shortcut `'approximate'`) is
|
||||
recommended for using `trim_messages` on the hot path, where exact token
|
||||
counting is not necessary.
|
||||
|
||||
strategy: Strategy for trimming.
|
||||
|
||||
- `'first'`: Keep the first `<= n_count` tokens of the messages.
|
||||
- `'last'`: Keep the last `<= n_count` tokens of the messages.
|
||||
allow_partial: Whether to split a message if only part of the message can be
|
||||
included. If `strategy='last'` then the last partial contents of a message
|
||||
are included. If `strategy='first'` then the first partial contents of a
|
||||
message are included.
|
||||
end_on: The message type to end on. If specified then every message after the
|
||||
last occurrence of this type is ignored. If `strategy='last'` then this
|
||||
is done before we attempt to get the last `max_tokens`. If
|
||||
`strategy='first'` then this is done after we get the first
|
||||
`max_tokens`. Can be specified as string names (e.g. `'system'`,
|
||||
`'human'`, `'ai'`, ...) or as `BaseMessage` classes (e.g.
|
||||
`SystemMessage`, `HumanMessage`, `AIMessage`, ...). Can be a single
|
||||
type or a list of types.
|
||||
included.
|
||||
|
||||
start_on: The message type to start on. Should only be specified if
|
||||
`strategy='last'`. If specified then every message before
|
||||
the first occurrence of this type is ignored. This is done after we trim
|
||||
the initial messages to the last `max_tokens`. Does not
|
||||
apply to a `SystemMessage` at index 0 if `include_system=True`. Can be
|
||||
specified as string names (e.g. `'system'`, `'human'`, `'ai'`, ...) or
|
||||
as `BaseMessage` classes (e.g. `SystemMessage`, `HumanMessage`,
|
||||
`AIMessage`, ...). Can be a single type or a list of types.
|
||||
If `strategy='last'` then the last partial contents of a message are
|
||||
included. If `strategy='first'` then the first partial contents of a
|
||||
message are included.
|
||||
end_on: The message type to end on.
|
||||
|
||||
If specified then every message after the last occurrence of this type is
|
||||
ignored. If `strategy='last'` then this is done before we attempt to get the
|
||||
last `max_tokens`. If `strategy='first'` then this is done after we get the
|
||||
first `max_tokens`. Can be specified as string names (e.g. `'system'`,
|
||||
`'human'`, `'ai'`, ...) or as `BaseMessage` classes (e.g. `SystemMessage`,
|
||||
`HumanMessage`, `AIMessage`, ...). Can be a single type or a list of types.
|
||||
|
||||
start_on: The message type to start on.
|
||||
|
||||
Should only be specified if `strategy='last'`. If specified then every
|
||||
message before the first occurrence of this type is ignored. This is done
|
||||
after we trim the initial messages to the last `max_tokens`. Does not apply
|
||||
to a `SystemMessage` at index 0 if `include_system=True`. Can be specified
|
||||
as string names (e.g. `'system'`, `'human'`, `'ai'`, ...) or as
|
||||
`BaseMessage` classes (e.g. `SystemMessage`, `HumanMessage`, `AIMessage`,
|
||||
...). Can be a single type or a list of types.
|
||||
|
||||
include_system: Whether to keep the `SystemMessage` if there is one at index
|
||||
`0`. Should only be specified if `strategy="last"`.
|
||||
`0`.
|
||||
|
||||
Should only be specified if `strategy="last"`.
|
||||
text_splitter: Function or `langchain_text_splitters.TextSplitter` for
|
||||
splitting the string contents of a message. Only used if
|
||||
`allow_partial=True`. If `strategy='last'` then the last split tokens
|
||||
from a partial message will be included. if `strategy='first'` then the
|
||||
first split tokens from a partial message will be included. Token splitter
|
||||
assumes that separators are kept, so that split contents can be directly
|
||||
concatenated to recreate the original text. Defaults to splitting on
|
||||
newlines.
|
||||
splitting the string contents of a message.
|
||||
|
||||
Only used if `allow_partial=True`. If `strategy='last'` then the last split
|
||||
tokens from a partial message will be included. if `strategy='first'` then
|
||||
the first split tokens from a partial message will be included. Token
|
||||
splitter assumes that separators are kept, so that split contents can be
|
||||
directly concatenated to recreate the original text. Defaults to splitting
|
||||
on newlines.
|
||||
|
||||
Returns:
|
||||
List of trimmed `BaseMessage`.
|
||||
@@ -788,8 +837,8 @@ def trim_messages(
|
||||
|
||||
Example:
|
||||
Trim chat history based on token count, keeping the `SystemMessage` if
|
||||
present, and ensuring that the chat history starts with a `HumanMessage` (
|
||||
or a `SystemMessage` followed by a `HumanMessage`).
|
||||
present, and ensuring that the chat history starts with a `HumanMessage` (or a
|
||||
`SystemMessage` followed by a `HumanMessage`).
|
||||
|
||||
```python
|
||||
from langchain_core.messages import (
|
||||
@@ -842,8 +891,34 @@ def trim_messages(
|
||||
]
|
||||
```
|
||||
|
||||
Trim chat history using approximate token counting with `'approximate'`:
|
||||
|
||||
```python
|
||||
trim_messages(
|
||||
messages,
|
||||
max_tokens=45,
|
||||
strategy="last",
|
||||
# Using the "approximate" shortcut for fast token counting
|
||||
token_counter="approximate",
|
||||
start_on="human",
|
||||
include_system=True,
|
||||
)
|
||||
|
||||
# This is equivalent to using `count_tokens_approximately` directly
|
||||
from langchain_core.messages.utils import count_tokens_approximately
|
||||
|
||||
trim_messages(
|
||||
messages,
|
||||
max_tokens=45,
|
||||
strategy="last",
|
||||
token_counter=count_tokens_approximately,
|
||||
start_on="human",
|
||||
include_system=True,
|
||||
)
|
||||
```
|
||||
|
||||
Trim chat history based on the message count, keeping the `SystemMessage` if
|
||||
present, and ensuring that the chat history starts with a `HumanMessage` (
|
||||
present, and ensuring that the chat history starts with a HumanMessage (
|
||||
or a `SystemMessage` followed by a `HumanMessage`).
|
||||
|
||||
trim_messages(
|
||||
@@ -965,24 +1040,44 @@ def trim_messages(
|
||||
raise ValueError(msg)
|
||||
|
||||
messages = convert_to_messages(messages)
|
||||
if hasattr(token_counter, "get_num_tokens_from_messages"):
|
||||
list_token_counter = token_counter.get_num_tokens_from_messages
|
||||
elif callable(token_counter):
|
||||
|
||||
# Handle string shortcuts for token counter
|
||||
if isinstance(token_counter, str):
|
||||
if token_counter in _TOKEN_COUNTER_SHORTCUTS:
|
||||
actual_token_counter = _TOKEN_COUNTER_SHORTCUTS[token_counter]
|
||||
else:
|
||||
available_shortcuts = ", ".join(
|
||||
f"'{key}'" for key in _TOKEN_COUNTER_SHORTCUTS
|
||||
)
|
||||
msg = (
|
||||
f"Invalid token_counter shortcut '{token_counter}'. "
|
||||
f"Available shortcuts: {available_shortcuts}."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
# Type narrowing: at this point token_counter is not a str
|
||||
actual_token_counter = token_counter # type: ignore[assignment]
|
||||
|
||||
if hasattr(actual_token_counter, "get_num_tokens_from_messages"):
|
||||
list_token_counter = actual_token_counter.get_num_tokens_from_messages
|
||||
elif callable(actual_token_counter):
|
||||
if (
|
||||
next(iter(inspect.signature(token_counter).parameters.values())).annotation
|
||||
next(
|
||||
iter(inspect.signature(actual_token_counter).parameters.values())
|
||||
).annotation
|
||||
is BaseMessage
|
||||
):
|
||||
|
||||
def list_token_counter(messages: Sequence[BaseMessage]) -> int:
|
||||
return sum(token_counter(msg) for msg in messages) # type: ignore[arg-type, misc]
|
||||
return sum(actual_token_counter(msg) for msg in messages) # type: ignore[arg-type, misc]
|
||||
|
||||
else:
|
||||
list_token_counter = token_counter
|
||||
list_token_counter = actual_token_counter
|
||||
else:
|
||||
msg = (
|
||||
f"'token_counter' expected to be a model that implements "
|
||||
f"'get_num_tokens_from_messages()' or a function. Received object of type "
|
||||
f"{type(token_counter)}."
|
||||
f"{type(actual_token_counter)}."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
@@ -1022,6 +1117,7 @@ def convert_to_openai_messages(
|
||||
*,
|
||||
text_format: Literal["string", "block"] = "string",
|
||||
include_id: bool = False,
|
||||
pass_through_unknown_blocks: bool = True,
|
||||
) -> dict | list[dict]:
|
||||
"""Convert LangChain messages into OpenAI message dicts.
|
||||
|
||||
@@ -1041,6 +1137,9 @@ def convert_to_openai_messages(
|
||||
content blocks these are left as is.
|
||||
include_id: Whether to include message IDs in the openai messages, if they
|
||||
are present in the source messages.
|
||||
pass_through_unknown_blocks: Whether to include content blocks with unknown
|
||||
formats in the output. If `False`, an error is raised if an unknown
|
||||
content block is encountered.
|
||||
|
||||
Raises:
|
||||
ValueError: if an unrecognized `text_format` is specified, or if a message
|
||||
@@ -1290,6 +1389,36 @@ def convert_to_openai_messages(
|
||||
},
|
||||
}
|
||||
)
|
||||
elif block.get("type") == "function_call": # OpenAI Responses
|
||||
if not any(
|
||||
tool_call["id"] == block.get("call_id")
|
||||
for tool_call in cast("AIMessage", message).tool_calls
|
||||
):
|
||||
if missing := [
|
||||
k
|
||||
for k in ("call_id", "name", "arguments")
|
||||
if k not in block
|
||||
]:
|
||||
err = (
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': "
|
||||
f"'tool_use', but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
raise ValueError(err)
|
||||
oai_msg["tool_calls"] = oai_msg.get("tool_calls", [])
|
||||
oai_msg["tool_calls"].append(
|
||||
{
|
||||
"type": "function",
|
||||
"id": block.get("call_id"),
|
||||
"function": {
|
||||
"name": block.get("name"),
|
||||
"arguments": block.get("arguments"),
|
||||
},
|
||||
}
|
||||
)
|
||||
if pass_through_unknown_blocks:
|
||||
content.append(block)
|
||||
elif block.get("type") == "tool_result":
|
||||
if missing := [
|
||||
k for k in ("content", "tool_use_id") if k not in block
|
||||
@@ -1370,7 +1499,10 @@ def convert_to_openai_messages(
|
||||
},
|
||||
}
|
||||
)
|
||||
elif block.get("type") in ["thinking", "reasoning"]:
|
||||
elif (
|
||||
block.get("type") in {"thinking", "reasoning"}
|
||||
or pass_through_unknown_blocks
|
||||
):
|
||||
content.append(block)
|
||||
else:
|
||||
err = (
|
||||
@@ -1743,3 +1875,14 @@ def count_tokens_approximately(
|
||||
|
||||
# round up once more time in case extra_tokens_per_message is a float
|
||||
return math.ceil(token_count)
|
||||
|
||||
|
||||
# Mapping from string shortcuts to token counter functions
|
||||
def _approximate_token_counter(messages: Sequence[BaseMessage]) -> int:
|
||||
"""Wrapper for `count_tokens_approximately` that matches expected signature."""
|
||||
return count_tokens_approximately(messages)
|
||||
|
||||
|
||||
_TOKEN_COUNTER_SHORTCUTS = {
|
||||
"approximate": _approximate_token_counter,
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@ class OutputFunctionsParser(BaseGenerationOutputParser[Any]):
|
||||
The parsed JSON object.
|
||||
|
||||
Raises:
|
||||
`OutputParserException`: If the output is not valid JSON.
|
||||
OutputParserException: If the output is not valid JSON.
|
||||
"""
|
||||
generation = result[0]
|
||||
if not isinstance(generation, ChatGeneration):
|
||||
@@ -88,7 +88,7 @@ class JsonOutputFunctionsParser(BaseCumulativeTransformOutputParser[Any]):
|
||||
The parsed JSON object.
|
||||
|
||||
Raises:
|
||||
OutputParserExcept`ion: If the output is not valid JSON.
|
||||
OutputParserException: If the output is not valid JSON.
|
||||
"""
|
||||
if len(result) != 1:
|
||||
msg = f"Expected exactly one result, but got {len(result)}"
|
||||
|
||||
@@ -47,22 +47,24 @@ def parse_tool_call(
|
||||
"""
|
||||
if "function" not in raw_tool_call:
|
||||
return None
|
||||
|
||||
arguments = raw_tool_call["function"]["arguments"]
|
||||
|
||||
if partial:
|
||||
try:
|
||||
function_args = parse_partial_json(
|
||||
raw_tool_call["function"]["arguments"], strict=strict
|
||||
)
|
||||
function_args = parse_partial_json(arguments, strict=strict)
|
||||
except (JSONDecodeError, TypeError): # None args raise TypeError
|
||||
return None
|
||||
# Handle None or empty string arguments for parameter-less tools
|
||||
elif not arguments:
|
||||
function_args = {}
|
||||
else:
|
||||
try:
|
||||
function_args = json.loads(
|
||||
raw_tool_call["function"]["arguments"], strict=strict
|
||||
)
|
||||
function_args = json.loads(arguments, strict=strict)
|
||||
except JSONDecodeError as e:
|
||||
msg = (
|
||||
f"Function {raw_tool_call['function']['name']} arguments:\n\n"
|
||||
f"{raw_tool_call['function']['arguments']}\n\nare not valid JSON. "
|
||||
f"{arguments}\n\nare not valid JSON. "
|
||||
f"Received JSONDecodeError {e}"
|
||||
)
|
||||
raise OutputParserException(msg) from e
|
||||
|
||||
@@ -37,7 +37,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
|
||||
def _parser_exception(
|
||||
self, e: Exception, json_object: dict
|
||||
) -> OutputParserException:
|
||||
json_string = json.dumps(json_object)
|
||||
json_string = json.dumps(json_object, ensure_ascii=False)
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from completion {json_string}. Got: {e}"
|
||||
return OutputParserException(msg, llm_output=json_string)
|
||||
@@ -54,7 +54,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
|
||||
all the keys that have been returned so far.
|
||||
|
||||
Raises:
|
||||
`OutputParserException`: If the result is not valid JSON
|
||||
OutputParserException: If the result is not valid JSON
|
||||
or does not conform to the Pydantic model.
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -6,7 +6,33 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
|
||||
|
||||
|
||||
class StrOutputParser(BaseTransformOutputParser[str]):
|
||||
"""OutputParser that parses `LLMResult` into the top likely string."""
|
||||
"""Extract text content from model outputs as a string.
|
||||
|
||||
Converts model outputs (such as `AIMessage` or `AIMessageChunk` objects) into plain
|
||||
text strings. It's the simplest output parser and is useful when you need string
|
||||
responses for downstream processing, display, or storage.
|
||||
|
||||
Supports streaming, yielding text chunks as they're generated by the model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model = ChatOpenAI(model="gpt-4o")
|
||||
parser = StrOutputParser()
|
||||
|
||||
# Get string output from a model
|
||||
message = model.invoke("Tell me a joke")
|
||||
result = parser.invoke(message)
|
||||
print(result) # plain string
|
||||
|
||||
# With streaming - use transform() to process a stream
|
||||
stream = model.stream("Tell me a story")
|
||||
for chunk in parser.transform(stream):
|
||||
print(chunk, end="", flush=True)
|
||||
```
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
|
||||
@@ -903,23 +903,28 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
template_format: Format of the template.
|
||||
input_variables: A list of the names of the variables whose values are
|
||||
required as inputs to the prompt.
|
||||
optional_variables: A list of the names of the variables for placeholder
|
||||
or MessagePlaceholder that are optional.
|
||||
**kwargs: Additional keyword arguments passed to `BasePromptTemplate`,
|
||||
including (but not limited to):
|
||||
|
||||
These variables are auto inferred from the prompt and user need not
|
||||
provide them.
|
||||
partial_variables: A dictionary of the partial variables the prompt
|
||||
template carries.
|
||||
- `input_variables`: A list of the names of the variables whose values
|
||||
are required as inputs to the prompt.
|
||||
- `optional_variables`: A list of the names of the variables for
|
||||
placeholder or `MessagePlaceholder` that are optional.
|
||||
|
||||
Partial variables populate the template so that you don't need to pass
|
||||
them in every time you call the prompt.
|
||||
validate_template: Whether to validate the template.
|
||||
input_types: A dictionary of the types of the variables the prompt template
|
||||
expects.
|
||||
These variables are auto inferred from the prompt and user need not
|
||||
provide them.
|
||||
|
||||
If not provided, all variables are assumed to be strings.
|
||||
- `partial_variables`: A dictionary of the partial variables the prompt
|
||||
template carries.
|
||||
|
||||
Partial variables populate the template so that you don't need to
|
||||
pass them in every time you call the prompt.
|
||||
|
||||
- `validate_template`: Whether to validate the template.
|
||||
- `input_types`: A dictionary of the types of the variables the prompt
|
||||
template expects.
|
||||
|
||||
If not provided, all variables are assumed to be strings.
|
||||
|
||||
Examples:
|
||||
Instantiation from a list of message templates:
|
||||
|
||||
@@ -19,7 +19,7 @@ if TYPE_CHECKING:
|
||||
from collections.abc import Callable, Sequence
|
||||
|
||||
try:
|
||||
from jinja2 import Environment, meta
|
||||
from jinja2 import meta
|
||||
from jinja2.sandbox import SandboxedEnvironment
|
||||
|
||||
_HAS_JINJA2 = True
|
||||
@@ -61,13 +61,9 @@ def jinja2_formatter(template: str, /, **kwargs: Any) -> str:
|
||||
)
|
||||
raise ImportError(msg)
|
||||
|
||||
# This uses a sandboxed environment to prevent arbitrary code execution.
|
||||
# Jinja2 uses an opt-out rather than opt-in approach for sand-boxing.
|
||||
# Please treat this sand-boxing as a best-effort approach rather than
|
||||
# a guarantee of security.
|
||||
# We recommend to never use jinja2 templates with untrusted inputs.
|
||||
# https://jinja.palletsprojects.com/en/3.1.x/sandbox/
|
||||
# approach not a guarantee of security.
|
||||
# Use a restricted sandbox that blocks ALL attribute/method access
|
||||
# Only simple variable lookups like {{variable}} are allowed
|
||||
# Attribute access like {{variable.attr}} or {{variable.method()}} is blocked
|
||||
return SandboxedEnvironment().from_string(template).render(**kwargs)
|
||||
|
||||
|
||||
@@ -103,7 +99,7 @@ def _get_jinja2_variables_from_template(template: str) -> set[str]:
|
||||
"Please install it with `pip install jinja2`."
|
||||
)
|
||||
raise ImportError(msg)
|
||||
env = Environment() # noqa: S701
|
||||
env = SandboxedEnvironment()
|
||||
ast = env.parse(template)
|
||||
return meta.find_undeclared_variables(ast)
|
||||
|
||||
@@ -273,6 +269,30 @@ def get_template_variables(template: str, template_format: str) -> list[str]:
|
||||
msg = f"Unsupported template format: {template_format}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# For f-strings, block attribute access and indexing syntax
|
||||
# This prevents template injection attacks via accessing dangerous attributes
|
||||
if template_format == "f-string":
|
||||
for var in input_variables:
|
||||
# Formatter().parse() returns field names with dots/brackets if present
|
||||
# e.g., "obj.attr" or "obj[0]" - we need to block these
|
||||
if "." in var or "[" in var or "]" in var:
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot contain attribute "
|
||||
f"access (.) or indexing ([])."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
# Block variable names that are all digits (e.g., "0", "100")
|
||||
# These are interpreted as positional arguments, not keyword arguments
|
||||
if var.isdigit():
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot be all digits as they are interpreted "
|
||||
f"as positional arguments."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
return sorted(input_variables)
|
||||
|
||||
|
||||
|
||||
@@ -48,8 +48,17 @@ class StructuredPrompt(ChatPromptTemplate):
|
||||
schema_: schema for the structured prompt.
|
||||
structured_output_kwargs: additional kwargs for structured output.
|
||||
template_format: template format for the prompt.
|
||||
|
||||
Raises:
|
||||
ValueError: if schema is not provided.
|
||||
"""
|
||||
schema_ = schema_ or kwargs.pop("schema")
|
||||
schema_ = schema_ or kwargs.pop("schema", None)
|
||||
if not schema_:
|
||||
err_msg = (
|
||||
"Must pass in a non-empty structured output schema. Received: "
|
||||
f"{schema_}"
|
||||
)
|
||||
raise ValueError(err_msg)
|
||||
structured_output_kwargs = structured_output_kwargs or {}
|
||||
for k in set(kwargs).difference(get_pydantic_field_names(self.__class__)):
|
||||
structured_output_kwargs[k] = kwargs.pop(k)
|
||||
|
||||
@@ -94,7 +94,7 @@ from langchain_core.tracers.root_listeners import (
|
||||
AsyncRootListenersTracer,
|
||||
RootListenersTracer,
|
||||
)
|
||||
from langchain_core.utils.aiter import aclosing, atee, py_anext
|
||||
from langchain_core.utils.aiter import aclosing, atee
|
||||
from langchain_core.utils.iter import safetee
|
||||
from langchain_core.utils.pydantic import create_model_v2
|
||||
|
||||
@@ -127,10 +127,10 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Key Methods
|
||||
===========
|
||||
|
||||
- **`invoke`/`ainvoke`**: Transforms a single input into an output.
|
||||
- **`batch`/`abatch`**: Efficiently transforms multiple inputs into outputs.
|
||||
- **`stream`/`astream`**: Streams output from a single input as it's produced.
|
||||
- **`astream_log`**: Streams output and selected intermediate results from an
|
||||
- `invoke`/`ainvoke`: Transforms a single input into an output.
|
||||
- `batch`/`abatch`: Efficiently transforms multiple inputs into outputs.
|
||||
- `stream`/`astream`: Streams output from a single input as it's produced.
|
||||
- `astream_log`: Streams output and selected intermediate results from an
|
||||
input.
|
||||
|
||||
Built-in optimizations:
|
||||
@@ -707,51 +707,53 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
def pick(self, keys: str | list[str]) -> RunnableSerializable[Any, Any]:
|
||||
"""Pick keys from the output `dict` of this `Runnable`.
|
||||
|
||||
Pick a single key:
|
||||
!!! example "Pick a single key"
|
||||
|
||||
```python
|
||||
import json
|
||||
```python
|
||||
import json
|
||||
|
||||
from langchain_core.runnables import RunnableLambda, RunnableMap
|
||||
from langchain_core.runnables import RunnableLambda, RunnableMap
|
||||
|
||||
as_str = RunnableLambda(str)
|
||||
as_json = RunnableLambda(json.loads)
|
||||
chain = RunnableMap(str=as_str, json=as_json)
|
||||
as_str = RunnableLambda(str)
|
||||
as_json = RunnableLambda(json.loads)
|
||||
chain = RunnableMap(str=as_str, json=as_json)
|
||||
|
||||
chain.invoke("[1, 2, 3]")
|
||||
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3]}
|
||||
chain.invoke("[1, 2, 3]")
|
||||
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3]}
|
||||
|
||||
json_only_chain = chain.pick("json")
|
||||
json_only_chain.invoke("[1, 2, 3]")
|
||||
# -> [1, 2, 3]
|
||||
```
|
||||
json_only_chain = chain.pick("json")
|
||||
json_only_chain.invoke("[1, 2, 3]")
|
||||
# -> [1, 2, 3]
|
||||
```
|
||||
|
||||
Pick a list of keys:
|
||||
!!! example "Pick a list of keys"
|
||||
|
||||
```python
|
||||
from typing import Any
|
||||
```python
|
||||
from typing import Any
|
||||
|
||||
import json
|
||||
import json
|
||||
|
||||
from langchain_core.runnables import RunnableLambda, RunnableMap
|
||||
from langchain_core.runnables import RunnableLambda, RunnableMap
|
||||
|
||||
as_str = RunnableLambda(str)
|
||||
as_json = RunnableLambda(json.loads)
|
||||
as_str = RunnableLambda(str)
|
||||
as_json = RunnableLambda(json.loads)
|
||||
|
||||
|
||||
def as_bytes(x: Any) -> bytes:
|
||||
return bytes(x, "utf-8")
|
||||
def as_bytes(x: Any) -> bytes:
|
||||
return bytes(x, "utf-8")
|
||||
|
||||
|
||||
chain = RunnableMap(str=as_str, json=as_json, bytes=RunnableLambda(as_bytes))
|
||||
chain = RunnableMap(
|
||||
str=as_str, json=as_json, bytes=RunnableLambda(as_bytes)
|
||||
)
|
||||
|
||||
chain.invoke("[1, 2, 3]")
|
||||
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
|
||||
chain.invoke("[1, 2, 3]")
|
||||
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
|
||||
|
||||
json_and_bytes_chain = chain.pick(["json", "bytes"])
|
||||
json_and_bytes_chain.invoke("[1, 2, 3]")
|
||||
# -> {"json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
|
||||
```
|
||||
json_and_bytes_chain = chain.pick(["json", "bytes"])
|
||||
json_and_bytes_chain.invoke("[1, 2, 3]")
|
||||
# -> {"json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
|
||||
```
|
||||
|
||||
Args:
|
||||
keys: A key or list of keys to pick from the output dict.
|
||||
@@ -1372,48 +1374,50 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
).with_config({"run_name": "my_template", "tags": ["my_template"]})
|
||||
```
|
||||
|
||||
For instance:
|
||||
!!! example
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
|
||||
async def reverse(s: str) -> str:
|
||||
return s[::-1]
|
||||
async def reverse(s: str) -> str:
|
||||
return s[::-1]
|
||||
|
||||
|
||||
chain = RunnableLambda(func=reverse)
|
||||
chain = RunnableLambda(func=reverse)
|
||||
|
||||
events = [event async for event in chain.astream_events("hello", version="v2")]
|
||||
events = [
|
||||
event async for event in chain.astream_events("hello", version="v2")
|
||||
]
|
||||
|
||||
# Will produce the following events
|
||||
# (run_id, and parent_ids has been omitted for brevity):
|
||||
[
|
||||
{
|
||||
"data": {"input": "hello"},
|
||||
"event": "on_chain_start",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"data": {"chunk": "olleh"},
|
||||
"event": "on_chain_stream",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"data": {"output": "olleh"},
|
||||
"event": "on_chain_end",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
]
|
||||
```
|
||||
# Will produce the following events
|
||||
# (run_id, and parent_ids has been omitted for brevity):
|
||||
[
|
||||
{
|
||||
"data": {"input": "hello"},
|
||||
"event": "on_chain_start",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"data": {"chunk": "olleh"},
|
||||
"event": "on_chain_stream",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"data": {"output": "olleh"},
|
||||
"event": "on_chain_end",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
]
|
||||
```
|
||||
|
||||
```python title="Example: Dispatch Custom Event"
|
||||
```python title="Dispatch custom event"
|
||||
from langchain_core.callbacks.manager import (
|
||||
adispatch_custom_event,
|
||||
)
|
||||
@@ -1447,10 +1451,13 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: The config to use for the `Runnable`.
|
||||
version: The version of the schema to use either `'v2'` or `'v1'`.
|
||||
version: The version of the schema to use, either `'v2'` or `'v1'`.
|
||||
|
||||
Users should use `'v2'`.
|
||||
|
||||
`'v1'` is for backwards compatibility and will be deprecated
|
||||
in `0.4.0`.
|
||||
|
||||
No default will be assigned until the API is stabilized.
|
||||
custom events will only be surfaced in `'v2'`.
|
||||
include_names: Only include events from `Runnable` objects with matching names.
|
||||
@@ -1460,6 +1467,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
exclude_types: Exclude events from `Runnable` objects with matching types.
|
||||
exclude_tags: Exclude events from `Runnable` objects with matching tags.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
These will be passed to `astream_log` as this implementation
|
||||
of `astream_events` is built on top of `astream_log`.
|
||||
|
||||
@@ -2269,6 +2277,9 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Use this to implement `stream` or `transform` in `Runnable` subclasses.
|
||||
|
||||
"""
|
||||
# Extract defers_inputs from kwargs if present
|
||||
defers_inputs = kwargs.pop("defers_inputs", False)
|
||||
|
||||
# tee the input so we can iterate over it twice
|
||||
input_for_tracing, input_for_transform = tee(inputs, 2)
|
||||
# Start the input iterator to ensure the input Runnable starts before this one
|
||||
@@ -2285,6 +2296,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
run_type=run_type,
|
||||
name=config.get("run_name") or self.get_name(),
|
||||
run_id=config.pop("run_id", None),
|
||||
defers_inputs=defers_inputs,
|
||||
)
|
||||
try:
|
||||
child_config = patch_config(config, callbacks=run_manager.get_child())
|
||||
@@ -2366,10 +2378,13 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Use this to implement `astream` or `atransform` in `Runnable` subclasses.
|
||||
|
||||
"""
|
||||
# Extract defers_inputs from kwargs if present
|
||||
defers_inputs = kwargs.pop("defers_inputs", False)
|
||||
|
||||
# tee the input so we can iterate over it twice
|
||||
input_for_tracing, input_for_transform = atee(inputs, 2)
|
||||
# Start the input iterator to ensure the input Runnable starts before this one
|
||||
final_input: Input | None = await py_anext(input_for_tracing, None)
|
||||
final_input: Input | None = await anext(input_for_tracing, None)
|
||||
final_input_supported = True
|
||||
final_output: Output | None = None
|
||||
final_output_supported = True
|
||||
@@ -2382,6 +2397,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
run_type=run_type,
|
||||
name=config.get("run_name") or self.get_name(),
|
||||
run_id=config.pop("run_id", None),
|
||||
defers_inputs=defers_inputs,
|
||||
)
|
||||
try:
|
||||
child_config = patch_config(config, callbacks=run_manager.get_child())
|
||||
@@ -2409,7 +2425,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
iterator = iterator_
|
||||
try:
|
||||
while True:
|
||||
chunk = await coro_with_context(py_anext(iterator), context)
|
||||
chunk = await coro_with_context(anext(iterator), context)
|
||||
yield chunk
|
||||
if final_output_supported:
|
||||
if final_output is None:
|
||||
@@ -2476,82 +2492,82 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Returns:
|
||||
A `BaseTool` instance.
|
||||
|
||||
Typed dict input:
|
||||
!!! example "`TypedDict` input"
|
||||
|
||||
```python
|
||||
from typing_extensions import TypedDict
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from typing_extensions import TypedDict
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
|
||||
class Args(TypedDict):
|
||||
a: int
|
||||
b: list[int]
|
||||
class Args(TypedDict):
|
||||
a: int
|
||||
b: list[int]
|
||||
|
||||
|
||||
def f(x: Args) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
def f(x: Args) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
|
||||
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
|
||||
`dict` input, specifying schema via `args_schema`:
|
||||
!!! example "`dict` input, specifying schema via `args_schema`"
|
||||
|
||||
```python
|
||||
from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
def f(x: dict[str, Any]) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
def f(x: dict[str, Any]) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
|
||||
class FSchema(BaseModel):
|
||||
\"\"\"Apply a function to an integer and list of integers.\"\"\"
|
||||
class FSchema(BaseModel):
|
||||
\"\"\"Apply a function to an integer and list of integers.\"\"\"
|
||||
|
||||
a: int = Field(..., description="Integer")
|
||||
b: list[int] = Field(..., description="List of ints")
|
||||
a: int = Field(..., description="Integer")
|
||||
b: list[int] = Field(..., description="List of ints")
|
||||
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool(FSchema)
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool(FSchema)
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
|
||||
`dict` input, specifying schema via `arg_types`:
|
||||
!!! example "`dict` input, specifying schema via `arg_types`"
|
||||
|
||||
```python
|
||||
from typing import Any
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from typing import Any
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
|
||||
def f(x: dict[str, Any]) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
def f(x: dict[str, Any]) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
|
||||
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool(arg_types={"a": int, "b": list[int]})
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool(arg_types={"a": int, "b": list[int]})
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
|
||||
`str` input:
|
||||
!!! example "`str` input"
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
|
||||
def f(x: str) -> str:
|
||||
return x + "a"
|
||||
def f(x: str) -> str:
|
||||
return x + "a"
|
||||
|
||||
|
||||
def g(x: str) -> str:
|
||||
return x + "z"
|
||||
def g(x: str) -> str:
|
||||
return x + "z"
|
||||
|
||||
|
||||
runnable = RunnableLambda(f) | g
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke("b")
|
||||
```
|
||||
runnable = RunnableLambda(f) | g
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke("b")
|
||||
```
|
||||
"""
|
||||
# Avoid circular import
|
||||
from langchain_core.tools import convert_runnable_to_tool # noqa: PLC0415
|
||||
@@ -2603,29 +2619,33 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
|
||||
Returns:
|
||||
A new `Runnable` with the fields configured.
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import ConfigurableField
|
||||
from langchain_openai import ChatOpenAI
|
||||
!!! example
|
||||
|
||||
model = ChatOpenAI(max_tokens=20).configurable_fields(
|
||||
max_tokens=ConfigurableField(
|
||||
id="output_token_number",
|
||||
name="Max tokens in the output",
|
||||
description="The maximum number of tokens in the output",
|
||||
```python
|
||||
from langchain_core.runnables import ConfigurableField
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model = ChatOpenAI(max_tokens=20).configurable_fields(
|
||||
max_tokens=ConfigurableField(
|
||||
id="output_token_number",
|
||||
name="Max tokens in the output",
|
||||
description="The maximum number of tokens in the output",
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# max_tokens = 20
|
||||
print("max_tokens_20: ", model.invoke("tell me something about chess").content)
|
||||
# max_tokens = 20
|
||||
print(
|
||||
"max_tokens_20: ", model.invoke("tell me something about chess").content
|
||||
)
|
||||
|
||||
# max_tokens = 200
|
||||
print(
|
||||
"max_tokens_200: ",
|
||||
model.with_config(configurable={"output_token_number": 200})
|
||||
.invoke("tell me something about chess")
|
||||
.content,
|
||||
)
|
||||
```
|
||||
# max_tokens = 200
|
||||
print(
|
||||
"max_tokens_200: ",
|
||||
model.with_config(configurable={"output_token_number": 200})
|
||||
.invoke("tell me something about chess")
|
||||
.content,
|
||||
)
|
||||
```
|
||||
"""
|
||||
# Import locally to prevent circular import
|
||||
from langchain_core.runnables.configurable import ( # noqa: PLC0415
|
||||
@@ -2664,29 +2684,31 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
|
||||
Returns:
|
||||
A new `Runnable` with the alternatives configured.
|
||||
|
||||
```python
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.runnables.utils import ConfigurableField
|
||||
from langchain_openai import ChatOpenAI
|
||||
!!! example
|
||||
|
||||
model = ChatAnthropic(
|
||||
model_name="claude-sonnet-4-5-20250929"
|
||||
).configurable_alternatives(
|
||||
ConfigurableField(id="llm"),
|
||||
default_key="anthropic",
|
||||
openai=ChatOpenAI(),
|
||||
)
|
||||
```python
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.runnables.utils import ConfigurableField
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
# uses the default model ChatAnthropic
|
||||
print(model.invoke("which organization created you?").content)
|
||||
model = ChatAnthropic(
|
||||
model_name="claude-sonnet-4-5-20250929"
|
||||
).configurable_alternatives(
|
||||
ConfigurableField(id="llm"),
|
||||
default_key="anthropic",
|
||||
openai=ChatOpenAI(),
|
||||
)
|
||||
|
||||
# uses ChatOpenAI
|
||||
print(
|
||||
model.with_config(configurable={"llm": "openai"})
|
||||
.invoke("which organization created you?")
|
||||
.content
|
||||
)
|
||||
```
|
||||
# uses the default model ChatAnthropic
|
||||
print(model.invoke("which organization created you?").content)
|
||||
|
||||
# uses ChatOpenAI
|
||||
print(
|
||||
model.with_config(configurable={"llm": "openai"})
|
||||
.invoke("which organization created you?")
|
||||
.content
|
||||
)
|
||||
```
|
||||
"""
|
||||
# Import locally to prevent circular import
|
||||
from langchain_core.runnables.configurable import ( # noqa: PLC0415
|
||||
@@ -4011,7 +4033,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
|
||||
|
||||
# Wrap in a coroutine to satisfy linter
|
||||
async def get_next_chunk(generator: AsyncIterator) -> Output | None:
|
||||
return await py_anext(generator)
|
||||
return await anext(generator)
|
||||
|
||||
# Start the first iteration of each generator
|
||||
tasks = {
|
||||
@@ -4309,6 +4331,7 @@ class RunnableGenerator(Runnable[Input, Output]):
|
||||
input,
|
||||
self._transform, # type: ignore[arg-type]
|
||||
config,
|
||||
defers_inputs=True,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -4342,7 +4365,7 @@ class RunnableGenerator(Runnable[Input, Output]):
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
return self._atransform_stream_with_config(
|
||||
input, self._atransform, config, **kwargs
|
||||
input, self._atransform, config, defers_inputs=True, **kwargs
|
||||
)
|
||||
|
||||
@override
|
||||
|
||||
@@ -303,7 +303,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: The configuration for the Runna`ble.
|
||||
config: The configuration for the `Runnable`.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
Yields:
|
||||
|
||||
@@ -47,54 +47,59 @@ class EmptyDict(TypedDict, total=False):
|
||||
|
||||
|
||||
class RunnableConfig(TypedDict, total=False):
|
||||
"""Configuration for a Runnable."""
|
||||
"""Configuration for a `Runnable`.
|
||||
|
||||
See the [reference docs](https://reference.langchain.com/python/langchain_core/runnables/#langchain_core.runnables.RunnableConfig)
|
||||
for more details.
|
||||
"""
|
||||
|
||||
tags: list[str]
|
||||
"""
|
||||
Tags for this call and any sub-calls (eg. a Chain calling an LLM).
|
||||
"""Tags for this call and any sub-calls (e.g. a Chain calling an LLM).
|
||||
|
||||
You can use these to filter calls.
|
||||
"""
|
||||
|
||||
metadata: dict[str, Any]
|
||||
"""
|
||||
Metadata for this call and any sub-calls (eg. a Chain calling an LLM).
|
||||
"""Metadata for this call and any sub-calls (e.g. a Chain calling an LLM).
|
||||
|
||||
Keys should be strings, values should be JSON-serializable.
|
||||
"""
|
||||
|
||||
callbacks: Callbacks
|
||||
"""
|
||||
Callbacks for this call and any sub-calls (eg. a Chain calling an LLM).
|
||||
"""Callbacks for this call and any sub-calls (e.g. a Chain calling an LLM).
|
||||
|
||||
Tags are passed to all callbacks, metadata is passed to handle*Start callbacks.
|
||||
"""
|
||||
|
||||
run_name: str
|
||||
"""
|
||||
Name for the tracer run for this call. Defaults to the name of the class.
|
||||
"""
|
||||
"""Name for the tracer run for this call.
|
||||
|
||||
Defaults to the name of the class."""
|
||||
|
||||
max_concurrency: int | None
|
||||
"""
|
||||
Maximum number of parallel calls to make. If not provided, defaults to
|
||||
`ThreadPoolExecutor`'s default.
|
||||
"""Maximum number of parallel calls to make.
|
||||
|
||||
If not provided, defaults to `ThreadPoolExecutor`'s default.
|
||||
"""
|
||||
|
||||
recursion_limit: int
|
||||
"""
|
||||
Maximum number of times a call can recurse. If not provided, defaults to `25`.
|
||||
"""Maximum number of times a call can recurse.
|
||||
|
||||
If not provided, defaults to `25`.
|
||||
"""
|
||||
|
||||
configurable: dict[str, Any]
|
||||
"""
|
||||
Runtime values for attributes previously made configurable on this `Runnable`,
|
||||
"""Runtime values for attributes previously made configurable on this `Runnable`,
|
||||
or sub-Runnables, through `configurable_fields` or `configurable_alternatives`.
|
||||
|
||||
Check `output_schema` for a description of the attributes that have been made
|
||||
configurable.
|
||||
"""
|
||||
|
||||
run_id: uuid.UUID | None
|
||||
"""
|
||||
Unique identifier for the tracer run for this call. If not provided, a new UUID
|
||||
will be generated.
|
||||
"""Unique identifier for the tracer run for this call.
|
||||
|
||||
If not provided, a new UUID will be generated.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -28,7 +28,6 @@ from langchain_core.runnables.utils import (
|
||||
coro_with_context,
|
||||
get_unique_config_specs,
|
||||
)
|
||||
from langchain_core.utils.aiter import py_anext
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.callbacks.manager import AsyncCallbackManagerForChainRun
|
||||
@@ -563,7 +562,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
child_config,
|
||||
**kwargs,
|
||||
)
|
||||
chunk = await coro_with_context(py_anext(stream), context)
|
||||
chunk = await coro_with_context(anext(stream), context)
|
||||
except self.exceptions_to_handle as e:
|
||||
first_error = e if first_error is None else first_error
|
||||
last_error = e
|
||||
|
||||
@@ -165,6 +165,9 @@ class AsciiCanvas:
|
||||
y0: y coordinate of the box corner.
|
||||
width: box width.
|
||||
height: box height.
|
||||
|
||||
Raises:
|
||||
ValueError: if box dimensions are invalid.
|
||||
"""
|
||||
if width <= 1 or height <= 1:
|
||||
msg = "Box dimensions should be > 1"
|
||||
|
||||
@@ -81,6 +81,7 @@ def draw_mermaid(
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Returns:
|
||||
Mermaid graph syntax.
|
||||
|
||||
|
||||
@@ -201,7 +201,8 @@ class PngDrawer:
|
||||
viz, start, end, str(data) if data is not None else None, cond
|
||||
)
|
||||
|
||||
def update_styles(self, viz: Any, graph: Graph) -> None:
|
||||
@staticmethod
|
||||
def update_styles(viz: Any, graph: Graph) -> None:
|
||||
"""Update the styles of the entrypoint and END nodes.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -539,7 +539,7 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
hist: BaseChatMessageHistory = config["configurable"]["message_history"]
|
||||
|
||||
# Get the input messages
|
||||
inputs = load(run.inputs)
|
||||
inputs = load(run.inputs, allowed_objects="all")
|
||||
input_messages = self._get_input_messages(inputs)
|
||||
# If historic messages were prepended to the input messages, remove them to
|
||||
# avoid adding duplicate messages to history.
|
||||
@@ -548,7 +548,7 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
input_messages = input_messages[len(historic_messages) :]
|
||||
|
||||
# Get the output messages
|
||||
output_val = load(run.outputs)
|
||||
output_val = load(run.outputs, allowed_objects="all")
|
||||
output_messages = self._get_output_messages(output_val)
|
||||
hist.add_messages(input_messages + output_messages)
|
||||
|
||||
@@ -556,7 +556,7 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
hist: BaseChatMessageHistory = config["configurable"]["message_history"]
|
||||
|
||||
# Get the input messages
|
||||
inputs = load(run.inputs)
|
||||
inputs = load(run.inputs, allowed_objects="all")
|
||||
input_messages = self._get_input_messages(inputs)
|
||||
# If historic messages were prepended to the input messages, remove them to
|
||||
# avoid adding duplicate messages to history.
|
||||
@@ -565,7 +565,7 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
input_messages = input_messages[len(historic_messages) :]
|
||||
|
||||
# Get the output messages
|
||||
output_val = load(run.outputs)
|
||||
output_val = load(run.outputs, allowed_objects="all")
|
||||
output_messages = self._get_output_messages(output_val)
|
||||
await hist.aadd_messages(input_messages + output_messages)
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ from langchain_core.runnables.utils import (
|
||||
AddableDict,
|
||||
ConfigurableFieldSpec,
|
||||
)
|
||||
from langchain_core.utils.aiter import atee, py_anext
|
||||
from langchain_core.utils.aiter import atee
|
||||
from langchain_core.utils.iter import safetee
|
||||
from langchain_core.utils.pydantic import create_model_v2
|
||||
|
||||
@@ -614,7 +614,7 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
|
||||
)
|
||||
# start map output stream
|
||||
first_map_chunk_task: asyncio.Task = asyncio.create_task(
|
||||
py_anext(map_output, None), # type: ignore[arg-type]
|
||||
anext(map_output, None),
|
||||
)
|
||||
# consume passthrough stream
|
||||
async for chunk in for_passthrough:
|
||||
@@ -753,25 +753,19 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
return AddableDict(picked)
|
||||
return None
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
value: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
return self._pick(value)
|
||||
|
||||
@override
|
||||
def invoke(
|
||||
self,
|
||||
input: dict[str, Any],
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict[str, Any]:
|
||||
return self._call_with_config(self._invoke, input, config, **kwargs)
|
||||
) -> Any:
|
||||
return self._call_with_config(self._pick, input, config, **kwargs)
|
||||
|
||||
async def _ainvoke(
|
||||
self,
|
||||
value: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
) -> Any:
|
||||
return self._pick(value)
|
||||
|
||||
@override
|
||||
@@ -780,13 +774,13 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
input: dict[str, Any],
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict[str, Any]:
|
||||
) -> Any:
|
||||
return await self._acall_with_config(self._ainvoke, input, config, **kwargs)
|
||||
|
||||
def _transform(
|
||||
self,
|
||||
chunks: Iterator[dict[str, Any]],
|
||||
) -> Iterator[dict[str, Any]]:
|
||||
) -> Iterator[Any]:
|
||||
for chunk in chunks:
|
||||
picked = self._pick(chunk)
|
||||
if picked is not None:
|
||||
@@ -798,7 +792,7 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
input: Iterator[dict[str, Any]],
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[dict[str, Any]]:
|
||||
) -> Iterator[Any]:
|
||||
yield from self._transform_stream_with_config(
|
||||
input, self._transform, config, **kwargs
|
||||
)
|
||||
@@ -806,7 +800,7 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
async def _atransform(
|
||||
self,
|
||||
chunks: AsyncIterator[dict[str, Any]],
|
||||
) -> AsyncIterator[dict[str, Any]]:
|
||||
) -> AsyncIterator[Any]:
|
||||
async for chunk in chunks:
|
||||
picked = self._pick(chunk)
|
||||
if picked is not None:
|
||||
@@ -818,7 +812,7 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
input: AsyncIterator[dict[str, Any]],
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any,
|
||||
) -> AsyncIterator[dict[str, Any]]:
|
||||
) -> AsyncIterator[Any]:
|
||||
async for chunk in self._atransform_stream_with_config(
|
||||
input, self._atransform, config, **kwargs
|
||||
):
|
||||
@@ -830,7 +824,7 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
input: dict[str, Any],
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[dict[str, Any]]:
|
||||
) -> Iterator[Any]:
|
||||
return self.transform(iter([input]), config, **kwargs)
|
||||
|
||||
@override
|
||||
@@ -839,7 +833,7 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
input: dict[str, Any],
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any,
|
||||
) -> AsyncIterator[dict[str, Any]]:
|
||||
) -> AsyncIterator[Any]:
|
||||
async def input_aiter() -> AsyncIterator[dict[str, Any]]:
|
||||
yield input
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from typing import (
|
||||
get_type_hints,
|
||||
)
|
||||
|
||||
import typing_extensions
|
||||
from pydantic import (
|
||||
BaseModel,
|
||||
ConfigDict,
|
||||
@@ -31,6 +32,7 @@ from pydantic import (
|
||||
ValidationError,
|
||||
validate_arguments,
|
||||
)
|
||||
from pydantic.fields import FieldInfo
|
||||
from pydantic.v1 import BaseModel as BaseModelV1
|
||||
from pydantic.v1 import ValidationError as ValidationErrorV1
|
||||
from pydantic.v1 import validate_arguments as validate_arguments_v1
|
||||
@@ -94,12 +96,14 @@ def _is_annotated_type(typ: type[Any]) -> bool:
|
||||
Returns:
|
||||
`True` if the type is an Annotated type, `False` otherwise.
|
||||
"""
|
||||
return get_origin(typ) is typing.Annotated
|
||||
return get_origin(typ) in {typing.Annotated, typing_extensions.Annotated}
|
||||
|
||||
|
||||
def _get_annotation_description(arg_type: type) -> str | None:
|
||||
"""Extract description from an Annotated type.
|
||||
|
||||
Checks for string annotations and `FieldInfo` objects with descriptions.
|
||||
|
||||
Args:
|
||||
arg_type: The type to extract description from.
|
||||
|
||||
@@ -111,6 +115,8 @@ def _get_annotation_description(arg_type: type) -> str | None:
|
||||
for annotation in annotated_args[1:]:
|
||||
if isinstance(annotation, str):
|
||||
return annotation
|
||||
if isinstance(annotation, FieldInfo) and annotation.description:
|
||||
return annotation.description
|
||||
return None
|
||||
|
||||
|
||||
@@ -496,6 +502,24 @@ class ChildTool(BaseTool):
|
||||
two-tuple corresponding to the `(content, artifact)` of a `ToolMessage`.
|
||||
"""
|
||||
|
||||
extras: dict[str, Any] | None = None
|
||||
"""Optional provider-specific extra fields for the tool.
|
||||
|
||||
This is used to pass provider-specific configuration that doesn't fit into
|
||||
standard tool fields.
|
||||
|
||||
Example:
|
||||
Anthropic-specific fields like [`cache_control`](https://docs.langchain.com/oss/python/integrations/chat/anthropic#prompt-caching),
|
||||
[`defer_loading`](https://docs.langchain.com/oss/python/integrations/chat/anthropic#tool-search),
|
||||
or `input_examples`.
|
||||
|
||||
```python
|
||||
@tool(extras={"defer_loading": True, "cache_control": {"type": "ephemeral"}})
|
||||
def my_tool(x: str) -> str:
|
||||
return x
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize the tool.
|
||||
|
||||
@@ -635,6 +659,7 @@ class ChildTool(BaseTool):
|
||||
TypeError: If `args_schema` is not a Pydantic `BaseModel` or dict.
|
||||
"""
|
||||
input_args = self.args_schema
|
||||
|
||||
if isinstance(tool_input, str):
|
||||
if input_args is not None:
|
||||
if isinstance(input_args, dict):
|
||||
@@ -652,6 +677,7 @@ class ChildTool(BaseTool):
|
||||
msg = f"args_schema must be a Pydantic BaseModel, got {input_args}"
|
||||
raise TypeError(msg)
|
||||
return tool_input
|
||||
|
||||
if input_args is not None:
|
||||
if isinstance(input_args, dict):
|
||||
return tool_input
|
||||
@@ -692,11 +718,34 @@ class ChildTool(BaseTool):
|
||||
f"args_schema must be a Pydantic BaseModel, got {self.args_schema}"
|
||||
)
|
||||
raise NotImplementedError(msg)
|
||||
validated_input = {
|
||||
k: getattr(result, k) for k in result_dict if k in tool_input
|
||||
}
|
||||
|
||||
# Include fields from tool_input, plus fields with explicit defaults.
|
||||
# This applies Pydantic defaults (like Field(default=1)) while excluding
|
||||
# synthetic "args"/"kwargs" fields that Pydantic creates for *args/**kwargs.
|
||||
field_info = get_fields(input_args)
|
||||
validated_input = {}
|
||||
for k in result_dict:
|
||||
if k in tool_input:
|
||||
# Field was provided in input - include it (validated)
|
||||
validated_input[k] = getattr(result, k)
|
||||
elif k in field_info and k not in ("args", "kwargs"):
|
||||
# Check if field has an explicit default defined in the schema.
|
||||
# Exclude "args"/"kwargs" as these are synthetic fields for variadic
|
||||
# parameters that should not be passed as keyword arguments.
|
||||
fi = field_info[k]
|
||||
# Pydantic v2 uses is_required() method, v1 uses required attribute
|
||||
has_default = (
|
||||
not fi.is_required()
|
||||
if hasattr(fi, "is_required")
|
||||
else not getattr(fi, "required", True)
|
||||
)
|
||||
if has_default:
|
||||
validated_input[k] = getattr(result, k)
|
||||
|
||||
for k in self._injected_args_keys:
|
||||
if k == "tool_call_id":
|
||||
if k in tool_input:
|
||||
validated_input[k] = tool_input[k]
|
||||
elif k == "tool_call_id":
|
||||
if tool_call_id is None:
|
||||
msg = (
|
||||
"When tool includes an InjectedToolCallId "
|
||||
@@ -707,10 +756,9 @@ class ChildTool(BaseTool):
|
||||
)
|
||||
raise ValueError(msg)
|
||||
validated_input[k] = tool_call_id
|
||||
if k in tool_input:
|
||||
injected_val = tool_input[k]
|
||||
validated_input[k] = injected_val
|
||||
|
||||
return validated_input
|
||||
|
||||
return tool_input
|
||||
|
||||
@abstractmethod
|
||||
@@ -878,6 +926,7 @@ class ChildTool(BaseTool):
|
||||
name=run_name,
|
||||
run_id=run_id,
|
||||
inputs=filtered_tool_input,
|
||||
tool_call_id=tool_call_id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -1005,6 +1054,7 @@ class ChildTool(BaseTool):
|
||||
name=run_name,
|
||||
run_id=run_id,
|
||||
inputs=filtered_tool_input,
|
||||
tool_call_id=tool_call_id,
|
||||
**kwargs,
|
||||
)
|
||||
content = None
|
||||
|
||||
@@ -23,6 +23,7 @@ def tool(
|
||||
response_format: Literal["content", "content_and_artifact"] = "content",
|
||||
parse_docstring: bool = False,
|
||||
error_on_invalid_docstring: bool = True,
|
||||
extras: dict[str, Any] | None = None,
|
||||
) -> Callable[[Callable | Runnable], BaseTool]: ...
|
||||
|
||||
|
||||
@@ -38,6 +39,7 @@ def tool(
|
||||
response_format: Literal["content", "content_and_artifact"] = "content",
|
||||
parse_docstring: bool = False,
|
||||
error_on_invalid_docstring: bool = True,
|
||||
extras: dict[str, Any] | None = None,
|
||||
) -> BaseTool: ...
|
||||
|
||||
|
||||
@@ -52,6 +54,7 @@ def tool(
|
||||
response_format: Literal["content", "content_and_artifact"] = "content",
|
||||
parse_docstring: bool = False,
|
||||
error_on_invalid_docstring: bool = True,
|
||||
extras: dict[str, Any] | None = None,
|
||||
) -> BaseTool: ...
|
||||
|
||||
|
||||
@@ -66,6 +69,7 @@ def tool(
|
||||
response_format: Literal["content", "content_and_artifact"] = "content",
|
||||
parse_docstring: bool = False,
|
||||
error_on_invalid_docstring: bool = True,
|
||||
extras: dict[str, Any] | None = None,
|
||||
) -> Callable[[Callable | Runnable], BaseTool]: ...
|
||||
|
||||
|
||||
@@ -80,6 +84,7 @@ def tool(
|
||||
response_format: Literal["content", "content_and_artifact"] = "content",
|
||||
parse_docstring: bool = False,
|
||||
error_on_invalid_docstring: bool = True,
|
||||
extras: dict[str, Any] | None = None,
|
||||
) -> BaseTool | Callable[[Callable | Runnable], BaseTool]:
|
||||
"""Convert Python functions and `Runnables` to LangChain tools.
|
||||
|
||||
@@ -130,6 +135,15 @@ def tool(
|
||||
parse parameter descriptions from Google Style function docstrings.
|
||||
error_on_invalid_docstring: If `parse_docstring` is provided, configure
|
||||
whether to raise `ValueError` on invalid Google Style docstrings.
|
||||
extras: Optional provider-specific extra fields for the tool.
|
||||
|
||||
Used to pass configuration that doesn't fit into standard tool fields.
|
||||
Chat models should process known extras when constructing model payloads.
|
||||
|
||||
!!! example
|
||||
|
||||
For example, Anthropic-specific fields like `cache_control`,
|
||||
`defer_loading`, or `input_examples`.
|
||||
|
||||
Raises:
|
||||
ValueError: If too many positional arguments are provided (e.g. violating the
|
||||
@@ -292,6 +306,7 @@ def tool(
|
||||
response_format=response_format,
|
||||
parse_docstring=parse_docstring,
|
||||
error_on_invalid_docstring=error_on_invalid_docstring,
|
||||
extras=extras,
|
||||
)
|
||||
# If someone doesn't want a schema applied, we must treat it as
|
||||
# a simple string->string function
|
||||
@@ -308,6 +323,7 @@ def tool(
|
||||
return_direct=return_direct,
|
||||
coroutine=coroutine,
|
||||
response_format=response_format,
|
||||
extras=extras,
|
||||
)
|
||||
|
||||
return _tool_factory
|
||||
|
||||
@@ -2,22 +2,21 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import partial
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_core.callbacks import Callbacks
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.prompts import (
|
||||
BasePromptTemplate,
|
||||
PromptTemplate,
|
||||
aformat_document,
|
||||
format_document,
|
||||
)
|
||||
from langchain_core.tools.simple import Tool
|
||||
from langchain_core.tools.structured import StructuredTool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.callbacks import Callbacks
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
|
||||
|
||||
@@ -27,43 +26,6 @@ class RetrieverInput(BaseModel):
|
||||
query: str = Field(description="query to look up in retriever")
|
||||
|
||||
|
||||
def _get_relevant_documents(
|
||||
query: str,
|
||||
retriever: BaseRetriever,
|
||||
document_prompt: BasePromptTemplate,
|
||||
document_separator: str,
|
||||
callbacks: Callbacks = None,
|
||||
response_format: Literal["content", "content_and_artifact"] = "content",
|
||||
) -> str | tuple[str, list[Document]]:
|
||||
docs = retriever.invoke(query, config={"callbacks": callbacks})
|
||||
content = document_separator.join(
|
||||
format_document(doc, document_prompt) for doc in docs
|
||||
)
|
||||
if response_format == "content_and_artifact":
|
||||
return (content, docs)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
async def _aget_relevant_documents(
|
||||
query: str,
|
||||
retriever: BaseRetriever,
|
||||
document_prompt: BasePromptTemplate,
|
||||
document_separator: str,
|
||||
callbacks: Callbacks = None,
|
||||
response_format: Literal["content", "content_and_artifact"] = "content",
|
||||
) -> str | tuple[str, list[Document]]:
|
||||
docs = await retriever.ainvoke(query, config={"callbacks": callbacks})
|
||||
content = document_separator.join(
|
||||
[await aformat_document(doc, document_prompt) for doc in docs]
|
||||
)
|
||||
|
||||
if response_format == "content_and_artifact":
|
||||
return (content, docs)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def create_retriever_tool(
|
||||
retriever: BaseRetriever,
|
||||
name: str,
|
||||
@@ -72,7 +34,7 @@ def create_retriever_tool(
|
||||
document_prompt: BasePromptTemplate | None = None,
|
||||
document_separator: str = "\n\n",
|
||||
response_format: Literal["content", "content_and_artifact"] = "content",
|
||||
) -> Tool:
|
||||
) -> StructuredTool:
|
||||
r"""Create a tool to do retrieval of documents.
|
||||
|
||||
Args:
|
||||
@@ -93,22 +55,31 @@ def create_retriever_tool(
|
||||
Returns:
|
||||
Tool class to pass to an agent.
|
||||
"""
|
||||
document_prompt = document_prompt or PromptTemplate.from_template("{page_content}")
|
||||
func = partial(
|
||||
_get_relevant_documents,
|
||||
retriever=retriever,
|
||||
document_prompt=document_prompt,
|
||||
document_separator=document_separator,
|
||||
response_format=response_format,
|
||||
)
|
||||
afunc = partial(
|
||||
_aget_relevant_documents,
|
||||
retriever=retriever,
|
||||
document_prompt=document_prompt,
|
||||
document_separator=document_separator,
|
||||
response_format=response_format,
|
||||
)
|
||||
return Tool(
|
||||
document_prompt_ = document_prompt or PromptTemplate.from_template("{page_content}")
|
||||
|
||||
def func(
|
||||
query: str, callbacks: Callbacks = None
|
||||
) -> str | tuple[str, list[Document]]:
|
||||
docs = retriever.invoke(query, config={"callbacks": callbacks})
|
||||
content = document_separator.join(
|
||||
format_document(doc, document_prompt_) for doc in docs
|
||||
)
|
||||
if response_format == "content_and_artifact":
|
||||
return (content, docs)
|
||||
return content
|
||||
|
||||
async def afunc(
|
||||
query: str, callbacks: Callbacks = None
|
||||
) -> str | tuple[str, list[Document]]:
|
||||
docs = await retriever.ainvoke(query, config={"callbacks": callbacks})
|
||||
content = document_separator.join(
|
||||
[await aformat_document(doc, document_prompt_) for doc in docs]
|
||||
)
|
||||
if response_format == "content_and_artifact":
|
||||
return (content, docs)
|
||||
return content
|
||||
|
||||
return StructuredTool(
|
||||
name=name,
|
||||
description=description,
|
||||
func=func,
|
||||
|
||||
@@ -284,6 +284,16 @@ class _TracerCore(ABC):
|
||||
llm_run.end_time = datetime.now(timezone.utc)
|
||||
llm_run.events.append({"name": "end", "time": llm_run.end_time})
|
||||
|
||||
tool_call_count = 0
|
||||
for generations in response.generations:
|
||||
for generation in generations:
|
||||
if hasattr(generation, "message"):
|
||||
msg = generation.message
|
||||
if hasattr(msg, "tool_calls") and msg.tool_calls:
|
||||
tool_call_count += len(msg.tool_calls)
|
||||
if tool_call_count > 0:
|
||||
llm_run.extra["tool_call_count"] = tool_call_count
|
||||
|
||||
return llm_run
|
||||
|
||||
def _errored_llm_run(
|
||||
|
||||
@@ -154,8 +154,8 @@ class EvaluatorCallbackHandler(BaseTracer):
|
||||
res
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _select_eval_results(
|
||||
self,
|
||||
results: EvaluationResult | EvaluationResults,
|
||||
) -> list[EvaluationResult]:
|
||||
if isinstance(results, EvaluationResult):
|
||||
|
||||
@@ -12,7 +12,7 @@ from typing import (
|
||||
TypeVar,
|
||||
cast,
|
||||
)
|
||||
from uuid import UUID, uuid4
|
||||
from uuid import UUID
|
||||
|
||||
from typing_extensions import NotRequired, override
|
||||
|
||||
@@ -42,7 +42,8 @@ from langchain_core.tracers.log_stream import (
|
||||
_astream_log_implementation,
|
||||
)
|
||||
from langchain_core.tracers.memory_stream import _MemoryStream
|
||||
from langchain_core.utils.aiter import aclosing, py_anext
|
||||
from langchain_core.utils.aiter import aclosing
|
||||
from langchain_core.utils.uuid import uuid7
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncIterator, Iterator, Sequence
|
||||
@@ -188,7 +189,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
|
||||
# atomic check and set
|
||||
tap = self.is_tapped.setdefault(run_id, sentinel)
|
||||
# wait for first chunk
|
||||
first = await py_anext(output, default=sentinel)
|
||||
first = await anext(output, sentinel)
|
||||
if first is sentinel:
|
||||
return
|
||||
# get run info
|
||||
@@ -425,6 +426,10 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
|
||||
"""Run on new output token. Only available when streaming is enabled.
|
||||
|
||||
For both chat models and non-chat models (legacy LLMs).
|
||||
|
||||
Raises:
|
||||
ValueError: If the run type is not `llm` or `chat_model`.
|
||||
AssertionError: If the run ID is not found in the run map.
|
||||
"""
|
||||
run_info = self.run_map.get(run_id)
|
||||
chunk_: GenerationChunk | BaseMessageChunk
|
||||
@@ -706,11 +711,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
|
||||
|
||||
@override
|
||||
async def on_tool_end(self, output: Any, *, run_id: UUID, **kwargs: Any) -> None:
|
||||
"""End a trace for a tool run.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the run ID is a tool call and does not have inputs
|
||||
"""
|
||||
"""End a trace for a tool run."""
|
||||
run_info, inputs = self._get_tool_run_info_with_inputs(run_id)
|
||||
|
||||
self._send(
|
||||
@@ -1006,7 +1007,11 @@ async def _astream_events_implementation_v2(
|
||||
|
||||
# Assign the stream handler to the config
|
||||
config = ensure_config(config)
|
||||
run_id = cast("UUID", config.setdefault("run_id", uuid4()))
|
||||
if "run_id" in config:
|
||||
run_id = cast("UUID", config["run_id"])
|
||||
else:
|
||||
run_id = uuid7()
|
||||
config["run_id"] = run_id
|
||||
callbacks = config.get("callbacks")
|
||||
if callbacks is None:
|
||||
config["callbacks"] = [event_streamer]
|
||||
|
||||
@@ -21,6 +21,7 @@ from typing_extensions import override
|
||||
|
||||
from langchain_core.env import get_runtime_environment
|
||||
from langchain_core.load import dumpd
|
||||
from langchain_core.messages.ai import UsageMetadata, add_usage
|
||||
from langchain_core.tracers.base import BaseTracer
|
||||
from langchain_core.tracers.schemas import Run
|
||||
|
||||
@@ -69,6 +70,32 @@ def _get_executor() -> ThreadPoolExecutor:
|
||||
return _EXECUTOR
|
||||
|
||||
|
||||
def _get_usage_metadata_from_generations(
|
||||
generations: list[list[dict[str, Any]]],
|
||||
) -> UsageMetadata | None:
|
||||
"""Extract and aggregate `usage_metadata` from generations.
|
||||
|
||||
Iterates through generations to find and aggregate all `usage_metadata` found in
|
||||
messages. This is typically present in chat model outputs.
|
||||
|
||||
Args:
|
||||
generations: List of generation batches, where each batch is a list
|
||||
of generation dicts that may contain a `'message'` key with
|
||||
`'usage_metadata'`.
|
||||
|
||||
Returns:
|
||||
The aggregated `usage_metadata` dict if found, otherwise `None`.
|
||||
"""
|
||||
output: UsageMetadata | None = None
|
||||
for generation_batch in generations:
|
||||
for generation in generation_batch:
|
||||
if isinstance(generation, dict) and "message" in generation:
|
||||
message = generation["message"]
|
||||
if isinstance(message, dict) and "usage_metadata" in message:
|
||||
output = add_usage(output, message["usage_metadata"])
|
||||
return output
|
||||
|
||||
|
||||
class LangChainTracer(BaseTracer):
|
||||
"""Implementation of the SharedTracer that POSTS to the LangChain endpoint."""
|
||||
|
||||
@@ -220,7 +247,8 @@ class LangChainTracer(BaseTracer):
|
||||
log_error_once("post", e)
|
||||
raise
|
||||
|
||||
def _update_run_single(self, run: Run) -> None:
|
||||
@staticmethod
|
||||
def _update_run_single(run: Run) -> None:
|
||||
"""Update a run."""
|
||||
if run.extra.get("__disabled"):
|
||||
return
|
||||
@@ -266,6 +294,15 @@ class LangChainTracer(BaseTracer):
|
||||
|
||||
def _on_llm_end(self, run: Run) -> None:
|
||||
"""Process the LLM Run."""
|
||||
# Extract usage_metadata from outputs and store in extra.metadata
|
||||
if run.outputs and "generations" in run.outputs:
|
||||
usage_metadata = _get_usage_metadata_from_generations(
|
||||
run.outputs["generations"]
|
||||
)
|
||||
if usage_metadata is not None:
|
||||
if "metadata" not in run.extra:
|
||||
run.extra["metadata"] = {}
|
||||
run.extra["metadata"]["usage_metadata"] = usage_metadata
|
||||
self._update_run_single(run)
|
||||
|
||||
def _on_llm_error(self, run: Run) -> None:
|
||||
@@ -276,15 +313,28 @@ class LangChainTracer(BaseTracer):
|
||||
"""Process the Chain Run upon start."""
|
||||
if run.parent_run_id is None:
|
||||
run.reference_example_id = self.example_id
|
||||
self._persist_run_single(run)
|
||||
# Skip persisting if inputs are deferred (e.g., iterator/generator inputs).
|
||||
# The run will be posted when _on_chain_end is called with realized inputs.
|
||||
if not run.extra.get("defers_inputs"):
|
||||
self._persist_run_single(run)
|
||||
|
||||
def _on_chain_end(self, run: Run) -> None:
|
||||
"""Process the Chain Run."""
|
||||
self._update_run_single(run)
|
||||
# If inputs were deferred, persist (POST) the run now that inputs are realized.
|
||||
# Otherwise, update (PATCH) the existing run.
|
||||
if run.extra.get("defers_inputs"):
|
||||
self._persist_run_single(run)
|
||||
else:
|
||||
self._update_run_single(run)
|
||||
|
||||
def _on_chain_error(self, run: Run) -> None:
|
||||
"""Process the Chain Run upon error."""
|
||||
self._update_run_single(run)
|
||||
# If inputs were deferred, persist (POST) the run now that inputs are realized.
|
||||
# Otherwise, update (PATCH) the existing run.
|
||||
if run.extra.get("defers_inputs"):
|
||||
self._persist_run_single(run)
|
||||
else:
|
||||
self._update_run_single(run)
|
||||
|
||||
def _on_tool_start(self, run: Run) -> None:
|
||||
"""Process the Tool Run upon start."""
|
||||
|
||||
@@ -563,7 +563,7 @@ def _get_standardized_inputs(
|
||||
)
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
inputs = load(run.inputs)
|
||||
inputs = load(run.inputs, allowed_objects="all")
|
||||
|
||||
if run.run_type in {"retriever", "llm", "chat_model"}:
|
||||
return inputs
|
||||
@@ -595,7 +595,7 @@ def _get_standardized_outputs(
|
||||
Returns:
|
||||
An output if returned, otherwise a None
|
||||
"""
|
||||
outputs = load(run.outputs)
|
||||
outputs = load(run.outputs, allowed_objects="all")
|
||||
if schema_format == "original":
|
||||
if run.run_type == "prompt" and "output" in outputs:
|
||||
# These were previously dumped before the tracer.
|
||||
|
||||
@@ -58,7 +58,7 @@ def merge_dicts(left: dict[str, Any], *others: dict[str, Any]) -> dict[str, Any]
|
||||
# "all dicts."
|
||||
# )
|
||||
if (right_k == "index" and merged[right_k].startswith("lc_")) or (
|
||||
right_k in ("id", "output_version", "model_provider")
|
||||
right_k in {"id", "output_version", "model_provider"}
|
||||
and merged[right_k] == right_v
|
||||
):
|
||||
continue
|
||||
|
||||
@@ -26,13 +26,15 @@ from typing import (
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
_no_default = object()
|
||||
|
||||
|
||||
# https://github.com/python/cpython/blob/main/Lib/test/test_asyncgen.py#L54
|
||||
# before 3.10, the builtin anext() was not available
|
||||
@deprecated(since="1.1.2", removal="2.0.0")
|
||||
def py_anext(
|
||||
iterator: AsyncIterator[T], default: T | Any = _no_default
|
||||
) -> Awaitable[T | Any | None]:
|
||||
@@ -128,7 +130,7 @@ async def tee_peer(
|
||||
if buffer:
|
||||
continue
|
||||
try:
|
||||
item = await iterator.__anext__()
|
||||
item = await anext(iterator)
|
||||
except StopAsyncIteration:
|
||||
break
|
||||
else:
|
||||
|
||||
@@ -8,6 +8,7 @@ import logging
|
||||
import types
|
||||
import typing
|
||||
import uuid
|
||||
from collections.abc import Mapping
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Annotated,
|
||||
@@ -17,8 +18,10 @@ from typing import (
|
||||
cast,
|
||||
get_args,
|
||||
get_origin,
|
||||
get_type_hints,
|
||||
)
|
||||
|
||||
import typing_extensions
|
||||
from pydantic import BaseModel
|
||||
from pydantic.v1 import BaseModel as BaseModelV1
|
||||
from pydantic.v1 import Field as Field_v1
|
||||
@@ -231,13 +234,20 @@ def _convert_any_typed_dicts_to_pydantic(
|
||||
if is_typeddict(type_):
|
||||
typed_dict = type_
|
||||
docstring = inspect.getdoc(typed_dict)
|
||||
annotations_ = typed_dict.__annotations__
|
||||
# Use get_type_hints to properly resolve forward references and
|
||||
# string annotations in Python 3.14+ (PEP 649 deferred annotations).
|
||||
# include_extras=True preserves Annotated metadata.
|
||||
try:
|
||||
annotations_ = get_type_hints(typed_dict, include_extras=True)
|
||||
except Exception:
|
||||
# Fallback for edge cases where get_type_hints might fail
|
||||
annotations_ = typed_dict.__annotations__
|
||||
description, arg_descriptions = _parse_google_docstring(
|
||||
docstring, list(annotations_)
|
||||
)
|
||||
fields: dict = {}
|
||||
for arg, arg_type in annotations_.items():
|
||||
if get_origin(arg_type) is Annotated: # type: ignore[comparison-overlap]
|
||||
if get_origin(arg_type) in {Annotated, typing_extensions.Annotated}:
|
||||
annotated_args = get_args(arg_type)
|
||||
new_arg_type = _convert_any_typed_dicts_to_pydantic(
|
||||
annotated_args[0], depth=depth + 1, visited=visited
|
||||
@@ -327,7 +337,7 @@ def _format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:
|
||||
|
||||
|
||||
def convert_to_openai_function(
|
||||
function: dict[str, Any] | type | Callable | BaseTool,
|
||||
function: Mapping[str, Any] | type | Callable | BaseTool,
|
||||
*,
|
||||
strict: bool | None = None,
|
||||
) -> dict[str, Any]:
|
||||
@@ -353,6 +363,7 @@ def convert_to_openai_function(
|
||||
ValueError: If function is not in a supported format.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.16"
|
||||
|
||||
`description` and `parameters` keys are now optional. Only `name` is
|
||||
required and guaranteed to be part of the output.
|
||||
"""
|
||||
@@ -453,7 +464,7 @@ _WellKnownOpenAITools = (
|
||||
|
||||
|
||||
def convert_to_openai_tool(
|
||||
tool: dict[str, Any] | type[BaseModel] | Callable | BaseTool,
|
||||
tool: Mapping[str, Any] | type[BaseModel] | Callable | BaseTool,
|
||||
*,
|
||||
strict: bool | None = None,
|
||||
) -> dict[str, Any]:
|
||||
@@ -477,15 +488,18 @@ def convert_to_openai_tool(
|
||||
OpenAI tool-calling API.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.16"
|
||||
|
||||
`description` and `parameters` keys are now optional. Only `name` is
|
||||
required and guaranteed to be part of the output.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.44"
|
||||
|
||||
Return OpenAI Responses API-style tools unchanged. This includes
|
||||
any dict with `"type"` in `"file_search"`, `"function"`,
|
||||
`"computer_use_preview"`, `"web_search_preview"`.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.63"
|
||||
|
||||
Added support for OpenAI's image generation built-in tool.
|
||||
"""
|
||||
# Import locally to prevent circular import
|
||||
|
||||
@@ -22,6 +22,9 @@ def get_color_mapping(
|
||||
|
||||
Returns:
|
||||
The mapping of items to colors.
|
||||
|
||||
Raises:
|
||||
ValueError: If no colors are available after applying exclusions.
|
||||
"""
|
||||
colors = list(_TEXT_COLOR_MAPPING.keys())
|
||||
if excluded_colors is not None:
|
||||
|
||||
@@ -170,28 +170,33 @@ def dereference_refs(
|
||||
full_schema: dict | None = None,
|
||||
skip_keys: Sequence[str] | None = None,
|
||||
) -> dict:
|
||||
"""Resolve and inline JSON Schema $ref references in a schema object.
|
||||
"""Resolve and inline JSON Schema `$ref` references in a schema object.
|
||||
|
||||
This function processes a JSON Schema and resolves all $ref references by replacing
|
||||
them with the actual referenced content. It handles both simple references and
|
||||
complex cases like circular references and mixed $ref objects that contain
|
||||
additional properties alongside the $ref.
|
||||
This function processes a JSON Schema and resolves all `$ref` references by
|
||||
replacing them with the actual referenced content.
|
||||
|
||||
Handles both simple references and complex cases like circular references and mixed
|
||||
`$ref` objects that contain additional properties alongside the `$ref`.
|
||||
|
||||
Args:
|
||||
schema_obj: The JSON Schema object or fragment to process. This can be a
|
||||
complete schema or just a portion of one.
|
||||
full_schema: The complete schema containing all definitions that $refs might
|
||||
point to. If not provided, defaults to schema_obj (useful when the
|
||||
schema is self-contained).
|
||||
skip_keys: Controls recursion behavior and reference resolution depth:
|
||||
- If `None` (Default): Only recurse under '$defs' and use shallow reference
|
||||
resolution (break cycles but don't deep-inline nested refs)
|
||||
- If provided (even as []): Recurse under all keys and use deep reference
|
||||
resolution (fully inline all nested references)
|
||||
schema_obj: The JSON Schema object or fragment to process.
|
||||
|
||||
This can be a complete schema or just a portion of one.
|
||||
full_schema: The complete schema containing all definitions that `$refs` might
|
||||
point to.
|
||||
|
||||
If not provided, defaults to `schema_obj` (useful when the schema is
|
||||
self-contained).
|
||||
skip_keys: Controls recursion behavior and reference resolution depth.
|
||||
|
||||
- If `None` (Default): Only recurse under `'$defs'` and use shallow
|
||||
reference resolution (break cycles but don't deep-inline nested refs)
|
||||
- If provided (even as `[]`): Recurse under all keys and use deep reference
|
||||
resolution (fully inline all nested references)
|
||||
|
||||
Returns:
|
||||
A new dictionary with all $ref references resolved and inlined. The original
|
||||
schema_obj is not modified.
|
||||
A new dictionary with all $ref references resolved and inlined.
|
||||
The original `schema_obj` is not modified.
|
||||
|
||||
Examples:
|
||||
Basic reference resolution:
|
||||
@@ -203,7 +208,8 @@ def dereference_refs(
|
||||
>>> result = dereference_refs(schema)
|
||||
>>> result["properties"]["name"] # {"type": "string"}
|
||||
|
||||
Mixed $ref with additional properties:
|
||||
Mixed `$ref` with additional properties:
|
||||
|
||||
>>> schema = {
|
||||
... "properties": {
|
||||
... "name": {"$ref": "#/$defs/base", "description": "User name"}
|
||||
@@ -215,6 +221,7 @@ def dereference_refs(
|
||||
# {"type": "string", "minLength": 1, "description": "User name"}
|
||||
|
||||
Handling circular references:
|
||||
|
||||
>>> schema = {
|
||||
... "properties": {"user": {"$ref": "#/$defs/User"}},
|
||||
... "$defs": {
|
||||
@@ -227,10 +234,11 @@ def dereference_refs(
|
||||
>>> result = dereference_refs(schema) # Won't cause infinite recursion
|
||||
|
||||
!!! note
|
||||
|
||||
- Circular references are handled gracefully by breaking cycles
|
||||
- Mixed $ref objects (with both $ref and other properties) are supported
|
||||
- Additional properties in mixed $refs override resolved properties
|
||||
- The $defs section is preserved in the output by default
|
||||
- Mixed `$ref` objects (with both `$ref` and other properties) are supported
|
||||
- Additional properties in mixed `$refs` override resolved properties
|
||||
- The `$defs` section is preserved in the output by default
|
||||
"""
|
||||
full = full_schema or schema_obj
|
||||
keys_to_skip = list(skip_keys) if skip_keys is not None else ["$defs"]
|
||||
|
||||
@@ -85,7 +85,7 @@ def l_sa_check(
|
||||
# If all the characters since the last newline are spaces
|
||||
# Then the next tag could be a standalone
|
||||
# Otherwise it can't be
|
||||
return padding.isspace() or padding == ""
|
||||
return padding.isspace() or not padding
|
||||
return False
|
||||
|
||||
|
||||
@@ -305,7 +305,7 @@ def tokenize(
|
||||
|
||||
# Start yielding
|
||||
# Ignore literals that are empty
|
||||
if literal != "":
|
||||
if literal:
|
||||
yield ("literal", literal)
|
||||
|
||||
# Ignore comments and set delimiters
|
||||
@@ -374,15 +374,29 @@ def _get_key(
|
||||
if resolved_scope in (0, False):
|
||||
return resolved_scope
|
||||
# Move into the scope
|
||||
try:
|
||||
# Try subscripting (Normal dictionaries)
|
||||
resolved_scope = cast("dict[str, Any]", resolved_scope)[child]
|
||||
except (TypeError, AttributeError):
|
||||
if isinstance(resolved_scope, dict):
|
||||
try:
|
||||
resolved_scope = getattr(resolved_scope, child)
|
||||
except (TypeError, AttributeError):
|
||||
# Try as a list
|
||||
resolved_scope = resolved_scope[int(child)] # type: ignore[index]
|
||||
resolved_scope = resolved_scope[child]
|
||||
except (KeyError, TypeError):
|
||||
# Key not found - will be caught by outer try-except
|
||||
msg = f"Key {child!r} not found in dict"
|
||||
raise KeyError(msg) from None
|
||||
elif isinstance(resolved_scope, (list, tuple)):
|
||||
try:
|
||||
resolved_scope = resolved_scope[int(child)]
|
||||
except (ValueError, IndexError, TypeError):
|
||||
# Invalid index - will be caught by outer try-except
|
||||
msg = f"Invalid index {child!r} for list/tuple"
|
||||
raise IndexError(msg) from None
|
||||
else:
|
||||
# Reject everything else for security
|
||||
# This prevents traversing into arbitrary Python objects
|
||||
msg = (
|
||||
f"Cannot traverse into {type(resolved_scope).__name__}. "
|
||||
"Mustache templates only support dict, list, and tuple. "
|
||||
f"Got: {type(resolved_scope)}"
|
||||
)
|
||||
raise TypeError(msg) # noqa: TRY301
|
||||
|
||||
try:
|
||||
# This allows for custom falsy data types
|
||||
@@ -393,8 +407,9 @@ def _get_key(
|
||||
if resolved_scope in (0, False):
|
||||
return resolved_scope
|
||||
return resolved_scope or ""
|
||||
except (AttributeError, KeyError, IndexError, ValueError):
|
||||
except (AttributeError, KeyError, IndexError, ValueError, TypeError):
|
||||
# We couldn't find the key in the current scope
|
||||
# TypeError: Attempted to traverse into non-dict/list type
|
||||
# We'll try again on the next pass
|
||||
pass
|
||||
|
||||
|
||||
@@ -88,18 +88,18 @@ def is_pydantic_v2_subclass(cls: type) -> bool:
|
||||
"""Check if the given class is Pydantic v2-like.
|
||||
|
||||
Returns:
|
||||
`True` if the given class is a subclass of Pydantic BaseModel 2.x.
|
||||
`True` if the given class is a subclass of Pydantic `BaseModel` 2.x.
|
||||
"""
|
||||
return issubclass(cls, BaseModel)
|
||||
|
||||
|
||||
def is_basemodel_subclass(cls: type) -> bool:
|
||||
"""Check if the given class is a subclass of Pydantic BaseModel.
|
||||
"""Check if the given class is a subclass of Pydantic `BaseModel`.
|
||||
|
||||
Check if the given class is a subclass of any of the following:
|
||||
|
||||
* pydantic.BaseModel in Pydantic 2.x
|
||||
* pydantic.v1.BaseModel in Pydantic 2.x
|
||||
* `pydantic.BaseModel` in Pydantic 2.x
|
||||
* `pydantic.v1.BaseModel` in Pydantic 2.x
|
||||
|
||||
Returns:
|
||||
`True` if the given class is a subclass of Pydantic `BaseModel`.
|
||||
@@ -112,12 +112,12 @@ def is_basemodel_subclass(cls: type) -> bool:
|
||||
|
||||
|
||||
def is_basemodel_instance(obj: Any) -> bool:
|
||||
"""Check if the given class is an instance of Pydantic BaseModel.
|
||||
"""Check if the given class is an instance of Pydantic `BaseModel`.
|
||||
|
||||
Check if the given class is an instance of any of the following:
|
||||
|
||||
* pydantic.BaseModel in Pydantic 2.x
|
||||
* pydantic.v1.BaseModel in Pydantic 2.x
|
||||
* `pydantic.BaseModel` in Pydantic 2.x
|
||||
* `pydantic.v1.BaseModel` in Pydantic 2.x
|
||||
|
||||
Returns:
|
||||
`True` if the given class is an instance of Pydantic `BaseModel`.
|
||||
|
||||
57
libs/core/langchain_core/utils/uuid.py
Normal file
57
libs/core/langchain_core/utils/uuid.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""UUID utility functions.
|
||||
|
||||
This module exports a uuid7 function to generate monotonic, time-ordered UUIDs
|
||||
for tracing and similar operations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
from uuid import UUID
|
||||
|
||||
from uuid_utils.compat import uuid7 as _uuid_utils_uuid7
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from uuid import UUID
|
||||
|
||||
_NANOS_PER_SECOND: typing.Final = 1_000_000_000
|
||||
|
||||
|
||||
def _to_timestamp_and_nanos(nanoseconds: int) -> tuple[int, int]:
|
||||
"""Split a nanosecond timestamp into seconds and remaining nanoseconds."""
|
||||
seconds, nanos = divmod(nanoseconds, _NANOS_PER_SECOND)
|
||||
return seconds, nanos
|
||||
|
||||
|
||||
def uuid7(nanoseconds: int | None = None) -> UUID:
|
||||
"""Generate a UUID from a Unix timestamp in nanoseconds and random bits.
|
||||
|
||||
UUIDv7 objects feature monotonicity within a millisecond.
|
||||
|
||||
Args:
|
||||
nanoseconds: Optional ns timestamp. If not provided, uses current time.
|
||||
|
||||
Returns:
|
||||
A UUIDv7 object.
|
||||
"""
|
||||
# --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 -
|
||||
# unix_ts_ms | version | counter_hi | variant | counter_lo | random
|
||||
#
|
||||
# 'counter = counter_hi | counter_lo' is a 42-bit counter constructed
|
||||
# with Method 1 of RFC 9562, §6.2, and its MSB is set to 0.
|
||||
#
|
||||
# 'random' is a 32-bit random value regenerated for every new UUID.
|
||||
#
|
||||
# If multiple UUIDs are generated within the same millisecond, the LSB
|
||||
# of 'counter' is incremented by 1. When overflowing, the timestamp is
|
||||
# advanced and the counter is reset to a random 42-bit integer with MSB
|
||||
# set to 0.
|
||||
|
||||
# For now, just delegate to the uuid_utils implementation
|
||||
if nanoseconds is None:
|
||||
return _uuid_utils_uuid7()
|
||||
seconds, nanos = _to_timestamp_and_nanos(nanoseconds)
|
||||
return _uuid_utils_uuid7(timestamp=seconds, nanos=nanos)
|
||||
|
||||
|
||||
__all__ = ["uuid7"]
|
||||
@@ -294,8 +294,9 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
search_type: Type of search to perform. Can be `'similarity'`, `'mmr'`, or
|
||||
`'similarity_score_threshold'`.
|
||||
search_type: Type of search to perform.
|
||||
|
||||
Can be `'similarity'`, `'mmr'`, or `'similarity_score_threshold'`.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -328,8 +329,9 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
search_type: Type of search to perform. Can be `'similarity'`, `'mmr'`, or
|
||||
`'similarity_score_threshold'`.
|
||||
search_type: Type of search to perform.
|
||||
|
||||
Can be `'similarity'`, `'mmr'`, or `'similarity_score_threshold'`.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -460,9 +462,10 @@ class VectorStore(ABC):
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include
|
||||
`score_threshold`, An optional floating point value between `0` to `1`
|
||||
to filter the resulting set of retrieved docs
|
||||
**kwargs: Kwargs to be passed to similarity search.
|
||||
|
||||
Should include `score_threshold`, an optional floating point value
|
||||
between `0` to `1` to filter the resulting set of retrieved docs.
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`
|
||||
@@ -487,9 +490,10 @@ class VectorStore(ABC):
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include
|
||||
`score_threshold`, An optional floating point value between `0` to `1`
|
||||
to filter the resulting set of retrieved docs
|
||||
**kwargs: Kwargs to be passed to similarity search.
|
||||
|
||||
Should include `score_threshold`, an optional floating point value
|
||||
between `0` to `1` to filter the resulting set of retrieved docs.
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`
|
||||
@@ -511,9 +515,10 @@ class VectorStore(ABC):
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include
|
||||
`score_threshold`, An optional floating point value between `0` to `1`
|
||||
to filter the resulting set of retrieved docs
|
||||
**kwargs: Kwargs to be passed to similarity search.
|
||||
|
||||
Should include `score_threshold`, an optional floating point value
|
||||
between `0` to `1` to filter the resulting set of retrieved docs.
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`.
|
||||
@@ -560,9 +565,10 @@ class VectorStore(ABC):
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include
|
||||
`score_threshold`, An optional floating point value between `0` to `1`
|
||||
to filter the resulting set of retrieved docs
|
||||
**kwargs: Kwargs to be passed to similarity search.
|
||||
|
||||
Should include `score_threshold`, an optional floating point value
|
||||
between `0` to `1` to filter the resulting set of retrieved docs.
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`
|
||||
@@ -900,13 +906,15 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments to pass to the search function.
|
||||
|
||||
Can include:
|
||||
|
||||
* `search_type`: Defines the type of search that the Retriever should
|
||||
perform. Can be `'similarity'` (default), `'mmr'`, or
|
||||
`'similarity_score_threshold'`.
|
||||
* `search_kwargs`: Keyword arguments to pass to the search function. Can
|
||||
include things like:
|
||||
* `search_kwargs`: Keyword arguments to pass to the search function.
|
||||
|
||||
Can include things like:
|
||||
|
||||
* `k`: Amount of documents to return (Default: `4`)
|
||||
* `score_threshold`: Minimum relevance threshold
|
||||
|
||||
@@ -528,7 +528,7 @@ class InMemoryVectorStore(VectorStore):
|
||||
"""
|
||||
path_: Path = Path(path)
|
||||
with path_.open("r", encoding="utf-8") as f:
|
||||
store = load(json.load(f))
|
||||
store = load(json.load(f), allowed_objects=[Document])
|
||||
vectorstore = cls(embedding=embedding, **kwargs)
|
||||
vectorstore.store = store
|
||||
return vectorstore
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""langchain-core version information and utilities."""
|
||||
|
||||
VERSION = "1.0.6"
|
||||
VERSION = "1.2.5"
|
||||
|
||||
@@ -9,7 +9,7 @@ license = {text = "MIT"}
|
||||
readme = "README.md"
|
||||
authors = []
|
||||
|
||||
version = "1.0.6"
|
||||
version = "1.2.5"
|
||||
requires-python = ">=3.10.0,<4.0.0"
|
||||
dependencies = [
|
||||
"langsmith>=0.3.45,<1.0.0",
|
||||
@@ -19,6 +19,7 @@ dependencies = [
|
||||
"typing-extensions>=4.7.0,<5.0.0",
|
||||
"packaging>=23.2.0,<26.0.0",
|
||||
"pydantic>=2.7.4,<3.0.0",
|
||||
"uuid-utils>=0.12.0,<1.0",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
@@ -36,7 +37,6 @@ typing = [
|
||||
"mypy>=1.18.1,<1.19.0",
|
||||
"types-pyyaml>=6.0.12.2,<7.0.0.0",
|
||||
"types-requests>=2.28.11.5,<3.0.0.0",
|
||||
"langchain-model-profiles",
|
||||
"langchain-text-splitters",
|
||||
]
|
||||
dev = [
|
||||
@@ -58,7 +58,6 @@ test = [
|
||||
"blockbuster>=1.5.18,<1.6.0",
|
||||
"numpy>=1.26.4; python_version<'3.13'",
|
||||
"numpy>=2.1.0; python_version>='3.13'",
|
||||
"langchain-model-profiles",
|
||||
"langchain-tests",
|
||||
"pytest-benchmark",
|
||||
"pytest-codspeed",
|
||||
@@ -66,7 +65,6 @@ test = [
|
||||
test_integration = []
|
||||
|
||||
[tool.uv.sources]
|
||||
langchain-model-profiles = { path = "../model-profiles" }
|
||||
langchain-tests = { path = "../standard-tests" }
|
||||
langchain-text-splitters = { path = "../text-splitters" }
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
import pytest
|
||||
from pytest_benchmark.fixture import BenchmarkFixture # type: ignore[import-untyped]
|
||||
from pytest_benchmark.fixture import BenchmarkFixture
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core.callbacks.base import AsyncCallbackHandler
|
||||
|
||||
@@ -2,7 +2,7 @@ import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from pytest_benchmark.fixture import BenchmarkFixture # type: ignore[import-untyped]
|
||||
from pytest_benchmark.fixture import BenchmarkFixture
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@@ -493,3 +493,88 @@ def test_rename_parameter_method() -> None:
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
assert foo.a("hello", old_name="hello") # type: ignore[call-arg]
|
||||
|
||||
|
||||
# Tests for PEP 702 __deprecated__ attribute
|
||||
|
||||
|
||||
def test_deprecated_function_has_pep702_attribute() -> None:
|
||||
"""Test that deprecated functions have `__deprecated__` attribute."""
|
||||
|
||||
@deprecated(since="2.0.0", removal="3.0.0", alternative="new_function")
|
||||
def old_function() -> str:
|
||||
"""Original doc."""
|
||||
return "old"
|
||||
|
||||
assert hasattr(old_function, "__deprecated__")
|
||||
assert old_function.__deprecated__ == "Use new_function instead."
|
||||
|
||||
|
||||
def test_deprecated_function_with_alternative_import_has_pep702_attribute() -> None:
|
||||
"""Test `__deprecated__` with `alternative_import`."""
|
||||
|
||||
@deprecated(
|
||||
since="2.0.0", removal="3.0.0", alternative_import="new_module.new_function"
|
||||
)
|
||||
def old_function() -> str:
|
||||
"""Original doc."""
|
||||
return "old"
|
||||
|
||||
assert hasattr(old_function, "__deprecated__")
|
||||
assert old_function.__deprecated__ == "Use new_module.new_function instead."
|
||||
|
||||
|
||||
def test_deprecated_function_without_alternative_has_pep702_attribute() -> None:
|
||||
"""Test `__deprecated__` without alternative shows `'Deprecated.'`."""
|
||||
|
||||
@deprecated(since="2.0.0", removal="3.0.0")
|
||||
def old_function() -> str:
|
||||
"""Original doc."""
|
||||
return "old"
|
||||
|
||||
assert hasattr(old_function, "__deprecated__")
|
||||
assert old_function.__deprecated__ == "Deprecated."
|
||||
|
||||
|
||||
def test_deprecated_class_has_pep702_attribute() -> None:
|
||||
"""Test that deprecated classes have `__deprecated__` attribute (PEP 702)."""
|
||||
|
||||
@deprecated(since="2.0.0", removal="3.0.0", alternative="NewClass")
|
||||
class OldClass:
|
||||
def __init__(self) -> None:
|
||||
"""Original doc."""
|
||||
|
||||
assert hasattr(OldClass, "__deprecated__")
|
||||
assert OldClass.__deprecated__ == "Use NewClass instead."
|
||||
|
||||
|
||||
def test_deprecated_class_without_alternative_has_pep702_attribute() -> None:
|
||||
"""Test `__deprecated__` on class without alternative."""
|
||||
|
||||
@deprecated(since="2.0.0", removal="3.0.0")
|
||||
class OldClass:
|
||||
def __init__(self) -> None:
|
||||
"""Original doc."""
|
||||
|
||||
assert hasattr(OldClass, "__deprecated__")
|
||||
assert OldClass.__deprecated__ == "Deprecated."
|
||||
|
||||
|
||||
def test_deprecated_property_has_pep702_attribute() -> None:
|
||||
"""Test that deprecated properties have `__deprecated__` attribute (PEP 702).
|
||||
|
||||
Note: When using @property over @deprecated (which is what works in practice),
|
||||
the `__deprecated__` attribute is set on the property's underlying `fget` function.
|
||||
"""
|
||||
|
||||
class MyClass:
|
||||
@property
|
||||
@deprecated(since="2.0.0", removal="3.0.0", alternative="new_property")
|
||||
def old_property(self) -> str:
|
||||
"""Original doc."""
|
||||
return "old"
|
||||
|
||||
prop = MyClass.__dict__["old_property"]
|
||||
# The __deprecated__ attribute is on the underlying fget function
|
||||
assert hasattr(prop.fget, "__deprecated__")
|
||||
assert prop.fget.__deprecated__ == "Use new_property instead."
|
||||
|
||||
@@ -165,9 +165,9 @@ async def test_stream_error_callback() -> None:
|
||||
cb_async = FakeAsyncCallbackHandler()
|
||||
llm_astream = llm.astream("Dummy message", config={"callbacks": [cb_async]})
|
||||
for _ in range(i):
|
||||
await llm_astream.__anext__()
|
||||
await anext(llm_astream)
|
||||
with pytest.raises(FakeListChatModelError):
|
||||
await llm_astream.__anext__()
|
||||
await anext(llm_astream)
|
||||
eval_response(cb_async, i)
|
||||
|
||||
cb_sync = FakeCallbackHandler()
|
||||
@@ -1222,19 +1222,12 @@ def test_get_ls_params() -> None:
|
||||
|
||||
def test_model_profiles() -> None:
|
||||
model = GenericFakeChatModel(messages=iter([]))
|
||||
profile = model.profile
|
||||
assert profile == {}
|
||||
assert model.profile is None
|
||||
|
||||
class MyModel(GenericFakeChatModel):
|
||||
model: str = "gpt-5"
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
return "openai-chat"
|
||||
|
||||
model = MyModel(messages=iter([]))
|
||||
profile = model.profile
|
||||
assert profile
|
||||
model_with_profile = GenericFakeChatModel(
|
||||
messages=iter([]), profile={"max_input_tokens": 100}
|
||||
)
|
||||
assert model_with_profile.profile == {"max_input_tokens": 100}
|
||||
|
||||
|
||||
class MockResponse:
|
||||
|
||||
@@ -18,6 +18,8 @@ EXPECTED_ALL = [
|
||||
"FakeStreamingListLLM",
|
||||
"FakeListLLM",
|
||||
"ParrotFakeChatModel",
|
||||
"ModelProfile",
|
||||
"ModelProfileRegistry",
|
||||
"is_openai_data_block",
|
||||
]
|
||||
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
from langchain_core.load import __all__
|
||||
|
||||
EXPECTED_ALL = ["dumpd", "dumps", "load", "loads", "Serializable"]
|
||||
EXPECTED_ALL = [
|
||||
"InitValidator",
|
||||
"Serializable",
|
||||
"dumpd",
|
||||
"dumps",
|
||||
"load",
|
||||
"loads",
|
||||
]
|
||||
|
||||
|
||||
def test_all_imports() -> None:
|
||||
|
||||
431
libs/core/tests/unit_tests/load/test_secret_injection.py
Normal file
431
libs/core/tests/unit_tests/load/test_secret_injection.py
Normal file
@@ -0,0 +1,431 @@
|
||||
"""Tests for secret injection prevention in serialization.
|
||||
|
||||
Verify that user-provided data containing secret-like structures cannot be used to
|
||||
extract environment variables during deserialization.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from typing import Any
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.load import dumpd, dumps, load
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
from langchain_core.outputs import ChatGeneration
|
||||
|
||||
SENTINEL_ENV_VAR = "TEST_SECRET_INJECTION_VAR"
|
||||
"""Sentinel value that should NEVER appear in serialized output."""
|
||||
|
||||
SENTINEL_VALUE = "LEAKED_SECRET_MEOW_12345"
|
||||
"""Sentinel value that should NEVER appear in serialized output."""
|
||||
|
||||
MALICIOUS_SECRET_DICT: dict[str, Any] = {
|
||||
"lc": 1,
|
||||
"type": "secret",
|
||||
"id": [SENTINEL_ENV_VAR],
|
||||
}
|
||||
"""The malicious secret-like dict that tries to read the env var"""
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _set_sentinel_env_var() -> Any:
|
||||
"""Set the sentinel env var for all tests in this module."""
|
||||
with mock.patch.dict(os.environ, {SENTINEL_ENV_VAR: SENTINEL_VALUE}):
|
||||
yield
|
||||
|
||||
|
||||
def _assert_no_secret_leak(payload: Any) -> None:
|
||||
"""Assert that serializing/deserializing payload doesn't leak the secret."""
|
||||
# First serialize
|
||||
serialized = dumps(payload)
|
||||
|
||||
# Deserialize with secrets_from_env=True (the dangerous setting)
|
||||
deserialized = load(serialized, secrets_from_env=True)
|
||||
|
||||
# Re-serialize to string
|
||||
reserialized = dumps(deserialized)
|
||||
|
||||
assert SENTINEL_VALUE not in reserialized, (
|
||||
f"Secret was leaked! Found '{SENTINEL_VALUE}' in output.\n"
|
||||
f"Original payload type: {type(payload)}\n"
|
||||
f"Reserialized output: {reserialized[:500]}..."
|
||||
)
|
||||
|
||||
assert SENTINEL_VALUE not in repr(deserialized), (
|
||||
f"Secret was leaked in deserialized object! Found '{SENTINEL_VALUE}'.\n"
|
||||
f"Deserialized: {deserialized!r}"
|
||||
)
|
||||
|
||||
|
||||
class TestSerializableTopLevel:
|
||||
"""Tests with `Serializable` objects at the top level."""
|
||||
|
||||
def test_human_message_with_secret_in_content(self) -> None:
|
||||
"""`HumanMessage` with secret-like dict in `content`."""
|
||||
msg = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Hello"},
|
||||
{"type": "text", "text": MALICIOUS_SECRET_DICT},
|
||||
]
|
||||
)
|
||||
_assert_no_secret_leak(msg)
|
||||
|
||||
def test_human_message_with_secret_in_additional_kwargs(self) -> None:
|
||||
"""`HumanMessage` with secret-like dict in `additional_kwargs`."""
|
||||
msg = HumanMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"data": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
_assert_no_secret_leak(msg)
|
||||
|
||||
def test_human_message_with_secret_in_nested_additional_kwargs(self) -> None:
|
||||
"""`HumanMessage` with secret-like dict nested in `additional_kwargs`."""
|
||||
msg = HumanMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"nested": {"deep": MALICIOUS_SECRET_DICT}},
|
||||
)
|
||||
_assert_no_secret_leak(msg)
|
||||
|
||||
def test_human_message_with_secret_in_list_in_additional_kwargs(self) -> None:
|
||||
"""`HumanMessage` with secret-like dict in a list in `additional_kwargs`."""
|
||||
msg = HumanMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"items": [MALICIOUS_SECRET_DICT]},
|
||||
)
|
||||
_assert_no_secret_leak(msg)
|
||||
|
||||
def test_ai_message_with_secret_in_response_metadata(self) -> None:
|
||||
"""`AIMessage` with secret-like dict in respo`nse_metadata."""
|
||||
msg = AIMessage(
|
||||
content="Hello",
|
||||
response_metadata={"data": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
_assert_no_secret_leak(msg)
|
||||
|
||||
def test_document_with_secret_in_metadata(self) -> None:
|
||||
"""Document with secret-like dict in `metadata`."""
|
||||
doc = Document(
|
||||
page_content="Hello",
|
||||
metadata={"data": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
_assert_no_secret_leak(doc)
|
||||
|
||||
def test_nested_serializable_with_secret(self) -> None:
|
||||
"""`AIMessage` containing `dumpd(HumanMessage)` with secret in kwargs."""
|
||||
inner = HumanMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"secret": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
outer = AIMessage(
|
||||
content="Outer",
|
||||
additional_kwargs={"nested": [dumpd(inner)]},
|
||||
)
|
||||
_assert_no_secret_leak(outer)
|
||||
|
||||
|
||||
class TestDictTopLevel:
|
||||
"""Tests with plain dicts at the top level."""
|
||||
|
||||
def test_dict_with_serializable_containing_secret(self) -> None:
|
||||
"""Dict containing a `Serializable` with secret-like dict."""
|
||||
msg = HumanMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"data": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
payload = {"message": msg}
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
def test_dict_with_secret_no_serializable(self) -> None:
|
||||
"""Dict with secret-like dict, no `Serializable` objects."""
|
||||
payload = {"data": MALICIOUS_SECRET_DICT}
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
def test_dict_with_nested_secret_no_serializable(self) -> None:
|
||||
"""Dict with nested secret-like dict, no `Serializable` objects."""
|
||||
payload = {"outer": {"inner": MALICIOUS_SECRET_DICT}}
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
def test_dict_with_secret_in_list(self) -> None:
|
||||
"""Dict with secret-like dict in a list."""
|
||||
payload = {"items": [MALICIOUS_SECRET_DICT]}
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
def test_dict_mimicking_lc_constructor_with_secret(self) -> None:
|
||||
"""Dict that looks like an LC constructor containing a secret."""
|
||||
payload = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_core", "messages", "ai", "AIMessage"],
|
||||
"kwargs": {
|
||||
"content": "Hello",
|
||||
"additional_kwargs": {"secret": MALICIOUS_SECRET_DICT},
|
||||
},
|
||||
}
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
|
||||
class TestPydanticModelTopLevel:
|
||||
"""Tests with Pydantic models (non-`Serializable`) at the top level."""
|
||||
|
||||
def test_pydantic_model_with_serializable_containing_secret(self) -> None:
|
||||
"""Pydantic model containing a `Serializable` with secret-like dict."""
|
||||
|
||||
class MyModel(BaseModel):
|
||||
message: Any
|
||||
|
||||
msg = HumanMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"data": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
payload = MyModel(message=msg)
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
def test_pydantic_model_with_secret_dict(self) -> None:
|
||||
"""Pydantic model containing a secret-like dict directly."""
|
||||
|
||||
class MyModel(BaseModel):
|
||||
data: dict[str, Any]
|
||||
|
||||
payload = MyModel(data=MALICIOUS_SECRET_DICT)
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
# Test treatment of "parsed" in additional_kwargs
|
||||
msg = AIMessage(content=[], additional_kwargs={"parsed": payload})
|
||||
gen = ChatGeneration(message=msg)
|
||||
_assert_no_secret_leak(gen)
|
||||
round_trip = load(dumpd(gen))
|
||||
assert MyModel(**(round_trip.message.additional_kwargs["parsed"])) == payload
|
||||
|
||||
def test_pydantic_model_with_nested_secret(self) -> None:
|
||||
"""Pydantic model with nested secret-like dict."""
|
||||
|
||||
class MyModel(BaseModel):
|
||||
nested: dict[str, Any]
|
||||
|
||||
payload = MyModel(nested={"inner": MALICIOUS_SECRET_DICT})
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
|
||||
class TestNonSerializableClassTopLevel:
|
||||
"""Tests with classes at the top level."""
|
||||
|
||||
def test_custom_class_with_serializable_containing_secret(self) -> None:
|
||||
"""Custom class containing a `Serializable` with secret-like dict."""
|
||||
|
||||
class MyClass:
|
||||
def __init__(self, message: Any) -> None:
|
||||
self.message = message
|
||||
|
||||
msg = HumanMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"data": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
payload = MyClass(message=msg)
|
||||
# This will serialize as not_implemented, but let's verify no leak
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
def test_custom_class_with_secret_dict(self) -> None:
|
||||
"""Custom class containing a secret-like dict directly."""
|
||||
|
||||
class MyClass:
|
||||
def __init__(self, data: dict[str, Any]) -> None:
|
||||
self.data = data
|
||||
|
||||
payload = MyClass(data=MALICIOUS_SECRET_DICT)
|
||||
_assert_no_secret_leak(payload)
|
||||
|
||||
|
||||
class TestDumpdInKwargs:
|
||||
"""Tests for the specific pattern of `dumpd()` result stored in kwargs."""
|
||||
|
||||
def test_dumpd_human_message_in_ai_message_kwargs(self) -> None:
|
||||
"""`AIMessage` with `dumpd(HumanMessage)` in `additional_kwargs`."""
|
||||
h = HumanMessage("Hello")
|
||||
a = AIMessage("foo", additional_kwargs={"bar": [dumpd(h)]})
|
||||
_assert_no_secret_leak(a)
|
||||
|
||||
def test_dumpd_human_message_with_secret_in_ai_message_kwargs(self) -> None:
|
||||
"""`AIMessage` with `dumpd(HumanMessage w/ secret)` in `additional_kwargs`."""
|
||||
h = HumanMessage(
|
||||
"Hello",
|
||||
additional_kwargs={"secret": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
a = AIMessage("foo", additional_kwargs={"bar": [dumpd(h)]})
|
||||
_assert_no_secret_leak(a)
|
||||
|
||||
def test_double_dumpd_nesting(self) -> None:
|
||||
"""Double nesting: `dumpd(AIMessage(dumpd(HumanMessage)))`."""
|
||||
h = HumanMessage(
|
||||
"Hello",
|
||||
additional_kwargs={"secret": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
a = AIMessage("foo", additional_kwargs={"bar": [dumpd(h)]})
|
||||
outer = AIMessage("outer", additional_kwargs={"nested": [dumpd(a)]})
|
||||
_assert_no_secret_leak(outer)
|
||||
|
||||
|
||||
class TestRoundTrip:
|
||||
"""Tests that verify round-trip serialization preserves data structure."""
|
||||
|
||||
def test_human_message_with_secret_round_trip(self) -> None:
|
||||
"""Verify secret-like dict is preserved as dict after round-trip."""
|
||||
msg = HumanMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"data": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
|
||||
serialized = dumpd(msg)
|
||||
deserialized = load(serialized, secrets_from_env=True)
|
||||
|
||||
# The secret-like dict should be preserved as a plain dict
|
||||
assert deserialized.additional_kwargs["data"] == MALICIOUS_SECRET_DICT
|
||||
assert isinstance(deserialized.additional_kwargs["data"], dict)
|
||||
|
||||
def test_document_with_secret_round_trip(self) -> None:
|
||||
"""Verify secret-like dict in `Document` metadata is preserved."""
|
||||
doc = Document(
|
||||
page_content="Hello",
|
||||
metadata={"data": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
|
||||
serialized = dumpd(doc)
|
||||
deserialized = load(
|
||||
serialized, secrets_from_env=True, allowed_objects=[Document]
|
||||
)
|
||||
|
||||
# The secret-like dict should be preserved as a plain dict
|
||||
assert deserialized.metadata["data"] == MALICIOUS_SECRET_DICT
|
||||
assert isinstance(deserialized.metadata["data"], dict)
|
||||
|
||||
def test_plain_dict_with_secret_round_trip(self) -> None:
|
||||
"""Verify secret-like dict in plain dict is preserved."""
|
||||
payload = {"data": MALICIOUS_SECRET_DICT}
|
||||
|
||||
serialized = dumpd(payload)
|
||||
deserialized = load(serialized, secrets_from_env=True)
|
||||
|
||||
# The secret-like dict should be preserved as a plain dict
|
||||
assert deserialized["data"] == MALICIOUS_SECRET_DICT
|
||||
assert isinstance(deserialized["data"], dict)
|
||||
|
||||
|
||||
class TestEscapingEfficiency:
|
||||
"""Tests that escaping doesn't cause excessive nesting."""
|
||||
|
||||
def test_no_triple_escaping(self) -> None:
|
||||
"""Verify dumpd doesn't cause triple/multiple escaping."""
|
||||
h = HumanMessage(
|
||||
"Hello",
|
||||
additional_kwargs={"bar": [MALICIOUS_SECRET_DICT]},
|
||||
)
|
||||
a = AIMessage("foo", additional_kwargs={"bar": [dumpd(h)]})
|
||||
d = dumpd(a)
|
||||
|
||||
serialized = json.dumps(d)
|
||||
# Count nested escape markers -
|
||||
# should be max 2 (one for HumanMessage, one for secret)
|
||||
# Not 3+ which would indicate re-escaping of already-escaped content
|
||||
escape_count = len(re.findall(r"__lc_escaped__", serialized))
|
||||
|
||||
# The HumanMessage dict gets escaped (1), the secret inside gets escaped (1)
|
||||
# Total should be 2, not 4 (which would mean triple nesting)
|
||||
assert escape_count <= 2, (
|
||||
f"Found {escape_count} escape markers, expected <= 2. "
|
||||
f"This indicates unnecessary re-escaping.\n{serialized}"
|
||||
)
|
||||
|
||||
def test_double_nesting_no_quadruple_escape(self) -> None:
|
||||
"""Verify double dumpd nesting doesn't explode escape markers."""
|
||||
h = HumanMessage(
|
||||
"Hello",
|
||||
additional_kwargs={"secret": MALICIOUS_SECRET_DICT},
|
||||
)
|
||||
a = AIMessage("middle", additional_kwargs={"nested": [dumpd(h)]})
|
||||
outer = AIMessage("outer", additional_kwargs={"deep": [dumpd(a)]})
|
||||
d = dumpd(outer)
|
||||
|
||||
serialized = json.dumps(d)
|
||||
escape_count = len(re.findall(r"__lc_escaped__", serialized))
|
||||
|
||||
# Should be:
|
||||
# outer escapes middle (1),
|
||||
# middle escapes h (1),
|
||||
# h escapes secret (1) = 3
|
||||
# Not 6+ which would indicate re-escaping
|
||||
assert escape_count <= 3, (
|
||||
f"Found {escape_count} escape markers, expected <= 3. "
|
||||
f"This indicates unnecessary re-escaping."
|
||||
)
|
||||
|
||||
|
||||
class TestConstructorInjection:
|
||||
"""Tests for constructor-type injection (not just secrets)."""
|
||||
|
||||
def test_constructor_in_metadata_not_instantiated(self) -> None:
|
||||
"""Verify constructor-like dict in metadata is not instantiated."""
|
||||
malicious_constructor = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_core", "messages", "ai", "AIMessage"],
|
||||
"kwargs": {"content": "injected"},
|
||||
}
|
||||
|
||||
doc = Document(
|
||||
page_content="Hello",
|
||||
metadata={"data": malicious_constructor},
|
||||
)
|
||||
|
||||
serialized = dumpd(doc)
|
||||
deserialized = load(
|
||||
serialized,
|
||||
secrets_from_env=True,
|
||||
allowed_objects=[Document, AIMessage],
|
||||
)
|
||||
|
||||
# The constructor-like dict should be a plain dict, NOT an AIMessage
|
||||
assert isinstance(deserialized.metadata["data"], dict)
|
||||
assert deserialized.metadata["data"] == malicious_constructor
|
||||
|
||||
def test_constructor_in_content_not_instantiated(self) -> None:
|
||||
"""Verify constructor-like dict in message content is not instantiated."""
|
||||
malicious_constructor = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_core", "messages", "human", "HumanMessage"],
|
||||
"kwargs": {"content": "injected"},
|
||||
}
|
||||
|
||||
msg = AIMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"nested": malicious_constructor},
|
||||
)
|
||||
|
||||
serialized = dumpd(msg)
|
||||
deserialized = load(
|
||||
serialized,
|
||||
secrets_from_env=True,
|
||||
allowed_objects=[AIMessage, HumanMessage],
|
||||
)
|
||||
|
||||
# The constructor-like dict should be a plain dict, NOT a HumanMessage
|
||||
assert isinstance(deserialized.additional_kwargs["nested"], dict)
|
||||
assert deserialized.additional_kwargs["nested"] == malicious_constructor
|
||||
|
||||
|
||||
def test_allowed_objects() -> None:
|
||||
# Core object
|
||||
msg = AIMessage(content="foo")
|
||||
serialized = dumpd(msg)
|
||||
assert load(serialized) == msg
|
||||
assert load(serialized, allowed_objects=[AIMessage]) == msg
|
||||
assert load(serialized, allowed_objects="core") == msg
|
||||
|
||||
with pytest.raises(ValueError, match="not allowed"):
|
||||
load(serialized, allowed_objects=[])
|
||||
with pytest.raises(ValueError, match="not allowed"):
|
||||
load(serialized, allowed_objects=[Document])
|
||||
@@ -1,12 +1,19 @@
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel, ConfigDict, Field, SecretStr
|
||||
|
||||
from langchain_core.load import Serializable, dumpd, dumps, load
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.load import InitValidator, Serializable, dumpd, dumps, load, loads
|
||||
from langchain_core.load.serializable import _is_field_useful
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.outputs import ChatGeneration, Generation
|
||||
from langchain_core.prompts import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
PromptTemplate,
|
||||
)
|
||||
|
||||
|
||||
class NonBoolObj:
|
||||
@@ -145,10 +152,17 @@ def test_simple_deserialization() -> None:
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
}
|
||||
new_foo = load(serialized_foo, valid_namespaces=["tests"])
|
||||
new_foo = load(serialized_foo, allowed_objects=[Foo], valid_namespaces=["tests"])
|
||||
assert new_foo == foo
|
||||
|
||||
|
||||
def test_disallowed_deserialization() -> None:
|
||||
foo = Foo(bar=1, baz="hello")
|
||||
serialized_foo = dumpd(foo)
|
||||
with pytest.raises(ValueError, match="not allowed"):
|
||||
load(serialized_foo, allowed_objects=[], valid_namespaces=["tests"])
|
||||
|
||||
|
||||
class Foo2(Serializable):
|
||||
bar: int
|
||||
baz: str
|
||||
@@ -170,6 +184,7 @@ def test_simple_deserialization_with_additional_imports() -> None:
|
||||
}
|
||||
new_foo = load(
|
||||
serialized_foo,
|
||||
allowed_objects=[Foo2],
|
||||
valid_namespaces=["tests"],
|
||||
additional_import_mappings={
|
||||
("tests", "unit_tests", "load", "test_serializable", "Foo"): (
|
||||
@@ -223,7 +238,7 @@ def test_serialization_with_pydantic() -> None:
|
||||
)
|
||||
)
|
||||
ser = dumpd(llm_response)
|
||||
deser = load(ser)
|
||||
deser = load(ser, allowed_objects=[ChatGeneration, AIMessage])
|
||||
assert isinstance(deser, ChatGeneration)
|
||||
assert deser.message.content
|
||||
assert deser.message.additional_kwargs["parsed"] == my_model.model_dump()
|
||||
@@ -260,8 +275,8 @@ def test_serialization_with_ignore_unserializable_fields() -> None:
|
||||
]
|
||||
]
|
||||
}
|
||||
ser = dumpd(data)
|
||||
deser = load(ser, ignore_unserializable_fields=True)
|
||||
# Load directly (no dumpd - this is already serialized data)
|
||||
deser = load(data, allowed_objects=[AIMessage], ignore_unserializable_fields=True)
|
||||
assert deser == {
|
||||
"messages": [
|
||||
[
|
||||
@@ -365,3 +380,514 @@ def test_dumps_mixed_data_structure() -> None:
|
||||
# Primitives should remain unchanged
|
||||
assert parsed["list"] == [1, 2, {"nested": "value"}]
|
||||
assert parsed["primitive"] == "string"
|
||||
|
||||
|
||||
def test_document_normal_metadata_allowed() -> None:
|
||||
"""Test that `Document` metadata without `'lc'` key works fine."""
|
||||
doc = Document(
|
||||
page_content="Hello world",
|
||||
metadata={"source": "test.txt", "page": 1, "nested": {"key": "value"}},
|
||||
)
|
||||
serialized = dumpd(doc)
|
||||
|
||||
loaded = load(serialized, allowed_objects=[Document])
|
||||
assert loaded.page_content == "Hello world"
|
||||
|
||||
expected = {"source": "test.txt", "page": 1, "nested": {"key": "value"}}
|
||||
assert loaded.metadata == expected
|
||||
|
||||
|
||||
class TestEscaping:
|
||||
"""Tests that escape-based serialization prevents injection attacks.
|
||||
|
||||
When user data contains an `'lc'` key, it's escaped during serialization
|
||||
(wrapped in `{"__lc_escaped__": ...}`). During deserialization, escaped
|
||||
dicts are unwrapped and returned as plain dicts - NOT instantiated as
|
||||
LC objects.
|
||||
"""
|
||||
|
||||
def test_document_metadata_with_lc_key_escaped(self) -> None:
|
||||
"""Test that `Document` metadata with `'lc'` key round-trips as plain dict."""
|
||||
# User data that looks like an LC constructor - should be escaped, not executed
|
||||
suspicious_metadata = {"lc": 1, "type": "constructor", "id": ["some", "module"]}
|
||||
doc = Document(page_content="test", metadata=suspicious_metadata)
|
||||
|
||||
# Serialize - should escape the metadata
|
||||
serialized = dumpd(doc)
|
||||
assert serialized["kwargs"]["metadata"] == {
|
||||
"__lc_escaped__": suspicious_metadata
|
||||
}
|
||||
|
||||
# Deserialize - should restore original metadata as plain dict
|
||||
loaded = load(serialized, allowed_objects=[Document])
|
||||
assert loaded.metadata == suspicious_metadata # Plain dict, not instantiated
|
||||
|
||||
def test_document_metadata_with_nested_lc_key_escaped(self) -> None:
|
||||
"""Test that nested `'lc'` key in `Document` metadata is escaped."""
|
||||
suspicious_nested = {"lc": 1, "type": "constructor", "id": ["some", "module"]}
|
||||
doc = Document(page_content="test", metadata={"nested": suspicious_nested})
|
||||
|
||||
serialized = dumpd(doc)
|
||||
# The nested dict with 'lc' key should be escaped
|
||||
assert serialized["kwargs"]["metadata"]["nested"] == {
|
||||
"__lc_escaped__": suspicious_nested
|
||||
}
|
||||
|
||||
loaded = load(serialized, allowed_objects=[Document])
|
||||
assert loaded.metadata == {"nested": suspicious_nested}
|
||||
|
||||
def test_document_metadata_with_lc_key_in_list_escaped(self) -> None:
|
||||
"""Test that `'lc'` key in list items within `Document` metadata is escaped."""
|
||||
suspicious_item = {"lc": 1, "type": "constructor", "id": ["some", "module"]}
|
||||
doc = Document(page_content="test", metadata={"items": [suspicious_item]})
|
||||
|
||||
serialized = dumpd(doc)
|
||||
assert serialized["kwargs"]["metadata"]["items"][0] == {
|
||||
"__lc_escaped__": suspicious_item
|
||||
}
|
||||
|
||||
loaded = load(serialized, allowed_objects=[Document])
|
||||
assert loaded.metadata == {"items": [suspicious_item]}
|
||||
|
||||
def test_malicious_payload_not_instantiated(self) -> None:
|
||||
"""Test that malicious LC-like structures in user data are NOT instantiated."""
|
||||
# An attacker might craft a payload with a valid AIMessage structure in metadata
|
||||
malicious_data = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "schema", "document", "Document"],
|
||||
"kwargs": {
|
||||
"page_content": "test",
|
||||
"metadata": {
|
||||
# This looks like a valid LC object but is in escaped form
|
||||
"__lc_escaped__": {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_core", "messages", "ai", "AIMessage"],
|
||||
"kwargs": {"content": "injected message"},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Even though AIMessage is allowed, the metadata should remain as dict
|
||||
loaded = load(malicious_data, allowed_objects=[Document, AIMessage])
|
||||
assert loaded.page_content == "test"
|
||||
# The metadata is the original dict (unescaped), NOT an AIMessage instance
|
||||
assert loaded.metadata == {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain_core", "messages", "ai", "AIMessage"],
|
||||
"kwargs": {"content": "injected message"},
|
||||
}
|
||||
assert not isinstance(loaded.metadata, AIMessage)
|
||||
|
||||
def test_message_additional_kwargs_with_lc_key_escaped(self) -> None:
|
||||
"""Test that `AIMessage` `additional_kwargs` with `'lc'` is escaped."""
|
||||
suspicious_data = {"lc": 1, "type": "constructor", "id": ["x", "y"]}
|
||||
msg = AIMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"data": suspicious_data},
|
||||
)
|
||||
|
||||
serialized = dumpd(msg)
|
||||
assert serialized["kwargs"]["additional_kwargs"]["data"] == {
|
||||
"__lc_escaped__": suspicious_data
|
||||
}
|
||||
|
||||
loaded = load(serialized, allowed_objects=[AIMessage])
|
||||
assert loaded.additional_kwargs == {"data": suspicious_data}
|
||||
|
||||
def test_message_response_metadata_with_lc_key_escaped(self) -> None:
|
||||
"""Test that `AIMessage` `response_metadata` with `'lc'` is escaped."""
|
||||
suspicious_data = {"lc": 1, "type": "constructor", "id": ["x", "y"]}
|
||||
msg = AIMessage(content="Hello", response_metadata=suspicious_data)
|
||||
|
||||
serialized = dumpd(msg)
|
||||
assert serialized["kwargs"]["response_metadata"] == {
|
||||
"__lc_escaped__": suspicious_data
|
||||
}
|
||||
|
||||
loaded = load(serialized, allowed_objects=[AIMessage])
|
||||
assert loaded.response_metadata == suspicious_data
|
||||
|
||||
def test_double_escape_handling(self) -> None:
|
||||
"""Test that data containing escape key itself is properly handled."""
|
||||
# User data that contains our escape key
|
||||
data_with_escape_key = {"__lc_escaped__": "some_value"}
|
||||
doc = Document(page_content="test", metadata=data_with_escape_key)
|
||||
|
||||
serialized = dumpd(doc)
|
||||
# Should be double-escaped since it looks like an escaped dict
|
||||
assert serialized["kwargs"]["metadata"] == {
|
||||
"__lc_escaped__": {"__lc_escaped__": "some_value"}
|
||||
}
|
||||
|
||||
loaded = load(serialized, allowed_objects=[Document])
|
||||
assert loaded.metadata == {"__lc_escaped__": "some_value"}
|
||||
|
||||
|
||||
class TestDumpdEscapesLcKeyInPlainDicts:
|
||||
"""Tests that `dumpd()` escapes `'lc'` keys in plain dict kwargs."""
|
||||
|
||||
def test_normal_message_not_escaped(self) -> None:
|
||||
"""Test that normal `AIMessage` without `'lc'` key is not escaped."""
|
||||
msg = AIMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"tool_calls": []},
|
||||
response_metadata={"model": "gpt-4"},
|
||||
)
|
||||
serialized = dumpd(msg)
|
||||
assert serialized["kwargs"]["content"] == "Hello"
|
||||
# No escape wrappers for normal data
|
||||
assert "__lc_escaped__" not in str(serialized)
|
||||
|
||||
def test_document_metadata_with_lc_key_escaped(self) -> None:
|
||||
"""Test that `Document` with `'lc'` key in metadata is escaped."""
|
||||
doc = Document(
|
||||
page_content="test",
|
||||
metadata={"lc": 1, "type": "constructor"},
|
||||
)
|
||||
|
||||
serialized = dumpd(doc)
|
||||
# Should be escaped, not blocked
|
||||
assert serialized["kwargs"]["metadata"] == {
|
||||
"__lc_escaped__": {"lc": 1, "type": "constructor"}
|
||||
}
|
||||
|
||||
def test_document_metadata_with_nested_lc_key_escaped(self) -> None:
|
||||
"""Test that `Document` with nested `'lc'` in metadata is escaped."""
|
||||
doc = Document(
|
||||
page_content="test",
|
||||
metadata={"nested": {"lc": 1}},
|
||||
)
|
||||
|
||||
serialized = dumpd(doc)
|
||||
assert serialized["kwargs"]["metadata"]["nested"] == {
|
||||
"__lc_escaped__": {"lc": 1}
|
||||
}
|
||||
|
||||
def test_message_additional_kwargs_with_lc_key_escaped(self) -> None:
|
||||
"""Test `AIMessage` with `'lc'` in `additional_kwargs` is escaped."""
|
||||
msg = AIMessage(
|
||||
content="Hello",
|
||||
additional_kwargs={"malicious": {"lc": 1}},
|
||||
)
|
||||
|
||||
serialized = dumpd(msg)
|
||||
assert serialized["kwargs"]["additional_kwargs"]["malicious"] == {
|
||||
"__lc_escaped__": {"lc": 1}
|
||||
}
|
||||
|
||||
def test_message_response_metadata_with_lc_key_escaped(self) -> None:
|
||||
"""Test `AIMessage` with `'lc'` in `response_metadata` is escaped."""
|
||||
msg = AIMessage(
|
||||
content="Hello",
|
||||
response_metadata={"lc": 1},
|
||||
)
|
||||
|
||||
serialized = dumpd(msg)
|
||||
assert serialized["kwargs"]["response_metadata"] == {
|
||||
"__lc_escaped__": {"lc": 1}
|
||||
}
|
||||
|
||||
|
||||
class TestInitValidator:
|
||||
"""Tests for `init_validator` on `load()` and `loads()`."""
|
||||
|
||||
def test_init_validator_allows_valid_kwargs(self) -> None:
|
||||
"""Test that `init_validator` returning None allows deserialization."""
|
||||
msg = AIMessage(content="Hello")
|
||||
serialized = dumpd(msg)
|
||||
|
||||
def allow_all(_class_path: tuple[str, ...], _kwargs: dict[str, Any]) -> None:
|
||||
pass # Allow all by doing nothing
|
||||
|
||||
loaded = load(serialized, allowed_objects=[AIMessage], init_validator=allow_all)
|
||||
assert loaded == msg
|
||||
|
||||
def test_init_validator_blocks_deserialization(self) -> None:
|
||||
"""Test that `init_validator` can block deserialization by raising."""
|
||||
doc = Document(page_content="test", metadata={"source": "test.txt"})
|
||||
serialized = dumpd(doc)
|
||||
|
||||
def block_metadata(
|
||||
_class_path: tuple[str, ...], kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
if "metadata" in kwargs:
|
||||
msg = "Metadata not allowed"
|
||||
raise ValueError(msg)
|
||||
|
||||
with pytest.raises(ValueError, match="Metadata not allowed"):
|
||||
load(serialized, allowed_objects=[Document], init_validator=block_metadata)
|
||||
|
||||
def test_init_validator_receives_correct_class_path(self) -> None:
|
||||
"""Test that `init_validator` receives the correct class path."""
|
||||
msg = AIMessage(content="Hello")
|
||||
serialized = dumpd(msg)
|
||||
|
||||
received_class_paths: list[tuple[str, ...]] = []
|
||||
|
||||
def capture_class_path(
|
||||
class_path: tuple[str, ...], _kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
received_class_paths.append(class_path)
|
||||
|
||||
load(serialized, allowed_objects=[AIMessage], init_validator=capture_class_path)
|
||||
|
||||
assert len(received_class_paths) == 1
|
||||
assert received_class_paths[0] == (
|
||||
"langchain",
|
||||
"schema",
|
||||
"messages",
|
||||
"AIMessage",
|
||||
)
|
||||
|
||||
def test_init_validator_receives_correct_kwargs(self) -> None:
|
||||
"""Test that `init_validator` receives the kwargs dict."""
|
||||
msg = AIMessage(content="Hello world", name="test_name")
|
||||
serialized = dumpd(msg)
|
||||
|
||||
received_kwargs: list[dict[str, Any]] = []
|
||||
|
||||
def capture_kwargs(
|
||||
_class_path: tuple[str, ...], kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
received_kwargs.append(kwargs)
|
||||
|
||||
load(serialized, allowed_objects=[AIMessage], init_validator=capture_kwargs)
|
||||
|
||||
assert len(received_kwargs) == 1
|
||||
assert "content" in received_kwargs[0]
|
||||
assert received_kwargs[0]["content"] == "Hello world"
|
||||
assert "name" in received_kwargs[0]
|
||||
assert received_kwargs[0]["name"] == "test_name"
|
||||
|
||||
def test_init_validator_with_loads(self) -> None:
|
||||
"""Test that `init_validator` works with `loads()` function."""
|
||||
doc = Document(page_content="test", metadata={"key": "value"})
|
||||
json_str = dumps(doc)
|
||||
|
||||
def block_metadata(
|
||||
_class_path: tuple[str, ...], kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
if "metadata" in kwargs:
|
||||
msg = "Metadata not allowed"
|
||||
raise ValueError(msg)
|
||||
|
||||
with pytest.raises(ValueError, match="Metadata not allowed"):
|
||||
loads(json_str, allowed_objects=[Document], init_validator=block_metadata)
|
||||
|
||||
def test_init_validator_none_allows_all(self) -> None:
|
||||
"""Test that `init_validator=None` (default) allows all kwargs."""
|
||||
msg = AIMessage(content="Hello")
|
||||
serialized = dumpd(msg)
|
||||
|
||||
# Should work without init_validator
|
||||
loaded = load(serialized, allowed_objects=[AIMessage])
|
||||
assert loaded == msg
|
||||
|
||||
def test_init_validator_type_alias_exists(self) -> None:
|
||||
"""Test that `InitValidator` type alias is exported and usable."""
|
||||
|
||||
def my_validator(_class_path: tuple[str, ...], _kwargs: dict[str, Any]) -> None:
|
||||
pass
|
||||
|
||||
validator_typed: InitValidator = my_validator
|
||||
assert callable(validator_typed)
|
||||
|
||||
def test_init_validator_blocks_specific_class(self) -> None:
|
||||
"""Test blocking deserialization for a specific class."""
|
||||
doc = Document(page_content="test", metadata={"source": "test.txt"})
|
||||
serialized = dumpd(doc)
|
||||
|
||||
def block_documents(
|
||||
class_path: tuple[str, ...], _kwargs: dict[str, Any]
|
||||
) -> None:
|
||||
if class_path == ("langchain", "schema", "document", "Document"):
|
||||
msg = "Documents not allowed"
|
||||
raise ValueError(msg)
|
||||
|
||||
with pytest.raises(ValueError, match="Documents not allowed"):
|
||||
load(serialized, allowed_objects=[Document], init_validator=block_documents)
|
||||
|
||||
|
||||
class TestJinja2SecurityBlocking:
|
||||
"""Tests blocking Jinja2 templates by default."""
|
||||
|
||||
def test_fstring_template_allowed(self) -> None:
|
||||
"""Test that f-string templates deserialize successfully."""
|
||||
# Serialized ChatPromptTemplate with f-string format
|
||||
serialized = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "prompts", "chat", "ChatPromptTemplate"],
|
||||
"kwargs": {
|
||||
"input_variables": ["name"],
|
||||
"messages": [
|
||||
{
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": [
|
||||
"langchain",
|
||||
"prompts",
|
||||
"chat",
|
||||
"HumanMessagePromptTemplate",
|
||||
],
|
||||
"kwargs": {
|
||||
"prompt": {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": [
|
||||
"langchain",
|
||||
"prompts",
|
||||
"prompt",
|
||||
"PromptTemplate",
|
||||
],
|
||||
"kwargs": {
|
||||
"input_variables": ["name"],
|
||||
"template": "Hello {name}",
|
||||
"template_format": "f-string",
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
# f-string should deserialize successfully
|
||||
loaded = load(
|
||||
serialized,
|
||||
allowed_objects=[
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
PromptTemplate,
|
||||
],
|
||||
)
|
||||
assert isinstance(loaded, ChatPromptTemplate)
|
||||
assert loaded.input_variables == ["name"]
|
||||
|
||||
def test_jinja2_template_blocked(self) -> None:
|
||||
"""Test that Jinja2 templates are blocked by default."""
|
||||
# Malicious serialized payload attempting to use jinja2
|
||||
malicious_serialized = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "prompts", "chat", "ChatPromptTemplate"],
|
||||
"kwargs": {
|
||||
"input_variables": ["name"],
|
||||
"messages": [
|
||||
{
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": [
|
||||
"langchain",
|
||||
"prompts",
|
||||
"chat",
|
||||
"HumanMessagePromptTemplate",
|
||||
],
|
||||
"kwargs": {
|
||||
"prompt": {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": [
|
||||
"langchain",
|
||||
"prompts",
|
||||
"prompt",
|
||||
"PromptTemplate",
|
||||
],
|
||||
"kwargs": {
|
||||
"input_variables": ["name"],
|
||||
"template": "{{ name }}",
|
||||
"template_format": "jinja2",
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
# jinja2 should be blocked by default
|
||||
with pytest.raises(ValueError, match="Jinja2 templates are not allowed"):
|
||||
load(
|
||||
malicious_serialized,
|
||||
allowed_objects=[
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
PromptTemplate,
|
||||
],
|
||||
)
|
||||
|
||||
def test_jinja2_blocked_standalone_prompt_template(self) -> None:
|
||||
"""Test blocking Jinja2 on standalone `PromptTemplate`."""
|
||||
serialized_jinja2 = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "prompts", "prompt", "PromptTemplate"],
|
||||
"kwargs": {
|
||||
"input_variables": ["name"],
|
||||
"template": "{{ name }}",
|
||||
"template_format": "jinja2",
|
||||
},
|
||||
}
|
||||
|
||||
serialized_fstring = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "prompts", "prompt", "PromptTemplate"],
|
||||
"kwargs": {
|
||||
"input_variables": ["name"],
|
||||
"template": "{name}",
|
||||
"template_format": "f-string",
|
||||
},
|
||||
}
|
||||
|
||||
# f-string should work
|
||||
loaded = load(
|
||||
serialized_fstring,
|
||||
allowed_objects=[PromptTemplate],
|
||||
)
|
||||
assert isinstance(loaded, PromptTemplate)
|
||||
assert loaded.template == "{name}"
|
||||
|
||||
# jinja2 should be blocked by default
|
||||
with pytest.raises(ValueError, match="Jinja2 templates are not allowed"):
|
||||
load(
|
||||
serialized_jinja2,
|
||||
allowed_objects=[PromptTemplate],
|
||||
)
|
||||
|
||||
def test_jinja2_blocked_by_default(self) -> None:
|
||||
"""Test that Jinja2 templates are blocked by default."""
|
||||
serialized_jinja2 = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "prompts", "prompt", "PromptTemplate"],
|
||||
"kwargs": {
|
||||
"input_variables": ["name"],
|
||||
"template": "{{ name }}",
|
||||
"template_format": "jinja2",
|
||||
},
|
||||
}
|
||||
|
||||
serialized_fstring = {
|
||||
"lc": 1,
|
||||
"type": "constructor",
|
||||
"id": ["langchain", "prompts", "prompt", "PromptTemplate"],
|
||||
"kwargs": {
|
||||
"input_variables": ["name"],
|
||||
"template": "{name}",
|
||||
"template_format": "f-string",
|
||||
},
|
||||
}
|
||||
|
||||
# f-string should work
|
||||
loaded = load(serialized_fstring, allowed_objects=[PromptTemplate])
|
||||
assert isinstance(loaded, PromptTemplate)
|
||||
assert loaded.template == "{name}"
|
||||
|
||||
# jinja2 should be blocked by default
|
||||
with pytest.raises(ValueError, match="Jinja2 templates are not allowed"):
|
||||
load(serialized_jinja2, allowed_objects=[PromptTemplate])
|
||||
|
||||
@@ -13,6 +13,16 @@ def test_convert_to_v1_from_anthropic() -> None:
|
||||
"name": "get_weather",
|
||||
"input": {"location": "San Francisco"},
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "abc_234",
|
||||
"name": "get_weather_programmatic",
|
||||
"input": {"location": "Boston"},
|
||||
"caller": {
|
||||
"type": "code_execution_20250825",
|
||||
"tool_id": "srvtoolu_abc234",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "It's sunny.",
|
||||
@@ -88,6 +98,18 @@ def test_convert_to_v1_from_anthropic() -> None:
|
||||
"name": "get_weather",
|
||||
"args": {"location": "San Francisco"},
|
||||
},
|
||||
{
|
||||
"type": "tool_call",
|
||||
"id": "abc_234",
|
||||
"name": "get_weather_programmatic",
|
||||
"args": {"location": "Boston"},
|
||||
"extras": {
|
||||
"caller": {
|
||||
"type": "code_execution_20250825",
|
||||
"tool_id": "srvtoolu_abc234",
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "It's sunny.",
|
||||
|
||||
@@ -0,0 +1,218 @@
|
||||
"""Tests for Google GenAI block translator."""
|
||||
|
||||
from langchain_core.messages.block_translators.google_genai import (
|
||||
translate_grounding_metadata_to_citations,
|
||||
)
|
||||
|
||||
|
||||
def test_translate_grounding_metadata_web() -> None:
|
||||
"""Test translation of web grounding metadata to citations."""
|
||||
grounding_metadata = {
|
||||
"grounding_chunks": [
|
||||
{
|
||||
"web": {
|
||||
"uri": "https://example.com",
|
||||
"title": "Example Site",
|
||||
},
|
||||
"maps": None,
|
||||
}
|
||||
],
|
||||
"grounding_supports": [
|
||||
{
|
||||
"segment": {
|
||||
"start_index": 0,
|
||||
"end_index": 13,
|
||||
"text": "Test response",
|
||||
},
|
||||
"grounding_chunk_indices": [0],
|
||||
"confidence_scores": [],
|
||||
}
|
||||
],
|
||||
"web_search_queries": ["test query"],
|
||||
}
|
||||
|
||||
citations = translate_grounding_metadata_to_citations(grounding_metadata)
|
||||
|
||||
assert len(citations) == 1
|
||||
citation = citations[0]
|
||||
assert citation["type"] == "citation"
|
||||
assert citation.get("url") == "https://example.com"
|
||||
assert citation.get("title") == "Example Site"
|
||||
assert citation.get("start_index") == 0
|
||||
assert citation.get("end_index") == 13
|
||||
assert citation.get("cited_text") == "Test response"
|
||||
|
||||
extras = citation.get("extras", {})["google_ai_metadata"]
|
||||
assert extras["web_search_queries"] == ["test query"]
|
||||
assert extras["grounding_chunk_index"] == 0
|
||||
assert "place_id" not in extras
|
||||
|
||||
|
||||
def test_translate_grounding_metadata_maps() -> None:
|
||||
"""Test translation of maps grounding metadata to citations."""
|
||||
grounding_metadata = {
|
||||
"grounding_chunks": [
|
||||
{
|
||||
"web": None,
|
||||
"maps": {
|
||||
"uri": "https://maps.google.com/?cid=13100894621228039586",
|
||||
"title": "Heaven on 7th Marketplace",
|
||||
"placeId": "places/ChIJ0-zA1vBZwokRon0fGj-6z7U",
|
||||
},
|
||||
}
|
||||
],
|
||||
"grounding_supports": [
|
||||
{
|
||||
"segment": {
|
||||
"start_index": 0,
|
||||
"end_index": 25,
|
||||
"text": "Great Italian restaurant",
|
||||
},
|
||||
"grounding_chunk_indices": [0],
|
||||
"confidence_scores": [0.95],
|
||||
}
|
||||
],
|
||||
"web_search_queries": [],
|
||||
}
|
||||
|
||||
citations = translate_grounding_metadata_to_citations(grounding_metadata)
|
||||
|
||||
assert len(citations) == 1
|
||||
citation = citations[0]
|
||||
assert citation["type"] == "citation"
|
||||
assert citation.get("url") == "https://maps.google.com/?cid=13100894621228039586"
|
||||
assert citation.get("title") == "Heaven on 7th Marketplace"
|
||||
assert citation.get("start_index") == 0
|
||||
assert citation.get("end_index") == 25
|
||||
assert citation.get("cited_text") == "Great Italian restaurant"
|
||||
|
||||
extras = citation.get("extras", {})["google_ai_metadata"]
|
||||
assert extras["web_search_queries"] == []
|
||||
assert extras["grounding_chunk_index"] == 0
|
||||
assert extras["confidence_scores"] == [0.95]
|
||||
assert extras["place_id"] == "places/ChIJ0-zA1vBZwokRon0fGj-6z7U"
|
||||
|
||||
|
||||
def test_translate_grounding_metadata_none() -> None:
|
||||
"""Test translation when both web and maps are None."""
|
||||
grounding_metadata = {
|
||||
"grounding_chunks": [
|
||||
{
|
||||
"web": None,
|
||||
"maps": None,
|
||||
}
|
||||
],
|
||||
"grounding_supports": [
|
||||
{
|
||||
"segment": {
|
||||
"start_index": 0,
|
||||
"end_index": 10,
|
||||
"text": "test text",
|
||||
},
|
||||
"grounding_chunk_indices": [0],
|
||||
"confidence_scores": [],
|
||||
}
|
||||
],
|
||||
"web_search_queries": [],
|
||||
}
|
||||
|
||||
citations = translate_grounding_metadata_to_citations(grounding_metadata)
|
||||
|
||||
# Should still create citation but without url/title fields when None
|
||||
assert len(citations) == 1
|
||||
citation = citations[0]
|
||||
assert citation["type"] == "citation"
|
||||
# url and title are omitted when None
|
||||
assert "url" not in citation
|
||||
assert "title" not in citation
|
||||
assert citation.get("start_index") == 0
|
||||
assert citation.get("end_index") == 10
|
||||
assert citation.get("cited_text") == "test text"
|
||||
|
||||
|
||||
def test_translate_grounding_metadata_confidence_scores_none() -> None:
|
||||
"""Test translation when confidence_scores is None (API returns this)."""
|
||||
grounding_metadata = {
|
||||
"grounding_chunks": [
|
||||
{
|
||||
"web": None,
|
||||
"maps": {
|
||||
"uri": "https://maps.google.com/?cid=123",
|
||||
"title": "Test Restaurant",
|
||||
"placeId": "places/ChIJ123",
|
||||
},
|
||||
}
|
||||
],
|
||||
"grounding_supports": [
|
||||
{
|
||||
"segment": {
|
||||
"start_index": 0,
|
||||
"end_index": 10,
|
||||
"text": "test text",
|
||||
},
|
||||
"grounding_chunk_indices": [0],
|
||||
"confidence_scores": None, # API returns None, not []
|
||||
}
|
||||
],
|
||||
"web_search_queries": ["test query"],
|
||||
}
|
||||
|
||||
citations = translate_grounding_metadata_to_citations(grounding_metadata)
|
||||
|
||||
assert len(citations) == 1
|
||||
extras = citations[0].get("extras", {})["google_ai_metadata"]
|
||||
# Should convert None to empty list
|
||||
assert extras["confidence_scores"] == []
|
||||
assert isinstance(extras["confidence_scores"], list)
|
||||
|
||||
|
||||
def test_translate_grounding_metadata_multiple_chunks() -> None:
|
||||
"""Test translation with multiple grounding chunks."""
|
||||
grounding_metadata = {
|
||||
"grounding_chunks": [
|
||||
{
|
||||
"web": {
|
||||
"uri": "https://example1.com",
|
||||
"title": "Example 1",
|
||||
},
|
||||
"maps": None,
|
||||
},
|
||||
{
|
||||
"web": None,
|
||||
"maps": {
|
||||
"uri": "https://maps.google.com/?cid=123",
|
||||
"title": "Place 1",
|
||||
"placeId": "places/123",
|
||||
},
|
||||
},
|
||||
],
|
||||
"grounding_supports": [
|
||||
{
|
||||
"segment": {
|
||||
"start_index": 0,
|
||||
"end_index": 10,
|
||||
"text": "First part",
|
||||
},
|
||||
"grounding_chunk_indices": [0, 1],
|
||||
"confidence_scores": [],
|
||||
}
|
||||
],
|
||||
"web_search_queries": [],
|
||||
}
|
||||
|
||||
citations = translate_grounding_metadata_to_citations(grounding_metadata)
|
||||
|
||||
# Should create two citations, one for each chunk
|
||||
assert len(citations) == 2
|
||||
|
||||
# First citation from web chunk
|
||||
assert citations[0].get("url") == "https://example1.com"
|
||||
assert citations[0].get("title") == "Example 1"
|
||||
assert "place_id" not in citations[0].get("extras", {})["google_ai_metadata"]
|
||||
|
||||
# Second citation from maps chunk
|
||||
assert citations[1].get("url") == "https://maps.google.com/?cid=123"
|
||||
assert citations[1].get("title") == "Place 1"
|
||||
assert (
|
||||
citations[1].get("extras", {})["google_ai_metadata"]["place_id"] == "places/123"
|
||||
)
|
||||
@@ -385,8 +385,9 @@ def test_compat_responses_v03() -> None:
|
||||
]
|
||||
assert message_v03.content_blocks == expected_content
|
||||
|
||||
# Test chunks
|
||||
## Tool calls
|
||||
# --- Test chunks --- #
|
||||
|
||||
# Tool calls
|
||||
chunk_1 = AIMessageChunk(
|
||||
content=[],
|
||||
additional_kwargs={"__openai_function_call_ids__": {"call_abc": "fc_abc"}},
|
||||
@@ -443,7 +444,7 @@ def test_compat_responses_v03() -> None:
|
||||
]
|
||||
assert chunk.content_blocks == expected_content
|
||||
|
||||
## Reasoning
|
||||
# Reasoning
|
||||
chunk_1 = AIMessageChunk(
|
||||
content=[],
|
||||
additional_kwargs={
|
||||
@@ -477,7 +478,7 @@ def test_compat_responses_v03() -> None:
|
||||
|
||||
def test_convert_to_openai_data_block() -> None:
|
||||
# Chat completions
|
||||
## Image / url
|
||||
# Image / url
|
||||
block = {
|
||||
"type": "image",
|
||||
"url": "https://example.com/test.png",
|
||||
@@ -489,7 +490,7 @@ def test_convert_to_openai_data_block() -> None:
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
## Image / base64
|
||||
# Image / base64
|
||||
block = {
|
||||
"type": "image",
|
||||
"base64": "<base64 string>",
|
||||
@@ -502,7 +503,7 @@ def test_convert_to_openai_data_block() -> None:
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
## File / url
|
||||
# File / url
|
||||
block = {
|
||||
"type": "file",
|
||||
"url": "https://example.com/test.pdf",
|
||||
@@ -510,7 +511,7 @@ def test_convert_to_openai_data_block() -> None:
|
||||
with pytest.raises(ValueError, match="does not support"):
|
||||
result = convert_to_openai_data_block(block)
|
||||
|
||||
## File / base64
|
||||
# File / base64
|
||||
block = {
|
||||
"type": "file",
|
||||
"base64": "<base64 string>",
|
||||
@@ -527,7 +528,7 @@ def test_convert_to_openai_data_block() -> None:
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
## File / file ID
|
||||
# File / file ID
|
||||
block = {
|
||||
"type": "file",
|
||||
"file_id": "file-abc123",
|
||||
@@ -536,7 +537,7 @@ def test_convert_to_openai_data_block() -> None:
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
## Audio / base64
|
||||
# Audio / base64
|
||||
block = {
|
||||
"type": "audio",
|
||||
"base64": "<base64 string>",
|
||||
@@ -550,7 +551,7 @@ def test_convert_to_openai_data_block() -> None:
|
||||
assert result == expected
|
||||
|
||||
# Responses
|
||||
## Image / url
|
||||
# Image / url
|
||||
block = {
|
||||
"type": "image",
|
||||
"url": "https://example.com/test.png",
|
||||
@@ -559,7 +560,7 @@ def test_convert_to_openai_data_block() -> None:
|
||||
result = convert_to_openai_data_block(block, api="responses")
|
||||
assert result == expected
|
||||
|
||||
## Image / base64
|
||||
# Image / base64
|
||||
block = {
|
||||
"type": "image",
|
||||
"base64": "<base64 string>",
|
||||
@@ -572,14 +573,14 @@ def test_convert_to_openai_data_block() -> None:
|
||||
result = convert_to_openai_data_block(block, api="responses")
|
||||
assert result == expected
|
||||
|
||||
## File / url
|
||||
# File / url
|
||||
block = {
|
||||
"type": "file",
|
||||
"url": "https://example.com/test.pdf",
|
||||
}
|
||||
expected = {"type": "input_file", "file_url": "https://example.com/test.pdf"}
|
||||
|
||||
## File / base64
|
||||
# File / base64
|
||||
block = {
|
||||
"type": "file",
|
||||
"base64": "<base64 string>",
|
||||
@@ -594,7 +595,7 @@ def test_convert_to_openai_data_block() -> None:
|
||||
result = convert_to_openai_data_block(block, api="responses")
|
||||
assert result == expected
|
||||
|
||||
## File / file ID
|
||||
# File / file ID
|
||||
block = {
|
||||
"type": "file",
|
||||
"file_id": "file-abc123",
|
||||
|
||||
@@ -47,7 +47,7 @@ def test_serdes_message() -> None:
|
||||
}
|
||||
actual = dumpd(msg)
|
||||
assert actual == expected
|
||||
assert load(actual) == msg
|
||||
assert load(actual, allowed_objects=[AIMessage]) == msg
|
||||
|
||||
|
||||
def test_serdes_message_chunk() -> None:
|
||||
@@ -102,7 +102,7 @@ def test_serdes_message_chunk() -> None:
|
||||
}
|
||||
actual = dumpd(chunk)
|
||||
assert actual == expected
|
||||
assert load(actual) == chunk
|
||||
assert load(actual, allowed_objects=[AIMessageChunk]) == chunk
|
||||
|
||||
|
||||
def test_add_usage_both_none() -> None:
|
||||
|
||||
@@ -2,10 +2,10 @@ import base64
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Callable, Sequence
|
||||
from typing import Any
|
||||
from typing import Any, TypedDict
|
||||
|
||||
import pytest
|
||||
from typing_extensions import override
|
||||
from typing_extensions import NotRequired, override
|
||||
|
||||
from langchain_core.language_models.fake_chat_models import FakeChatModel
|
||||
from langchain_core.messages import (
|
||||
@@ -135,6 +135,16 @@ def test_merge_messages_tool_messages() -> None:
|
||||
assert messages == messages_model_copy
|
||||
|
||||
|
||||
class FilterFields(TypedDict):
|
||||
include_names: NotRequired[Sequence[str]]
|
||||
exclude_names: NotRequired[Sequence[str]]
|
||||
include_types: NotRequired[Sequence[str | type[BaseMessage]]]
|
||||
exclude_types: NotRequired[Sequence[str | type[BaseMessage]]]
|
||||
include_ids: NotRequired[Sequence[str]]
|
||||
exclude_ids: NotRequired[Sequence[str]]
|
||||
exclude_tool_calls: NotRequired[Sequence[str] | bool]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filters",
|
||||
[
|
||||
@@ -153,7 +163,7 @@ def test_merge_messages_tool_messages() -> None:
|
||||
{"include_names": ["blah", "blur"], "exclude_types": [SystemMessage]},
|
||||
],
|
||||
)
|
||||
def test_filter_message(filters: dict) -> None:
|
||||
def test_filter_message(filters: FilterFields) -> None:
|
||||
messages = [
|
||||
SystemMessage("foo", name="blah", id="1"),
|
||||
HumanMessage("bar", name="blur", id="2"),
|
||||
@@ -192,7 +202,7 @@ def test_filter_message_exclude_tool_calls() -> None:
|
||||
assert expected == actual
|
||||
|
||||
# test explicitly excluding all tool calls
|
||||
actual = filter_messages(messages, exclude_tool_calls={"1", "2"})
|
||||
actual = filter_messages(messages, exclude_tool_calls=["1", "2"])
|
||||
assert expected == actual
|
||||
|
||||
# test excluding a specific tool call
|
||||
@@ -234,7 +244,7 @@ def test_filter_message_exclude_tool_calls_content_blocks() -> None:
|
||||
assert expected == actual
|
||||
|
||||
# test explicitly excluding all tool calls
|
||||
actual = filter_messages(messages, exclude_tool_calls={"1", "2"})
|
||||
actual = filter_messages(messages, exclude_tool_calls=["1", "2"])
|
||||
assert expected == actual
|
||||
|
||||
# test excluding a specific tool call
|
||||
@@ -508,13 +518,14 @@ def test_trim_messages_invoke() -> None:
|
||||
|
||||
def test_trim_messages_bound_model_token_counter() -> None:
|
||||
trimmer = trim_messages(
|
||||
max_tokens=10, token_counter=FakeTokenCountingModel().bind(foo="bar")
|
||||
max_tokens=10,
|
||||
token_counter=FakeTokenCountingModel().bind(foo="bar"), # type: ignore[call-overload]
|
||||
)
|
||||
trimmer.invoke([HumanMessage("foobar")])
|
||||
|
||||
|
||||
def test_trim_messages_bad_token_counter() -> None:
|
||||
trimmer = trim_messages(max_tokens=10, token_counter={})
|
||||
trimmer = trim_messages(max_tokens=10, token_counter={}) # type: ignore[call-overload]
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=re.escape(
|
||||
@@ -608,7 +619,9 @@ def test_trim_messages_mixed_content_with_partial() -> None:
|
||||
|
||||
assert len(result) == 1
|
||||
assert len(result[0].content) == 1
|
||||
assert result[0].content[0]["text"] == "First part of text."
|
||||
content = result[0].content[0]
|
||||
assert isinstance(content, dict)
|
||||
assert content["text"] == "First part of text."
|
||||
assert messages == messages_copy
|
||||
|
||||
|
||||
@@ -660,6 +673,82 @@ def test_trim_messages_start_on_with_allow_partial() -> None:
|
||||
assert messages == messages_copy
|
||||
|
||||
|
||||
def test_trim_messages_token_counter_shortcut_approximate() -> None:
|
||||
"""Test that `'approximate'` shortcut works for `token_counter`."""
|
||||
messages = [
|
||||
SystemMessage("This is a test message"),
|
||||
HumanMessage("Another test message", id="first"),
|
||||
AIMessage("AI response here", id="second"),
|
||||
]
|
||||
messages_copy = [m.model_copy(deep=True) for m in messages]
|
||||
|
||||
# Test using the "approximate" shortcut
|
||||
result_shortcut = trim_messages(
|
||||
messages,
|
||||
max_tokens=50,
|
||||
token_counter="approximate",
|
||||
strategy="last",
|
||||
)
|
||||
|
||||
# Test using count_tokens_approximately directly
|
||||
result_direct = trim_messages(
|
||||
messages,
|
||||
max_tokens=50,
|
||||
token_counter=count_tokens_approximately,
|
||||
strategy="last",
|
||||
)
|
||||
|
||||
# Both should produce the same result
|
||||
assert result_shortcut == result_direct
|
||||
assert messages == messages_copy
|
||||
|
||||
|
||||
def test_trim_messages_token_counter_shortcut_invalid() -> None:
|
||||
"""Test that invalid `token_counter` shortcut raises `ValueError`."""
|
||||
messages = [
|
||||
SystemMessage("This is a test message"),
|
||||
HumanMessage("Another test message"),
|
||||
]
|
||||
|
||||
# Test with invalid shortcut - intentionally passing invalid string to verify
|
||||
# runtime error handling for dynamically-constructed inputs
|
||||
with pytest.raises(ValueError, match="Invalid token_counter shortcut 'invalid'"):
|
||||
trim_messages( # type: ignore[call-overload]
|
||||
messages,
|
||||
max_tokens=50,
|
||||
token_counter="invalid",
|
||||
strategy="last",
|
||||
)
|
||||
|
||||
|
||||
def test_trim_messages_token_counter_shortcut_with_options() -> None:
|
||||
"""Test that `'approximate'` shortcut works with different trim options."""
|
||||
messages = [
|
||||
SystemMessage("System instructions"),
|
||||
HumanMessage("First human message", id="first"),
|
||||
AIMessage("First AI response", id="ai1"),
|
||||
HumanMessage("Second human message", id="second"),
|
||||
AIMessage("Second AI response", id="ai2"),
|
||||
]
|
||||
messages_copy = [m.model_copy(deep=True) for m in messages]
|
||||
|
||||
# Test with various options
|
||||
result = trim_messages(
|
||||
messages,
|
||||
max_tokens=100,
|
||||
token_counter="approximate",
|
||||
strategy="last",
|
||||
include_system=True,
|
||||
start_on="human",
|
||||
)
|
||||
|
||||
# Should include system message and start on human
|
||||
assert len(result) >= 2
|
||||
assert isinstance(result[0], SystemMessage)
|
||||
assert any(isinstance(msg, HumanMessage) for msg in result[1:])
|
||||
assert messages == messages_copy
|
||||
|
||||
|
||||
class FakeTokenCountingModel(FakeChatModel):
|
||||
@override
|
||||
def get_num_tokens_from_messages(
|
||||
@@ -1184,7 +1273,47 @@ def test_convert_to_openai_messages_guard_content() -> None:
|
||||
def test_convert_to_openai_messages_invalid_block() -> None:
|
||||
messages = [HumanMessage(content=[{"type": "invalid", "foo": "bar"}])]
|
||||
with pytest.raises(ValueError, match="Unrecognized content block"):
|
||||
convert_to_openai_messages(messages, text_format="block")
|
||||
convert_to_openai_messages(
|
||||
messages,
|
||||
text_format="block",
|
||||
pass_through_unknown_blocks=False,
|
||||
)
|
||||
# Accept by default
|
||||
result = convert_to_openai_messages(messages, text_format="block")
|
||||
assert result == [{"role": "user", "content": [{"type": "invalid", "foo": "bar"}]}]
|
||||
|
||||
|
||||
def test_handle_openai_responses_blocks() -> None:
|
||||
blocks: str | list[str | dict] = [
|
||||
{"type": "reasoning", "id": "1"},
|
||||
{
|
||||
"type": "function_call",
|
||||
"name": "multiply",
|
||||
"arguments": '{"x":5,"y":4}',
|
||||
"call_id": "call_abc123",
|
||||
"id": "fc_abc123",
|
||||
"status": "completed",
|
||||
},
|
||||
]
|
||||
message = AIMessage(content=blocks)
|
||||
|
||||
expected_tool_call = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "multiply",
|
||||
"arguments": '{"x":5,"y":4}',
|
||||
},
|
||||
"id": "call_abc123",
|
||||
}
|
||||
result = convert_to_openai_messages(message)
|
||||
assert isinstance(result, dict)
|
||||
assert result["content"] == blocks
|
||||
assert result["tool_calls"] == [expected_tool_call]
|
||||
|
||||
result = convert_to_openai_messages(message, pass_through_unknown_blocks=False)
|
||||
assert isinstance(result, dict)
|
||||
assert result["content"] == [{"type": "reasoning", "id": "1"}]
|
||||
assert result["tool_calls"] == [expected_tool_call]
|
||||
|
||||
|
||||
def test_convert_to_openai_messages_empty_message() -> None:
|
||||
@@ -1523,6 +1652,72 @@ def test_get_buffer_string_with_empty_content() -> None:
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_get_buffer_string_with_tool_calls() -> None:
|
||||
"""Test `get_buffer_string` with `tool_calls` field."""
|
||||
messages = [
|
||||
HumanMessage(content="What's the weather?"),
|
||||
AIMessage(
|
||||
content="Let me check the weather",
|
||||
tool_calls=[
|
||||
{
|
||||
"name": "get_weather",
|
||||
"args": {"city": "NYC"},
|
||||
"id": "call_1",
|
||||
"type": "tool_call",
|
||||
}
|
||||
],
|
||||
),
|
||||
]
|
||||
result = get_buffer_string(messages)
|
||||
assert "Human: What's the weather?" in result
|
||||
assert "AI: Let me check the weather" in result
|
||||
assert "get_weather" in result
|
||||
assert "NYC" in result
|
||||
|
||||
|
||||
def test_get_buffer_string_with_tool_calls_empty_content() -> None:
|
||||
"""Test `get_buffer_string` with `tool_calls` and empty `content`."""
|
||||
messages = [
|
||||
AIMessage(
|
||||
content="",
|
||||
tool_calls=[
|
||||
{
|
||||
"name": "search",
|
||||
"args": {"query": "test"},
|
||||
"id": "call_2",
|
||||
"type": "tool_call",
|
||||
}
|
||||
],
|
||||
),
|
||||
]
|
||||
result = get_buffer_string(messages)
|
||||
assert "AI: " in result
|
||||
assert "search" in result
|
||||
|
||||
|
||||
def test_get_buffer_string_tool_calls_preferred_over_function_call() -> None:
|
||||
"""Test that `tool_calls` takes precedence over legacy `function_call`."""
|
||||
messages = [
|
||||
AIMessage(
|
||||
content="Calling tools",
|
||||
tool_calls=[
|
||||
{
|
||||
"name": "modern_tool",
|
||||
"args": {"key": "value"},
|
||||
"id": "call_3",
|
||||
"type": "tool_call",
|
||||
}
|
||||
],
|
||||
additional_kwargs={
|
||||
"function_call": {"name": "legacy_function", "arguments": "{}"}
|
||||
},
|
||||
),
|
||||
]
|
||||
result = get_buffer_string(messages)
|
||||
assert "modern_tool" in result
|
||||
assert "legacy_function" not in result
|
||||
|
||||
|
||||
def test_convert_to_openai_messages_reasoning_content() -> None:
|
||||
"""Test convert_to_openai_messages with reasoning content blocks."""
|
||||
# Test reasoning block with empty summary
|
||||
|
||||
@@ -16,6 +16,7 @@ from langchain_core.output_parsers.openai_tools import (
|
||||
JsonOutputKeyToolsParser,
|
||||
JsonOutputToolsParser,
|
||||
PydanticToolsParser,
|
||||
parse_tool_call,
|
||||
)
|
||||
from langchain_core.outputs import ChatGeneration
|
||||
|
||||
@@ -1345,3 +1346,76 @@ def test_pydantic_tools_parser_with_optional_fields() -> None:
|
||||
assert result_mixed[1].username == "alice"
|
||||
assert result_mixed[1].bio is None
|
||||
assert result_mixed[1].age == 35
|
||||
|
||||
|
||||
def test_parse_tool_call_with_none_arguments() -> None:
|
||||
"""Test parse_tool_call handles None arguments for parameter-less tools.
|
||||
|
||||
When an LLM calls a tool that has no parameters, some providers return
|
||||
None for the arguments field instead of an empty string or "{}".
|
||||
This should not raise an error.
|
||||
|
||||
See: https://github.com/langchain-ai/langchain/issues/34123
|
||||
"""
|
||||
# Test case from issue #34123: arguments is None
|
||||
raw_tool_call = {
|
||||
"function": {"arguments": None, "name": "orderStatus"},
|
||||
"id": "chatcmpl-tool-8b1f759d874b412e931e64cf6f57bdcc",
|
||||
"type": "function",
|
||||
}
|
||||
|
||||
# This should not raise an error - should return parsed tool call with empty args
|
||||
result = parse_tool_call(raw_tool_call, return_id=True)
|
||||
|
||||
assert result is not None
|
||||
assert result["name"] == "orderStatus"
|
||||
assert result["args"] == {}
|
||||
assert result["id"] == "chatcmpl-tool-8b1f759d874b412e931e64cf6f57bdcc"
|
||||
|
||||
|
||||
def test_parse_tool_call_with_empty_string_arguments() -> None:
|
||||
"""Test parse_tool_call handles empty string arguments."""
|
||||
raw_tool_call = {
|
||||
"function": {"arguments": "", "name": "getStatus"},
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
}
|
||||
|
||||
# Empty string should be treated as empty args
|
||||
result = parse_tool_call(raw_tool_call, return_id=True)
|
||||
|
||||
assert result is not None
|
||||
assert result["name"] == "getStatus"
|
||||
assert result["args"] == {}
|
||||
assert result["id"] == "call_123"
|
||||
|
||||
|
||||
def test_parse_tool_call_with_valid_arguments() -> None:
|
||||
"""Test parse_tool_call works normally with valid JSON arguments."""
|
||||
raw_tool_call = {
|
||||
"function": {"arguments": '{"param": "value"}', "name": "myTool"},
|
||||
"id": "call_456",
|
||||
"type": "function",
|
||||
}
|
||||
|
||||
result = parse_tool_call(raw_tool_call, return_id=True)
|
||||
|
||||
assert result is not None
|
||||
assert result["name"] == "myTool"
|
||||
assert result["args"] == {"param": "value"}
|
||||
assert result["id"] == "call_456"
|
||||
|
||||
|
||||
def test_parse_tool_call_partial_mode_with_none_arguments() -> None:
|
||||
"""Test parse_tool_call in partial mode handles None arguments."""
|
||||
raw_tool_call = {
|
||||
"function": {"arguments": None, "name": "streamingTool"},
|
||||
"id": "call_789",
|
||||
"type": "function",
|
||||
}
|
||||
|
||||
# Partial mode should return None for None arguments (existing behavior)
|
||||
result = parse_tool_call(raw_tool_call, partial=True, return_id=True)
|
||||
|
||||
# In partial mode, None arguments returns None (incomplete tool call)
|
||||
assert result is None
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user