mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-05 00:30:18 +00:00
Compare commits
326 Commits
sr/do-not-
...
langchain=
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9f61ed8b81 | ||
|
|
6cff82d02e | ||
|
|
0cd72b50fb | ||
|
|
1a3cd46d88 | ||
|
|
470160cf81 | ||
|
|
20b8342fdf | ||
|
|
2f8af61218 | ||
|
|
81758e22f3 | ||
|
|
54241f4d06 | ||
|
|
7c9223d2b2 | ||
|
|
3342e4d62d | ||
|
|
5842110dbc | ||
|
|
62db04c43a | ||
|
|
fb892ee50a | ||
|
|
8ad0e9f267 | ||
|
|
d0b13e926d | ||
|
|
6fa4a45311 | ||
|
|
97dd5f2cb8 | ||
|
|
2a82fbc0ff | ||
|
|
0e5e33ba03 | ||
|
|
fc35544e0d | ||
|
|
15cc090e52 | ||
|
|
0f940d74b2 | ||
|
|
7829b722b1 | ||
|
|
914730cf8d | ||
|
|
c3738ea376 | ||
|
|
cd124a0949 | ||
|
|
57ff48e62e | ||
|
|
bc232e6d03 | ||
|
|
be32382d92 | ||
|
|
16c984ef0a | ||
|
|
13dd115d1d | ||
|
|
75d365418b | ||
|
|
2cff369cdc | ||
|
|
f5b6eecf72 | ||
|
|
a528ea1796 | ||
|
|
bf6a5eb122 | ||
|
|
5720dea41b | ||
|
|
087107557f | ||
|
|
05ba853548 | ||
|
|
3fb90666be | ||
|
|
6a2a149f89 | ||
|
|
bbc1d46efe | ||
|
|
d6b5f05f33 | ||
|
|
10377a7373 | ||
|
|
373ad8ac2c | ||
|
|
5eec11e2db | ||
|
|
badc0cf1b6 | ||
|
|
3b7abdff96 | ||
|
|
4aebfbad59 | ||
|
|
ae1f03fbe0 | ||
|
|
46dbb3967e | ||
|
|
dd0b990ba5 | ||
|
|
5aa46501cf | ||
|
|
92df109dd5 | ||
|
|
d27fb0c432 | ||
|
|
69dd39c461 | ||
|
|
41cebfe4fb | ||
|
|
5350967ddc | ||
|
|
7542278997 | ||
|
|
ff6e3558d7 | ||
|
|
585e12e53b | ||
|
|
73ba156a7d | ||
|
|
395c8d0bd4 | ||
|
|
34d31b8394 | ||
|
|
2aa0555941 | ||
|
|
dff229d018 | ||
|
|
b009ca4d23 | ||
|
|
0254c12cb0 | ||
|
|
2faed37ff1 | ||
|
|
d886dcfba5 | ||
|
|
91d5ca275d | ||
|
|
dcb670f395 | ||
|
|
85012ae601 | ||
|
|
aa0f4fb927 | ||
|
|
d18cdc6f32 | ||
|
|
8a5f46322b | ||
|
|
a0e86b18bf | ||
|
|
6affec92ce | ||
|
|
a64aee310c | ||
|
|
ba6c2590ae | ||
|
|
bb71f53585 | ||
|
|
9875ffbabc | ||
|
|
b5efafe80c | ||
|
|
ff3353f02f | ||
|
|
3ace4e3680 | ||
|
|
80c397019f | ||
|
|
4a42158e6c | ||
|
|
7ba3e80057 | ||
|
|
50e27a447b | ||
|
|
78c10f8790 | ||
|
|
ccfc9f795a | ||
|
|
b21926fe6c | ||
|
|
f1ad0da8f5 | ||
|
|
f67af34ea0 | ||
|
|
3030ffc248 | ||
|
|
1ad9de4b45 | ||
|
|
b95cb770e8 | ||
|
|
1867521d1a | ||
|
|
8e3ca21bd3 | ||
|
|
e92c817518 | ||
|
|
28727618b3 | ||
|
|
3108b14164 | ||
|
|
1922adc092 | ||
|
|
4a242a8a4f | ||
|
|
064b37f90e | ||
|
|
062678fa18 | ||
|
|
5d3e3d3f31 | ||
|
|
5a7cf87626 | ||
|
|
c63f23d233 | ||
|
|
b7091d391d | ||
|
|
7a2952210e | ||
|
|
7549845d82 | ||
|
|
878f033ed7 | ||
|
|
4065106c2e | ||
|
|
12df938ace | ||
|
|
65ee43cc10 | ||
|
|
fe7c000fc1 | ||
|
|
dad50e5624 | ||
|
|
0a6d01e61d | ||
|
|
c6f8b0875a | ||
|
|
4c3800d743 | ||
|
|
7fe1c4b78f | ||
|
|
c375732396 | ||
|
|
9c21f83e82 | ||
|
|
880652b713 | ||
|
|
4ab94579ad | ||
|
|
eb0545a173 | ||
|
|
a2e389de9f | ||
|
|
01573c1375 | ||
|
|
2ba3ce81a6 | ||
|
|
4e4e5d7337 | ||
|
|
2a863727f9 | ||
|
|
30e2260e26 | ||
|
|
cbaea351b2 | ||
|
|
f070217c3b | ||
|
|
0915682c12 | ||
|
|
68ab9a1e56 | ||
|
|
47b79c30c0 | ||
|
|
5899f980aa | ||
|
|
b0bf4afe81 | ||
|
|
33e5d01f7c | ||
|
|
ee3373afc2 | ||
|
|
b296f103a9 | ||
|
|
525d5c0169 | ||
|
|
c4b6ba254e | ||
|
|
b7d1831f9d | ||
|
|
328ba36601 | ||
|
|
6f677ef5c1 | ||
|
|
d47d41cbd3 | ||
|
|
32bbe99efc | ||
|
|
990e346c46 | ||
|
|
9b7792631d | ||
|
|
558a8fe25b | ||
|
|
52b1516d44 | ||
|
|
8a3bb73c05 | ||
|
|
099c042395 | ||
|
|
2d4f00a451 | ||
|
|
9bd401a6d4 | ||
|
|
6aa3794b74 | ||
|
|
189dcf7295 | ||
|
|
1bc88028e6 | ||
|
|
d2942351ce | ||
|
|
83c078f363 | ||
|
|
26d39ffc4a | ||
|
|
421e2ceeee | ||
|
|
275dcbf69f | ||
|
|
9f87b27a5b | ||
|
|
b2e1196e29 | ||
|
|
2dc1396380 | ||
|
|
77941ab3ce | ||
|
|
ee19a30dde | ||
|
|
5d799b3174 | ||
|
|
8f33a985a2 | ||
|
|
78eeccef0e | ||
|
|
3d415441e8 | ||
|
|
74385e0ebd | ||
|
|
2bfbc29ccc | ||
|
|
ef79c26f18 | ||
|
|
fbe32c8e89 | ||
|
|
2511c28f92 | ||
|
|
637bb1cbbc | ||
|
|
3dfea96ec1 | ||
|
|
68643153e5 | ||
|
|
462762f75b | ||
|
|
4f3729c004 | ||
|
|
ba428cdf54 | ||
|
|
69c7d1b01b | ||
|
|
733299ec13 | ||
|
|
e1adf781c6 | ||
|
|
31b5e4810c | ||
|
|
c6801fe159 | ||
|
|
1b563067f8 | ||
|
|
1996d81d72 | ||
|
|
ab0677c6f1 | ||
|
|
bdb53c93cc | ||
|
|
94d5271cb5 | ||
|
|
e499db4266 | ||
|
|
cc3af82b47 | ||
|
|
9383b78be1 | ||
|
|
3c492571ab | ||
|
|
f2410f7ea7 | ||
|
|
91560b6a7a | ||
|
|
b1dd448233 | ||
|
|
904daf6f40 | ||
|
|
8e31a5d7bd | ||
|
|
ee630b4539 | ||
|
|
46971447df | ||
|
|
d8b94007c1 | ||
|
|
cf595dcc38 | ||
|
|
d27211cfa7 | ||
|
|
ca1a3fbe88 | ||
|
|
c955b53aed | ||
|
|
2a626d9608 | ||
|
|
0861cba04b | ||
|
|
88246f45b3 | ||
|
|
1d04514354 | ||
|
|
c2324b8f3e | ||
|
|
957ea65d12 | ||
|
|
00fa38a295 | ||
|
|
9d98c1b669 | ||
|
|
00cc9d421f | ||
|
|
65716cf590 | ||
|
|
1b77a191f4 | ||
|
|
ebfde9173c | ||
|
|
2fe0369049 | ||
|
|
e023201d42 | ||
|
|
d40e340479 | ||
|
|
9a09ed0659 | ||
|
|
5f27b546dd | ||
|
|
022fdd52c3 | ||
|
|
7946a8f64e | ||
|
|
7af79039fc | ||
|
|
1755750ca1 | ||
|
|
ddb53672e2 | ||
|
|
eeae34972f | ||
|
|
47d89b1e47 | ||
|
|
ee0bdaeb79 | ||
|
|
915c446c48 | ||
|
|
d1e2099408 | ||
|
|
6ea15b9efa | ||
|
|
69f33aaff5 | ||
|
|
3f66f102d2 | ||
|
|
c6547f58b7 | ||
|
|
dfb05a7fa0 | ||
|
|
2f67f9ddcb | ||
|
|
0e36185933 | ||
|
|
6617865440 | ||
|
|
6dba4912be | ||
|
|
7a3827471b | ||
|
|
f006bc4c7e | ||
|
|
0a442644e3 | ||
|
|
4960663546 | ||
|
|
1381137c37 | ||
|
|
b4a042dfc4 | ||
|
|
81c4f21b52 | ||
|
|
f2dab562a8 | ||
|
|
61196a8280 | ||
|
|
7a97c31ac0 | ||
|
|
424214041e | ||
|
|
b06bd6a913 | ||
|
|
1c762187e8 | ||
|
|
90aefc607f | ||
|
|
2ca73c479b | ||
|
|
17c7c273b8 | ||
|
|
493be259c3 | ||
|
|
106c6ac273 | ||
|
|
7aaaa371e7 | ||
|
|
468dad1780 | ||
|
|
32d294b89a | ||
|
|
dc5b7dace8 | ||
|
|
e00b7233cf | ||
|
|
91f7e73c27 | ||
|
|
75fff151e8 | ||
|
|
d05a0cb80d | ||
|
|
d24aa69ceb | ||
|
|
fabcacc3e5 | ||
|
|
ac58d75113 | ||
|
|
28564ef94e | ||
|
|
b62a9b57f3 | ||
|
|
76dd656f2a | ||
|
|
d218936763 | ||
|
|
123e29dc26 | ||
|
|
6a1dca113e | ||
|
|
8aea6dd23a | ||
|
|
78a2f86f70 | ||
|
|
b5e23e5823 | ||
|
|
7872643910 | ||
|
|
f15391f4fc | ||
|
|
ca9b81cc2e | ||
|
|
a2a9a02ecb | ||
|
|
e5e1d6c705 | ||
|
|
6ee19473ba | ||
|
|
a59551f3b4 | ||
|
|
3286a98b27 | ||
|
|
62769a0dac | ||
|
|
f94108b4bc | ||
|
|
60a0ff8217 | ||
|
|
b3dffc70e2 | ||
|
|
86ac39e11f | ||
|
|
6e036d38b2 | ||
|
|
2d30ebb53b | ||
|
|
b3934b9580 | ||
|
|
09102a634a | ||
|
|
95ff5901a1 | ||
|
|
f3d7152074 | ||
|
|
dff37f6048 | ||
|
|
832036ef0f | ||
|
|
f1742954ab | ||
|
|
6ab0476676 | ||
|
|
d36413c821 | ||
|
|
99097f799c | ||
|
|
0666571519 | ||
|
|
ef85161525 | ||
|
|
079eb808f8 | ||
|
|
39fb2d1a3b | ||
|
|
db7f2db1ae | ||
|
|
df46c82ae2 | ||
|
|
f8adbbc461 | ||
|
|
17f0716d6c | ||
|
|
5acd34ae92 | ||
|
|
84dbebac4f | ||
|
|
eddfcd2c88 | ||
|
|
9f470d297f | ||
|
|
2222470f69 | ||
|
|
78175fcb96 |
132
.github/CODE_OF_CONDUCT.md
vendored
132
.github/CODE_OF_CONDUCT.md
vendored
@@ -1,132 +0,0 @@
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at
|
||||
conduct@langchain.dev.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
6
.github/CONTRIBUTING.md
vendored
6
.github/CONTRIBUTING.md
vendored
@@ -1,6 +0,0 @@
|
||||
# Contributing to LangChain
|
||||
|
||||
Hi there! Thank you for even being interested in contributing to LangChain.
|
||||
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether they involve new features, improved infrastructure, better documentation, or bug fixes.
|
||||
|
||||
To learn how to contribute to LangChain, please follow the [contribution guide here](https://docs.langchain.com/oss/python/contributing).
|
||||
77
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
77
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -8,16 +8,15 @@ body:
|
||||
value: |
|
||||
Thank you for taking the time to file a bug report.
|
||||
|
||||
Use this to report BUGS in LangChain. For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
|
||||
For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
|
||||
|
||||
Relevant links to check before filing a bug report to see if your issue has already been reported, fixed or
|
||||
if there's another way to solve your problem:
|
||||
Check these before submitting to see if your issue has already been reported, fixed or if there's another way to solve your problem:
|
||||
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
* [LangChain documentation with the integrated search](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference](https://reference.langchain.com/python/),
|
||||
* [Documentation](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference Documentation](https://reference.langchain.com/python/),
|
||||
* [LangChain ChatBot](https://chat.langchain.com/)
|
||||
* [GitHub search](https://github.com/langchain-ai/langchain),
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
- type: checkboxes
|
||||
id: checks
|
||||
attributes:
|
||||
@@ -36,16 +35,48 @@ body:
|
||||
required: true
|
||||
- label: This is not related to the langchain-community package.
|
||||
required: true
|
||||
- label: I read what a minimal reproducible example is (https://stackoverflow.com/help/minimal-reproducible-example).
|
||||
required: true
|
||||
- label: I posted a self-contained, minimal, reproducible example. A maintainer can copy it and run it AS IS.
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Which `langchain` package(s) is this bug related to? Select at least one.
|
||||
|
||||
Note that if the package you are reporting for is not listed here, it is not in this repository (e.g. `langchain-google-genai` is in [`langchain-ai/langchain-google`](https://github.com/langchain-ai/langchain-google/)).
|
||||
|
||||
Please report issues for other packages to their respective repositories.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-cli
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-prompty
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Example Code
|
||||
label: Example Code (Python)
|
||||
description: |
|
||||
Please add a self-contained, [minimal, reproducible, example](https://stackoverflow.com/help/minimal-reproducible-example) with your use case.
|
||||
|
||||
@@ -53,15 +84,12 @@ body:
|
||||
|
||||
**Important!**
|
||||
|
||||
* Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
|
||||
* Reduce your code to the minimum required to reproduce the issue if possible. This makes it much easier for others to help you.
|
||||
* Use code tags (e.g., ```python ... ```) to correctly [format your code](https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting).
|
||||
* INCLUDE the language label (e.g. `python`) after the first three backticks to enable syntax highlighting. (e.g., ```python rather than ```).
|
||||
* Avoid screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
|
||||
* Reduce your code to the minimum required to reproduce the issue if possible.
|
||||
|
||||
(This will be automatically formatted into code, so no need for backticks.)
|
||||
render: python
|
||||
placeholder: |
|
||||
The following code:
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
def bad_code(inputs) -> int:
|
||||
@@ -69,17 +97,14 @@ body:
|
||||
|
||||
chain = RunnableLambda(bad_code)
|
||||
chain.invoke('Hello!')
|
||||
```
|
||||
- type: textarea
|
||||
id: error
|
||||
validations:
|
||||
required: false
|
||||
attributes:
|
||||
label: Error Message and Stack Trace (if applicable)
|
||||
description: |
|
||||
If you are reporting an error, please include the full error message and stack trace.
|
||||
placeholder: |
|
||||
Exception + full stack trace
|
||||
If you are reporting an error, please copy and paste the full error message and
|
||||
stack trace.
|
||||
(This will be automatically formatted into code, so no need for backticks.)
|
||||
render: shell
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
@@ -99,9 +124,7 @@ body:
|
||||
attributes:
|
||||
label: System Info
|
||||
description: |
|
||||
Please share your system info with us. Do NOT skip this step and please don't trim
|
||||
the output. Most users don't include enough information here and it makes it harder
|
||||
for us to help you.
|
||||
Please share your system info with us.
|
||||
|
||||
Run the following command in your terminal and paste the output here:
|
||||
|
||||
@@ -113,8 +136,6 @@ body:
|
||||
from langchain_core import sys_info
|
||||
sys_info.print_sys_info()
|
||||
```
|
||||
|
||||
alternatively, put the entire output of `pip freeze` here.
|
||||
placeholder: |
|
||||
python -m langchain_core.sys_info
|
||||
validations:
|
||||
|
||||
10
.github/ISSUE_TEMPLATE/config.yml
vendored
10
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,9 +1,15 @@
|
||||
blank_issues_enabled: false
|
||||
version: 2.1
|
||||
contact_links:
|
||||
- name: 📚 Documentation
|
||||
url: https://github.com/langchain-ai/docs/issues/new?template=langchain.yml
|
||||
- name: 📚 Documentation issue
|
||||
url: https://github.com/langchain-ai/docs/issues/new?template=01-langchain.yml
|
||||
about: Report an issue related to the LangChain documentation
|
||||
- name: 💬 LangChain Forum
|
||||
url: https://forum.langchain.com/
|
||||
about: General community discussions and support
|
||||
- name: 📚 LangChain Documentation
|
||||
url: https://docs.langchain.com/oss/python/langchain/overview
|
||||
about: View the official LangChain documentation
|
||||
- name: 📚 API Reference Documentation
|
||||
url: https://reference.langchain.com/python/
|
||||
about: View the official LangChain API reference documentation
|
||||
|
||||
40
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
40
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
@@ -13,11 +13,11 @@ body:
|
||||
Relevant links to check before filing a feature request to see if your request has already been made or
|
||||
if there's another way to achieve what you want:
|
||||
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
* [LangChain documentation with the integrated search](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference](https://reference.langchain.com/python/),
|
||||
* [Documentation](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference Documentation](https://reference.langchain.com/python/),
|
||||
* [LangChain ChatBot](https://chat.langchain.com/)
|
||||
* [GitHub search](https://github.com/langchain-ai/langchain),
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
- type: checkboxes
|
||||
id: checks
|
||||
attributes:
|
||||
@@ -34,6 +34,40 @@ body:
|
||||
required: true
|
||||
- label: This is not related to the langchain-community package.
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Which `langchain` package(s) is this request related to? Select at least one.
|
||||
|
||||
Note that if the package you are requesting for is not listed here, it is not in this repository (e.g. `langchain-google-genai` is in `langchain-ai/langchain`).
|
||||
|
||||
Please submit feature requests for other packages to their respective repositories.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-cli
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-prompty
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
- type: textarea
|
||||
id: feature-description
|
||||
validations:
|
||||
|
||||
30
.github/ISSUE_TEMPLATE/privileged.yml
vendored
30
.github/ISSUE_TEMPLATE/privileged.yml
vendored
@@ -18,3 +18,33 @@ body:
|
||||
attributes:
|
||||
label: Issue Content
|
||||
description: Add the content of the issue here.
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Please select package(s) that this issue is related to.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-cli
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-prompty
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
|
||||
48
.github/ISSUE_TEMPLATE/task.yml
vendored
48
.github/ISSUE_TEMPLATE/task.yml
vendored
@@ -25,13 +25,13 @@ body:
|
||||
label: Task Description
|
||||
description: |
|
||||
Provide a clear and detailed description of the task.
|
||||
|
||||
|
||||
What needs to be done? Be specific about the scope and requirements.
|
||||
placeholder: |
|
||||
This task involves...
|
||||
|
||||
|
||||
The goal is to...
|
||||
|
||||
|
||||
Specific requirements:
|
||||
- ...
|
||||
- ...
|
||||
@@ -43,7 +43,7 @@ body:
|
||||
label: Acceptance Criteria
|
||||
description: |
|
||||
Define the criteria that must be met for this task to be considered complete.
|
||||
|
||||
|
||||
What are the specific deliverables or outcomes expected?
|
||||
placeholder: |
|
||||
This task will be complete when:
|
||||
@@ -58,15 +58,15 @@ body:
|
||||
label: Context and Background
|
||||
description: |
|
||||
Provide any relevant context, background information, or links to related issues/PRs.
|
||||
|
||||
|
||||
Why is this task needed? What problem does it solve?
|
||||
placeholder: |
|
||||
Background:
|
||||
- ...
|
||||
|
||||
|
||||
Related issues/PRs:
|
||||
- #...
|
||||
|
||||
|
||||
Additional context:
|
||||
- ...
|
||||
validations:
|
||||
@@ -77,15 +77,45 @@ body:
|
||||
label: Dependencies
|
||||
description: |
|
||||
List any dependencies or blockers for this task.
|
||||
|
||||
|
||||
Are there other tasks, issues, or external factors that need to be completed first?
|
||||
placeholder: |
|
||||
This task depends on:
|
||||
- [ ] Issue #...
|
||||
- [ ] PR #...
|
||||
- [ ] External dependency: ...
|
||||
|
||||
|
||||
Blocked by:
|
||||
- ...
|
||||
validations:
|
||||
required: false
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Please select package(s) that this task is related to.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-cli
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-prompty
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
|
||||
38
.github/PULL_REQUEST_TEMPLATE.md
vendored
38
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,28 +1,30 @@
|
||||
(Replace this entire block of text)
|
||||
|
||||
Thank you for contributing to LangChain! Follow these steps to mark your pull request as ready for review. **If any of these steps are not completed, your PR will not be considered for review.**
|
||||
Read the full contributing guidelines: https://docs.langchain.com/oss/python/contributing/overview
|
||||
|
||||
Thank you for contributing to LangChain! Follow these steps to have your pull request considered as ready for review.
|
||||
|
||||
1. PR title: Should follow the format: TYPE(SCOPE): DESCRIPTION
|
||||
|
||||
- [ ] **PR title**: Follows the format: {TYPE}({SCOPE}): {DESCRIPTION}
|
||||
- Examples:
|
||||
- fix(anthropic): resolve flag parsing error
|
||||
- feat(core): add multi-tenant support
|
||||
- fix(cli): resolve flag parsing error
|
||||
- docs(openai): update API usage examples
|
||||
- Allowed `{TYPE}` values:
|
||||
- feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert, release
|
||||
- Allowed `{SCOPE}` values (optional):
|
||||
- core, cli, langchain, standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq, huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant, xai, infra
|
||||
- Once you've written the title, please delete this checklist item; do not include it in the PR.
|
||||
- test(openai): update API usage tests
|
||||
- Allowed TYPE and SCOPE values: https://github.com/langchain-ai/langchain/blob/master/.github/workflows/pr_lint.yml#L15-L33
|
||||
|
||||
- [ ] **PR message**: ***Delete this entire checklist*** and replace with
|
||||
- **Description:** a description of the change. Include a [closing keyword](https://docs.github.com/en/issues/tracking-your-work-with-issues/using-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) if applicable to a relevant issue.
|
||||
- **Issue:** the issue # it fixes, if applicable (e.g. Fixes #123)
|
||||
- **Dependencies:** any dependencies required for this change
|
||||
2. PR description:
|
||||
|
||||
- [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. **We will not consider a PR unless these three are passing in CI.** See [contribution guidelines](https://docs.langchain.com/oss/python/contributing) for more.
|
||||
- Write 1-2 sentences summarizing the change.
|
||||
- If this PR addresses a specific issue, please include "Fixes #ISSUE_NUMBER" in the description to automatically close the issue when the PR is merged.
|
||||
- If there are any breaking changes, please clearly describe them.
|
||||
- If this PR depends on another PR being merged first, please include "Depends on #PR_NUMBER" inthe description.
|
||||
|
||||
3. Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified.
|
||||
|
||||
- We will not consider a PR unless these three are passing in CI.
|
||||
|
||||
Additional guidelines:
|
||||
|
||||
- Most PRs should not touch more than one package.
|
||||
- Please do not add dependencies to `pyproject.toml` files (even optional ones) unless they are **required** for unit tests. Likewise, please do not update the `uv.lock` files unless you are adding a required dependency.
|
||||
- Changes should be backwards compatible.
|
||||
- Make sure optional dependencies are imported within a function.
|
||||
- We ask that if you use generative AI for your contribution, you include a disclaimer.
|
||||
- PRs should not touch more than one package unless absolutely necessary.
|
||||
- Do not update the `uv.lock` files unless or add dependencies to `pyproject.toml` files (even optional ones) unless you have explicit permission to do so by a maintainer.
|
||||
|
||||
93
.github/actions/poetry_setup/action.yml
vendored
93
.github/actions/poetry_setup/action.yml
vendored
@@ -1,93 +0,0 @@
|
||||
# An action for setting up poetry install with caching.
|
||||
# Using a custom action since the default action does not
|
||||
# take poetry install groups into account.
|
||||
# Action code from:
|
||||
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
|
||||
name: poetry-install-with-caching
|
||||
description: Poetry install with support for caching of dependency groups.
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: Python version, supporting MAJOR.MINOR only
|
||||
required: true
|
||||
|
||||
poetry-version:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory whose poetry.lock file should be cached
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: actions/setup-python@v5
|
||||
name: Setup python ${{ inputs.python-version }}
|
||||
id: setup-python
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v4
|
||||
id: cache-bin-poetry
|
||||
name: Cache Poetry binary - Python ${{ inputs.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
|
||||
with:
|
||||
path: |
|
||||
/opt/pipx/venvs/poetry
|
||||
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
|
||||
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
|
||||
|
||||
- name: Refresh shell hashtable and fixup softlinks
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
run: |
|
||||
set -eux
|
||||
|
||||
# Refresh the shell hashtable, to ensure correct `which` output.
|
||||
hash -r
|
||||
|
||||
# `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
|
||||
# Delete and recreate the softlinks pipx expects to have.
|
||||
rm /opt/pipx/venvs/poetry/bin/python
|
||||
cd /opt/pipx/venvs/poetry/bin
|
||||
ln -s "$(which "python$PYTHON_VERSION")" python
|
||||
chmod +x python
|
||||
cd /opt/pipx_bin/
|
||||
ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
|
||||
chmod +x poetry
|
||||
|
||||
# Ensure everything got set up correctly.
|
||||
/opt/pipx/venvs/poetry/bin/python --version
|
||||
/opt/pipx_bin/poetry --version
|
||||
|
||||
- name: Install poetry
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
# Install poetry using the python version installed by setup-python step.
|
||||
run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose
|
||||
|
||||
- name: Restore pip and poetry cached dependencies
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
${{ env.WORKDIR }}/.venv
|
||||
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}
|
||||
330
.github/copilot-instructions.md
vendored
330
.github/copilot-instructions.md
vendored
@@ -1,330 +0,0 @@
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
|
||||
## Core Development Principles
|
||||
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```
|
||||
|
||||
✅ **Good - Stable Interface:**
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring admonitions (using MkDocs Material, like `!!! warning`)
|
||||
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### 2. Code Quality Standards
|
||||
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
**Style Requirements:**
|
||||
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
### 3. Testing Requirements
|
||||
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
**Test Organization:**
|
||||
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
**Test Quality Checklist:**
|
||||
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
Checklist questions:
|
||||
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
with open(path) as f:
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
def load_config(path: str) -> dict:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
### 5. Documentation Standards
|
||||
|
||||
**Use Google-style docstrings with Args and Returns sections for all public functions.**
|
||||
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level.
|
||||
|
||||
Returns:
|
||||
True if email was sent successfully, False otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications.
|
||||
|
||||
Args:
|
||||
data: List of data items to process.
|
||||
|
||||
Returns:
|
||||
ProcessingResult with details of the operation.
|
||||
"""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
|
||||
# Format code
|
||||
make format
|
||||
|
||||
# Type checking
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
|
||||
**Local Development Dependencies:**
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `!fix(cli): resolve flag parsing error` (breaking change uses exclamation mark)
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
|
||||
- Follow the existing patterns in `langchain_core` for base abstractions
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components
|
||||
|
||||
### Partner Integrations
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference Checklist
|
||||
|
||||
Before submitting code changes:
|
||||
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
96
.github/pr-file-labeler.yml
vendored
96
.github/pr-file-labeler.yml
vendored
@@ -7,13 +7,12 @@ core:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/core/**/*"
|
||||
|
||||
langchain:
|
||||
langchain-classic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/langchain/**/*"
|
||||
- "libs/langchain_v1/**/*"
|
||||
|
||||
v1:
|
||||
langchain:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/langchain_v1/**/*"
|
||||
@@ -28,6 +27,11 @@ standard-tests:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/standard-tests/**/*"
|
||||
|
||||
model-profiles:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/model-profiles/**/*"
|
||||
|
||||
text-splitters:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
@@ -39,6 +43,81 @@ integration:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/**/*"
|
||||
|
||||
anthropic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/anthropic/**/*"
|
||||
|
||||
chroma:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/chroma/**/*"
|
||||
|
||||
deepseek:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/deepseek/**/*"
|
||||
|
||||
exa:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/exa/**/*"
|
||||
|
||||
fireworks:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/fireworks/**/*"
|
||||
|
||||
groq:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/groq/**/*"
|
||||
|
||||
huggingface:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/huggingface/**/*"
|
||||
|
||||
mistralai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/mistralai/**/*"
|
||||
|
||||
nomic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/nomic/**/*"
|
||||
|
||||
ollama:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/ollama/**/*"
|
||||
|
||||
openai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/openai/**/*"
|
||||
|
||||
perplexity:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/perplexity/**/*"
|
||||
|
||||
prompty:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/prompty/**/*"
|
||||
|
||||
qdrant:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/qdrant/**/*"
|
||||
|
||||
xai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/xai/**/*"
|
||||
|
||||
# Infrastructure and DevOps
|
||||
infra:
|
||||
- changed-files:
|
||||
@@ -69,16 +148,5 @@ documentation:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "**/*.md"
|
||||
- "**/*.rst"
|
||||
- "**/README*"
|
||||
|
||||
# Security related changes
|
||||
security:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "**/*security*"
|
||||
- "**/*auth*"
|
||||
- "**/*credential*"
|
||||
- "**/*secret*"
|
||||
- "**/*token*"
|
||||
- ".github/workflows/security*"
|
||||
|
||||
41
.github/pr-title-labeler.yml
vendored
41
.github/pr-title-labeler.yml
vendored
@@ -1,41 +0,0 @@
|
||||
# PR title labeler config
|
||||
#
|
||||
# Labels PRs based on conventional commit patterns in titles
|
||||
#
|
||||
# Format: type(scope): description or type!: description (breaking)
|
||||
|
||||
add-missing-labels: true
|
||||
clear-prexisting: false
|
||||
include-commits: false
|
||||
include-title: true
|
||||
label-for-breaking-changes: breaking
|
||||
|
||||
label-mapping:
|
||||
documentation: ["docs"]
|
||||
feature: ["feat"]
|
||||
fix: ["fix"]
|
||||
infra: ["build", "ci", "chore"]
|
||||
integration:
|
||||
[
|
||||
"anthropic",
|
||||
"chroma",
|
||||
"deepseek",
|
||||
"exa",
|
||||
"fireworks",
|
||||
"groq",
|
||||
"huggingface",
|
||||
"mistralai",
|
||||
"nomic",
|
||||
"ollama",
|
||||
"openai",
|
||||
"perplexity",
|
||||
"prompty",
|
||||
"qdrant",
|
||||
"xai",
|
||||
]
|
||||
linting: ["style"]
|
||||
performance: ["perf"]
|
||||
refactor: ["refactor"]
|
||||
release: ["release"]
|
||||
revert: ["revert"]
|
||||
tests: ["test"]
|
||||
3
.github/scripts/check_diff.py
vendored
3
.github/scripts/check_diff.py
vendored
@@ -30,6 +30,7 @@ LANGCHAIN_DIRS = [
|
||||
"libs/text-splitters",
|
||||
"libs/langchain",
|
||||
"libs/langchain_v1",
|
||||
"libs/model-profiles",
|
||||
]
|
||||
|
||||
# When set to True, we are ignoring core dependents
|
||||
@@ -134,7 +135,7 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
|
||||
elif dir_ == "libs/core":
|
||||
py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
# custom logic for specific directories
|
||||
elif dir_ in {"libs/partners/chroma", "libs/partners/nomic"}:
|
||||
elif dir_ in {"libs/partners/chroma"}:
|
||||
py_versions = ["3.10", "3.13"]
|
||||
else:
|
||||
py_versions = ["3.10", "3.14"]
|
||||
|
||||
2
.github/scripts/get_min_versions.py
vendored
2
.github/scripts/get_min_versions.py
vendored
@@ -98,7 +98,7 @@ def _check_python_version_from_requirement(
|
||||
return True
|
||||
else:
|
||||
marker_str = str(requirement.marker)
|
||||
if "python_version" or "python_full_version" in marker_str:
|
||||
if "python_version" in marker_str or "python_full_version" in marker_str:
|
||||
python_version_str = "".join(
|
||||
char
|
||||
for char in marker_str
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
2
.github/workflows/_lint.yml
vendored
2
.github/workflows/_lint.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
42
.github/workflows/_release.yml
vendored
42
.github/workflows/_release.yml
vendored
@@ -19,7 +19,7 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
default: "libs/langchain"
|
||||
default: "libs/langchain_v1"
|
||||
release-version:
|
||||
required: true
|
||||
type: string
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
version: ${{ steps.check-version.outputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Upload build
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
outputs:
|
||||
release-body: ${{ steps.generate-release-body.outputs.release-body }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain
|
||||
path: langchain
|
||||
@@ -149,8 +149,8 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
# if PREV_TAG is empty, let it be empty
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
# if PREV_TAG is empty or came out to 0.0.0, let it be empty
|
||||
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
|
||||
echo "No previous tag found - first release"
|
||||
else
|
||||
# confirm prev-tag actually exists in git repo with git tag
|
||||
@@ -179,8 +179,8 @@ jobs:
|
||||
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
|
||||
run: |
|
||||
PREAMBLE="Changes since $PREV_TAG"
|
||||
# if PREV_TAG is empty, then we are releasing the first version
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
# if PREV_TAG is empty or 0.0.0, then we are releasing the first version
|
||||
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
|
||||
PREAMBLE="Initial release"
|
||||
PREV_TAG=$(git rev-list --max-parents=0 HEAD)
|
||||
fi
|
||||
@@ -206,9 +206,9 @@ jobs:
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -237,7 +237,7 @@ jobs:
|
||||
contents: read
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# We explicitly *don't* set up caching here. This ensures our tests are
|
||||
# maximally sensitive to catching breakage.
|
||||
@@ -258,7 +258,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -377,6 +377,7 @@ jobs:
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
|
||||
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
@@ -395,7 +396,7 @@ jobs:
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
partner: [openai, anthropic]
|
||||
partner: [anthropic]
|
||||
fail-fast: false # Continue testing other partners if one fails
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
@@ -409,8 +410,9 @@ jobs:
|
||||
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# We implement this conditional as Github Actions does not have good support
|
||||
# for conditionally needing steps. https://github.com/actions/runner/issues/491
|
||||
@@ -428,7 +430,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
if: startsWith(inputs.working-directory, 'libs/core')
|
||||
with:
|
||||
name: dist
|
||||
@@ -442,7 +444,7 @@ jobs:
|
||||
git ls-remote --tags origin "langchain-${{ matrix.partner }}*" \
|
||||
| awk '{print $2}' \
|
||||
| sed 's|refs/tags/||' \
|
||||
| grep -E '[0-9]+\.[0-9]+\.[0-9]+([a-zA-Z]+[0-9]+)?$' \
|
||||
| grep -E '[0-9]+\.[0-9]+\.[0-9]+$' \
|
||||
| sort -Vr \
|
||||
| head -n 1
|
||||
)"
|
||||
@@ -490,14 +492,14 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -530,14 +532,14 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
2
.github/workflows/_test.yml
vendored
2
.github/workflows/_test.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
2
.github/workflows/_test_pydantic.yml
vendored
2
.github/workflows/_test_pydantic.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
name: "Pydantic ~=${{ inputs.pydantic-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
107
.github/workflows/auto-label-by-package.yml
vendored
Normal file
107
.github/workflows/auto-label-by-package.yml
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
name: Auto Label Issues by Package
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, edited]
|
||||
|
||||
jobs:
|
||||
label-by-package:
|
||||
permissions:
|
||||
issues: write
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Sync package labels
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const body = context.payload.issue.body || "";
|
||||
|
||||
// Extract text under "### Package"
|
||||
const match = body.match(/### Package\s+([\s\S]*?)\n###/i);
|
||||
if (!match) return;
|
||||
|
||||
const packageSection = match[1].trim();
|
||||
|
||||
// Mapping table for package names to labels
|
||||
const mapping = {
|
||||
"langchain": "langchain",
|
||||
"langchain-openai": "openai",
|
||||
"langchain-anthropic": "anthropic",
|
||||
"langchain-classic": "langchain-classic",
|
||||
"langchain-core": "core",
|
||||
"langchain-cli": "cli",
|
||||
"langchain-model-profiles": "model-profiles",
|
||||
"langchain-tests": "standard-tests",
|
||||
"langchain-text-splitters": "text-splitters",
|
||||
"langchain-chroma": "chroma",
|
||||
"langchain-deepseek": "deepseek",
|
||||
"langchain-exa": "exa",
|
||||
"langchain-fireworks": "fireworks",
|
||||
"langchain-groq": "groq",
|
||||
"langchain-huggingface": "huggingface",
|
||||
"langchain-mistralai": "mistralai",
|
||||
"langchain-nomic": "nomic",
|
||||
"langchain-ollama": "ollama",
|
||||
"langchain-perplexity": "perplexity",
|
||||
"langchain-prompty": "prompty",
|
||||
"langchain-qdrant": "qdrant",
|
||||
"langchain-xai": "xai",
|
||||
};
|
||||
|
||||
// All possible package labels we manage
|
||||
const allPackageLabels = Object.values(mapping);
|
||||
const selectedLabels = [];
|
||||
|
||||
// Check if this is checkbox format (multiple selection)
|
||||
const checkboxMatches = packageSection.match(/- \[x\]\s+([^\n\r]+)/gi);
|
||||
if (checkboxMatches) {
|
||||
// Handle checkbox format
|
||||
for (const match of checkboxMatches) {
|
||||
const packageName = match.replace(/- \[x\]\s+/i, '').trim();
|
||||
const label = mapping[packageName];
|
||||
if (label && !selectedLabels.includes(label)) {
|
||||
selectedLabels.push(label);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Handle dropdown format (single selection)
|
||||
const label = mapping[packageSection];
|
||||
if (label) {
|
||||
selectedLabels.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
// Get current issue labels
|
||||
const issue = await github.rest.issues.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number
|
||||
});
|
||||
|
||||
const currentLabels = issue.data.labels.map(label => label.name);
|
||||
const currentPackageLabels = currentLabels.filter(label => allPackageLabels.includes(label));
|
||||
|
||||
// Determine labels to add and remove
|
||||
const labelsToAdd = selectedLabels.filter(label => !currentPackageLabels.includes(label));
|
||||
const labelsToRemove = currentPackageLabels.filter(label => !selectedLabels.includes(label));
|
||||
|
||||
// Add new labels
|
||||
if (labelsToAdd.length > 0) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
labels: labelsToAdd
|
||||
});
|
||||
}
|
||||
|
||||
// Remove old labels
|
||||
for (const label of labelsToRemove) {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
name: label
|
||||
});
|
||||
}
|
||||
2
.github/workflows/check_core_versions.yml
vendored
2
.github/workflows/check_core_versions.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "✅ Verify pyproject.toml & version.py Match"
|
||||
run: |
|
||||
|
||||
6
.github/workflows/check_diffs.yml
vendored
6
.github/workflows/check_diffs.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ci-ignore') }}
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
run:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "📦 Install UV Package Manager"
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
7
.github/workflows/integration_tests.yml
vendored
7
.github/workflows/integration_tests.yml
vendored
@@ -71,14 +71,14 @@ jobs:
|
||||
working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
path: langchain
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-google
|
||||
path: langchain-google
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-aws
|
||||
path: langchain-aws
|
||||
@@ -155,6 +155,7 @@ jobs:
|
||||
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
|
||||
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
run: |
|
||||
cd langchain/${{ matrix.working-directory }}
|
||||
make integration_tests
|
||||
|
||||
15
.github/workflows/pr_lint.yml
vendored
15
.github/workflows/pr_lint.yml
vendored
@@ -26,11 +26,13 @@
|
||||
# * revert — reverts a previous commit
|
||||
# * release — prepare a new release
|
||||
#
|
||||
# Allowed Scopes (optional):
|
||||
# core, cli, langchain, langchain_v1, langchain-classic, standard-tests,
|
||||
# text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq,
|
||||
# huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant,
|
||||
# xai, infra
|
||||
# Allowed Scope(s) (optional):
|
||||
# core, cli, langchain, langchain_v1, langchain-classic, model-profiles,
|
||||
# standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa,
|
||||
# fireworks, groq, huggingface, mistralai, nomic, ollama, openai,
|
||||
# perplexity, prompty, qdrant, xai, infra, deps
|
||||
#
|
||||
# Multiple scopes can be used by separating them with a comma.
|
||||
#
|
||||
# Rules:
|
||||
# 1. The 'Type' must start with a lowercase letter.
|
||||
@@ -79,8 +81,8 @@ jobs:
|
||||
core
|
||||
cli
|
||||
langchain
|
||||
langchain_v1
|
||||
langchain-classic
|
||||
model-profiles
|
||||
standard-tests
|
||||
text-splitters
|
||||
docs
|
||||
@@ -100,6 +102,7 @@ jobs:
|
||||
qdrant
|
||||
xai
|
||||
infra
|
||||
deps
|
||||
requireScope: false
|
||||
disallowScopes: |
|
||||
release
|
||||
|
||||
4
.github/workflows/v03_api_doc_build.yml
vendored
4
.github/workflows/v03_api_doc_build.yml
vendored
@@ -23,12 +23,12 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
ref: v0.3
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-api-docs-html
|
||||
path: langchain-api-docs-html
|
||||
|
||||
8
.github/workflows/v1_changes.md
vendored
8
.github/workflows/v1_changes.md
vendored
@@ -1,8 +0,0 @@
|
||||
With the deprecation of v0 docs, the following files will need to be migrated/supported
|
||||
in the new docs repo:
|
||||
|
||||
- run_notebooks.yml: New repo should run Integration tests on code snippets?
|
||||
- people.yml: Need to fix and somehow display on the new docs site
|
||||
- Subsequently, `.github/actions/people/`
|
||||
- _test_doc_imports.yml
|
||||
- check-broken-links.yml
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,6 +1,8 @@
|
||||
.vs/
|
||||
.claude/
|
||||
.idea/
|
||||
#Emacs backup
|
||||
*~
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
@@ -161,3 +163,6 @@ node_modules
|
||||
|
||||
prof
|
||||
virtualenv/
|
||||
scratch/
|
||||
|
||||
.langgraph_api/
|
||||
|
||||
8
.mcp.json
Normal file
8
.mcp.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"docs-langchain": {
|
||||
"type": "http",
|
||||
"url": "https://docs.langchain.com/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
405
AGENTS.md
405
AGENTS.md
@@ -1,255 +1,58 @@
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
# Global development guidelines for the LangChain monorepo
|
||||
|
||||
## Core Development Principles
|
||||
This document provides context to understand the LangChain Python project and assist with development.
|
||||
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
## Project architecture and context
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
### Monorepo structure
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
This is a Python monorepo with multiple independently versioned packages that use `uv`.
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```txt
|
||||
langchain/
|
||||
├── libs/
|
||||
│ ├── core/ # `langchain-core` primitives and base abstractions
|
||||
│ ├── langchain/ # `langchain-classic` (legacy, no new features)
|
||||
│ ├── langchain_v1/ # Actively maintained `langchain` package
|
||||
│ ├── partners/ # Third-party integrations
|
||||
│ │ ├── openai/ # OpenAI models and embeddings
|
||||
│ │ ├── anthropic/ # Anthropic (Claude) integration
|
||||
│ │ ├── ollama/ # Local model support
|
||||
│ │ └── ... (other integrations maintained by the LangChain team)
|
||||
│ ├── text-splitters/ # Document chunking utilities
|
||||
│ ├── standard-tests/ # Shared test suite for integrations
|
||||
│ ├── model-profiles/ # Model configuration profiles
|
||||
│ └── cli/ # Command-line interface tools
|
||||
├── .github/ # CI/CD workflows and templates
|
||||
├── .vscode/ # VSCode IDE standard settings and recommended extensions
|
||||
└── README.md # Information about LangChain
|
||||
```
|
||||
|
||||
✅ **Good - Stable Interface:**
|
||||
- **Core layer** (`langchain-core`): Base abstractions, interfaces, and protocols. Users should not need to know about this layer directly.
|
||||
- **Implementation layer** (`langchain`): Concrete implementations and high-level public utilities
|
||||
- **Integration layer** (`partners/`): Third-party service integrations. Note that this monorepo is not exhaustive of all LangChain integrations; some are maintained in separate repos, such as `langchain-ai/langchain-google` and `langchain-ai/langchain-aws`. Usually these repos are cloned at the same level as this monorepo, so if needed, you can refer to their code directly by navigating to `../langchain-google/` from this monorepo.
|
||||
- **Testing layer** (`standard-tests/`): Standardized integration tests for partner integrations
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
### Development tools & commands**
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
- `uv` – Fast Python package installer and resolver (replaces pip/poetry)
|
||||
- `make` – Task runner for common development commands. Feel free to look at the `Makefile` for available commands and usage patterns.
|
||||
- `ruff` – Fast Python linter and formatter
|
||||
- `mypy` – Static type checking
|
||||
- `pytest` – Testing framework
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
This monorepo uses `uv` for dependency management. Local development uses editable installs: `[tool.uv.sources]`
|
||||
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### 2. Code Quality Standards
|
||||
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
**Style Requirements:**
|
||||
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
### 3. Testing Requirements
|
||||
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
**Test Organization:**
|
||||
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
**Test Quality Checklist:**
|
||||
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
Checklist questions:
|
||||
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
with open(path) as f:
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
def load_config(path: str) -> dict:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
### 5. Documentation Standards
|
||||
|
||||
**Use Google-style docstrings with Args section for all public functions.**
|
||||
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level (`'low'`, `'normal'`, `'high'`).
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
`InvalidEmailError`: If the email address format is invalid.
|
||||
`SMTPConnectionError`: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications."""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
@@ -261,66 +64,118 @@ make format
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
#### Key config files
|
||||
|
||||
**Local Development Dependencies:**
|
||||
- pyproject.toml: Main workspace configuration with dependency groups
|
||||
- uv.lock: Locked dependencies for reproducible builds
|
||||
- Makefile: Development tasks
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
#### Commit standards
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes.
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
#### Pull request guidelines
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
|
||||
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
|
||||
- Highlight areas of the proposed changes that require careful review.
|
||||
|
||||
## Core development principles
|
||||
|
||||
### Maintain stable public interfaces
|
||||
|
||||
CRITICAL: Always attempt to preserve function signatures, argument positions, and names for exported/public methods. Do not make breaking changes.
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
|
||||
Ask: "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### Code quality standards
|
||||
|
||||
All Python code MUST include type hints and return types.
|
||||
|
||||
```python title="Example"
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Single line description of the function.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
- Use descriptive, self-explanatory variable names.
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
### Testing requirements
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `fix(cli): resolve flag parsing error`
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
Every new feature or bugfix MUST be covered by unit tests.
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- We use `pytest` as the testing framework; if in doubt, check other existing tests for examples.
|
||||
- The testing file structure should mirror the source code structure.
|
||||
|
||||
- Follow the existing patterns in `langchain-core` for base abstractions
|
||||
- Use `langchain_core.callbacks` for execution tracking
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components like legacy `LLMChain`
|
||||
**Checklist:**
|
||||
|
||||
### Partner Integrations
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
### Security and risk assessment
|
||||
|
||||
---
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
## Quick Reference Checklist
|
||||
### Documentation standards
|
||||
|
||||
Before submitting code changes:
|
||||
Use Google-style docstrings with Args section for all public functions.
|
||||
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
```python title="Example"
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""Send an email to a recipient with specified priority.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level.
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
## Additional resources
|
||||
|
||||
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
|
||||
- **Contributing Guide:** [`.github/CONTRIBUTING.md`](https://docs.langchain.com/oss/python/contributing/overview)
|
||||
|
||||
405
CLAUDE.md
405
CLAUDE.md
@@ -1,255 +1,58 @@
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
# Global development guidelines for the LangChain monorepo
|
||||
|
||||
## Core Development Principles
|
||||
This document provides context to understand the LangChain Python project and assist with development.
|
||||
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
## Project architecture and context
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
### Monorepo structure
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
This is a Python monorepo with multiple independently versioned packages that use `uv`.
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```txt
|
||||
langchain/
|
||||
├── libs/
|
||||
│ ├── core/ # `langchain-core` primitives and base abstractions
|
||||
│ ├── langchain/ # `langchain-classic` (legacy, no new features)
|
||||
│ ├── langchain_v1/ # Actively maintained `langchain` package
|
||||
│ ├── partners/ # Third-party integrations
|
||||
│ │ ├── openai/ # OpenAI models and embeddings
|
||||
│ │ ├── anthropic/ # Anthropic (Claude) integration
|
||||
│ │ ├── ollama/ # Local model support
|
||||
│ │ └── ... (other integrations maintained by the LangChain team)
|
||||
│ ├── text-splitters/ # Document chunking utilities
|
||||
│ ├── standard-tests/ # Shared test suite for integrations
|
||||
│ ├── model-profiles/ # Model configuration profiles
|
||||
│ └── cli/ # Command-line interface tools
|
||||
├── .github/ # CI/CD workflows and templates
|
||||
├── .vscode/ # VSCode IDE standard settings and recommended extensions
|
||||
└── README.md # Information about LangChain
|
||||
```
|
||||
|
||||
✅ **Good - Stable Interface:**
|
||||
- **Core layer** (`langchain-core`): Base abstractions, interfaces, and protocols. Users should not need to know about this layer directly.
|
||||
- **Implementation layer** (`langchain`): Concrete implementations and high-level public utilities
|
||||
- **Integration layer** (`partners/`): Third-party service integrations. Note that this monorepo is not exhaustive of all LangChain integrations; some are maintained in separate repos, such as `langchain-ai/langchain-google` and `langchain-ai/langchain-aws`. Usually these repos are cloned at the same level as this monorepo, so if needed, you can refer to their code directly by navigating to `../langchain-google/` from this monorepo.
|
||||
- **Testing layer** (`standard-tests/`): Standardized integration tests for partner integrations
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
### Development tools & commands**
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
- `uv` – Fast Python package installer and resolver (replaces pip/poetry)
|
||||
- `make` – Task runner for common development commands. Feel free to look at the `Makefile` for available commands and usage patterns.
|
||||
- `ruff` – Fast Python linter and formatter
|
||||
- `mypy` – Static type checking
|
||||
- `pytest` – Testing framework
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
This monorepo uses `uv` for dependency management. Local development uses editable installs: `[tool.uv.sources]`
|
||||
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### 2. Code Quality Standards
|
||||
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
**Style Requirements:**
|
||||
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
### 3. Testing Requirements
|
||||
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
**Test Organization:**
|
||||
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
**Test Quality Checklist:**
|
||||
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
Checklist questions:
|
||||
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
with open(path) as f:
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
def load_config(path: str) -> dict:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
### 5. Documentation Standards
|
||||
|
||||
**Use Google-style docstrings with Args section for all public functions.**
|
||||
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level (`'low'`, `'normal'`, `'high'`).
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
`InvalidEmailError`: If the email address format is invalid.
|
||||
`SMTPConnectionError`: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications."""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
@@ -261,66 +64,118 @@ make format
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
#### Key config files
|
||||
|
||||
**Local Development Dependencies:**
|
||||
- pyproject.toml: Main workspace configuration with dependency groups
|
||||
- uv.lock: Locked dependencies for reproducible builds
|
||||
- Makefile: Development tasks
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
#### Commit standards
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes.
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
#### Pull request guidelines
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
|
||||
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
|
||||
- Highlight areas of the proposed changes that require careful review.
|
||||
|
||||
## Core development principles
|
||||
|
||||
### Maintain stable public interfaces
|
||||
|
||||
CRITICAL: Always attempt to preserve function signatures, argument positions, and names for exported/public methods. Do not make breaking changes.
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
|
||||
Ask: "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### Code quality standards
|
||||
|
||||
All Python code MUST include type hints and return types.
|
||||
|
||||
```python title="Example"
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Single line description of the function.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
- Use descriptive, self-explanatory variable names.
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
### Testing requirements
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `fix(cli): resolve flag parsing error`
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
Every new feature or bugfix MUST be covered by unit tests.
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- We use `pytest` as the testing framework; if in doubt, check other existing tests for examples.
|
||||
- The testing file structure should mirror the source code structure.
|
||||
|
||||
- Follow the existing patterns in `langchain-core` for base abstractions
|
||||
- Use `langchain_core.callbacks` for execution tracking
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components like legacy `LLMChain`
|
||||
**Checklist:**
|
||||
|
||||
### Partner Integrations
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
### Security and risk assessment
|
||||
|
||||
---
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
## Quick Reference Checklist
|
||||
### Documentation standards
|
||||
|
||||
Before submitting code changes:
|
||||
Use Google-style docstrings with Args section for all public functions.
|
||||
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
```python title="Example"
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""Send an email to a recipient with specified priority.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level.
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
## Additional resources
|
||||
|
||||
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
|
||||
- **Contributing Guide:** [`.github/CONTRIBUTING.md`](https://docs.langchain.com/oss/python/contributing/overview)
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
# Migrating
|
||||
|
||||
Please see the following guides for migrating LangChain code:
|
||||
|
||||
* Migrate to [LangChain v1.0](https://docs.langchain.com/oss/python/migrate/langchain-v1)
|
||||
* Migrate to [LangChain v0.3](https://python.langchain.com/docs/versions/v0_3/)
|
||||
* Migrate to [LangChain v0.2](https://python.langchain.com/docs/versions/v0_2/)
|
||||
* Migrating from [LangChain 0.0.x Chains](https://python.langchain.com/docs/versions/migrating_chains/)
|
||||
* Upgrade to [LangGraph Memory](https://python.langchain.com/docs/versions/migrating_memory/)
|
||||
92
README.md
92
README.md
@@ -1,50 +1,43 @@
|
||||
<p align="center">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset=".github/images/logo-dark.svg">
|
||||
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-light.svg">
|
||||
<img alt="LangChain Logo" src=".github/images/logo-dark.svg" width="80%">
|
||||
</picture>
|
||||
</p>
|
||||
<div align="center">
|
||||
<a href="https://www.langchain.com/">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset=".github/images/logo-dark.svg">
|
||||
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-light.svg">
|
||||
<img alt="LangChain Logo" src=".github/images/logo-dark.svg" width="80%">
|
||||
</picture>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<p align="center">
|
||||
The platform for reliable agents.
|
||||
</p>
|
||||
<div align="center">
|
||||
<h3>The platform for reliable agents.</h3>
|
||||
</div>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://opensource.org/licenses/MIT" target="_blank">
|
||||
<img src="https://img.shields.io/pypi/l/langchain" alt="PyPI - License">
|
||||
</a>
|
||||
<a href="https://pypistats.org/packages/langchain" target="_blank">
|
||||
<img src="https://img.shields.io/pepy/dt/langchain" alt="PyPI - Downloads">
|
||||
</a>
|
||||
<a href="https://pypi.org/project/langchain/#history" target="_blank">
|
||||
<img src="https://img.shields.io/pypi/v/langchain?label=%20" alt="Version">
|
||||
</a>
|
||||
<a href="https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain" target="_blank">
|
||||
<img src="https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode" alt="Open in Dev Containers">
|
||||
</a>
|
||||
<a href="https://codespaces.new/langchain-ai/langchain" target="_blank">
|
||||
<img src="https://github.com/codespaces/badge.svg" alt="Open in Github Codespace" title="Open in Github Codespace" width="150" height="20">
|
||||
</a>
|
||||
<a href="https://codspeed.io/langchain-ai/langchain" target="_blank">
|
||||
<img src="https://img.shields.io/endpoint?url=https://codspeed.io/badge.json" alt="CodSpeed Badge">
|
||||
</a>
|
||||
<a href="https://twitter.com/langchainai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI" alt="Twitter / X">
|
||||
</a>
|
||||
</p>
|
||||
<div align="center">
|
||||
<a href="https://opensource.org/licenses/MIT" target="_blank"><img src="https://img.shields.io/pypi/l/langchain" alt="PyPI - License"></a>
|
||||
<a href="https://pypistats.org/packages/langchain" target="_blank"><img src="https://img.shields.io/pepy/dt/langchain" alt="PyPI - Downloads"></a>
|
||||
<a href="https://pypi.org/project/langchain/#history" target="_blank"><img src="https://img.shields.io/pypi/v/langchain?label=%20" alt="Version"></a>
|
||||
<a href="https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode" alt="Open in Dev Containers"></a>
|
||||
<a href="https://codespaces.new/langchain-ai/langchain" target="_blank"><img src="https://github.com/codespaces/badge.svg" alt="Open in Github Codespace" title="Open in Github Codespace" width="150" height="20"></a>
|
||||
<a href="https://codspeed.io/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/endpoint?url=https://codspeed.io/badge.json" alt="CodSpeed Badge"></a>
|
||||
<a href="https://twitter.com/langchainai" target="_blank"><img src="https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI" alt="Twitter / X"></a>
|
||||
</div>
|
||||
|
||||
LangChain is a framework for building LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development — all while future-proofing decisions as the underlying technology evolves.
|
||||
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development – all while future-proofing decisions as the underlying technology evolves.
|
||||
|
||||
```bash
|
||||
pip install langchain
|
||||
```
|
||||
|
||||
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
|
||||
|
||||
---
|
||||
|
||||
**Documentation**: To learn more about LangChain, check out [the docs](https://docs.langchain.com/oss/python/langchain/overview).
|
||||
**Documentation**:
|
||||
|
||||
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
|
||||
- [docs.langchain.com](https://docs.langchain.com/oss/python/langchain/overview) – Comprehensive documentation, including conceptual overviews and guides
|
||||
- [reference.langchain.com/python](https://reference.langchain.com/python) – API reference docs for LangChain packages
|
||||
|
||||
**Discussions**: Visit the [LangChain Forum](https://forum.langchain.com) to connect with the community and share all of your technical questions, ideas, and feedback.
|
||||
|
||||
> [!NOTE]
|
||||
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
||||
@@ -55,24 +48,27 @@ LangChain helps developers build applications powered by LLMs through a standard
|
||||
|
||||
Use LangChain for:
|
||||
|
||||
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain’s vast library of integrations with model providers, tools, vector stores, retrievers, and more.
|
||||
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application’s needs. As the industry frontier evolves, adapt quickly — LangChain’s abstractions keep you moving without losing momentum.
|
||||
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more.
|
||||
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly – LangChain's abstractions keep you moving without losing momentum.
|
||||
- **Rapid prototyping**. Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle.
|
||||
- **Production-ready features**. Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices.
|
||||
- **Vibrant community and ecosystem**. Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community.
|
||||
- **Flexible abstraction layers**. Work at the level of abstraction that suits your needs - from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity.
|
||||
|
||||
## LangChain’s ecosystem
|
||||
## LangChain ecosystem
|
||||
|
||||
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
|
||||
|
||||
To improve your LLM application development, pair LangChain with:
|
||||
|
||||
- [LangSmith](https://www.langchain.com/langsmith) - Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
|
||||
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) - Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows — and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
|
||||
- [LangGraph Platform](https://docs.langchain.com/langgraph-platform) - Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams — and iterate quickly with visual prototyping in [LangGraph Studio](https://langchain-ai.github.io/langgraph/concepts/langgraph_studio).
|
||||
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) – Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows – and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
|
||||
- [Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview) – List of LangChain integrations, including chat & embedding models, tools & toolkits, and more
|
||||
- [LangSmith](https://www.langchain.com/langsmith) – Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
|
||||
- [LangSmith Deployment](https://docs.langchain.com/langsmith/deployments) – Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams – and iterate quickly with visual prototyping in [LangSmith Studio](https://docs.langchain.com/langsmith/studio).
|
||||
- [Deep Agents](https://github.com/langchain-ai/deepagents) *(new!)* – Build agents that can plan, use subagents, and leverage file systems for complex tasks
|
||||
|
||||
## Additional resources
|
||||
|
||||
- [Learn](https://docs.langchain.com/oss/python/learn): Use cases, conceptual overviews, and more.
|
||||
- [API Reference](https://reference.langchain.com/python): Detailed reference on
|
||||
navigating base packages and integrations for LangChain.
|
||||
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview): Learn how to contribute to LangChain and find good first issues.
|
||||
- [LangChain Forum](https://forum.langchain.com): Connect with the community and share all of your technical questions, ideas, and feedback.
|
||||
- [Chat LangChain](https://chat.langchain.com): Ask questions & chat with our documentation.
|
||||
- [API Reference](https://reference.langchain.com/python) – Detailed reference on navigating base packages and integrations for LangChain.
|
||||
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview) – Learn how to contribute to LangChain projects and find good first issues.
|
||||
- [Code of Conduct](https://github.com/langchain-ai/langchain/blob/master/.github/CODE_OF_CONDUCT.md) – Our community guidelines and standards for participation.
|
||||
|
||||
80
SECURITY.md
80
SECURITY.md
@@ -1,80 +0,0 @@
|
||||
# Security Policy
|
||||
|
||||
LangChain has a large ecosystem of integrations with various external resources like local and remote file systems, APIs and databases. These integrations allow developers to create versatile applications that combine the power of LLMs with the ability to access, interact with and manipulate external resources.
|
||||
|
||||
## Best practices
|
||||
|
||||
When building such applications, developers should remember to follow good security practices:
|
||||
|
||||
* [**Limit Permissions**](https://en.wikipedia.org/wiki/Principle_of_least_privilege): Scope permissions specifically to the application's need. Granting broad or excessive permissions can introduce significant security vulnerabilities. To avoid such vulnerabilities, consider using read-only credentials, disallowing access to sensitive resources, using sandboxing techniques (such as running inside a container), specifying proxy configurations to control external requests, etc., as appropriate for your application.
|
||||
* **Anticipate Potential Misuse**: Just as humans can err, so can Large Language Models (LLMs). Always assume that any system access or credentials may be used in any way allowed by the permissions they are assigned. For example, if a pair of database credentials allows deleting data, it's safest to assume that any LLM able to use those credentials may in fact delete data.
|
||||
* [**Defense in Depth**](https://en.wikipedia.org/wiki/Defense_in_depth_(computing)): No security technique is perfect. Fine-tuning and good chain design can reduce, but not eliminate, the odds that a Large Language Model (LLM) may make a mistake. It's best to combine multiple layered security approaches rather than relying on any single layer of defense to ensure security. For example: use both read-only permissions and sandboxing to ensure that LLMs are only able to access data that is explicitly meant for them to use.
|
||||
|
||||
Risks of not doing so include, but are not limited to:
|
||||
|
||||
* Data corruption or loss.
|
||||
* Unauthorized access to confidential information.
|
||||
* Compromised performance or availability of critical resources.
|
||||
|
||||
Example scenarios with mitigation strategies:
|
||||
|
||||
* A user may ask an agent with access to the file system to delete files that should not be deleted or read the content of files that contain sensitive information. To mitigate, limit the agent to only use a specific directory and only allow it to read or write files that are safe to read or write. Consider further sandboxing the agent by running it in a container.
|
||||
* A user may ask an agent with write access to an external API to write malicious data to the API, or delete data from that API. To mitigate, give the agent read-only API keys, or limit it to only use endpoints that are already resistant to such misuse.
|
||||
* A user may ask an agent with access to a database to drop a table or mutate the schema. To mitigate, scope the credentials to only the tables that the agent needs to access and consider issuing READ-ONLY credentials.
|
||||
|
||||
If you're building applications that access external resources like file systems, APIs or databases, consider speaking with your company's security team to determine how to best design and secure your applications.
|
||||
|
||||
## Reporting OSS Vulnerabilities
|
||||
|
||||
LangChain is partnered with [huntr by Protect AI](https://huntr.com/) to provide
|
||||
a bounty program for our open source projects.
|
||||
|
||||
Please report security vulnerabilities associated with the LangChain
|
||||
open source projects at [huntr](https://huntr.com/bounties/disclose/?target=https%3A%2F%2Fgithub.com%2Flangchain-ai%2Flangchain&validSearch=true).
|
||||
|
||||
Before reporting a vulnerability, please review:
|
||||
|
||||
1) In-Scope Targets and Out-of-Scope Targets below.
|
||||
2) The [langchain-ai/langchain](https://docs.langchain.com/oss/python/contributing/code#repository-structure) monorepo structure.
|
||||
3) The [Best Practices](#best-practices) above to understand what we consider to be a security vulnerability vs. developer responsibility.
|
||||
|
||||
### In-Scope Targets
|
||||
|
||||
The following packages and repositories are eligible for bug bounties:
|
||||
|
||||
* langchain-core
|
||||
* langchain (see exceptions)
|
||||
* langchain-community (see exceptions)
|
||||
* langgraph
|
||||
* langserve
|
||||
|
||||
### Out of Scope Targets
|
||||
|
||||
All out of scope targets defined by huntr as well as:
|
||||
|
||||
* **langchain-experimental**: This repository is for experimental code and is not
|
||||
eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
|
||||
time and published with no bounty attached.
|
||||
* **tools**: Tools in either langchain or langchain-community are not eligible for bug
|
||||
bounties. This includes the following directories
|
||||
* libs/langchain/langchain/tools
|
||||
* libs/community/langchain_community/tools
|
||||
* Please review the [Best Practices](#best-practices)
|
||||
for more details, but generally tools interact with the real world. Developers are
|
||||
expected to understand the security implications of their code and are responsible
|
||||
for the security of their tools.
|
||||
* Code documented with security notices. This will be decided on a case-by-case basis, but likely will not be eligible for a bounty as the code is already
|
||||
documented with guidelines for developers that should be followed for making their
|
||||
application secure.
|
||||
* Any LangSmith related repositories or APIs (see [Reporting LangSmith Vulnerabilities](#reporting-langsmith-vulnerabilities)).
|
||||
|
||||
## Reporting LangSmith Vulnerabilities
|
||||
|
||||
Please report security vulnerabilities associated with LangSmith by email to `security@langchain.dev`.
|
||||
|
||||
* LangSmith site: [https://smith.langchain.com](https://smith.langchain.com)
|
||||
* SDK client: [https://github.com/langchain-ai/langsmith-sdk](https://github.com/langchain-ai/langsmith-sdk)
|
||||
|
||||
### Other Security Concerns
|
||||
|
||||
For any other security concerns, please contact us at `security@langchain.dev`.
|
||||
@@ -295,7 +295,7 @@
|
||||
"source": [
|
||||
"## TODO: Any functionality specific to this vector store\n",
|
||||
"\n",
|
||||
"E.g. creating a persisten database to save to your disk, etc."
|
||||
"E.g. creating a persistent database to save to your disk, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,9 +6,8 @@ import hashlib
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
from typing import Any, TypedDict
|
||||
from typing import TYPE_CHECKING, Any, TypedDict
|
||||
|
||||
from git import Repo
|
||||
|
||||
@@ -18,6 +17,9 @@ from langchain_cli.constants import (
|
||||
DEFAULT_GIT_SUBDIRECTORY,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from .file import File
|
||||
from .folder import Folder
|
||||
if TYPE_CHECKING:
|
||||
from .file import File
|
||||
from .folder import Folder
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from .file import File
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Folder:
|
||||
def __init__(self, name: str, *files: Folder | File) -> None:
|
||||
|
||||
@@ -34,7 +34,7 @@ The LangChain ecosystem is built on top of `langchain-core`. Some of the benefit
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_core/).
|
||||
For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_core/). For conceptual guides, tutorials, and examples on using LangChain, see the [LangChain Docs](https://docs.langchain.com/oss/python/langchain/overview).
|
||||
|
||||
## 📕 Releases & Versioning
|
||||
|
||||
|
||||
@@ -5,12 +5,10 @@
|
||||
|
||||
!!! warning
|
||||
New agents should be built using the
|
||||
[langgraph library](https://github.com/langchain-ai/langgraph), which provides a
|
||||
[`langchain` library](https://pypi.org/project/langchain/), which provides a
|
||||
simpler and more flexible way to define agents.
|
||||
|
||||
Please see the
|
||||
[migration guide](https://python.langchain.com/docs/how_to/migrate_agent/) for
|
||||
information on how to migrate existing agents to modern langgraph agents.
|
||||
See docs on [building agents](https://docs.langchain.com/oss/python/langchain/agents).
|
||||
|
||||
Agents use language models to choose a sequence of actions to take.
|
||||
|
||||
@@ -54,31 +52,33 @@ class AgentAction(Serializable):
|
||||
"""The input to pass in to the Tool."""
|
||||
log: str
|
||||
"""Additional information to log about the action.
|
||||
This log can be used in a few ways. First, it can be used to audit
|
||||
what exactly the LLM predicted to lead to this (tool, tool_input).
|
||||
Second, it can be used in future iterations to show the LLMs prior
|
||||
thoughts. This is useful when (tool, tool_input) does not contain
|
||||
full information about the LLM prediction (for example, any `thought`
|
||||
before the tool/tool_input)."""
|
||||
|
||||
This log can be used in a few ways. First, it can be used to audit what exactly the
|
||||
LLM predicted to lead to this `(tool, tool_input)`.
|
||||
|
||||
Second, it can be used in future iterations to show the LLMs prior thoughts. This is
|
||||
useful when `(tool, tool_input)` does not contain full information about the LLM
|
||||
prediction (for example, any `thought` before the tool/tool_input).
|
||||
"""
|
||||
type: Literal["AgentAction"] = "AgentAction"
|
||||
|
||||
# Override init to support instantiation by position for backward compat.
|
||||
def __init__(self, tool: str, tool_input: str | dict, log: str, **kwargs: Any):
|
||||
"""Create an AgentAction.
|
||||
"""Create an `AgentAction`.
|
||||
|
||||
Args:
|
||||
tool: The name of the tool to execute.
|
||||
tool_input: The input to pass in to the Tool.
|
||||
tool_input: The input to pass in to the `Tool`.
|
||||
log: Additional information to log about the action.
|
||||
"""
|
||||
super().__init__(tool=tool, tool_input=tool_input, log=log, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""AgentAction is serializable.
|
||||
"""`AgentAction` is serializable.
|
||||
|
||||
Returns:
|
||||
True
|
||||
`True`
|
||||
"""
|
||||
return True
|
||||
|
||||
@@ -100,19 +100,23 @@ class AgentAction(Serializable):
|
||||
class AgentActionMessageLog(AgentAction):
|
||||
"""Representation of an action to be executed by an agent.
|
||||
|
||||
This is similar to AgentAction, but includes a message log consisting of
|
||||
chat messages. This is useful when working with ChatModels, and is used
|
||||
to reconstruct conversation history from the agent's perspective.
|
||||
This is similar to `AgentAction`, but includes a message log consisting of
|
||||
chat messages.
|
||||
|
||||
This is useful when working with `ChatModels`, and is used to reconstruct
|
||||
conversation history from the agent's perspective.
|
||||
"""
|
||||
|
||||
message_log: Sequence[BaseMessage]
|
||||
"""Similar to log, this can be used to pass along extra
|
||||
information about what exact messages were predicted by the LLM
|
||||
before parsing out the (tool, tool_input). This is again useful
|
||||
if (tool, tool_input) cannot be used to fully recreate the LLM
|
||||
prediction, and you need that LLM prediction (for future agent iteration).
|
||||
"""Similar to log, this can be used to pass along extra information about what exact
|
||||
messages were predicted by the LLM before parsing out the `(tool, tool_input)`.
|
||||
|
||||
This is again useful if `(tool, tool_input)` cannot be used to fully recreate the
|
||||
LLM prediction, and you need that LLM prediction (for future agent iteration).
|
||||
|
||||
Compared to `log`, this is useful when the underlying LLM is a
|
||||
chat model (and therefore returns messages rather than a string)."""
|
||||
chat model (and therefore returns messages rather than a string).
|
||||
"""
|
||||
# Ignoring type because we're overriding the type from AgentAction.
|
||||
# And this is the correct thing to do in this case.
|
||||
# The type literal is used for serialization purposes.
|
||||
@@ -120,12 +124,12 @@ class AgentActionMessageLog(AgentAction):
|
||||
|
||||
|
||||
class AgentStep(Serializable):
|
||||
"""Result of running an AgentAction."""
|
||||
"""Result of running an `AgentAction`."""
|
||||
|
||||
action: AgentAction
|
||||
"""The AgentAction that was executed."""
|
||||
"""The `AgentAction` that was executed."""
|
||||
observation: Any
|
||||
"""The result of the AgentAction."""
|
||||
"""The result of the `AgentAction`."""
|
||||
|
||||
@property
|
||||
def messages(self) -> Sequence[BaseMessage]:
|
||||
@@ -134,19 +138,22 @@ class AgentStep(Serializable):
|
||||
|
||||
|
||||
class AgentFinish(Serializable):
|
||||
"""Final return value of an ActionAgent.
|
||||
"""Final return value of an `ActionAgent`.
|
||||
|
||||
Agents return an AgentFinish when they have reached a stopping condition.
|
||||
Agents return an `AgentFinish` when they have reached a stopping condition.
|
||||
"""
|
||||
|
||||
return_values: dict
|
||||
"""Dictionary of return values."""
|
||||
log: str
|
||||
"""Additional information to log about the return value.
|
||||
|
||||
This is used to pass along the full LLM prediction, not just the parsed out
|
||||
return value. For example, if the full LLM prediction was
|
||||
`Final Answer: 2` you may want to just return `2` as a return value, but pass
|
||||
along the full string as a `log` (for debugging or observability purposes).
|
||||
return value.
|
||||
|
||||
For example, if the full LLM prediction was `Final Answer: 2` you may want to just
|
||||
return `2` as a return value, but pass along the full string as a `log` (for
|
||||
debugging or observability purposes).
|
||||
"""
|
||||
type: Literal["AgentFinish"] = "AgentFinish"
|
||||
|
||||
@@ -156,7 +163,7 @@ class AgentFinish(Serializable):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -204,7 +211,7 @@ def _convert_agent_observation_to_messages(
|
||||
observation: Observation to convert to a message.
|
||||
|
||||
Returns:
|
||||
AIMessage that corresponds to the original tool invocation.
|
||||
`AIMessage` that corresponds to the original tool invocation.
|
||||
"""
|
||||
if isinstance(agent_action, AgentActionMessageLog):
|
||||
return [_create_function_message(agent_action, observation)]
|
||||
@@ -227,7 +234,7 @@ def _create_function_message(
|
||||
observation: the result of the tool invocation.
|
||||
|
||||
Returns:
|
||||
FunctionMessage that corresponds to the original tool invocation.
|
||||
`FunctionMessage` that corresponds to the original tool invocation.
|
||||
"""
|
||||
if not isinstance(observation, str):
|
||||
try:
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
Distinct from provider-based [prompt caching](https://docs.langchain.com/oss/python/langchain/models#prompt-caching).
|
||||
|
||||
!!! warning
|
||||
This is a beta feature! Please be wary of deploying experimental code to production
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. Please be wary of deploying experimental code to production
|
||||
unless you've taken appropriate precautions.
|
||||
|
||||
A cache is useful for two reasons:
|
||||
@@ -49,17 +49,18 @@ class BaseCache(ABC):
|
||||
"""Look up based on `prompt` and `llm_string`.
|
||||
|
||||
A cache implementation is expected to generate a key from the 2-tuple
|
||||
of prompt and llm_string (e.g., by concatenating them with a delimiter).
|
||||
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
|
||||
|
||||
Args:
|
||||
prompt: A string representation of the prompt.
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
|
||||
These invocation parameters are serialized into a string representation.
|
||||
|
||||
Returns:
|
||||
On a cache miss, return `None`. On a cache hit, return the cached value.
|
||||
@@ -78,8 +79,10 @@ class BaseCache(ABC):
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
return_val: The value to be cached. The value is a list of `Generation`
|
||||
@@ -94,15 +97,17 @@ class BaseCache(ABC):
|
||||
"""Async look up based on `prompt` and `llm_string`.
|
||||
|
||||
A cache implementation is expected to generate a key from the 2-tuple
|
||||
of prompt and llm_string (e.g., by concatenating them with a delimiter).
|
||||
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
|
||||
|
||||
Args:
|
||||
prompt: A string representation of the prompt.
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
|
||||
@@ -125,8 +130,10 @@ class BaseCache(ABC):
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
return_val: The value to be cached. The value is a list of `Generation`
|
||||
|
||||
@@ -5,13 +5,12 @@ from __future__ import annotations
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from typing_extensions import Self
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
from uuid import UUID
|
||||
|
||||
from tenacity import RetryCallState
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core.agents import AgentAction, AgentFinish
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@@ -6,7 +6,6 @@ import asyncio
|
||||
import atexit
|
||||
import functools
|
||||
import logging
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
@@ -39,9 +38,9 @@ from langchain_core.tracers.context import (
|
||||
tracing_v2_callback_var,
|
||||
)
|
||||
from langchain_core.tracers.langchain import LangChainTracer
|
||||
from langchain_core.tracers.schemas import Run
|
||||
from langchain_core.tracers.stdout import ConsoleCallbackHandler
|
||||
from langchain_core.utils.env import env_var_is_set
|
||||
from langchain_core.utils.uuid import uuid7
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import AsyncGenerator, Coroutine, Generator, Sequence
|
||||
@@ -52,6 +51,7 @@ if TYPE_CHECKING:
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
|
||||
from langchain_core.runnables.config import RunnableConfig
|
||||
from langchain_core.tracers.schemas import Run
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -229,7 +229,24 @@ def shielded(func: Func) -> Func:
|
||||
|
||||
@functools.wraps(func)
|
||||
async def wrapped(*args: Any, **kwargs: Any) -> Any:
|
||||
return await asyncio.shield(func(*args, **kwargs))
|
||||
# Capture the current context to preserve context variables
|
||||
ctx = copy_context()
|
||||
|
||||
# Create the coroutine
|
||||
coro = func(*args, **kwargs)
|
||||
|
||||
# For Python 3.11+, create task with explicit context
|
||||
# For older versions, fallback to original behavior
|
||||
try:
|
||||
# Create a task with the captured context to preserve context variables
|
||||
task = asyncio.create_task(coro, context=ctx) # type: ignore[call-arg, unused-ignore]
|
||||
# `call-arg` used to not fail 3.9 or 3.10 tests
|
||||
return await asyncio.shield(task)
|
||||
except TypeError:
|
||||
# Python < 3.11 fallback - create task normally then shield
|
||||
# This won't preserve context perfectly but is better than nothing
|
||||
task = asyncio.create_task(coro)
|
||||
return await asyncio.shield(task)
|
||||
|
||||
return cast("Func", wrapped)
|
||||
|
||||
@@ -487,7 +504,7 @@ class BaseRunManager(RunManagerMixin):
|
||||
|
||||
"""
|
||||
return cls(
|
||||
run_id=uuid.uuid4(),
|
||||
run_id=uuid7(),
|
||||
handlers=[],
|
||||
inheritable_handlers=[],
|
||||
tags=[],
|
||||
@@ -1313,7 +1330,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
managers = []
|
||||
for i, prompt in enumerate(prompts):
|
||||
# Can't have duplicate runs with the same run ID (if provided)
|
||||
run_id_ = run_id if i == 0 and run_id is not None else uuid.uuid4()
|
||||
run_id_ = run_id if i == 0 and run_id is not None else uuid7()
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_llm_start",
|
||||
@@ -1367,7 +1384,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
run_id_ = run_id
|
||||
run_id = None
|
||||
else:
|
||||
run_id_ = uuid.uuid4()
|
||||
run_id_ = uuid7()
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_chat_model_start",
|
||||
@@ -1416,7 +1433,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
handle_event(
|
||||
self.handlers,
|
||||
"on_chain_start",
|
||||
@@ -1471,7 +1488,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
handle_event(
|
||||
self.handlers,
|
||||
@@ -1520,7 +1537,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
The callback manager for the retriever run.
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
handle_event(
|
||||
self.handlers,
|
||||
@@ -1577,7 +1594,7 @@ class CallbackManager(BaseCallbackManager):
|
||||
)
|
||||
raise ValueError(msg)
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
handle_event(
|
||||
self.handlers,
|
||||
@@ -1799,7 +1816,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
run_id_ = run_id
|
||||
run_id = None
|
||||
else:
|
||||
run_id_ = uuid.uuid4()
|
||||
run_id_ = uuid7()
|
||||
|
||||
if inline_handlers:
|
||||
inline_tasks.append(
|
||||
@@ -1883,7 +1900,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
run_id_ = run_id
|
||||
run_id = None
|
||||
else:
|
||||
run_id_ = uuid.uuid4()
|
||||
run_id_ = uuid7()
|
||||
|
||||
for handler in self.handlers:
|
||||
task = ahandle_event(
|
||||
@@ -1945,7 +1962,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
The async callback manager for the chain run.
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
@@ -1993,7 +2010,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
The async callback manager for the tool run.
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
@@ -2043,7 +2060,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
if not self.handlers:
|
||||
return
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
if kwargs:
|
||||
msg = (
|
||||
@@ -2085,7 +2102,7 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
The async callback manager for the retriever run.
|
||||
"""
|
||||
if run_id is None:
|
||||
run_id = uuid.uuid4()
|
||||
run_id = uuid7()
|
||||
|
||||
await ahandle_event(
|
||||
self.handlers,
|
||||
|
||||
@@ -24,7 +24,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
||||
from langchain_core.callbacks import UsageMetadataCallbackHandler
|
||||
|
||||
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
|
||||
|
||||
callback = UsageMetadataCallbackHandler()
|
||||
result_1 = llm_1.invoke("Hello", config={"callbacks": [callback]})
|
||||
@@ -43,7 +43,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
||||
'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.49"
|
||||
!!! version-added "Added in `langchain-core` 0.3.49"
|
||||
|
||||
"""
|
||||
|
||||
@@ -95,7 +95,7 @@ def get_usage_metadata_callback(
|
||||
"""Get usage metadata callback.
|
||||
|
||||
Get context manager for tracking usage metadata across chat model calls using
|
||||
`AIMessage.usage_metadata`.
|
||||
[`AIMessage.usage_metadata`][langchain.messages.AIMessage.usage_metadata].
|
||||
|
||||
Args:
|
||||
name: The name of the context variable.
|
||||
@@ -109,7 +109,7 @@ def get_usage_metadata_callback(
|
||||
from langchain_core.callbacks import get_usage_metadata_callback
|
||||
|
||||
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
|
||||
|
||||
with get_usage_metadata_callback() as cb:
|
||||
llm_1.invoke("Hello")
|
||||
@@ -134,7 +134,7 @@ def get_usage_metadata_callback(
|
||||
}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.49"
|
||||
!!! version-added "Added in `langchain-core` 0.3.49"
|
||||
|
||||
"""
|
||||
usage_metadata_callback_var: ContextVar[UsageMetadataCallbackHandler | None] = (
|
||||
|
||||
@@ -121,7 +121,7 @@ class BaseChatMessageHistory(ABC):
|
||||
This method may be deprecated in a future release.
|
||||
|
||||
Args:
|
||||
message: The human message to add to the store.
|
||||
message: The `HumanMessage` to add to the store.
|
||||
"""
|
||||
if isinstance(message, HumanMessage):
|
||||
self.add_message(message)
|
||||
@@ -129,7 +129,7 @@ class BaseChatMessageHistory(ABC):
|
||||
self.add_message(HumanMessage(content=message))
|
||||
|
||||
def add_ai_message(self, message: AIMessage | str) -> None:
|
||||
"""Convenience method for adding an AI message string to the store.
|
||||
"""Convenience method for adding an `AIMessage` string to the store.
|
||||
|
||||
!!! note
|
||||
This is a convenience method. Code should favor the bulk `add_messages`
|
||||
@@ -138,7 +138,7 @@ class BaseChatMessageHistory(ABC):
|
||||
This method may be deprecated in a future release.
|
||||
|
||||
Args:
|
||||
message: The AI message to add.
|
||||
message: The `AIMessage` to add.
|
||||
"""
|
||||
if isinstance(message, AIMessage):
|
||||
self.add_message(message)
|
||||
@@ -173,7 +173,7 @@ class BaseChatMessageHistory(ABC):
|
||||
in an efficient manner to avoid unnecessary round-trips to the underlying store.
|
||||
|
||||
Args:
|
||||
messages: A sequence of BaseMessage objects to store.
|
||||
messages: A sequence of `BaseMessage` objects to store.
|
||||
"""
|
||||
for message in messages:
|
||||
self.add_message(message)
|
||||
@@ -182,7 +182,7 @@ class BaseChatMessageHistory(ABC):
|
||||
"""Async add a list of messages.
|
||||
|
||||
Args:
|
||||
messages: A sequence of BaseMessage objects to store.
|
||||
messages: A sequence of `BaseMessage` objects to store.
|
||||
"""
|
||||
await run_in_executor(None, self.add_messages, messages)
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class BaseLoader(ABC): # noqa: B024
|
||||
"""Interface for Document Loader.
|
||||
|
||||
Implementations should implement the lazy-loading method using generators
|
||||
to avoid loading all Documents into memory at once.
|
||||
to avoid loading all documents into memory at once.
|
||||
|
||||
`load` is provided just for user convenience and should not be overridden.
|
||||
"""
|
||||
@@ -53,9 +53,11 @@ class BaseLoader(ABC): # noqa: B024
|
||||
def load_and_split(
|
||||
self, text_splitter: TextSplitter | None = None
|
||||
) -> list[Document]:
|
||||
"""Load Documents and split into chunks. Chunks are returned as `Document`.
|
||||
"""Load `Document` and split into chunks. Chunks are returned as `Document`.
|
||||
|
||||
Do not override this method. It should be considered to be deprecated!
|
||||
!!! danger
|
||||
|
||||
Do not override this method. It should be considered to be deprecated!
|
||||
|
||||
Args:
|
||||
text_splitter: `TextSplitter` instance to use for splitting documents.
|
||||
@@ -135,7 +137,7 @@ class BaseBlobParser(ABC):
|
||||
"""
|
||||
|
||||
def parse(self, blob: Blob) -> list[Document]:
|
||||
"""Eagerly parse the blob into a `Document` or `Document` objects.
|
||||
"""Eagerly parse the blob into a `Document` or list of `Document` objects.
|
||||
|
||||
This is a convenience method for interactive development environment.
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ class BlobLoader(ABC):
|
||||
def yield_blobs(
|
||||
self,
|
||||
) -> Iterable[Blob]:
|
||||
"""A lazy loader for raw data represented by LangChain's Blob object.
|
||||
"""A lazy loader for raw data represented by LangChain's `Blob` object.
|
||||
|
||||
Returns:
|
||||
A generator over blobs
|
||||
|
||||
@@ -14,13 +14,13 @@ from langchain_core.documents import Document
|
||||
|
||||
|
||||
class LangSmithLoader(BaseLoader):
|
||||
"""Load LangSmith Dataset examples as Documents.
|
||||
"""Load LangSmith Dataset examples as `Document` objects.
|
||||
|
||||
Loads the example inputs as the Document page content and places the entire example
|
||||
into the Document metadata. This allows you to easily create few-shot example
|
||||
retrievers from the loaded documents.
|
||||
Loads the example inputs as the `Document` page content and places the entire
|
||||
example into the `Document` metadata. This allows you to easily create few-shot
|
||||
example retrievers from the loaded documents.
|
||||
|
||||
??? note "Lazy load"
|
||||
??? note "Lazy loading example"
|
||||
|
||||
```python
|
||||
from langchain_core.document_loaders import LangSmithLoader
|
||||
@@ -66,12 +66,11 @@ class LangSmithLoader(BaseLoader):
|
||||
format_content: Function for converting the content extracted from the example
|
||||
inputs into a string. Defaults to JSON-encoding the contents.
|
||||
example_ids: The IDs of the examples to filter by.
|
||||
as_of: The dataset version tag OR
|
||||
timestamp to retrieve the examples as of.
|
||||
Response examples will only be those that were present at the time
|
||||
of the tagged (or timestamped) version.
|
||||
as_of: The dataset version tag or timestamp to retrieve the examples as of.
|
||||
Response examples will only be those that were present at the time of
|
||||
the tagged (or timestamped) version.
|
||||
splits: A list of dataset splits, which are
|
||||
divisions of your dataset such as 'train', 'test', or 'validation'.
|
||||
divisions of your dataset such as `train`, `test`, or `validation`.
|
||||
Returns examples only from the specified splits.
|
||||
inline_s3_urls: Whether to inline S3 URLs.
|
||||
offset: The offset to start from.
|
||||
|
||||
@@ -1,7 +1,28 @@
|
||||
"""Documents module.
|
||||
"""Documents module for data retrieval and processing workflows.
|
||||
|
||||
**Document** module is a collection of classes that handle documents
|
||||
and their transformations.
|
||||
This module provides core abstractions for handling data in retrieval-augmented
|
||||
generation (RAG) pipelines, vector stores, and document processing workflows.
|
||||
|
||||
!!! warning "Documents vs. message content"
|
||||
This module is distinct from `langchain_core.messages.content`, which provides
|
||||
multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within
|
||||
messages).
|
||||
|
||||
**Key distinction:**
|
||||
|
||||
- **Documents** (this module): For **data retrieval and processing workflows**
|
||||
- Vector stores, retrievers, RAG pipelines
|
||||
- Text chunking, embedding, and semantic search
|
||||
- Example: Chunks of a PDF stored in a vector database
|
||||
|
||||
- **Content Blocks** (`messages.content`): For **LLM conversational I/O**
|
||||
- Multimodal message content sent to/from models
|
||||
- Tool calls, reasoning, citations within chat
|
||||
- Example: An image sent to a vision model in a chat message (via
|
||||
[`ImageContentBlock`][langchain.messages.ImageContentBlock])
|
||||
|
||||
While both can represent similar data types (text, files), they serve different
|
||||
architectural purposes in LangChain applications.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -1,4 +1,16 @@
|
||||
"""Base classes for media and documents."""
|
||||
"""Base classes for media and documents.
|
||||
|
||||
This module contains core abstractions for **data retrieval and processing workflows**:
|
||||
|
||||
- `BaseMedia`: Base class providing `id` and `metadata` fields
|
||||
- `Blob`: Raw data loading (files, binary data) - used by document loaders
|
||||
- `Document`: Text content for retrieval (RAG, vector stores, semantic search)
|
||||
|
||||
!!! note "Not for LLM chat messages"
|
||||
These classes are for data processing pipelines, not LLM I/O. For multimodal
|
||||
content in chat messages (images, audio in conversations), see
|
||||
`langchain.messages` content blocks instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -19,20 +31,18 @@ PathLike = str | PurePath
|
||||
|
||||
|
||||
class BaseMedia(Serializable):
|
||||
"""Use to represent media content.
|
||||
"""Base class for content used in retrieval and data processing workflows.
|
||||
|
||||
Media objects can be used to represent raw data, such as text or binary data.
|
||||
Provides common fields for content that needs to be stored, indexed, or searched.
|
||||
|
||||
LangChain Media objects allow associating metadata and an optional identifier
|
||||
with the content.
|
||||
|
||||
The presence of an ID and metadata make it easier to store, index, and search
|
||||
over the content in a structured way.
|
||||
!!! note
|
||||
For multimodal content in **chat messages** (images, audio sent to/from LLMs),
|
||||
use `langchain.messages` content blocks instead.
|
||||
"""
|
||||
|
||||
# The ID field is optional at the moment.
|
||||
# It will likely become required in a future major release after
|
||||
# it has been adopted by enough vectorstore implementations.
|
||||
# it has been adopted by enough VectorStore implementations.
|
||||
id: str | None = Field(default=None, coerce_numbers_to_str=True)
|
||||
"""An optional identifier for the document.
|
||||
|
||||
@@ -45,71 +55,70 @@ class BaseMedia(Serializable):
|
||||
|
||||
|
||||
class Blob(BaseMedia):
|
||||
"""Blob represents raw data by either reference or value.
|
||||
"""Raw data abstraction for document loading and file processing.
|
||||
|
||||
Provides an interface to materialize the blob in different representations, and
|
||||
help to decouple the development of data loaders from the downstream parsing of
|
||||
the raw data.
|
||||
Represents raw bytes or text, either in-memory or by file reference. Used
|
||||
primarily by document loaders to decouple data loading from parsing.
|
||||
|
||||
Inspired by: https://developer.mozilla.org/en-US/docs/Web/API/Blob
|
||||
Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
|
||||
|
||||
Example: Initialize a blob from in-memory data
|
||||
???+ example "Initialize a blob from in-memory data"
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_data("Hello, world!")
|
||||
blob = Blob.from_data("Hello, world!")
|
||||
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
|
||||
Example: Load from memory and specify mime-type and metadata
|
||||
??? example "Load from memory and specify MIME type and metadata"
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_data(
|
||||
data="Hello, world!",
|
||||
mime_type="text/plain",
|
||||
metadata={"source": "https://example.com"},
|
||||
)
|
||||
```
|
||||
blob = Blob.from_data(
|
||||
data="Hello, world!",
|
||||
mime_type="text/plain",
|
||||
metadata={"source": "https://example.com"},
|
||||
)
|
||||
```
|
||||
|
||||
Example: Load the blob from a file
|
||||
??? example "Load the blob from a file"
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_path("path/to/file.txt")
|
||||
blob = Blob.from_path("path/to/file.txt")
|
||||
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
"""
|
||||
|
||||
data: bytes | str | None = None
|
||||
"""Raw data associated with the blob."""
|
||||
"""Raw data associated with the `Blob`."""
|
||||
mimetype: str | None = None
|
||||
"""MimeType not to be confused with a file extension."""
|
||||
"""MIME type, not to be confused with a file extension."""
|
||||
encoding: str = "utf-8"
|
||||
"""Encoding to use if decoding the bytes into a string.
|
||||
|
||||
Use `utf-8` as default encoding, if decoding to string.
|
||||
Uses `utf-8` as default encoding if decoding to string.
|
||||
"""
|
||||
path: PathLike | None = None
|
||||
"""Location where the original content was found."""
|
||||
@@ -123,9 +132,9 @@ class Blob(BaseMedia):
|
||||
def source(self) -> str | None:
|
||||
"""The source location of the blob as string if known otherwise none.
|
||||
|
||||
If a path is associated with the blob, it will default to the path location.
|
||||
If a path is associated with the `Blob`, it will default to the path location.
|
||||
|
||||
Unless explicitly set via a metadata field called `"source"`, in which
|
||||
Unless explicitly set via a metadata field called `'source'`, in which
|
||||
case that value will be used instead.
|
||||
"""
|
||||
if self.metadata and "source" in self.metadata:
|
||||
@@ -211,13 +220,13 @@ class Blob(BaseMedia):
|
||||
Args:
|
||||
path: Path-like object to file to be read
|
||||
encoding: Encoding to use if decoding the bytes into a string
|
||||
mime_type: If provided, will be set as the mime-type of the data
|
||||
guess_type: If `True`, the mimetype will be guessed from the file extension,
|
||||
if a mime-type was not provided
|
||||
metadata: Metadata to associate with the blob
|
||||
mime_type: If provided, will be set as the MIME type of the data
|
||||
guess_type: If `True`, the MIME type will be guessed from the file
|
||||
extension, if a MIME type was not provided
|
||||
metadata: Metadata to associate with the `Blob`
|
||||
|
||||
Returns:
|
||||
Blob instance
|
||||
`Blob` instance
|
||||
"""
|
||||
if mime_type is None and guess_type:
|
||||
mimetype = mimetypes.guess_type(path)[0] if guess_type else None
|
||||
@@ -243,17 +252,17 @@ class Blob(BaseMedia):
|
||||
path: str | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> Blob:
|
||||
"""Initialize the blob from in-memory data.
|
||||
"""Initialize the `Blob` from in-memory data.
|
||||
|
||||
Args:
|
||||
data: The in-memory data associated with the blob
|
||||
data: The in-memory data associated with the `Blob`
|
||||
encoding: Encoding to use if decoding the bytes into a string
|
||||
mime_type: If provided, will be set as the mime-type of the data
|
||||
mime_type: If provided, will be set as the MIME type of the data
|
||||
path: If provided, will be set as the source from which the data came
|
||||
metadata: Metadata to associate with the blob
|
||||
metadata: Metadata to associate with the `Blob`
|
||||
|
||||
Returns:
|
||||
Blob instance
|
||||
`Blob` instance
|
||||
"""
|
||||
return cls(
|
||||
data=data,
|
||||
@@ -274,6 +283,10 @@ class Blob(BaseMedia):
|
||||
class Document(BaseMedia):
|
||||
"""Class for storing a piece of text and associated metadata.
|
||||
|
||||
!!! note
|
||||
`Document` is for **retrieval workflows**, not chat I/O. For sending text
|
||||
to an LLM in a conversation, use message types from `langchain.messages`.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
@@ -296,7 +309,7 @@ class Document(BaseMedia):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -309,10 +322,10 @@ class Document(BaseMedia):
|
||||
return ["langchain", "schema", "document"]
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Override __str__ to restrict it to page_content and metadata.
|
||||
"""Override `__str__` to restrict it to page_content and metadata.
|
||||
|
||||
Returns:
|
||||
A string representation of the Document.
|
||||
A string representation of the `Document`.
|
||||
"""
|
||||
# The format matches pydantic format for __str__.
|
||||
#
|
||||
|
||||
@@ -21,14 +21,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
|
||||
This abstraction is primarily used for post-processing of retrieved documents.
|
||||
|
||||
Documents matching a given query are first retrieved.
|
||||
`Document` objects matching a given query are first retrieved.
|
||||
|
||||
Then the list of documents can be further processed.
|
||||
|
||||
For example, one could re-rank the retrieved documents using an LLM.
|
||||
|
||||
!!! note
|
||||
Users should favor using a RunnableLambda instead of sub-classing from this
|
||||
Users should favor using a `RunnableLambda` instead of sub-classing from this
|
||||
interface.
|
||||
|
||||
"""
|
||||
@@ -43,9 +43,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
"""Compress retrieved documents given the query context.
|
||||
|
||||
Args:
|
||||
documents: The retrieved documents.
|
||||
documents: The retrieved `Document` objects.
|
||||
query: The query context.
|
||||
callbacks: Optional callbacks to run during compression.
|
||||
callbacks: Optional `Callbacks` to run during compression.
|
||||
|
||||
Returns:
|
||||
The compressed documents.
|
||||
@@ -61,9 +61,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
"""Async compress retrieved documents given the query context.
|
||||
|
||||
Args:
|
||||
documents: The retrieved documents.
|
||||
documents: The retrieved `Document` objects.
|
||||
query: The query context.
|
||||
callbacks: Optional callbacks to run during compression.
|
||||
callbacks: Optional `Callbacks` to run during compression.
|
||||
|
||||
Returns:
|
||||
The compressed documents.
|
||||
|
||||
@@ -16,8 +16,8 @@ if TYPE_CHECKING:
|
||||
class BaseDocumentTransformer(ABC):
|
||||
"""Abstract base class for document transformation.
|
||||
|
||||
A document transformation takes a sequence of Documents and returns a
|
||||
sequence of transformed Documents.
|
||||
A document transformation takes a sequence of `Document` objects and returns a
|
||||
sequence of transformed `Document` objects.
|
||||
|
||||
Example:
|
||||
```python
|
||||
|
||||
@@ -18,7 +18,7 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
||||
|
||||
This embedding model creates embeddings by sampling from a normal distribution.
|
||||
|
||||
!!! warning
|
||||
!!! danger "Toy model"
|
||||
Do not use this outside of testing, as it is not a real embedding model.
|
||||
|
||||
Instantiate:
|
||||
@@ -73,7 +73,7 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
||||
This embedding model creates embeddings by sampling from a normal distribution
|
||||
with a seed based on the hash of the text.
|
||||
|
||||
!!! warning
|
||||
!!! danger "Toy model"
|
||||
Do not use this outside of testing, as it is not a real embedding model.
|
||||
|
||||
Instantiate:
|
||||
|
||||
@@ -29,7 +29,7 @@ class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
|
||||
max_length: int = 2048
|
||||
"""Max length for the prompt, beyond which examples are cut."""
|
||||
|
||||
example_text_lengths: list[int] = Field(default_factory=list) # :meta private:
|
||||
example_text_lengths: list[int] = Field(default_factory=list)
|
||||
"""Length of each example."""
|
||||
|
||||
def add_example(self, example: dict[str, str]) -> None:
|
||||
|
||||
@@ -41,7 +41,7 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
|
||||
"""Optional keys to filter input to. If provided, the search is based on
|
||||
the input variables instead of all variables."""
|
||||
vectorstore_kwargs: dict[str, Any] | None = None
|
||||
"""Extra arguments passed to similarity_search function of the vectorstore."""
|
||||
"""Extra arguments passed to similarity_search function of the `VectorStore`."""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
@@ -159,7 +159,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the vectorstore.
|
||||
of the `VectorStore`.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -203,7 +203,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the vectorstore.
|
||||
of the `VectorStore`.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -286,12 +286,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
|
||||
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
||||
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
||||
k: Number of examples to select.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
input_keys: If provided, the search is based on the input variables
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the vectorstore.
|
||||
of the `VectorStore`.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -333,12 +333,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
|
||||
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
||||
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
||||
k: Number of examples to select.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
input_keys: If provided, the search is based on the input variables
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the vectorstore.
|
||||
of the `VectorStore`.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -16,9 +16,10 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
||||
"""Exception that output parsers should raise to signify a parsing error.
|
||||
|
||||
This exists to differentiate parsing errors from other code or execution errors
|
||||
that also may arise inside the output parser. `OutputParserException` will be
|
||||
available to catch and handle in ways to fix the parsing error, while other
|
||||
errors will be raised.
|
||||
that also may arise inside the output parser.
|
||||
|
||||
`OutputParserException` will be available to catch and handle in ways to fix the
|
||||
parsing error, while other errors will be raised.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -32,18 +33,19 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
||||
|
||||
Args:
|
||||
error: The error that's being re-raised or an error message.
|
||||
observation: String explanation of error which can be passed to a
|
||||
model to try and remediate the issue.
|
||||
observation: String explanation of error which can be passed to a model to
|
||||
try and remediate the issue.
|
||||
llm_output: String model output which is error-ing.
|
||||
|
||||
send_to_llm: Whether to send the observation and llm_output back to an Agent
|
||||
after an `OutputParserException` has been raised.
|
||||
|
||||
This gives the underlying model driving the agent the context that the
|
||||
previous output was improperly structured, in the hopes that it will
|
||||
update the output to the correct format.
|
||||
|
||||
Raises:
|
||||
ValueError: If `send_to_llm` is True but either observation or
|
||||
ValueError: If `send_to_llm` is `True` but either observation or
|
||||
`llm_output` are not provided.
|
||||
"""
|
||||
if isinstance(error, str):
|
||||
@@ -66,11 +68,11 @@ class ErrorCode(Enum):
|
||||
"""Error codes."""
|
||||
|
||||
INVALID_PROMPT_INPUT = "INVALID_PROMPT_INPUT"
|
||||
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS"
|
||||
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS" # Used in JS; not Py (yet)
|
||||
MESSAGE_COERCION_FAILURE = "MESSAGE_COERCION_FAILURE"
|
||||
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION"
|
||||
MODEL_NOT_FOUND = "MODEL_NOT_FOUND"
|
||||
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT"
|
||||
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION" # Used in JS; not Py (yet)
|
||||
MODEL_NOT_FOUND = "MODEL_NOT_FOUND" # Used in JS; not Py (yet)
|
||||
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT" # Used in JS; not Py (yet)
|
||||
OUTPUT_PARSING_FAILURE = "OUTPUT_PARSING_FAILURE"
|
||||
|
||||
|
||||
@@ -86,6 +88,6 @@ def create_message(*, message: str, error_code: ErrorCode) -> str:
|
||||
"""
|
||||
return (
|
||||
f"{message}\n"
|
||||
"For troubleshooting, visit: https://python.langchain.com/docs/"
|
||||
f"troubleshooting/errors/{error_code.value} "
|
||||
"For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain"
|
||||
f"/errors/{error_code.value} "
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Code to help indexing data into a vectorstore.
|
||||
|
||||
This package contains helper logic to help deal with indexing data into
|
||||
a vectorstore while avoiding duplicated content and over-writing content
|
||||
a `VectorStore` while avoiding duplicated content and over-writing content
|
||||
if it's unchanged.
|
||||
"""
|
||||
|
||||
|
||||
@@ -6,16 +6,9 @@ import hashlib
|
||||
import json
|
||||
import uuid
|
||||
import warnings
|
||||
from collections.abc import (
|
||||
AsyncIterable,
|
||||
AsyncIterator,
|
||||
Callable,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Sequence,
|
||||
)
|
||||
from itertools import islice
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Literal,
|
||||
TypedDict,
|
||||
@@ -29,6 +22,16 @@ from langchain_core.exceptions import LangChainException
|
||||
from langchain_core.indexing.base import DocumentIndex, RecordManager
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
AsyncIterable,
|
||||
AsyncIterator,
|
||||
Callable,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Sequence,
|
||||
)
|
||||
|
||||
# Magic UUID to use as a namespace for hashing.
|
||||
# Used to try and generate a unique UUID for each document
|
||||
# from hashing the document content and metadata.
|
||||
@@ -298,48 +301,49 @@ def index(
|
||||
For the time being, documents are indexed using their hashes, and users
|
||||
are not able to specify the uid of the document.
|
||||
|
||||
!!! warning "Behavior changed in 0.3.25"
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.25"
|
||||
|
||||
Added `scoped_full` cleanup mode.
|
||||
|
||||
!!! warning
|
||||
|
||||
* In full mode, the loader should be returning
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
* In incremental mode, if documents associated with a particular
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
|
||||
Args:
|
||||
docs_source: Data loader or iterable of documents to index.
|
||||
record_manager: Timestamped set to keep track of which documents were
|
||||
updated.
|
||||
vector_store: VectorStore or DocumentIndex to index the documents into.
|
||||
vector_store: `VectorStore` or DocumentIndex to index the documents into.
|
||||
batch_size: Batch size to use when indexing.
|
||||
cleanup: How to handle clean up of documents.
|
||||
|
||||
- incremental: Cleans up all documents that haven't been updated AND
|
||||
that are associated with source ids that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
that are associated with source IDs that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
- full: Delete all documents that have not been returned by the loader
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
- scoped_full: Similar to Full, but only deletes all documents
|
||||
that haven't been updated AND that are associated with
|
||||
source ids that were seen during indexing.
|
||||
that haven't been updated AND that are associated with
|
||||
source IDs that were seen during indexing.
|
||||
- None: Do not delete any documents.
|
||||
source_id_key: Optional key that helps identify the original source
|
||||
of the document.
|
||||
@@ -349,7 +353,7 @@ def index(
|
||||
key_encoder: Hashing algorithm to use for hashing the document content and
|
||||
metadata. Options include "blake2b", "sha256", and "sha512".
|
||||
|
||||
!!! version-added "Added in version 0.3.66"
|
||||
!!! version-added "Added in `langchain-core` 0.3.66"
|
||||
|
||||
key_encoder: Hashing algorithm to use for hashing the document.
|
||||
If not provided, a default encoder using SHA-1 will be used.
|
||||
@@ -363,10 +367,10 @@ def index(
|
||||
When changing the key encoder, you must change the
|
||||
index as well to avoid duplicated documents in the cache.
|
||||
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
||||
method of the VectorStore or the upsert method of the DocumentIndex.
|
||||
method of the `VectorStore` or the upsert method of the DocumentIndex.
|
||||
For example, you can use this to specify a custom vector_field:
|
||||
upsert_kwargs={"vector_field": "embedding"}
|
||||
!!! version-added "Added in version 0.3.10"
|
||||
!!! version-added "Added in `langchain-core` 0.3.10"
|
||||
|
||||
Returns:
|
||||
Indexing result which contains information about how many documents
|
||||
@@ -375,10 +379,10 @@ def index(
|
||||
Raises:
|
||||
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
|
||||
ValueError: If cleanup mode is incremental and source_id_key is None.
|
||||
ValueError: If vectorstore does not have
|
||||
ValueError: If `VectorStore` does not have
|
||||
"delete" and "add_documents" required methods.
|
||||
ValueError: If source_id_key is not None, but is not a string or callable.
|
||||
TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
|
||||
TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
|
||||
AssertionError: If `source_id` is None when cleanup mode is incremental.
|
||||
(should be unreachable code).
|
||||
"""
|
||||
@@ -415,7 +419,7 @@ def index(
|
||||
raise ValueError(msg)
|
||||
|
||||
if type(destination).delete == VectorStore.delete:
|
||||
# Checking if the vectorstore has overridden the default delete method
|
||||
# Checking if the VectorStore has overridden the default delete method
|
||||
# implementation which just raises a NotImplementedError
|
||||
msg = "Vectorstore has not implemented the delete method"
|
||||
raise ValueError(msg)
|
||||
@@ -466,11 +470,11 @@ def index(
|
||||
]
|
||||
|
||||
if cleanup in {"incremental", "scoped_full"}:
|
||||
# source ids are required.
|
||||
# Source IDs are required.
|
||||
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
|
||||
if source_id is None:
|
||||
msg = (
|
||||
f"Source ids are required when cleanup mode is "
|
||||
f"Source IDs are required when cleanup mode is "
|
||||
f"incremental or scoped_full. "
|
||||
f"Document that starts with "
|
||||
f"content: {hashed_doc.page_content[:100]} "
|
||||
@@ -479,7 +483,7 @@ def index(
|
||||
raise ValueError(msg)
|
||||
if cleanup == "scoped_full":
|
||||
scoped_full_cleanup_source_ids.add(source_id)
|
||||
# source ids cannot be None after for loop above.
|
||||
# Source IDs cannot be None after for loop above.
|
||||
source_ids = cast("Sequence[str]", source_ids)
|
||||
|
||||
exists_batch = record_manager.exists(
|
||||
@@ -538,7 +542,7 @@ def index(
|
||||
# If source IDs are provided, we can do the deletion incrementally!
|
||||
if cleanup == "incremental":
|
||||
# Get the uids of the documents that were not returned by the loader.
|
||||
# mypy isn't good enough to determine that source ids cannot be None
|
||||
# mypy isn't good enough to determine that source IDs cannot be None
|
||||
# here due to a check that's happening above, so we check again.
|
||||
for source_id in source_ids:
|
||||
if source_id is None:
|
||||
@@ -636,48 +640,49 @@ async def aindex(
|
||||
For the time being, documents are indexed using their hashes, and users
|
||||
are not able to specify the uid of the document.
|
||||
|
||||
!!! warning "Behavior changed in 0.3.25"
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.25"
|
||||
|
||||
Added `scoped_full` cleanup mode.
|
||||
|
||||
!!! warning
|
||||
|
||||
* In full mode, the loader should be returning
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
* In incremental mode, if documents associated with a particular
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
|
||||
Args:
|
||||
docs_source: Data loader or iterable of documents to index.
|
||||
record_manager: Timestamped set to keep track of which documents were
|
||||
updated.
|
||||
vector_store: VectorStore or DocumentIndex to index the documents into.
|
||||
vector_store: `VectorStore` or DocumentIndex to index the documents into.
|
||||
batch_size: Batch size to use when indexing.
|
||||
cleanup: How to handle clean up of documents.
|
||||
|
||||
- incremental: Cleans up all documents that haven't been updated AND
|
||||
that are associated with source ids that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
that are associated with source IDs that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
- full: Delete all documents that have not been returned by the loader
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
- scoped_full: Similar to Full, but only deletes all documents
|
||||
that haven't been updated AND that are associated with
|
||||
source ids that were seen during indexing.
|
||||
that haven't been updated AND that are associated with
|
||||
source IDs that were seen during indexing.
|
||||
- None: Do not delete any documents.
|
||||
source_id_key: Optional key that helps identify the original source
|
||||
of the document.
|
||||
@@ -687,7 +692,7 @@ async def aindex(
|
||||
key_encoder: Hashing algorithm to use for hashing the document content and
|
||||
metadata. Options include "blake2b", "sha256", and "sha512".
|
||||
|
||||
!!! version-added "Added in version 0.3.66"
|
||||
!!! version-added "Added in `langchain-core` 0.3.66"
|
||||
|
||||
key_encoder: Hashing algorithm to use for hashing the document.
|
||||
If not provided, a default encoder using SHA-1 will be used.
|
||||
@@ -701,10 +706,10 @@ async def aindex(
|
||||
When changing the key encoder, you must change the
|
||||
index as well to avoid duplicated documents in the cache.
|
||||
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
||||
method of the VectorStore or the upsert method of the DocumentIndex.
|
||||
method of the `VectorStore` or the upsert method of the DocumentIndex.
|
||||
For example, you can use this to specify a custom vector_field:
|
||||
upsert_kwargs={"vector_field": "embedding"}
|
||||
!!! version-added "Added in version 0.3.10"
|
||||
!!! version-added "Added in `langchain-core` 0.3.10"
|
||||
|
||||
Returns:
|
||||
Indexing result which contains information about how many documents
|
||||
@@ -713,10 +718,10 @@ async def aindex(
|
||||
Raises:
|
||||
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
|
||||
ValueError: If cleanup mode is incremental and source_id_key is None.
|
||||
ValueError: If vectorstore does not have
|
||||
ValueError: If `VectorStore` does not have
|
||||
"adelete" and "aadd_documents" required methods.
|
||||
ValueError: If source_id_key is not None, but is not a string or callable.
|
||||
TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
|
||||
TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
|
||||
AssertionError: If `source_id_key` is None when cleanup mode is
|
||||
incremental or `scoped_full` (should be unreachable).
|
||||
"""
|
||||
@@ -757,7 +762,7 @@ async def aindex(
|
||||
type(destination).adelete == VectorStore.adelete
|
||||
and type(destination).delete == VectorStore.delete
|
||||
):
|
||||
# Checking if the vectorstore has overridden the default adelete or delete
|
||||
# Checking if the VectorStore has overridden the default adelete or delete
|
||||
# methods implementation which just raises a NotImplementedError
|
||||
msg = "Vectorstore has not implemented the adelete or delete method"
|
||||
raise ValueError(msg)
|
||||
@@ -815,11 +820,11 @@ async def aindex(
|
||||
]
|
||||
|
||||
if cleanup in {"incremental", "scoped_full"}:
|
||||
# If the cleanup mode is incremental, source ids are required.
|
||||
# If the cleanup mode is incremental, source IDs are required.
|
||||
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
|
||||
if source_id is None:
|
||||
msg = (
|
||||
f"Source ids are required when cleanup mode is "
|
||||
f"Source IDs are required when cleanup mode is "
|
||||
f"incremental or scoped_full. "
|
||||
f"Document that starts with "
|
||||
f"content: {hashed_doc.page_content[:100]} "
|
||||
@@ -828,7 +833,7 @@ async def aindex(
|
||||
raise ValueError(msg)
|
||||
if cleanup == "scoped_full":
|
||||
scoped_full_cleanup_source_ids.add(source_id)
|
||||
# source ids cannot be None after for loop above.
|
||||
# Source IDs cannot be None after for loop above.
|
||||
source_ids = cast("Sequence[str]", source_ids)
|
||||
|
||||
exists_batch = await record_manager.aexists(
|
||||
@@ -888,7 +893,7 @@ async def aindex(
|
||||
if cleanup == "incremental":
|
||||
# Get the uids of the documents that were not returned by the loader.
|
||||
|
||||
# mypy isn't good enough to determine that source ids cannot be None
|
||||
# mypy isn't good enough to determine that source IDs cannot be None
|
||||
# here due to a check that's happening above, so we check again.
|
||||
for source_id in source_ids:
|
||||
if source_id is None:
|
||||
|
||||
@@ -25,7 +25,7 @@ class RecordManager(ABC):
|
||||
The record manager abstraction is used by the langchain indexing API.
|
||||
|
||||
The record manager keeps track of which documents have been
|
||||
written into a vectorstore and when they were written.
|
||||
written into a `VectorStore` and when they were written.
|
||||
|
||||
The indexing API computes hashes for each document and stores the hash
|
||||
together with the write time and the source id in the record manager.
|
||||
@@ -37,7 +37,7 @@ class RecordManager(ABC):
|
||||
already been indexed, and to only index new documents.
|
||||
|
||||
The main benefit of this abstraction is that it works across many vectorstores.
|
||||
To be supported, a vectorstore needs to only support the ability to add and
|
||||
To be supported, a `VectorStore` needs to only support the ability to add and
|
||||
delete documents by ID. Using the record manager, the indexing API will
|
||||
be able to delete outdated documents and avoid redundant indexing of documents
|
||||
that have already been indexed.
|
||||
@@ -45,13 +45,13 @@ class RecordManager(ABC):
|
||||
The main constraints of this abstraction are:
|
||||
|
||||
1. It relies on the time-stamps to determine which documents have been
|
||||
indexed and which have not. This means that the time-stamps must be
|
||||
monotonically increasing. The timestamp should be the timestamp
|
||||
as measured by the server to minimize issues.
|
||||
indexed and which have not. This means that the time-stamps must be
|
||||
monotonically increasing. The timestamp should be the timestamp
|
||||
as measured by the server to minimize issues.
|
||||
2. The record manager is currently implemented separately from the
|
||||
vectorstore, which means that the overall system becomes distributed
|
||||
and may create issues with consistency. For example, writing to
|
||||
record manager succeeds, but corresponding writing to vectorstore fails.
|
||||
vectorstore, which means that the overall system becomes distributed
|
||||
and may create issues with consistency. For example, writing to
|
||||
record manager succeeds, but corresponding writing to `VectorStore` fails.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -460,7 +460,7 @@ class UpsertResponse(TypedDict):
|
||||
class DeleteResponse(TypedDict, total=False):
|
||||
"""A generic response for delete operation.
|
||||
|
||||
The fields in this response are optional and whether the vectorstore
|
||||
The fields in this response are optional and whether the `VectorStore`
|
||||
returns them or not is up to the implementation.
|
||||
"""
|
||||
|
||||
@@ -518,7 +518,7 @@ class DocumentIndex(BaseRetriever):
|
||||
if it is provided. If the ID is not provided, the upsert method is free
|
||||
to generate an ID for the content.
|
||||
|
||||
When an ID is specified and the content already exists in the vectorstore,
|
||||
When an ID is specified and the content already exists in the `VectorStore`,
|
||||
the upsert method should update the content with the new data. If the content
|
||||
does not exist, the upsert method should add the item to the `VectorStore`.
|
||||
|
||||
@@ -528,20 +528,20 @@ class DocumentIndex(BaseRetriever):
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
successfully added or updated in the `VectorStore` and the list of IDs that
|
||||
failed to be added or updated.
|
||||
"""
|
||||
|
||||
async def aupsert(
|
||||
self, items: Sequence[Document], /, **kwargs: Any
|
||||
) -> UpsertResponse:
|
||||
"""Add or update documents in the vectorstore. Async version of upsert.
|
||||
"""Add or update documents in the `VectorStore`. Async version of `upsert`.
|
||||
|
||||
The upsert functionality should utilize the ID field of the item
|
||||
if it is provided. If the ID is not provided, the upsert method is free
|
||||
to generate an ID for the item.
|
||||
|
||||
When an ID is specified and the item already exists in the vectorstore,
|
||||
When an ID is specified and the item already exists in the `VectorStore`,
|
||||
the upsert method should update the item with the new data. If the item
|
||||
does not exist, the upsert method should add the item to the `VectorStore`.
|
||||
|
||||
@@ -551,7 +551,7 @@ class DocumentIndex(BaseRetriever):
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
successfully added or updated in the `VectorStore` and the list of IDs that
|
||||
failed to be added or updated.
|
||||
"""
|
||||
return await run_in_executor(
|
||||
@@ -568,7 +568,7 @@ class DocumentIndex(BaseRetriever):
|
||||
Calling delete without any input parameters should raise a ValueError!
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
ids: List of IDs to delete.
|
||||
**kwargs: Additional keyword arguments. This is up to the implementation.
|
||||
For example, can include an option to delete the entire index,
|
||||
or else issue a non-blocking delete etc.
|
||||
@@ -586,7 +586,7 @@ class DocumentIndex(BaseRetriever):
|
||||
Calling adelete without any input parameters should raise a ValueError!
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
ids: List of IDs to delete.
|
||||
**kwargs: Additional keyword arguments. This is up to the implementation.
|
||||
For example, can include an option to delete the entire index.
|
||||
|
||||
|
||||
@@ -62,10 +62,10 @@ class InMemoryDocumentIndex(DocumentIndex):
|
||||
"""Delete by IDs.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
ids: List of IDs to delete.
|
||||
|
||||
Raises:
|
||||
ValueError: If ids is None.
|
||||
ValueError: If IDs is None.
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were successfully
|
||||
|
||||
@@ -1,23 +1,26 @@
|
||||
"""Language models.
|
||||
"""Core language model abstractions.
|
||||
|
||||
LangChain has two main classes to work with language models: chat models and
|
||||
"old-fashioned" LLMs.
|
||||
"old-fashioned" LLMs (string-in, string-out).
|
||||
|
||||
**Chat models**
|
||||
|
||||
Language models that use a sequence of messages as inputs and return chat messages
|
||||
as outputs (as opposed to using plain text). Chat models support the assignment of
|
||||
distinct roles to conversation messages, helping to distinguish messages from the AI,
|
||||
users, and instructions such as system messages.
|
||||
as outputs (as opposed to using plain text).
|
||||
|
||||
The key abstraction for chat models is `BaseChatModel`. Implementations
|
||||
should inherit from this class.
|
||||
Chat models support the assignment of distinct roles to conversation messages, helping
|
||||
to distinguish messages from the AI, users, and instructions such as system messages.
|
||||
|
||||
The key abstraction for chat models is
|
||||
[`BaseChatModel`][langchain_core.language_models.BaseChatModel]. Implementations should
|
||||
inherit from this class.
|
||||
|
||||
See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
|
||||
|
||||
**LLMs**
|
||||
**LLMs (legacy)**
|
||||
|
||||
Language models that takes a string as input and returns a string.
|
||||
|
||||
These are traditionally older models (newer models generally are chat models).
|
||||
|
||||
Although the underlying models are string in, string out, the LangChain wrappers also
|
||||
@@ -52,6 +55,10 @@ if TYPE_CHECKING:
|
||||
ParrotFakeChatModel,
|
||||
)
|
||||
from langchain_core.language_models.llms import LLM, BaseLLM
|
||||
from langchain_core.language_models.model_profile import (
|
||||
ModelProfile,
|
||||
ModelProfileRegistry,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"LLM",
|
||||
@@ -67,6 +74,8 @@ __all__ = (
|
||||
"LanguageModelInput",
|
||||
"LanguageModelLike",
|
||||
"LanguageModelOutput",
|
||||
"ModelProfile",
|
||||
"ModelProfileRegistry",
|
||||
"ParrotFakeChatModel",
|
||||
"SimpleChatModel",
|
||||
"get_tokenizer",
|
||||
@@ -89,6 +98,8 @@ _dynamic_imports = {
|
||||
"GenericFakeChatModel": "fake_chat_models",
|
||||
"ParrotFakeChatModel": "fake_chat_models",
|
||||
"LLM": "llms",
|
||||
"ModelProfile": "model_profile",
|
||||
"ModelProfileRegistry": "model_profile",
|
||||
"BaseLLM": "llms",
|
||||
"is_openai_data_block": "_utils",
|
||||
}
|
||||
|
||||
@@ -139,7 +139,8 @@ def _normalize_messages(
|
||||
directly; this may change in the future
|
||||
- LangChain v0 standard content blocks for backward compatibility
|
||||
|
||||
!!! warning "Behavior changed in 1.0.0"
|
||||
!!! warning "Behavior changed in `langchain-core` 1.0.0"
|
||||
|
||||
In previous versions, this function returned messages in LangChain v0 format.
|
||||
Now, it returns messages in LangChain v1 format, which upgraded chat models now
|
||||
expect to receive when passing back in message history. For backward
|
||||
|
||||
@@ -131,14 +131,19 @@ class BaseLanguageModel(
|
||||
|
||||
Caching is not currently supported for streaming methods of models.
|
||||
"""
|
||||
|
||||
verbose: bool = Field(default_factory=_get_verbosity, exclude=True, repr=False)
|
||||
"""Whether to print out response text."""
|
||||
|
||||
callbacks: Callbacks = Field(default=None, exclude=True)
|
||||
"""Callbacks to add to the run trace."""
|
||||
|
||||
tags: list[str] | None = Field(default=None, exclude=True)
|
||||
"""Tags to add to the run trace."""
|
||||
|
||||
metadata: dict[str, Any] | None = Field(default=None, exclude=True)
|
||||
"""Metadata to add to the run trace."""
|
||||
|
||||
custom_get_token_ids: Callable[[str], list[int]] | None = Field(
|
||||
default=None, exclude=True
|
||||
)
|
||||
@@ -195,15 +200,22 @@ class BaseLanguageModel(
|
||||
type (e.g., pure text completion models vs chat models).
|
||||
|
||||
Args:
|
||||
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
|
||||
can be converted to match the format of any language model (string for
|
||||
pure text generation models and `BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
prompts: List of `PromptValue` objects.
|
||||
|
||||
A `PromptValue` is an object that can be converted to match the format
|
||||
of any language model (string for pure text generation models and
|
||||
`BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generation` objects for
|
||||
@@ -232,15 +244,22 @@ class BaseLanguageModel(
|
||||
type (e.g., pure text completion models vs chat models).
|
||||
|
||||
Args:
|
||||
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
|
||||
can be converted to match the format of any language model (string for
|
||||
pure text generation models and `BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
prompts: List of `PromptValue` objects.
|
||||
|
||||
A `PromptValue` is an object that can be converted to match the format
|
||||
of any language model (string for pure text generation models and
|
||||
`BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generation` objects for
|
||||
@@ -262,13 +281,13 @@ class BaseLanguageModel(
|
||||
return self.lc_attributes
|
||||
|
||||
def get_token_ids(self, text: str) -> list[int]:
|
||||
"""Return the ordered ids of the tokens in a text.
|
||||
"""Return the ordered IDs of the tokens in a text.
|
||||
|
||||
Args:
|
||||
text: The string input to tokenize.
|
||||
|
||||
Returns:
|
||||
A list of ids corresponding to the tokens in the text, in order they occur
|
||||
A list of IDs corresponding to the tokens in the text, in order they occur
|
||||
in the text.
|
||||
"""
|
||||
if self.custom_get_token_ids is not None:
|
||||
@@ -280,6 +299,9 @@ class BaseLanguageModel(
|
||||
|
||||
Useful for checking if an input fits in a model's context window.
|
||||
|
||||
This should be overridden by model-specific implementations to provide accurate
|
||||
token counts via model-specific tokenizers.
|
||||
|
||||
Args:
|
||||
text: The string input to tokenize.
|
||||
|
||||
@@ -298,9 +320,17 @@ class BaseLanguageModel(
|
||||
|
||||
Useful for checking if an input fits in a model's context window.
|
||||
|
||||
This should be overridden by model-specific implementations to provide accurate
|
||||
token counts via model-specific tokenizers.
|
||||
|
||||
!!! note
|
||||
The base implementation of `get_num_tokens_from_messages` ignores tool
|
||||
schemas.
|
||||
|
||||
* The base implementation of `get_num_tokens_from_messages` ignores tool
|
||||
schemas.
|
||||
* The base implementation of `get_num_tokens_from_messages` adds additional
|
||||
prefixes to messages in represent user roles, which will add to the
|
||||
overall token count. Model-specific implementations may choose to
|
||||
handle this differently.
|
||||
|
||||
Args:
|
||||
messages: The message inputs to tokenize.
|
||||
|
||||
@@ -33,6 +33,7 @@ from langchain_core.language_models.base import (
|
||||
LangSmithParams,
|
||||
LanguageModelInput,
|
||||
)
|
||||
from langchain_core.language_models.model_profile import ModelProfile
|
||||
from langchain_core.load import dumpd, dumps
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
@@ -88,7 +89,10 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
|
||||
try:
|
||||
metadata["body"] = response.json()
|
||||
except Exception:
|
||||
metadata["body"] = getattr(response, "text", None)
|
||||
try:
|
||||
metadata["body"] = getattr(response, "text", None)
|
||||
except Exception:
|
||||
metadata["body"] = None
|
||||
if hasattr(response, "headers"):
|
||||
try:
|
||||
metadata["headers"] = dict(response.headers)
|
||||
@@ -329,10 +333,25 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
[`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this
|
||||
field to roll out new content formats in a backward-compatible way.
|
||||
|
||||
!!! version-added "Added in version 1.0"
|
||||
!!! version-added "Added in `langchain-core` 1.0.0"
|
||||
|
||||
"""
|
||||
|
||||
profile: ModelProfile | None = Field(default=None, exclude=True)
|
||||
"""Profile detailing model capabilities.
|
||||
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. The format of model profiles is subject to change.
|
||||
|
||||
If not specified, automatically loaded from the provider package on initialization
|
||||
if data is available.
|
||||
|
||||
Example profile data includes context window sizes, supported modalities, or support
|
||||
for tool calling, structured output, and other features.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 1.1.0"
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
)
|
||||
@@ -842,16 +861,21 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
|
||||
Args:
|
||||
messages: List of list of messages.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
tags: The tags to apply.
|
||||
metadata: The metadata to apply.
|
||||
run_name: The name of the run.
|
||||
run_id: The ID of the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generations` for each
|
||||
@@ -960,16 +984,21 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
|
||||
Args:
|
||||
messages: List of list of messages.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
tags: The tags to apply.
|
||||
metadata: The metadata to apply.
|
||||
run_name: The name of the run.
|
||||
run_id: The ID of the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generations` for each
|
||||
@@ -1502,10 +1531,10 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
Args:
|
||||
schema: The output schema. Can be passed in as:
|
||||
|
||||
- an OpenAI function/tool schema,
|
||||
- a JSON Schema,
|
||||
- a `TypedDict` class,
|
||||
- or a Pydantic class.
|
||||
- An OpenAI function/tool schema,
|
||||
- A JSON Schema,
|
||||
- A `TypedDict` class,
|
||||
- Or a Pydantic class.
|
||||
|
||||
If `schema` is a Pydantic class then the model output will be a
|
||||
Pydantic instance of that class, and the model-generated fields will be
|
||||
@@ -1517,11 +1546,15 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
when specifying a Pydantic or `TypedDict` class.
|
||||
|
||||
include_raw:
|
||||
If `False` then only the parsed structured output is returned. If
|
||||
an error occurs during model output parsing it will be raised. If `True`
|
||||
then both the raw model response (a `BaseMessage`) and the parsed model
|
||||
response will be returned. If an error occurs during output parsing it
|
||||
will be caught and returned as well.
|
||||
If `False` then only the parsed structured output is returned.
|
||||
|
||||
If an error occurs during model output parsing it will be raised.
|
||||
|
||||
If `True` then both the raw model response (a `BaseMessage`) and the
|
||||
parsed model response will be returned.
|
||||
|
||||
If an error occurs during output parsing it will be caught and returned
|
||||
as well.
|
||||
|
||||
The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
|
||||
`'parsing_error'`.
|
||||
@@ -1545,89 +1578,90 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
depends on the `schema` as described above.
|
||||
- `'parsing_error'`: `BaseException | None`
|
||||
|
||||
Example: Pydantic schema (`include_raw=False`):
|
||||
???+ example "Pydantic schema (`include_raw=False`)"
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
|
||||
answer: str
|
||||
justification: str
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(AnswerWithJustification)
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(AnswerWithJustification)
|
||||
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
|
||||
# -> AnswerWithJustification(
|
||||
# answer='They weigh the same',
|
||||
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
||||
# )
|
||||
```
|
||||
# -> AnswerWithJustification(
|
||||
# answer='They weigh the same',
|
||||
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
||||
# )
|
||||
```
|
||||
|
||||
Example: Pydantic schema (`include_raw=True`):
|
||||
??? example "Pydantic schema (`include_raw=True`)"
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
|
||||
answer: str
|
||||
justification: str
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(
|
||||
AnswerWithJustification, include_raw=True
|
||||
)
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(
|
||||
AnswerWithJustification, include_raw=True
|
||||
)
|
||||
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
# -> {
|
||||
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
|
||||
# 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
|
||||
# 'parsing_error': None
|
||||
# }
|
||||
```
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
# -> {
|
||||
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
|
||||
# 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
|
||||
# 'parsing_error': None
|
||||
# }
|
||||
```
|
||||
|
||||
Example: `dict` schema (`include_raw=False`):
|
||||
??? example "Dictionary schema (`include_raw=False`)"
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
|
||||
answer: str
|
||||
justification: str
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
|
||||
dict_schema = convert_to_openai_tool(AnswerWithJustification)
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(dict_schema)
|
||||
dict_schema = convert_to_openai_tool(AnswerWithJustification)
|
||||
model = ChatModel(model="model-name", temperature=0)
|
||||
structured_model = model.with_structured_output(dict_schema)
|
||||
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
# -> {
|
||||
# 'answer': 'They weigh the same',
|
||||
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
||||
# }
|
||||
```
|
||||
structured_model.invoke(
|
||||
"What weighs more a pound of bricks or a pound of feathers"
|
||||
)
|
||||
# -> {
|
||||
# 'answer': 'They weigh the same',
|
||||
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
||||
# }
|
||||
```
|
||||
|
||||
!!! warning "Behavior changed in 0.2.26"
|
||||
Added support for TypedDict class.
|
||||
!!! warning "Behavior changed in `langchain-core` 0.2.26"
|
||||
|
||||
Added support for `TypedDict` class.
|
||||
|
||||
""" # noqa: E501
|
||||
_ = kwargs.pop("method", None)
|
||||
@@ -1726,9 +1760,12 @@ def _gen_info_and_msg_metadata(
|
||||
}
|
||||
|
||||
|
||||
_MAX_CLEANUP_DEPTH = 100
|
||||
|
||||
|
||||
def _cleanup_llm_representation(serialized: Any, depth: int) -> None:
|
||||
"""Remove non-serializable objects from a serialized object."""
|
||||
if depth > 100: # Don't cooperate for pathological cases
|
||||
if depth > _MAX_CLEANUP_DEPTH: # Don't cooperate for pathological cases
|
||||
return
|
||||
|
||||
if not isinstance(serialized, dict):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Fake chat model for testing purposes."""
|
||||
"""Fake chat models for testing purposes."""
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
"""Base interface for large language models to expose."""
|
||||
"""Base interface for traditional large language models (LLMs) to expose.
|
||||
|
||||
These are traditionally older models (newer models generally are chat models).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -58,6 +61,8 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_background_tasks: set[asyncio.Task] = set()
|
||||
|
||||
|
||||
@functools.lru_cache
|
||||
def _log_error_once(msg: str) -> None:
|
||||
@@ -97,9 +102,9 @@ def create_base_retry_decorator(
|
||||
asyncio.run(coro)
|
||||
else:
|
||||
if loop.is_running():
|
||||
# TODO: Fix RUF006 - this task should have a reference
|
||||
# and be awaited somewhere
|
||||
loop.create_task(coro) # noqa: RUF006
|
||||
task = loop.create_task(coro)
|
||||
_background_tasks.add(task)
|
||||
task.add_done_callback(_background_tasks.discard)
|
||||
else:
|
||||
asyncio.run(coro)
|
||||
except Exception as e:
|
||||
@@ -648,9 +653,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: The prompts to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
run_manager: Callback manager for the run.
|
||||
|
||||
Returns:
|
||||
@@ -668,9 +676,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: The prompts to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
run_manager: Callback manager for the run.
|
||||
|
||||
Returns:
|
||||
@@ -702,11 +713,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Yields:
|
||||
Generation chunks.
|
||||
@@ -728,11 +742,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Yields:
|
||||
Generation chunks.
|
||||
@@ -843,10 +860,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: List of string prompts.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
tags: List of tags to associate with each prompt. If provided, the length
|
||||
of the list must match the length of the prompts list.
|
||||
metadata: List of metadata dictionaries to associate with each prompt. If
|
||||
@@ -856,8 +877,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
length of the list must match the length of the prompts list.
|
||||
run_id: List of run IDs to associate with each prompt. If provided, the
|
||||
length of the list must match the length of the prompts list.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Raises:
|
||||
ValueError: If prompts is not a list.
|
||||
@@ -1113,10 +1135,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: List of string prompts.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
tags: List of tags to associate with each prompt. If provided, the length
|
||||
of the list must match the length of the prompts list.
|
||||
metadata: List of metadata dictionaries to associate with each prompt. If
|
||||
@@ -1126,8 +1152,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
length of the list must match the length of the prompts list.
|
||||
run_id: List of run IDs to associate with each prompt. If provided, the
|
||||
length of the list must match the length of the prompts list.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Raises:
|
||||
ValueError: If the length of `callbacks`, `tags`, `metadata`, or
|
||||
@@ -1391,11 +1418,6 @@ class LLM(BaseLLM):
|
||||
`astream` will use `_astream` if provided, otherwise it will implement
|
||||
a fallback behavior that will use `_stream` if `_stream` is implemented,
|
||||
and use `_acall` if `_stream` is not implemented.
|
||||
|
||||
Please see the following guide for more information on how to
|
||||
implement a custom LLM:
|
||||
|
||||
https://python.langchain.com/docs/how_to/custom_llm/
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
@@ -1412,12 +1434,16 @@ class LLM(BaseLLM):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
The model output as a string. SHOULD NOT include the prompt.
|
||||
@@ -1438,12 +1464,16 @@ class LLM(BaseLLM):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
The model output as a string. SHOULD NOT include the prompt.
|
||||
|
||||
84
libs/core/langchain_core/language_models/model_profile.py
Normal file
84
libs/core/langchain_core/language_models/model_profile.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""Model profile types and utilities."""
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
||||
class ModelProfile(TypedDict, total=False):
|
||||
"""Model profile.
|
||||
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. The format of model profiles is subject to change.
|
||||
|
||||
Provides information about chat model capabilities, such as context window sizes
|
||||
and supported features.
|
||||
"""
|
||||
|
||||
# --- Input constraints ---
|
||||
|
||||
max_input_tokens: int
|
||||
"""Maximum context window (tokens)"""
|
||||
|
||||
image_inputs: bool
|
||||
"""Whether image inputs are supported."""
|
||||
# TODO: add more detail about formats?
|
||||
|
||||
image_url_inputs: bool
|
||||
"""Whether [image URL inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
pdf_inputs: bool
|
||||
"""Whether [PDF inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
audio_inputs: bool
|
||||
"""Whether [audio inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
video_inputs: bool
|
||||
"""Whether [video inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
image_tool_message: bool
|
||||
"""Whether images can be included in tool messages."""
|
||||
|
||||
pdf_tool_message: bool
|
||||
"""Whether PDFs can be included in tool messages."""
|
||||
|
||||
# --- Output constraints ---
|
||||
|
||||
max_output_tokens: int
|
||||
"""Maximum output tokens"""
|
||||
|
||||
reasoning_output: bool
|
||||
"""Whether the model supports [reasoning / chain-of-thought](https://docs.langchain.com/oss/python/langchain/models#reasoning)"""
|
||||
|
||||
image_outputs: bool
|
||||
"""Whether [image outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
audio_outputs: bool
|
||||
"""Whether [audio outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
video_outputs: bool
|
||||
"""Whether [video outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
# --- Tool calling ---
|
||||
tool_calling: bool
|
||||
"""Whether the model supports [tool calling](https://docs.langchain.com/oss/python/langchain/models#tool-calling)"""
|
||||
|
||||
tool_choice: bool
|
||||
"""Whether the model supports [tool choice](https://docs.langchain.com/oss/python/langchain/models#forcing-tool-calls)"""
|
||||
|
||||
# --- Structured output ---
|
||||
structured_output: bool
|
||||
"""Whether the model supports a native [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||
feature"""
|
||||
|
||||
|
||||
ModelProfileRegistry = dict[str, ModelProfile]
|
||||
"""Registry mapping model identifiers or names to their ModelProfile."""
|
||||
@@ -17,7 +17,7 @@ def default(obj: Any) -> Any:
|
||||
obj: The object to serialize to json if it is a Serializable object.
|
||||
|
||||
Returns:
|
||||
A json serializable object or a SerializedNotImplemented object.
|
||||
A JSON serializable object or a SerializedNotImplemented object.
|
||||
"""
|
||||
if isinstance(obj, Serializable):
|
||||
return obj.to_json()
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
"""Load LangChain objects from JSON strings or objects."""
|
||||
"""Load LangChain objects from JSON strings or objects.
|
||||
|
||||
!!! warning
|
||||
`load` and `loads` are vulnerable to remote code execution. Never use with untrusted
|
||||
input.
|
||||
"""
|
||||
|
||||
import importlib
|
||||
import json
|
||||
@@ -61,13 +66,15 @@ class Reviver:
|
||||
"""Initialize the reviver.
|
||||
|
||||
Args:
|
||||
secrets_map: A map of secrets to load. If a secret is not found in
|
||||
the map, it will be loaded from the environment if `secrets_from_env`
|
||||
is True.
|
||||
secrets_map: A map of secrets to load.
|
||||
|
||||
If a secret is not found in the map, it will be loaded from the
|
||||
environment if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
"""
|
||||
@@ -191,17 +198,23 @@ def loads(
|
||||
) -> Any:
|
||||
"""Revive a LangChain class from a JSON string.
|
||||
|
||||
!!! warning
|
||||
This function is vulnerable to remote code execution. Never use with untrusted
|
||||
input.
|
||||
|
||||
Equivalent to `load(json.loads(text))`.
|
||||
|
||||
Args:
|
||||
text: The string to load.
|
||||
secrets_map: A map of secrets to load. If a secret is not found in
|
||||
the map, it will be loaded from the environment if `secrets_from_env`
|
||||
is True.
|
||||
secrets_map: A map of secrets to load.
|
||||
|
||||
If a secret is not found in the map, it will be loaded from the environment
|
||||
if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
|
||||
@@ -232,18 +245,24 @@ def load(
|
||||
) -> Any:
|
||||
"""Revive a LangChain class from a JSON object.
|
||||
|
||||
!!! warning
|
||||
This function is vulnerable to remote code execution. Never use with untrusted
|
||||
input.
|
||||
|
||||
Use this if you already have a parsed JSON object,
|
||||
eg. from `json.load` or `orjson.loads`.
|
||||
|
||||
Args:
|
||||
obj: The object to load.
|
||||
secrets_map: A map of secrets to load. If a secret is not found in
|
||||
the map, it will be loaded from the environment if `secrets_from_env`
|
||||
is True.
|
||||
secrets_map: A map of secrets to load.
|
||||
|
||||
If a secret is not found in the map, it will be loaded from the environment
|
||||
if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
|
||||
|
||||
@@ -97,11 +97,14 @@ class Serializable(BaseModel, ABC):
|
||||
by default. This is to prevent accidental serialization of objects that should
|
||||
not be serialized.
|
||||
- `get_lc_namespace`: Get the namespace of the LangChain object.
|
||||
|
||||
During deserialization, this namespace is used to identify
|
||||
the correct class to instantiate.
|
||||
|
||||
Please see the `Reviver` class in `langchain_core.load.load` for more details.
|
||||
During deserialization an additional mapping is handle classes that have moved
|
||||
or been renamed across package versions.
|
||||
|
||||
- `lc_secrets`: A map of constructor argument names to secret ids.
|
||||
- `lc_attributes`: List of additional attribute names that should be included
|
||||
as part of the serialized representation.
|
||||
@@ -194,7 +197,7 @@ class Serializable(BaseModel, ABC):
|
||||
ValueError: If the class has deprecated attributes.
|
||||
|
||||
Returns:
|
||||
A json serializable object or a `SerializedNotImplemented` object.
|
||||
A JSON serializable object or a `SerializedNotImplemented` object.
|
||||
"""
|
||||
if not self.is_lc_serializable():
|
||||
return self.to_json_not_implemented()
|
||||
|
||||
@@ -9,6 +9,9 @@ if TYPE_CHECKING:
|
||||
from langchain_core.messages.ai import (
|
||||
AIMessage,
|
||||
AIMessageChunk,
|
||||
InputTokenDetails,
|
||||
OutputTokenDetails,
|
||||
UsageMetadata,
|
||||
)
|
||||
from langchain_core.messages.base import (
|
||||
BaseMessage,
|
||||
@@ -87,10 +90,12 @@ __all__ = (
|
||||
"HumanMessage",
|
||||
"HumanMessageChunk",
|
||||
"ImageContentBlock",
|
||||
"InputTokenDetails",
|
||||
"InvalidToolCall",
|
||||
"MessageLikeRepresentation",
|
||||
"NonStandardAnnotation",
|
||||
"NonStandardContentBlock",
|
||||
"OutputTokenDetails",
|
||||
"PlainTextContentBlock",
|
||||
"ReasoningContentBlock",
|
||||
"RemoveMessage",
|
||||
@@ -104,6 +109,7 @@ __all__ = (
|
||||
"ToolCallChunk",
|
||||
"ToolMessage",
|
||||
"ToolMessageChunk",
|
||||
"UsageMetadata",
|
||||
"VideoContentBlock",
|
||||
"_message_from_dict",
|
||||
"convert_to_messages",
|
||||
@@ -145,6 +151,7 @@ _dynamic_imports = {
|
||||
"HumanMessageChunk": "human",
|
||||
"NonStandardAnnotation": "content",
|
||||
"NonStandardContentBlock": "content",
|
||||
"OutputTokenDetails": "ai",
|
||||
"PlainTextContentBlock": "content",
|
||||
"ReasoningContentBlock": "content",
|
||||
"RemoveMessage": "modifier",
|
||||
@@ -154,12 +161,14 @@ _dynamic_imports = {
|
||||
"SystemMessage": "system",
|
||||
"SystemMessageChunk": "system",
|
||||
"ImageContentBlock": "content",
|
||||
"InputTokenDetails": "ai",
|
||||
"InvalidToolCall": "tool",
|
||||
"TextContentBlock": "content",
|
||||
"ToolCall": "tool",
|
||||
"ToolCallChunk": "tool",
|
||||
"ToolMessage": "tool",
|
||||
"ToolMessageChunk": "tool",
|
||||
"UsageMetadata": "ai",
|
||||
"VideoContentBlock": "content",
|
||||
"AnyMessage": "utils",
|
||||
"MessageLikeRepresentation": "utils",
|
||||
|
||||
@@ -48,25 +48,25 @@ class InputTokenDetails(TypedDict, total=False):
|
||||
}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.9"
|
||||
|
||||
May also hold extra provider-specific keys.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.9"
|
||||
"""
|
||||
|
||||
audio: int
|
||||
"""Audio input tokens."""
|
||||
|
||||
cache_creation: int
|
||||
"""Input tokens that were cached and there was a cache miss.
|
||||
|
||||
Since there was a cache miss, the cache was created from these tokens.
|
||||
"""
|
||||
|
||||
cache_read: int
|
||||
"""Input tokens that were cached and there was a cache hit.
|
||||
|
||||
Since there was a cache hit, the tokens were read from the cache. More precisely,
|
||||
the model state given these tokens was read from the cache.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -83,18 +83,20 @@ class OutputTokenDetails(TypedDict, total=False):
|
||||
}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.9"
|
||||
May also hold extra provider-specific keys.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.9"
|
||||
|
||||
"""
|
||||
|
||||
audio: int
|
||||
"""Audio output tokens."""
|
||||
|
||||
reasoning: int
|
||||
"""Reasoning output tokens.
|
||||
|
||||
Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
|
||||
models) that are not returned as part of model output.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -121,27 +123,36 @@ class UsageMetadata(TypedDict):
|
||||
}
|
||||
```
|
||||
|
||||
!!! warning "Behavior changed in 0.3.9"
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.9"
|
||||
|
||||
Added `input_token_details` and `output_token_details`.
|
||||
|
||||
!!! note "LangSmith SDK"
|
||||
|
||||
The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,
|
||||
LangSmith's `UsageMetadata` has additional fields to capture cost information
|
||||
used by the LangSmith platform.
|
||||
"""
|
||||
|
||||
input_tokens: int
|
||||
"""Count of input (or prompt) tokens. Sum of all input token types."""
|
||||
|
||||
output_tokens: int
|
||||
"""Count of output (or completion) tokens. Sum of all output token types."""
|
||||
|
||||
total_tokens: int
|
||||
"""Total token count. Sum of input_tokens + output_tokens."""
|
||||
"""Total token count. Sum of `input_tokens` + `output_tokens`."""
|
||||
|
||||
input_token_details: NotRequired[InputTokenDetails]
|
||||
"""Breakdown of input token counts.
|
||||
|
||||
Does *not* need to sum to full input token count. Does *not* need to have all keys.
|
||||
"""
|
||||
|
||||
output_token_details: NotRequired[OutputTokenDetails]
|
||||
"""Breakdown of output token counts.
|
||||
|
||||
Does *not* need to sum to full output token count. Does *not* need to have all keys.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -153,13 +164,14 @@ class AIMessage(BaseMessage):
|
||||
This message represents the output of the model and consists of both
|
||||
the raw output as returned by the model and standardized fields
|
||||
(e.g., tool calls, usage metadata) added by the LangChain framework.
|
||||
|
||||
"""
|
||||
|
||||
tool_calls: list[ToolCall] = []
|
||||
"""If present, tool calls associated with the message."""
|
||||
|
||||
invalid_tool_calls: list[InvalidToolCall] = []
|
||||
"""If present, tool calls with parsing errors associated with the message."""
|
||||
|
||||
usage_metadata: UsageMetadata | None = None
|
||||
"""If present, usage metadata for a message, such as token counts.
|
||||
|
||||
@@ -551,7 +563,7 @@ class AIMessageChunk(AIMessage, BaseMessageChunk):
|
||||
|
||||
@model_validator(mode="after")
|
||||
def init_server_tool_calls(self) -> Self:
|
||||
"""Parse `server_tool_call_chunks`."""
|
||||
"""Parse `server_tool_call_chunks` from [`ServerToolCallChunk`][langchain.messages.ServerToolCallChunk] objects.""" # noqa: E501
|
||||
if (
|
||||
self.chunk_position == "last"
|
||||
and self.response_metadata.get("output_version") == "v1"
|
||||
@@ -651,13 +663,13 @@ def add_ai_message_chunks(
|
||||
chunk_id = id_
|
||||
break
|
||||
else:
|
||||
# second pass: prefer lc_run-* ids over lc_* ids
|
||||
# second pass: prefer lc_run-* IDs over lc_* IDs
|
||||
for id_ in candidates:
|
||||
if id_ and id_.startswith(LC_ID_PREFIX):
|
||||
chunk_id = id_
|
||||
break
|
||||
else:
|
||||
# third pass: take any remaining id (auto-generated lc_* ids)
|
||||
# third pass: take any remaining ID (auto-generated lc_* IDs)
|
||||
for id_ in candidates:
|
||||
if id_:
|
||||
chunk_id = id_
|
||||
|
||||
@@ -5,11 +5,9 @@ from __future__ import annotations
|
||||
from typing import TYPE_CHECKING, Any, cast, overload
|
||||
|
||||
from pydantic import ConfigDict, Field
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core._api.deprecation import warn_deprecated
|
||||
from langchain_core.load.serializable import Serializable
|
||||
from langchain_core.messages import content as types
|
||||
from langchain_core.utils import get_bolded_text
|
||||
from langchain_core.utils._merge import merge_dicts, merge_lists
|
||||
from langchain_core.utils.interactive_env import is_interactive_env
|
||||
@@ -17,6 +15,9 @@ from langchain_core.utils.interactive_env import is_interactive_env
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core.messages import content as types
|
||||
from langchain_core.prompts.chat import ChatPromptTemplate
|
||||
|
||||
|
||||
@@ -93,6 +94,10 @@ class BaseMessage(Serializable):
|
||||
"""Base abstract message class.
|
||||
|
||||
Messages are the inputs and outputs of a chat model.
|
||||
|
||||
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
|
||||
[`AIMessage`][langchain.messages.AIMessage], and
|
||||
[`SystemMessage`][langchain.messages.SystemMessage].
|
||||
"""
|
||||
|
||||
content: str | list[str | dict]
|
||||
@@ -195,7 +200,7 @@ class BaseMessage(Serializable):
|
||||
def content_blocks(self) -> list[types.ContentBlock]:
|
||||
r"""Load content blocks from the message content.
|
||||
|
||||
!!! version-added "Added in version 1.0.0"
|
||||
!!! version-added "Added in `langchain-core` 1.0.0"
|
||||
|
||||
"""
|
||||
# Needed here to avoid circular import, as these classes import BaseMessages
|
||||
@@ -386,12 +391,12 @@ class BaseMessageChunk(BaseMessage):
|
||||
Raises:
|
||||
TypeError: If the other object is not a message chunk.
|
||||
|
||||
For example,
|
||||
|
||||
`AIMessageChunk(content="Hello") + AIMessageChunk(content=" World")`
|
||||
|
||||
will give `AIMessageChunk(content="Hello World")`
|
||||
|
||||
Example:
|
||||
```txt
|
||||
AIMessageChunk(content="Hello", ...)
|
||||
+ AIMessageChunk(content=" World", ...)
|
||||
= AIMessageChunk(content="Hello World", ...)
|
||||
```
|
||||
"""
|
||||
if isinstance(other, BaseMessageChunk):
|
||||
# If both are (subclasses of) BaseMessageChunk,
|
||||
|
||||
@@ -12,10 +12,11 @@ the implementation in `BaseMessage`.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk
|
||||
from langchain_core.messages import content as types
|
||||
|
||||
|
||||
@@ -245,11 +245,20 @@ def _convert_to_v1_from_anthropic(message: AIMessage) -> list[types.ContentBlock
|
||||
and message.chunk_position != "last"
|
||||
):
|
||||
# Isolated chunk
|
||||
tool_call_chunk: types.ToolCallChunk = (
|
||||
message.tool_call_chunks[0].copy() # type: ignore[assignment]
|
||||
chunk = message.tool_call_chunks[0]
|
||||
|
||||
tool_call_chunk = types.ToolCallChunk(
|
||||
name=chunk.get("name"),
|
||||
id=chunk.get("id"),
|
||||
args=chunk.get("args"),
|
||||
type="tool_call_chunk",
|
||||
)
|
||||
if "type" not in tool_call_chunk:
|
||||
tool_call_chunk["type"] = "tool_call_chunk"
|
||||
if "caller" in block:
|
||||
tool_call_chunk["extras"] = {"caller": block["caller"]}
|
||||
|
||||
index = chunk.get("index")
|
||||
if index is not None:
|
||||
tool_call_chunk["index"] = index
|
||||
yield tool_call_chunk
|
||||
else:
|
||||
tool_call_block: types.ToolCall | None = None
|
||||
@@ -282,17 +291,27 @@ def _convert_to_v1_from_anthropic(message: AIMessage) -> list[types.ContentBlock
|
||||
}
|
||||
if "index" in block:
|
||||
tool_call_block["index"] = block["index"]
|
||||
if "caller" in block:
|
||||
if "extras" not in tool_call_block:
|
||||
tool_call_block["extras"] = {}
|
||||
tool_call_block["extras"]["caller"] = block["caller"]
|
||||
|
||||
yield tool_call_block
|
||||
|
||||
elif block_type == "input_json_delta" and isinstance(
|
||||
message, AIMessageChunk
|
||||
):
|
||||
if len(message.tool_call_chunks) == 1:
|
||||
tool_call_chunk = (
|
||||
message.tool_call_chunks[0].copy() # type: ignore[assignment]
|
||||
chunk = message.tool_call_chunks[0]
|
||||
tool_call_chunk = types.ToolCallChunk(
|
||||
name=chunk.get("name"),
|
||||
id=chunk.get("id"),
|
||||
args=chunk.get("args"),
|
||||
type="tool_call_chunk",
|
||||
)
|
||||
if "type" not in tool_call_chunk:
|
||||
tool_call_chunk["type"] = "tool_call_chunk"
|
||||
index = chunk.get("index")
|
||||
if index is not None:
|
||||
tool_call_chunk["index"] = index
|
||||
yield tool_call_chunk
|
||||
|
||||
else:
|
||||
|
||||
@@ -209,11 +209,16 @@ def _convert_to_v1_from_converse(message: AIMessage) -> list[types.ContentBlock]
|
||||
and message.chunk_position != "last"
|
||||
):
|
||||
# Isolated chunk
|
||||
tool_call_chunk: types.ToolCallChunk = (
|
||||
message.tool_call_chunks[0].copy() # type: ignore[assignment]
|
||||
chunk = message.tool_call_chunks[0]
|
||||
tool_call_chunk = types.ToolCallChunk(
|
||||
name=chunk.get("name"),
|
||||
id=chunk.get("id"),
|
||||
args=chunk.get("args"),
|
||||
type="tool_call_chunk",
|
||||
)
|
||||
if "type" not in tool_call_chunk:
|
||||
tool_call_chunk["type"] = "tool_call_chunk"
|
||||
index = chunk.get("index")
|
||||
if index is not None:
|
||||
tool_call_chunk["index"] = index
|
||||
yield tool_call_chunk
|
||||
else:
|
||||
tool_call_block: types.ToolCall | None = None
|
||||
@@ -253,11 +258,16 @@ def _convert_to_v1_from_converse(message: AIMessage) -> list[types.ContentBlock]
|
||||
and isinstance(message, AIMessageChunk)
|
||||
and len(message.tool_call_chunks) == 1
|
||||
):
|
||||
tool_call_chunk = (
|
||||
message.tool_call_chunks[0].copy() # type: ignore[assignment]
|
||||
chunk = message.tool_call_chunks[0]
|
||||
tool_call_chunk = types.ToolCallChunk(
|
||||
name=chunk.get("name"),
|
||||
id=chunk.get("id"),
|
||||
args=chunk.get("args"),
|
||||
type="tool_call_chunk",
|
||||
)
|
||||
if "type" not in tool_call_chunk:
|
||||
tool_call_chunk["type"] = "tool_call_chunk"
|
||||
index = chunk.get("index")
|
||||
if index is not None:
|
||||
tool_call_chunk["index"] = index
|
||||
yield tool_call_chunk
|
||||
|
||||
else:
|
||||
|
||||
@@ -76,21 +76,36 @@ def translate_grounding_metadata_to_citations(
|
||||
for chunk_index in chunk_indices:
|
||||
if chunk_index < len(grounding_chunks):
|
||||
chunk = grounding_chunks[chunk_index]
|
||||
web_info = chunk.get("web", {})
|
||||
|
||||
# Handle web and maps grounding
|
||||
web_info = chunk.get("web") or {}
|
||||
maps_info = chunk.get("maps") or {}
|
||||
|
||||
# Extract citation info depending on source
|
||||
url = maps_info.get("uri") or web_info.get("uri")
|
||||
title = maps_info.get("title") or web_info.get("title")
|
||||
|
||||
# Note: confidence_scores is a legacy field from Gemini 2.0 and earlier
|
||||
# that indicated confidence (0.0-1.0) for each grounding chunk.
|
||||
#
|
||||
# In Gemini 2.5+, this field is always None/empty and should be ignored.
|
||||
extras_metadata = {
|
||||
"web_search_queries": web_search_queries,
|
||||
"grounding_chunk_index": chunk_index,
|
||||
"confidence_scores": support.get("confidence_scores") or [],
|
||||
}
|
||||
|
||||
# Add maps-specific metadata if present
|
||||
if maps_info.get("placeId"):
|
||||
extras_metadata["place_id"] = maps_info["placeId"]
|
||||
|
||||
citation = create_citation(
|
||||
url=web_info.get("uri"),
|
||||
title=web_info.get("title"),
|
||||
url=url,
|
||||
title=title,
|
||||
start_index=start_index,
|
||||
end_index=end_index,
|
||||
cited_text=cited_text,
|
||||
extras={
|
||||
"google_ai_metadata": {
|
||||
"web_search_queries": web_search_queries,
|
||||
"grounding_chunk_index": chunk_index,
|
||||
"confidence_scores": support.get("confidence_scores", []),
|
||||
}
|
||||
},
|
||||
google_ai_metadata=extras_metadata,
|
||||
)
|
||||
citations.append(citation)
|
||||
|
||||
@@ -368,7 +383,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
else:
|
||||
# Assume it's raw base64 without data URI
|
||||
try:
|
||||
# Validate base64 and decode for mime type detection
|
||||
# Validate base64 and decode for MIME type detection
|
||||
decoded_bytes = base64.b64decode(url, validate=True)
|
||||
|
||||
image_url_b64_block = {
|
||||
@@ -379,7 +394,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
try:
|
||||
import filetype # type: ignore[import-not-found] # noqa: PLC0415
|
||||
|
||||
# Guess mime type based on file bytes
|
||||
# Guess MIME type based on file bytes
|
||||
mime_type = None
|
||||
kind = filetype.guess(decoded_bytes)
|
||||
if kind:
|
||||
@@ -458,6 +473,8 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
if outcome is not None:
|
||||
server_tool_result_block["extras"]["outcome"] = outcome
|
||||
converted_blocks.append(server_tool_result_block)
|
||||
elif item_type == "text":
|
||||
converted_blocks.append(cast("types.TextContentBlock", item))
|
||||
else:
|
||||
# Unknown type, preserve as non-standard
|
||||
converted_blocks.append({"type": "non_standard", "value": item})
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from collections.abc import Iterable
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
|
||||
from langchain_core.language_models._utils import (
|
||||
@@ -14,6 +13,8 @@ from langchain_core.language_models._utils import (
|
||||
from langchain_core.messages import content as types
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk
|
||||
|
||||
|
||||
|
||||
@@ -644,7 +644,7 @@ class AudioContentBlock(TypedDict):
|
||||
|
||||
|
||||
class PlainTextContentBlock(TypedDict):
|
||||
"""Plaintext data (e.g., from a document).
|
||||
"""Plaintext data (e.g., from a `.txt` or `.md` document).
|
||||
|
||||
!!! note
|
||||
A `PlainTextContentBlock` existed in `langchain-core<1.0.0`. Although the
|
||||
@@ -654,7 +654,7 @@ class PlainTextContentBlock(TypedDict):
|
||||
|
||||
!!! note
|
||||
Title and context are optional fields that may be passed to the model. See
|
||||
Anthropic [example](https://docs.claude.com/en/docs/build-with-claude/citations#citable-vs-non-citable-content).
|
||||
Anthropic [example](https://platform.claude.com/docs/en/build-with-claude/citations#citable-vs-non-citable-content).
|
||||
|
||||
!!! note "Factory function"
|
||||
`create_plaintext_block` may also be used as a factory to create a
|
||||
@@ -767,7 +767,7 @@ class FileContentBlock(TypedDict):
|
||||
|
||||
|
||||
class NonStandardContentBlock(TypedDict):
|
||||
"""Provider-specific data.
|
||||
"""Provider-specific content data.
|
||||
|
||||
This block contains data for which there is not yet a standard type.
|
||||
|
||||
@@ -802,7 +802,7 @@ class NonStandardContentBlock(TypedDict):
|
||||
"""
|
||||
|
||||
value: dict[str, Any]
|
||||
"""Provider-specific data."""
|
||||
"""Provider-specific content data."""
|
||||
|
||||
index: NotRequired[int | str]
|
||||
"""Index of block in aggregate response. Used during streaming."""
|
||||
@@ -867,7 +867,7 @@ def _get_data_content_block_types() -> tuple[str, ...]:
|
||||
Example: ("image", "video", "audio", "text-plain", "file")
|
||||
|
||||
Note that old style multimodal blocks type literals with new style blocks.
|
||||
Speficially, "image", "audio", and "file".
|
||||
Specifically, "image", "audio", and "file".
|
||||
|
||||
See the docstring of `_normalize_messages` in `language_models._utils` for details.
|
||||
"""
|
||||
@@ -906,7 +906,7 @@ def is_data_content_block(block: dict) -> bool:
|
||||
|
||||
# 'text' is checked to support v0 PlainTextContentBlock types
|
||||
# We must guard against new style TextContentBlock which also has 'text' `type`
|
||||
# by ensuring the presense of `source_type`
|
||||
# by ensuring the presence of `source_type`
|
||||
if block["type"] == "text" and "source_type" not in block: # noqa: SIM103 # This is more readable
|
||||
return False
|
||||
|
||||
@@ -1399,7 +1399,7 @@ def create_non_standard_block(
|
||||
"""Create a `NonStandardContentBlock`.
|
||||
|
||||
Args:
|
||||
value: Provider-specific data.
|
||||
value: Provider-specific content data.
|
||||
id: Content block identifier. Generated automatically if not provided.
|
||||
index: Index of block in aggregate response. Used during streaming.
|
||||
|
||||
|
||||
@@ -29,38 +29,39 @@ class ToolMessage(BaseMessage, ToolOutputMixin):
|
||||
`ToolMessage` objects contain the result of a tool invocation. Typically, the result
|
||||
is encoded inside the `content` field.
|
||||
|
||||
Example: A `ToolMessage` representing a result of `42` from a tool call with id
|
||||
`tool_call_id` is used to associate the tool call request with the tool call
|
||||
response. Useful in situations where a chat model is able to request multiple tool
|
||||
calls in parallel.
|
||||
|
||||
```python
|
||||
from langchain_core.messages import ToolMessage
|
||||
Example:
|
||||
A `ToolMessage` representing a result of `42` from a tool call with id
|
||||
|
||||
ToolMessage(content="42", tool_call_id="call_Jja7J89XsjrOLA5r!MEOW!SL")
|
||||
```
|
||||
```python
|
||||
from langchain_core.messages import ToolMessage
|
||||
|
||||
Example: A `ToolMessage` where only part of the tool output is sent to the model
|
||||
and the full output is passed in to artifact.
|
||||
ToolMessage(content="42", tool_call_id="call_Jja7J89XsjrOLA5r!MEOW!SL")
|
||||
```
|
||||
|
||||
```python
|
||||
from langchain_core.messages import ToolMessage
|
||||
Example:
|
||||
A `ToolMessage` where only part of the tool output is sent to the model
|
||||
and the full output is passed in to artifact.
|
||||
|
||||
tool_output = {
|
||||
"stdout": "From the graph we can see that the correlation between "
|
||||
"x and y is ...",
|
||||
"stderr": None,
|
||||
"artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."},
|
||||
}
|
||||
```python
|
||||
from langchain_core.messages import ToolMessage
|
||||
|
||||
ToolMessage(
|
||||
content=tool_output["stdout"],
|
||||
artifact=tool_output,
|
||||
tool_call_id="call_Jja7J89XsjrOLA5r!MEOW!SL",
|
||||
)
|
||||
```
|
||||
|
||||
The `tool_call_id` field is used to associate the tool call request with the
|
||||
tool call response. Useful in situations where a chat model is able
|
||||
to request multiple tool calls in parallel.
|
||||
tool_output = {
|
||||
"stdout": "From the graph we can see that the correlation between "
|
||||
"x and y is ...",
|
||||
"stderr": None,
|
||||
"artifacts": {"type": "image", "base64_data": "/9j/4gIcSU..."},
|
||||
}
|
||||
|
||||
ToolMessage(
|
||||
content=tool_output["stdout"],
|
||||
artifact=tool_output,
|
||||
tool_call_id="call_Jja7J89XsjrOLA5r!MEOW!SL",
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
tool_call_id: str
|
||||
|
||||
@@ -15,12 +15,16 @@ import json
|
||||
import logging
|
||||
import math
|
||||
from collections.abc import Callable, Iterable, Sequence
|
||||
from functools import partial
|
||||
from functools import partial, wraps
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Annotated,
|
||||
Any,
|
||||
Concatenate,
|
||||
Literal,
|
||||
ParamSpec,
|
||||
Protocol,
|
||||
TypeVar,
|
||||
cast,
|
||||
overload,
|
||||
)
|
||||
@@ -86,7 +90,7 @@ AnyMessage = Annotated[
|
||||
| Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
|
||||
Field(discriminator=Discriminator(_get_type)),
|
||||
]
|
||||
""""A type representing any defined `Message` or `MessageChunk` type."""
|
||||
"""A type representing any defined `Message` or `MessageChunk` type."""
|
||||
|
||||
|
||||
def get_buffer_string(
|
||||
@@ -328,12 +332,16 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
||||
"""
|
||||
if isinstance(message, BaseMessage):
|
||||
message_ = message
|
||||
elif isinstance(message, str):
|
||||
message_ = _create_message_from_message_type("human", message)
|
||||
elif isinstance(message, Sequence) and len(message) == 2:
|
||||
# mypy doesn't realise this can't be a string given the previous branch
|
||||
message_type_str, template = message # type: ignore[misc]
|
||||
message_ = _create_message_from_message_type(message_type_str, template)
|
||||
elif isinstance(message, Sequence):
|
||||
if isinstance(message, str):
|
||||
message_ = _create_message_from_message_type("human", message)
|
||||
else:
|
||||
try:
|
||||
message_type_str, template = message
|
||||
except ValueError as e:
|
||||
msg = "Message as a sequence must be (role string, template)"
|
||||
raise NotImplementedError(msg) from e
|
||||
message_ = _create_message_from_message_type(message_type_str, template)
|
||||
elif isinstance(message, dict):
|
||||
msg_kwargs = message.copy()
|
||||
try:
|
||||
@@ -380,33 +388,54 @@ def convert_to_messages(
|
||||
return [_convert_to_message(m) for m in messages]
|
||||
|
||||
|
||||
def _runnable_support(func: Callable) -> Callable:
|
||||
_P = ParamSpec("_P")
|
||||
_R_co = TypeVar("_R_co", covariant=True)
|
||||
|
||||
|
||||
class _RunnableSupportCallable(Protocol[_P, _R_co]):
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: None = None, **kwargs: Any
|
||||
) -> Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]: ...
|
||||
def __call__(
|
||||
self,
|
||||
messages: None = None,
|
||||
*args: _P.args,
|
||||
**kwargs: _P.kwargs,
|
||||
) -> Runnable[Sequence[MessageLikeRepresentation], _R_co]: ...
|
||||
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: Sequence[MessageLikeRepresentation], **kwargs: Any
|
||||
) -> list[BaseMessage]: ...
|
||||
def __call__(
|
||||
self,
|
||||
messages: Sequence[MessageLikeRepresentation] | PromptValue,
|
||||
*args: _P.args,
|
||||
**kwargs: _P.kwargs,
|
||||
) -> _R_co: ...
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
messages: Sequence[MessageLikeRepresentation] | PromptValue | None = None,
|
||||
*args: _P.args,
|
||||
**kwargs: _P.kwargs,
|
||||
) -> _R_co | Runnable[Sequence[MessageLikeRepresentation], _R_co]: ...
|
||||
|
||||
|
||||
def _runnable_support(
|
||||
func: Callable[
|
||||
Concatenate[Sequence[MessageLikeRepresentation] | PromptValue, _P], _R_co
|
||||
],
|
||||
) -> _RunnableSupportCallable[_P, _R_co]:
|
||||
@wraps(func)
|
||||
def wrapped(
|
||||
messages: Sequence[MessageLikeRepresentation] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> (
|
||||
list[BaseMessage]
|
||||
| Runnable[Sequence[MessageLikeRepresentation], list[BaseMessage]]
|
||||
):
|
||||
messages: Sequence[MessageLikeRepresentation] | PromptValue | None = None,
|
||||
*args: _P.args,
|
||||
**kwargs: _P.kwargs,
|
||||
) -> _R_co | Runnable[Sequence[MessageLikeRepresentation], _R_co]:
|
||||
# Import locally to prevent circular import.
|
||||
from langchain_core.runnables.base import RunnableLambda # noqa: PLC0415
|
||||
|
||||
if messages is not None:
|
||||
return func(messages, **kwargs)
|
||||
return func(messages, *args, **kwargs)
|
||||
return RunnableLambda(partial(func, **kwargs), name=func.__name__)
|
||||
|
||||
wrapped.__doc__ = func.__doc__
|
||||
return wrapped
|
||||
return cast("_RunnableSupportCallable[_P, _R_co]", wrapped)
|
||||
|
||||
|
||||
@_runnable_support
|
||||
@@ -734,8 +763,10 @@ def trim_messages(
|
||||
Set to `len` to count the number of **messages** in the chat history.
|
||||
|
||||
!!! note
|
||||
|
||||
Use `count_tokens_approximately` to get fast, approximate token
|
||||
counts.
|
||||
|
||||
This is recommended for using `trim_messages` on the hot path, where
|
||||
exact token counting is not necessary.
|
||||
|
||||
@@ -1025,18 +1056,18 @@ def convert_to_openai_messages(
|
||||
messages: Message-like object or iterable of objects whose contents are
|
||||
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
|
||||
text_format: How to format string or text block contents:
|
||||
- `'string'`:
|
||||
If a message has a string content, this is left as a string. If
|
||||
a message has content blocks that are all of type `'text'`, these
|
||||
are joined with a newline to make a single string. If a message has
|
||||
content blocks and at least one isn't of type `'text'`, then
|
||||
all blocks are left as dicts.
|
||||
- `'block'`:
|
||||
If a message has a string content, this is turned into a list
|
||||
with a single content block of type `'text'`. If a message has
|
||||
content blocks these are left as is.
|
||||
include_id: Whether to include message ids in the openai messages, if they
|
||||
are present in the source messages.
|
||||
- `'string'`:
|
||||
If a message has a string content, this is left as a string. If
|
||||
a message has content blocks that are all of type `'text'`, these
|
||||
are joined with a newline to make a single string. If a message has
|
||||
content blocks and at least one isn't of type `'text'`, then
|
||||
all blocks are left as dicts.
|
||||
- `'block'`:
|
||||
If a message has a string content, this is turned into a list
|
||||
with a single content block of type `'text'`. If a message has
|
||||
content blocks these are left as is.
|
||||
include_id: Whether to include message IDs in the openai messages, if they
|
||||
are present in the source messages.
|
||||
|
||||
Raises:
|
||||
ValueError: if an unrecognized `text_format` is specified, or if a message
|
||||
@@ -1097,7 +1128,7 @@ def convert_to_openai_messages(
|
||||
# ]
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.11"
|
||||
!!! version-added "Added in `langchain-core` 0.3.11"
|
||||
|
||||
""" # noqa: E501
|
||||
if text_format not in {"string", "block"}:
|
||||
@@ -1697,7 +1728,7 @@ def count_tokens_approximately(
|
||||
Warning:
|
||||
This function does not currently support counting image tokens.
|
||||
|
||||
!!! version-added "Added in version 0.3.46"
|
||||
!!! version-added "Added in `langchain-core` 0.3.46"
|
||||
|
||||
"""
|
||||
token_count = 0.0
|
||||
|
||||
@@ -1,4 +1,20 @@
|
||||
"""**OutputParser** classes parse the output of an LLM call."""
|
||||
"""`OutputParser` classes parse the output of an LLM call into structured data.
|
||||
|
||||
!!! tip "Structured output"
|
||||
|
||||
Output parsers emerged as an early solution to the challenge of obtaining structured
|
||||
output from LLMs.
|
||||
|
||||
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||
natively. In such cases, using output parsers may be unnecessary, and you should
|
||||
leverage the model's built-in capabilities for structured output. Refer to the
|
||||
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
|
||||
for guidance on how to achieve structured output directly.
|
||||
|
||||
Output parsers remain valuable when working with models that do not support
|
||||
structured output natively, or when you require additional processing or validation
|
||||
of the model's output beyond its inherent capabilities.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
@@ -135,6 +135,9 @@ class BaseOutputParser(
|
||||
|
||||
Example:
|
||||
```python
|
||||
# Implement a simple boolean output parser
|
||||
|
||||
|
||||
class BooleanOutputParser(BaseOutputParser[bool]):
|
||||
true_val: str = "YES"
|
||||
false_val: str = "NO"
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
"""Format instructions."""
|
||||
|
||||
JSON_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
||||
JSON_FORMAT_INSTRUCTIONS = """STRICT OUTPUT FORMAT:
|
||||
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
|
||||
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
|
||||
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
|
||||
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.
|
||||
|
||||
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
|
||||
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
||||
The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
||||
|
||||
Here is the output schema:
|
||||
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
||||
|
||||
Here is the output schema (shown in a code block for readability only — do not include any backticks or Markdown in your output):
|
||||
```
|
||||
{schema}
|
||||
```""" # noqa: E501
|
||||
|
||||
@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
|
||||
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
||||
"""Parse the output of an LLM call to a JSON object.
|
||||
|
||||
Probably the most reliable output parser for getting structured data that does *not*
|
||||
use function calling.
|
||||
|
||||
When used in streaming mode, it will yield partial JSON objects containing
|
||||
all the keys that have been returned so far.
|
||||
|
||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations
|
||||
describing the difference between the previous and the current object.
|
||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
|
||||
difference between the previous and the current object.
|
||||
"""
|
||||
|
||||
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
|
||||
|
||||
@@ -41,7 +41,7 @@ def droplastn(
|
||||
|
||||
|
||||
class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
"""Parse the output of an LLM call to a list."""
|
||||
"""Parse the output of a model to a list."""
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
buffer = ""
|
||||
for chunk in input:
|
||||
if isinstance(chunk, BaseMessage):
|
||||
# extract text
|
||||
# Extract text
|
||||
chunk_content = chunk.content
|
||||
if not isinstance(chunk_content, str):
|
||||
continue
|
||||
buffer += chunk_content
|
||||
else:
|
||||
# add current chunk to buffer
|
||||
# Add current chunk to buffer
|
||||
buffer += chunk
|
||||
# parse buffer into a list of parts
|
||||
# Parse buffer into a list of parts
|
||||
try:
|
||||
done_idx = 0
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
for m in droplastn(self.parse_iter(buffer), 1):
|
||||
done_idx = m.end()
|
||||
yield [m.group(1)]
|
||||
buffer = buffer[done_idx:]
|
||||
except NotImplementedError:
|
||||
parts = self.parse(buffer)
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
if len(parts) > 1:
|
||||
for part in parts[:-1]:
|
||||
yield [part]
|
||||
buffer = parts[-1]
|
||||
# yield the last part
|
||||
# Yield the last part
|
||||
for part in self.parse(buffer):
|
||||
yield [part]
|
||||
|
||||
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
buffer = ""
|
||||
async for chunk in input:
|
||||
if isinstance(chunk, BaseMessage):
|
||||
# extract text
|
||||
# Extract text
|
||||
chunk_content = chunk.content
|
||||
if not isinstance(chunk_content, str):
|
||||
continue
|
||||
buffer += chunk_content
|
||||
else:
|
||||
# add current chunk to buffer
|
||||
# Add current chunk to buffer
|
||||
buffer += chunk
|
||||
# parse buffer into a list of parts
|
||||
# Parse buffer into a list of parts
|
||||
try:
|
||||
done_idx = 0
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
for m in droplastn(self.parse_iter(buffer), 1):
|
||||
done_idx = m.end()
|
||||
yield [m.group(1)]
|
||||
buffer = buffer[done_idx:]
|
||||
except NotImplementedError:
|
||||
parts = self.parse(buffer)
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
if len(parts) > 1:
|
||||
for part in parts[:-1]:
|
||||
yield [part]
|
||||
buffer = parts[-1]
|
||||
# yield the last part
|
||||
# Yield the last part
|
||||
for part in self.parse(buffer):
|
||||
yield [part]
|
||||
|
||||
|
||||
class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
"""Parse the output of an LLM call to a comma-separated list."""
|
||||
"""Parse the output of a model to a comma-separated list."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
)
|
||||
return [item for sublist in reader for item in sublist]
|
||||
except csv.Error:
|
||||
# keep old logic for backup
|
||||
# Keep old logic for backup
|
||||
return [part.strip() for part in text.split(",")]
|
||||
|
||||
@property
|
||||
|
||||
@@ -15,7 +15,11 @@ from langchain_core.messages.tool import tool_call as create_tool_call
|
||||
from langchain_core.output_parsers.transform import BaseCumulativeTransformOutputParser
|
||||
from langchain_core.outputs import ChatGeneration, Generation
|
||||
from langchain_core.utils.json import parse_partial_json
|
||||
from langchain_core.utils.pydantic import TypeBaseModel
|
||||
from langchain_core.utils.pydantic import (
|
||||
TypeBaseModel,
|
||||
is_pydantic_v1_subclass,
|
||||
is_pydantic_v2_subclass,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -43,22 +47,24 @@ def parse_tool_call(
|
||||
"""
|
||||
if "function" not in raw_tool_call:
|
||||
return None
|
||||
|
||||
arguments = raw_tool_call["function"]["arguments"]
|
||||
|
||||
if partial:
|
||||
try:
|
||||
function_args = parse_partial_json(
|
||||
raw_tool_call["function"]["arguments"], strict=strict
|
||||
)
|
||||
function_args = parse_partial_json(arguments, strict=strict)
|
||||
except (JSONDecodeError, TypeError): # None args raise TypeError
|
||||
return None
|
||||
# Handle None or empty string arguments for parameter-less tools
|
||||
elif not arguments:
|
||||
function_args = {}
|
||||
else:
|
||||
try:
|
||||
function_args = json.loads(
|
||||
raw_tool_call["function"]["arguments"], strict=strict
|
||||
)
|
||||
function_args = json.loads(arguments, strict=strict)
|
||||
except JSONDecodeError as e:
|
||||
msg = (
|
||||
f"Function {raw_tool_call['function']['name']} arguments:\n\n"
|
||||
f"{raw_tool_call['function']['arguments']}\n\nare not valid JSON. "
|
||||
f"{arguments}\n\nare not valid JSON. "
|
||||
f"Received JSONDecodeError {e}"
|
||||
)
|
||||
raise OutputParserException(msg) from e
|
||||
@@ -224,7 +230,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
|
||||
result: The result of the LLM call.
|
||||
partial: Whether to parse partial JSON.
|
||||
If `True`, the output will be a JSON object containing
|
||||
all the keys that have been returned so far.
|
||||
all the keys that have been returned so far.
|
||||
If `False`, the output will be the full JSON object.
|
||||
|
||||
Raises:
|
||||
@@ -307,7 +313,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
||||
result: The result of the LLM call.
|
||||
partial: Whether to parse partial JSON.
|
||||
If `True`, the output will be a JSON object containing
|
||||
all the keys that have been returned so far.
|
||||
all the keys that have been returned so far.
|
||||
If `False`, the output will be the full JSON object.
|
||||
|
||||
Returns:
|
||||
@@ -323,7 +329,15 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
||||
return None if self.first_tool_only else []
|
||||
|
||||
json_results = [json_results] if self.first_tool_only else json_results
|
||||
name_dict = {tool.__name__: tool for tool in self.tools}
|
||||
name_dict_v2: dict[str, TypeBaseModel] = {
|
||||
tool.model_config.get("title") or tool.__name__: tool
|
||||
for tool in self.tools
|
||||
if is_pydantic_v2_subclass(tool)
|
||||
}
|
||||
name_dict_v1: dict[str, TypeBaseModel] = {
|
||||
tool.__name__: tool for tool in self.tools if is_pydantic_v1_subclass(tool)
|
||||
}
|
||||
name_dict: dict[str, TypeBaseModel] = {**name_dict_v2, **name_dict_v1}
|
||||
pydantic_objects = []
|
||||
for res in json_results:
|
||||
if not isinstance(res["args"], dict):
|
||||
|
||||
@@ -37,7 +37,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
|
||||
def _parser_exception(
|
||||
self, e: Exception, json_object: dict
|
||||
) -> OutputParserException:
|
||||
json_string = json.dumps(json_object)
|
||||
json_string = json.dumps(json_object, ensure_ascii=False)
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from completion {json_string}. Got: {e}"
|
||||
return OutputParserException(msg, llm_output=json_string)
|
||||
@@ -86,7 +86,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
|
||||
The format instructions for the JSON output.
|
||||
"""
|
||||
# Copy schema to avoid altering original Pydantic schema.
|
||||
schema = dict(self.pydantic_object.model_json_schema().items())
|
||||
schema = dict(self._get_schema(self.pydantic_object).items())
|
||||
|
||||
# Remove extraneous fields.
|
||||
reduced_schema = schema
|
||||
|
||||
@@ -6,14 +6,40 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
|
||||
|
||||
|
||||
class StrOutputParser(BaseTransformOutputParser[str]):
|
||||
"""OutputParser that parses LLMResult into the top likely string."""
|
||||
"""Extract text content from model outputs as a string.
|
||||
|
||||
Converts model outputs (such as `AIMessage` or `AIMessageChunk` objects) into plain
|
||||
text strings. It's the simplest output parser and is useful when you need string
|
||||
responses for downstream processing, display, or storage.
|
||||
|
||||
Supports streaming, yielding text chunks as they're generated by the model.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model = ChatOpenAI(model="gpt-4o")
|
||||
parser = StrOutputParser()
|
||||
|
||||
# Get string output from a model
|
||||
message = model.invoke("Tell me a joke")
|
||||
result = parser.invoke(message)
|
||||
print(result) # plain string
|
||||
|
||||
# With streaming - use transform() to process a stream
|
||||
stream = model.stream("Tell me a story")
|
||||
for chunk in parser.transform(stream):
|
||||
print(chunk, end="", flush=True)
|
||||
```
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""StrOutputParser is serializable.
|
||||
"""`StrOutputParser` is serializable.
|
||||
|
||||
Returns:
|
||||
True
|
||||
`True`
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
@@ -43,19 +43,19 @@ class _StreamingParser:
|
||||
"""Streaming parser for XML.
|
||||
|
||||
This implementation is pulled into a class to avoid implementation
|
||||
drift between transform and atransform of the XMLOutputParser.
|
||||
drift between transform and atransform of the `XMLOutputParser`.
|
||||
"""
|
||||
|
||||
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
|
||||
"""Initialize the streaming parser.
|
||||
|
||||
Args:
|
||||
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
||||
See documentation in XMLOutputParser for more information.
|
||||
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
|
||||
`'xml'`. See documentation in `XMLOutputParser` for more information.
|
||||
|
||||
Raises:
|
||||
ImportError: If defusedxml is not installed and the defusedxml
|
||||
parser is requested.
|
||||
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
|
||||
requested.
|
||||
"""
|
||||
if parser == "defusedxml":
|
||||
if not _HAS_DEFUSEDXML:
|
||||
@@ -79,10 +79,10 @@ class _StreamingParser:
|
||||
"""Parse a chunk of text.
|
||||
|
||||
Args:
|
||||
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
|
||||
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
|
||||
|
||||
Yields:
|
||||
A dictionary representing the parsed XML element.
|
||||
A `dict` representing the parsed XML element.
|
||||
|
||||
Raises:
|
||||
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
|
||||
@@ -147,46 +147,49 @@ class _StreamingParser:
|
||||
|
||||
|
||||
class XMLOutputParser(BaseTransformOutputParser):
|
||||
"""Parse an output using xml format."""
|
||||
"""Parse an output using xml format.
|
||||
|
||||
Returns a dictionary of tags.
|
||||
"""
|
||||
|
||||
tags: list[str] | None = None
|
||||
"""Tags to tell the LLM to expect in the XML output.
|
||||
|
||||
Note this may not be perfect depending on the LLM implementation.
|
||||
|
||||
For example, with tags=["foo", "bar", "baz"]:
|
||||
For example, with `tags=["foo", "bar", "baz"]`:
|
||||
|
||||
1. A well-formatted XML instance:
|
||||
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
|
||||
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
|
||||
|
||||
2. A badly-formatted XML instance (missing closing tag for 'bar'):
|
||||
"<foo>\n <bar>\n </foo>"
|
||||
`"<foo>\n <bar>\n </foo>"`
|
||||
|
||||
3. A badly-formatted XML instance (unexpected 'tag' element):
|
||||
"<foo>\n <tag>\n </tag>\n</foo>"
|
||||
`"<foo>\n <tag>\n </tag>\n</foo>"`
|
||||
"""
|
||||
encoding_matcher: re.Pattern = re.compile(
|
||||
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
|
||||
)
|
||||
parser: Literal["defusedxml", "xml"] = "defusedxml"
|
||||
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
||||
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
|
||||
|
||||
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
|
||||
present in some distributions of Python's standard library xml.
|
||||
`defusedxml` is a wrapper around the standard library parser that
|
||||
sets up the parser with secure defaults.
|
||||
* 'xml' is the standard library parser.
|
||||
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
|
||||
present in some distributions of Python's standard library xml.
|
||||
`defusedxml` is a wrapper around the standard library parser that
|
||||
sets up the parser with secure defaults.
|
||||
* `'xml'` is the standard library parser.
|
||||
|
||||
Use `xml` only if you are sure that your distribution of the standard library
|
||||
is not vulnerable to XML vulnerabilities.
|
||||
Use `xml` only if you are sure that your distribution of the standard library is not
|
||||
vulnerable to XML vulnerabilities.
|
||||
|
||||
Please review the following resources for more information:
|
||||
|
||||
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
|
||||
* https://github.com/tiran/defusedxml
|
||||
|
||||
The standard library relies on libexpat for parsing XML:
|
||||
https://github.com/libexpat/libexpat
|
||||
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
|
||||
for parsing XML.
|
||||
"""
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
|
||||
text: The output of an LLM call.
|
||||
|
||||
Returns:
|
||||
A dictionary representing the parsed XML.
|
||||
A `dict` representing the parsed XML.
|
||||
|
||||
Raises:
|
||||
OutputParserException: If the XML is not well-formed.
|
||||
ImportError: If defusedxml is not installed and the defusedxml
|
||||
parser is requested.
|
||||
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
|
||||
requested.
|
||||
"""
|
||||
# Try to find XML string within triple backticks
|
||||
# Imports are temporarily placed here to avoid issue with caching on CI
|
||||
|
||||
@@ -2,15 +2,17 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Literal
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from pydantic import model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core.messages import BaseMessage, BaseMessageChunk
|
||||
from langchain_core.outputs.generation import Generation
|
||||
from langchain_core.utils._merge import merge_dicts
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import Self
|
||||
|
||||
|
||||
class ChatGeneration(Generation):
|
||||
"""A single chat generation output.
|
||||
|
||||
@@ -11,9 +11,8 @@ from langchain_core.utils._merge import merge_dicts
|
||||
class Generation(Serializable):
|
||||
"""A single text generation output.
|
||||
|
||||
Generation represents the response from an
|
||||
`"old-fashioned" LLM <https://python.langchain.com/docs/concepts/text_llms/>__` that
|
||||
generates regular text (not chat messages).
|
||||
Generation represents the response from an "old-fashioned" LLM (string-in,
|
||||
string-out) that generates regular text (not chat messages).
|
||||
|
||||
This model is used internally by chat model and will eventually
|
||||
be mapped to a more general `LLMResult` object, and then projected into
|
||||
@@ -21,8 +20,7 @@ class Generation(Serializable):
|
||||
|
||||
LangChain users working with chat models will usually access information via
|
||||
`AIMessage` (returned from runnable interfaces) or `LLMResult` (available
|
||||
via callbacks). Please refer the `AIMessage` and `LLMResult` schema documentation
|
||||
for more information.
|
||||
via callbacks). Please refer to `AIMessage` and `LLMResult` for more information.
|
||||
"""
|
||||
|
||||
text: str
|
||||
@@ -35,11 +33,13 @@ class Generation(Serializable):
|
||||
"""
|
||||
type: Literal["Generation"] = "Generation"
|
||||
"""Type is used exclusively for serialization purposes.
|
||||
Set to "Generation" for this class."""
|
||||
|
||||
Set to "Generation" for this class.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -53,7 +53,7 @@ class Generation(Serializable):
|
||||
|
||||
|
||||
class GenerationChunk(Generation):
|
||||
"""Generation chunk, which can be concatenated with other Generation chunks."""
|
||||
"""`GenerationChunk`, which can be concatenated with other Generation chunks."""
|
||||
|
||||
def __add__(self, other: GenerationChunk) -> GenerationChunk:
|
||||
"""Concatenate two `GenerationChunk`s.
|
||||
|
||||
@@ -30,15 +30,13 @@ class PromptValue(Serializable, ABC):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "schema", "prompt"]`
|
||||
"""
|
||||
@@ -50,7 +48,7 @@ class PromptValue(Serializable, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def to_messages(self) -> list[BaseMessage]:
|
||||
"""Return prompt as a list of Messages."""
|
||||
"""Return prompt as a list of messages."""
|
||||
|
||||
|
||||
class StringPromptValue(PromptValue):
|
||||
@@ -64,8 +62,6 @@ class StringPromptValue(PromptValue):
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "prompts", "base"]`
|
||||
"""
|
||||
@@ -101,8 +97,6 @@ class ChatPromptValue(PromptValue):
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "prompts", "chat"]`
|
||||
"""
|
||||
|
||||
@@ -6,7 +6,7 @@ import contextlib
|
||||
import json
|
||||
import typing
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable, Mapping
|
||||
from collections.abc import Mapping
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
@@ -33,6 +33,8 @@ from langchain_core.runnables.config import ensure_config
|
||||
from langchain_core.utils.pydantic import create_model_v2
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
|
||||
@@ -46,21 +48,27 @@ class BasePromptTemplate(
|
||||
|
||||
input_variables: list[str]
|
||||
"""A list of the names of the variables whose values are required as inputs to the
|
||||
prompt."""
|
||||
prompt.
|
||||
"""
|
||||
optional_variables: list[str] = Field(default=[])
|
||||
"""optional_variables: A list of the names of the variables for placeholder
|
||||
or MessagePlaceholder that are optional. These variables are auto inferred
|
||||
from the prompt and user need not provide them."""
|
||||
"""A list of the names of the variables for placeholder or `MessagePlaceholder` that
|
||||
are optional.
|
||||
|
||||
These variables are auto inferred from the prompt and user need not provide them.
|
||||
"""
|
||||
input_types: typing.Dict[str, Any] = Field(default_factory=dict, exclude=True) # noqa: UP006
|
||||
"""A dictionary of the types of the variables the prompt template expects.
|
||||
If not provided, all variables are assumed to be strings."""
|
||||
|
||||
If not provided, all variables are assumed to be strings.
|
||||
"""
|
||||
output_parser: BaseOutputParser | None = None
|
||||
"""How to parse the output of calling an LLM on this formatted prompt."""
|
||||
partial_variables: Mapping[str, Any] = Field(default_factory=dict)
|
||||
"""A dictionary of the partial variables the prompt template carries.
|
||||
|
||||
Partial variables populate the template so that you don't need to
|
||||
pass them in every time you call the prompt."""
|
||||
Partial variables populate the template so that you don't need to pass them in every
|
||||
time you call the prompt.
|
||||
"""
|
||||
metadata: typing.Dict[str, Any] | None = None # noqa: UP006
|
||||
"""Metadata to be used for tracing."""
|
||||
tags: list[str] | None = None
|
||||
@@ -105,7 +113,7 @@ class BasePromptTemplate(
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -127,7 +135,7 @@ class BasePromptTemplate(
|
||||
"""Get the input schema for the prompt.
|
||||
|
||||
Args:
|
||||
config: configuration for the prompt.
|
||||
config: Configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The input schema for the prompt.
|
||||
@@ -195,8 +203,8 @@ class BasePromptTemplate(
|
||||
"""Invoke the prompt.
|
||||
|
||||
Args:
|
||||
input: Dict, input to the prompt.
|
||||
config: RunnableConfig, configuration for the prompt.
|
||||
input: Input to the prompt.
|
||||
config: Configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The output of the prompt.
|
||||
@@ -221,8 +229,8 @@ class BasePromptTemplate(
|
||||
"""Async invoke the prompt.
|
||||
|
||||
Args:
|
||||
input: Dict, input to the prompt.
|
||||
config: RunnableConfig, configuration for the prompt.
|
||||
input: Input to the prompt.
|
||||
config: Configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The output of the prompt.
|
||||
@@ -242,7 +250,7 @@ class BasePromptTemplate(
|
||||
|
||||
@abstractmethod
|
||||
def format_prompt(self, **kwargs: Any) -> PromptValue:
|
||||
"""Create Prompt Value.
|
||||
"""Create `PromptValue`.
|
||||
|
||||
Args:
|
||||
**kwargs: Any arguments to be passed to the prompt template.
|
||||
@@ -252,7 +260,7 @@ class BasePromptTemplate(
|
||||
"""
|
||||
|
||||
async def aformat_prompt(self, **kwargs: Any) -> PromptValue:
|
||||
"""Async create Prompt Value.
|
||||
"""Async create `PromptValue`.
|
||||
|
||||
Args:
|
||||
**kwargs: Any arguments to be passed to the prompt template.
|
||||
@@ -266,7 +274,7 @@ class BasePromptTemplate(
|
||||
"""Return a partial of the prompt template.
|
||||
|
||||
Args:
|
||||
**kwargs: partial variables to set.
|
||||
**kwargs: Partial variables to set.
|
||||
|
||||
Returns:
|
||||
A partial of the prompt template.
|
||||
@@ -296,9 +304,9 @@ class BasePromptTemplate(
|
||||
A formatted string.
|
||||
|
||||
Example:
|
||||
```python
|
||||
prompt.format(variable1="foo")
|
||||
```
|
||||
```python
|
||||
prompt.format(variable1="foo")
|
||||
```
|
||||
"""
|
||||
|
||||
async def aformat(self, **kwargs: Any) -> FormatOutputType:
|
||||
@@ -311,9 +319,9 @@ class BasePromptTemplate(
|
||||
A formatted string.
|
||||
|
||||
Example:
|
||||
```python
|
||||
await prompt.aformat(variable1="foo")
|
||||
```
|
||||
```python
|
||||
await prompt.aformat(variable1="foo")
|
||||
```
|
||||
"""
|
||||
return self.format(**kwargs)
|
||||
|
||||
@@ -348,9 +356,9 @@ class BasePromptTemplate(
|
||||
NotImplementedError: If the prompt type is not implemented.
|
||||
|
||||
Example:
|
||||
```python
|
||||
prompt.save(file_path="path/prompt.yaml")
|
||||
```
|
||||
```python
|
||||
prompt.save(file_path="path/prompt.yaml")
|
||||
```
|
||||
"""
|
||||
if self.partial_variables:
|
||||
msg = "Cannot save prompt with partial variables."
|
||||
@@ -402,23 +410,23 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
|
||||
|
||||
First, this pulls information from the document from two sources:
|
||||
|
||||
1. page_content:
|
||||
This takes the information from the `document.page_content`
|
||||
and assigns it to a variable named `page_content`.
|
||||
2. metadata:
|
||||
This takes information from `document.metadata` and assigns
|
||||
it to variables of the same name.
|
||||
1. `page_content`:
|
||||
This takes the information from the `document.page_content` and assigns it to a
|
||||
variable named `page_content`.
|
||||
2. `metadata`:
|
||||
This takes information from `document.metadata` and assigns it to variables of
|
||||
the same name.
|
||||
|
||||
Those variables are then passed into the `prompt` to produce a formatted string.
|
||||
|
||||
Args:
|
||||
doc: Document, the page_content and metadata will be used to create
|
||||
doc: `Document`, the `page_content` and `metadata` will be used to create
|
||||
the final string.
|
||||
prompt: BasePromptTemplate, will be used to format the page_content
|
||||
and metadata into the final string.
|
||||
prompt: `BasePromptTemplate`, will be used to format the `page_content`
|
||||
and `metadata` into the final string.
|
||||
|
||||
Returns:
|
||||
string of the document formatted.
|
||||
String of the document formatted.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -429,7 +437,6 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
|
||||
prompt = PromptTemplate.from_template("Page {page}: {page_content}")
|
||||
format_document(doc, prompt)
|
||||
>>> "Page 1: This is a joke"
|
||||
|
||||
```
|
||||
"""
|
||||
return prompt.format(**_get_document_info(doc, prompt))
|
||||
@@ -440,22 +447,22 @@ async def aformat_document(doc: Document, prompt: BasePromptTemplate[str]) -> st
|
||||
|
||||
First, this pulls information from the document from two sources:
|
||||
|
||||
1. page_content:
|
||||
This takes the information from the `document.page_content`
|
||||
and assigns it to a variable named `page_content`.
|
||||
2. metadata:
|
||||
This takes information from `document.metadata` and assigns
|
||||
it to variables of the same name.
|
||||
1. `page_content`:
|
||||
This takes the information from the `document.page_content` and assigns it to a
|
||||
variable named `page_content`.
|
||||
2. `metadata`:
|
||||
This takes information from `document.metadata` and assigns it to variables of
|
||||
the same name.
|
||||
|
||||
Those variables are then passed into the `prompt` to produce a formatted string.
|
||||
|
||||
Args:
|
||||
doc: Document, the page_content and metadata will be used to create
|
||||
doc: `Document`, the `page_content` and `metadata` will be used to create
|
||||
the final string.
|
||||
prompt: BasePromptTemplate, will be used to format the page_content
|
||||
and metadata into the final string.
|
||||
prompt: `BasePromptTemplate`, will be used to format the `page_content`
|
||||
and `metadata` into the final string.
|
||||
|
||||
Returns:
|
||||
string of the document formatted.
|
||||
String of the document formatted.
|
||||
"""
|
||||
return await prompt.aformat(**_get_document_info(doc, prompt))
|
||||
|
||||
@@ -587,14 +587,15 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
for prompt in self.prompt:
|
||||
inputs = {var: kwargs[var] for var in prompt.input_variables}
|
||||
if isinstance(prompt, StringPromptTemplate):
|
||||
formatted: str | ImageURL | dict[str, Any] = prompt.format(**inputs)
|
||||
content.append({"type": "text", "text": formatted})
|
||||
formatted_text: str = prompt.format(**inputs)
|
||||
if formatted_text != "":
|
||||
content.append({"type": "text", "text": formatted_text})
|
||||
elif isinstance(prompt, ImagePromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted})
|
||||
formatted_image: ImageURL = prompt.format(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted_image})
|
||||
elif isinstance(prompt, DictPromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append(formatted)
|
||||
formatted_dict: dict[str, Any] = prompt.format(**inputs)
|
||||
content.append(formatted_dict)
|
||||
return self._msg_class(
|
||||
content=content, additional_kwargs=self.additional_kwargs
|
||||
)
|
||||
@@ -617,16 +618,15 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
for prompt in self.prompt:
|
||||
inputs = {var: kwargs[var] for var in prompt.input_variables}
|
||||
if isinstance(prompt, StringPromptTemplate):
|
||||
formatted: str | ImageURL | dict[str, Any] = await prompt.aformat(
|
||||
**inputs
|
||||
)
|
||||
content.append({"type": "text", "text": formatted})
|
||||
formatted_text: str = await prompt.aformat(**inputs)
|
||||
if formatted_text != "":
|
||||
content.append({"type": "text", "text": formatted_text})
|
||||
elif isinstance(prompt, ImagePromptTemplate):
|
||||
formatted = await prompt.aformat(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted})
|
||||
formatted_image: ImageURL = await prompt.aformat(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted_image})
|
||||
elif isinstance(prompt, DictPromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append(formatted)
|
||||
formatted_dict: dict[str, Any] = prompt.format(**inputs)
|
||||
content.append(formatted_dict)
|
||||
return self._msg_class(
|
||||
content=content, additional_kwargs=self.additional_kwargs
|
||||
)
|
||||
@@ -776,42 +776,36 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
|
||||
Use to create flexible templated prompts for chat models.
|
||||
|
||||
Examples:
|
||||
!!! warning "Behavior changed in 0.2.24"
|
||||
You can pass any Message-like formats supported by
|
||||
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
|
||||
init.
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
template = ChatPromptTemplate(
|
||||
[
|
||||
("system", "You are a helpful AI bot. Your name is {name}."),
|
||||
("human", "Hello, how are you doing?"),
|
||||
("ai", "I'm doing well, thanks!"),
|
||||
("human", "{user_input}"),
|
||||
]
|
||||
)
|
||||
|
||||
template = ChatPromptTemplate(
|
||||
[
|
||||
("system", "You are a helpful AI bot. Your name is {name}."),
|
||||
("human", "Hello, how are you doing?"),
|
||||
("ai", "I'm doing well, thanks!"),
|
||||
("human", "{user_input}"),
|
||||
]
|
||||
)
|
||||
prompt_value = template.invoke(
|
||||
{
|
||||
"name": "Bob",
|
||||
"user_input": "What is your name?",
|
||||
}
|
||||
)
|
||||
# Output:
|
||||
# ChatPromptValue(
|
||||
# messages=[
|
||||
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
||||
# HumanMessage(content='Hello, how are you doing?'),
|
||||
# AIMessage(content="I'm doing well, thanks!"),
|
||||
# HumanMessage(content='What is your name?')
|
||||
# ]
|
||||
# )
|
||||
```
|
||||
|
||||
prompt_value = template.invoke(
|
||||
{
|
||||
"name": "Bob",
|
||||
"user_input": "What is your name?",
|
||||
}
|
||||
)
|
||||
# Output:
|
||||
# ChatPromptValue(
|
||||
# messages=[
|
||||
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
||||
# HumanMessage(content='Hello, how are you doing?'),
|
||||
# AIMessage(content="I'm doing well, thanks!"),
|
||||
# HumanMessage(content='What is your name?')
|
||||
# ]
|
||||
# )
|
||||
```
|
||||
|
||||
Messages Placeholder:
|
||||
!!! note "Messages Placeholder"
|
||||
|
||||
```python
|
||||
# In addition to Human/AI/Tool/Function messages,
|
||||
@@ -852,13 +846,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
# )
|
||||
```
|
||||
|
||||
Single-variable template:
|
||||
!!! note "Single-variable template"
|
||||
|
||||
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
|
||||
and you invoke the template with a non-dict object, the prompt template will
|
||||
inject the provided argument into that variable location.
|
||||
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
@@ -898,25 +891,40 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
"""Create a chat prompt template from a variety of message formats.
|
||||
|
||||
Args:
|
||||
messages: sequence of message representations.
|
||||
messages: Sequence of message representations.
|
||||
|
||||
A message can be represented using the following formats:
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}".
|
||||
template_format: format of the template.
|
||||
input_variables: A list of the names of the variables whose values are
|
||||
required as inputs to the prompt.
|
||||
optional_variables: A list of the names of the variables for placeholder
|
||||
or MessagePlaceholder that are optional.
|
||||
These variables are auto inferred from the prompt and user need not
|
||||
provide them.
|
||||
partial_variables: A dictionary of the partial variables the prompt
|
||||
template carries. Partial variables populate the template so that you
|
||||
don't need to pass them in every time you call the prompt.
|
||||
validate_template: Whether to validate the template.
|
||||
input_types: A dictionary of the types of the variables the prompt template
|
||||
expects. If not provided, all variables are assumed to be strings.
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
template_format: Format of the template.
|
||||
**kwargs: Additional keyword arguments passed to `BasePromptTemplate`,
|
||||
including (but not limited to):
|
||||
|
||||
- `input_variables`: A list of the names of the variables whose values
|
||||
are required as inputs to the prompt.
|
||||
- `optional_variables`: A list of the names of the variables for
|
||||
placeholder or `MessagePlaceholder` that are optional.
|
||||
|
||||
These variables are auto inferred from the prompt and user need not
|
||||
provide them.
|
||||
|
||||
- `partial_variables`: A dictionary of the partial variables the prompt
|
||||
template carries.
|
||||
|
||||
Partial variables populate the template so that you don't need to
|
||||
pass them in every time you call the prompt.
|
||||
|
||||
- `validate_template`: Whether to validate the template.
|
||||
- `input_types`: A dictionary of the types of the variables the prompt
|
||||
template expects.
|
||||
|
||||
If not provided, all variables are assumed to be strings.
|
||||
|
||||
Examples:
|
||||
Instantiation from a list of message templates:
|
||||
@@ -1121,12 +1129,17 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
)
|
||||
```
|
||||
Args:
|
||||
messages: sequence of message representations.
|
||||
messages: Sequence of message representations.
|
||||
|
||||
A message can be represented using the following formats:
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}".
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
template_format: format of the template.
|
||||
|
||||
Returns:
|
||||
@@ -1238,7 +1251,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
"""Extend the chat template with a sequence of messages.
|
||||
|
||||
Args:
|
||||
messages: sequence of message representations to append.
|
||||
messages: Sequence of message representations to append.
|
||||
"""
|
||||
self.messages.extend(
|
||||
[_convert_to_message_template(message) for message in messages]
|
||||
@@ -1335,11 +1348,25 @@ def _create_template_from_message_type(
|
||||
raise ValueError(msg)
|
||||
var_name = template[1:-1]
|
||||
message = MessagesPlaceholder(variable_name=var_name, optional=True)
|
||||
elif len(template) == 2 and isinstance(template[1], bool):
|
||||
var_name_wrapped, is_optional = template
|
||||
else:
|
||||
try:
|
||||
var_name_wrapped, is_optional = template
|
||||
except ValueError as e:
|
||||
msg = (
|
||||
"Unexpected arguments for placeholder message type."
|
||||
" Expected either a single string variable name"
|
||||
" or a list of [variable_name: str, is_optional: bool]."
|
||||
f" Got: {template}"
|
||||
)
|
||||
raise ValueError(msg) from e
|
||||
|
||||
if not isinstance(is_optional, bool):
|
||||
msg = f"Expected is_optional to be a boolean. Got: {is_optional}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
|
||||
if not isinstance(var_name_wrapped, str):
|
||||
msg = f"Expected variable name to be a string. Got: {var_name_wrapped}"
|
||||
raise ValueError(msg) # noqa:TRY004
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
if var_name_wrapped[0] != "{" or var_name_wrapped[-1] != "}":
|
||||
msg = (
|
||||
f"Invalid placeholder template: {var_name_wrapped}."
|
||||
@@ -1349,14 +1376,6 @@ def _create_template_from_message_type(
|
||||
var_name = var_name_wrapped[1:-1]
|
||||
|
||||
message = MessagesPlaceholder(variable_name=var_name, optional=is_optional)
|
||||
else:
|
||||
msg = (
|
||||
"Unexpected arguments for placeholder message type."
|
||||
" Expected either a single string variable name"
|
||||
" or a list of [variable_name: str, is_optional: bool]."
|
||||
f" Got: {template}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
msg = (
|
||||
f"Unexpected message type: {message_type}. Use one of 'human',"
|
||||
@@ -1410,10 +1429,11 @@ def _convert_to_message_template(
|
||||
)
|
||||
raise ValueError(msg)
|
||||
message = (message["role"], message["content"])
|
||||
if len(message) != 2:
|
||||
try:
|
||||
message_type_str, template = message
|
||||
except ValueError as e:
|
||||
msg = f"Expected 2-tuple of (role, template), got {message}"
|
||||
raise ValueError(msg)
|
||||
message_type_str, template = message
|
||||
raise ValueError(msg) from e
|
||||
if isinstance(message_type_str, str):
|
||||
message_ = _create_template_from_message_type(
|
||||
message_type_str, template, template_format=template_format
|
||||
|
||||
@@ -69,7 +69,7 @@ class DictPromptTemplate(RunnableSerializable[dict, dict]):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -6,10 +6,10 @@ from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from langchain_core.load import Serializable
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.utils.interactive_env import is_interactive_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.prompts.chat import ChatPromptTemplate
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -32,13 +32,13 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def format_messages(self, **kwargs: Any) -> list[BaseMessage]:
|
||||
"""Format messages from kwargs. Should return a list of BaseMessages.
|
||||
"""Format messages from kwargs. Should return a list of `BaseMessage` objects.
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments to use for formatting.
|
||||
|
||||
Returns:
|
||||
List of BaseMessages.
|
||||
List of `BaseMessage` objects.
|
||||
"""
|
||||
|
||||
async def aformat_messages(self, **kwargs: Any) -> list[BaseMessage]:
|
||||
@@ -48,7 +48,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
**kwargs: Keyword arguments to use for formatting.
|
||||
|
||||
Returns:
|
||||
List of BaseMessages.
|
||||
List of `BaseMessage` objects.
|
||||
"""
|
||||
return self.format_messages(**kwargs)
|
||||
|
||||
|
||||
@@ -4,9 +4,8 @@ from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from abc import ABC
|
||||
from collections.abc import Callable, Sequence
|
||||
from string import Formatter
|
||||
from typing import Any, Literal
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
from pydantic import BaseModel, create_model
|
||||
|
||||
@@ -16,8 +15,11 @@ from langchain_core.utils import get_colored_text, mustache
|
||||
from langchain_core.utils.formatting import formatter
|
||||
from langchain_core.utils.interactive_env import is_interactive_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable, Sequence
|
||||
|
||||
try:
|
||||
from jinja2 import Environment, meta
|
||||
from jinja2 import meta
|
||||
from jinja2.sandbox import SandboxedEnvironment
|
||||
|
||||
_HAS_JINJA2 = True
|
||||
@@ -59,13 +61,9 @@ def jinja2_formatter(template: str, /, **kwargs: Any) -> str:
|
||||
)
|
||||
raise ImportError(msg)
|
||||
|
||||
# This uses a sandboxed environment to prevent arbitrary code execution.
|
||||
# Jinja2 uses an opt-out rather than opt-in approach for sand-boxing.
|
||||
# Please treat this sand-boxing as a best-effort approach rather than
|
||||
# a guarantee of security.
|
||||
# We recommend to never use jinja2 templates with untrusted inputs.
|
||||
# https://jinja.palletsprojects.com/en/3.1.x/sandbox/
|
||||
# approach not a guarantee of security.
|
||||
# Use a restricted sandbox that blocks ALL attribute/method access
|
||||
# Only simple variable lookups like {{variable}} are allowed
|
||||
# Attribute access like {{variable.attr}} or {{variable.method()}} is blocked
|
||||
return SandboxedEnvironment().from_string(template).render(**kwargs)
|
||||
|
||||
|
||||
@@ -101,7 +99,7 @@ def _get_jinja2_variables_from_template(template: str) -> set[str]:
|
||||
"Please install it with `pip install jinja2`."
|
||||
)
|
||||
raise ImportError(msg)
|
||||
env = Environment() # noqa: S701
|
||||
env = SandboxedEnvironment()
|
||||
ast = env.parse(template)
|
||||
return meta.find_undeclared_variables(ast)
|
||||
|
||||
@@ -122,13 +120,16 @@ def mustache_formatter(template: str, /, **kwargs: Any) -> str:
|
||||
def mustache_template_vars(
|
||||
template: str,
|
||||
) -> set[str]:
|
||||
"""Get the variables from a mustache template.
|
||||
"""Get the top-level variables from a mustache template.
|
||||
|
||||
For nested variables like `{{person.name}}`, only the top-level
|
||||
key (`person`) is returned.
|
||||
|
||||
Args:
|
||||
template: The template string.
|
||||
|
||||
Returns:
|
||||
The variables from the template.
|
||||
The top-level variables from the template.
|
||||
"""
|
||||
variables: set[str] = set()
|
||||
section_depth = 0
|
||||
@@ -268,6 +269,30 @@ def get_template_variables(template: str, template_format: str) -> list[str]:
|
||||
msg = f"Unsupported template format: {template_format}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# For f-strings, block attribute access and indexing syntax
|
||||
# This prevents template injection attacks via accessing dangerous attributes
|
||||
if template_format == "f-string":
|
||||
for var in input_variables:
|
||||
# Formatter().parse() returns field names with dots/brackets if present
|
||||
# e.g., "obj.attr" or "obj[0]" - we need to block these
|
||||
if "." in var or "[" in var or "]" in var:
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot contain attribute "
|
||||
f"access (.) or indexing ([])."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
# Block variable names that are all digits (e.g., "0", "100")
|
||||
# These are interpreted as positional arguments, not keyword arguments
|
||||
if var.isdigit():
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot be all digits as they are interpreted "
|
||||
f"as positional arguments."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
return sorted(input_variables)
|
||||
|
||||
|
||||
|
||||
@@ -49,7 +49,13 @@ class StructuredPrompt(ChatPromptTemplate):
|
||||
structured_output_kwargs: additional kwargs for structured output.
|
||||
template_format: template format for the prompt.
|
||||
"""
|
||||
schema_ = schema_ or kwargs.pop("schema")
|
||||
schema_ = schema_ or kwargs.pop("schema", None)
|
||||
if not schema_:
|
||||
err_msg = (
|
||||
"Must pass in a non-empty structured output schema. Received: "
|
||||
f"{schema_}"
|
||||
)
|
||||
raise ValueError(err_msg)
|
||||
structured_output_kwargs = structured_output_kwargs or {}
|
||||
for k in set(kwargs).difference(get_pydantic_field_names(self.__class__)):
|
||||
structured_output_kwargs[k] = kwargs.pop(k)
|
||||
@@ -104,19 +110,23 @@ class StructuredPrompt(ChatPromptTemplate):
|
||||
)
|
||||
```
|
||||
Args:
|
||||
messages: sequence of message representations.
|
||||
messages: Sequence of message representations.
|
||||
|
||||
A message can be represented using the following formats:
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}"
|
||||
schema: a dictionary representation of function call, or a Pydantic model.
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
schema: A dictionary representation of function call, or a Pydantic model.
|
||||
**kwargs: Any additional kwargs to pass through to
|
||||
`ChatModel.with_structured_output(schema, **kwargs)`.
|
||||
|
||||
Returns:
|
||||
a structured prompt template
|
||||
|
||||
A structured prompt template
|
||||
"""
|
||||
return cls(messages, schema, **kwargs)
|
||||
|
||||
|
||||
@@ -105,7 +105,9 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
||||
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
|
||||
model = ChatAnthropic(model_name="claude-sonnet-4-5", rate_limiter=rate_limiter)
|
||||
model = ChatAnthropic(
|
||||
model_name="claude-sonnet-4-5-20250929", rate_limiter=rate_limiter
|
||||
)
|
||||
|
||||
for _ in range(5):
|
||||
tic = time.time()
|
||||
|
||||
@@ -50,65 +50,65 @@ class LangSmithRetrieverParams(TypedDict, total=False):
|
||||
|
||||
|
||||
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
"""Abstract base class for a Document retrieval system.
|
||||
"""Abstract base class for a document retrieval system.
|
||||
|
||||
A retrieval system is defined as something that can take string queries and return
|
||||
the most 'relevant' Documents from some source.
|
||||
the most 'relevant' documents from some source.
|
||||
|
||||
Usage:
|
||||
|
||||
A retriever follows the standard Runnable interface, and should be used
|
||||
via the standard Runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`.
|
||||
A retriever follows the standard `Runnable` interface, and should be used via the
|
||||
standard `Runnable` methods of `invoke`, `ainvoke`, `batch`, `abatch`.
|
||||
|
||||
Implementation:
|
||||
|
||||
When implementing a custom retriever, the class should implement
|
||||
the `_get_relevant_documents` method to define the logic for retrieving documents.
|
||||
When implementing a custom retriever, the class should implement the
|
||||
`_get_relevant_documents` method to define the logic for retrieving documents.
|
||||
|
||||
Optionally, an async native implementations can be provided by overriding the
|
||||
`_aget_relevant_documents` method.
|
||||
|
||||
Example: A retriever that returns the first 5 documents from a list of documents
|
||||
!!! example "Retriever that returns the first 5 documents from a list of documents"
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
|
||||
class SimpleRetriever(BaseRetriever):
|
||||
docs: list[Document]
|
||||
k: int = 5
|
||||
class SimpleRetriever(BaseRetriever):
|
||||
docs: list[Document]
|
||||
k: int = 5
|
||||
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"Return the first k documents from the list of documents\"\"\"
|
||||
return self.docs[:self.k]
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"Return the first k documents from the list of documents\"\"\"
|
||||
return self.docs[:self.k]
|
||||
|
||||
async def _aget_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"(Optional) async native implementation.\"\"\"
|
||||
return self.docs[:self.k]
|
||||
```
|
||||
async def _aget_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"(Optional) async native implementation.\"\"\"
|
||||
return self.docs[:self.k]
|
||||
```
|
||||
|
||||
Example: A simple retriever based on a scikit-learn vectorizer
|
||||
!!! example "Simple retriever based on a scikit-learn vectorizer"
|
||||
|
||||
```python
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
```python
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
|
||||
class TFIDFRetriever(BaseRetriever, BaseModel):
|
||||
vectorizer: Any
|
||||
docs: list[Document]
|
||||
tfidf_array: Any
|
||||
k: int = 4
|
||||
class TFIDFRetriever(BaseRetriever, BaseModel):
|
||||
vectorizer: Any
|
||||
docs: list[Document]
|
||||
tfidf_array: Any
|
||||
k: int = 4
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
||||
query_vec = self.vectorizer.transform([query])
|
||||
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
||||
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
|
||||
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
|
||||
```
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
||||
query_vec = self.vectorizer.transform([query])
|
||||
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
||||
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
|
||||
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
|
||||
```
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -119,15 +119,19 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
_expects_other_args: bool = False
|
||||
tags: list[str] | None = None
|
||||
"""Optional list of tags associated with the retriever.
|
||||
|
||||
These tags will be associated with each call to this retriever,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
|
||||
You can use these to eg identify a specific instance of a retriever with its
|
||||
use case.
|
||||
"""
|
||||
metadata: dict[str, Any] | None = None
|
||||
"""Optional metadata associated with the retriever.
|
||||
|
||||
This metadata will be associated with each call to this retriever,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
|
||||
You can use these to eg identify a specific instance of a retriever with its
|
||||
use case.
|
||||
"""
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user