mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-07 01:30:24 +00:00
Compare commits
220 Commits
sr/do-not-
...
cc/summari
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c938b787f | ||
|
|
c63f23d233 | ||
|
|
b7091d391d | ||
|
|
7a2952210e | ||
|
|
7549845d82 | ||
|
|
fa18f8eda0 | ||
|
|
878f033ed7 | ||
|
|
4065106c2e | ||
|
|
12df938ace | ||
|
|
65ee43cc10 | ||
|
|
fe7c000fc1 | ||
|
|
dad50e5624 | ||
|
|
0a6d01e61d | ||
|
|
c6f8b0875a | ||
|
|
4c3800d743 | ||
|
|
7fe1c4b78f | ||
|
|
c375732396 | ||
|
|
b2db842cd4 | ||
|
|
9c21f83e82 | ||
|
|
880652b713 | ||
|
|
4ab94579ad | ||
|
|
eb0545a173 | ||
|
|
a2e389de9f | ||
|
|
01573c1375 | ||
|
|
2ba3ce81a6 | ||
|
|
4e4e5d7337 | ||
|
|
2a863727f9 | ||
|
|
30e2260e26 | ||
|
|
cbaea351b2 | ||
|
|
f070217c3b | ||
|
|
0915682c12 | ||
|
|
68ab9a1e56 | ||
|
|
47b79c30c0 | ||
|
|
5899f980aa | ||
|
|
b0bf4afe81 | ||
|
|
33e5d01f7c | ||
|
|
ee3373afc2 | ||
|
|
b296f103a9 | ||
|
|
525d5c0169 | ||
|
|
c4b6ba254e | ||
|
|
b7d1831f9d | ||
|
|
328ba36601 | ||
|
|
6f677ef5c1 | ||
|
|
d47d41cbd3 | ||
|
|
32bbe99efc | ||
|
|
990e346c46 | ||
|
|
9b7792631d | ||
|
|
558a8fe25b | ||
|
|
52b1516d44 | ||
|
|
8a3bb73c05 | ||
|
|
099c042395 | ||
|
|
2d4f00a451 | ||
|
|
9bd401a6d4 | ||
|
|
6aa3794b74 | ||
|
|
189dcf7295 | ||
|
|
1bc88028e6 | ||
|
|
d2942351ce | ||
|
|
83c078f363 | ||
|
|
26d39ffc4a | ||
|
|
421e2ceeee | ||
|
|
275dcbf69f | ||
|
|
9f87b27a5b | ||
|
|
b2e1196e29 | ||
|
|
2dc1396380 | ||
|
|
77941ab3ce | ||
|
|
ee19a30dde | ||
|
|
5d799b3174 | ||
|
|
8f33a985a2 | ||
|
|
78eeccef0e | ||
|
|
3d415441e8 | ||
|
|
74385e0ebd | ||
|
|
2bfbc29ccc | ||
|
|
ef79c26f18 | ||
|
|
fbe32c8e89 | ||
|
|
2511c28f92 | ||
|
|
637bb1cbbc | ||
|
|
3dfea96ec1 | ||
|
|
68643153e5 | ||
|
|
462762f75b | ||
|
|
4f3729c004 | ||
|
|
ba428cdf54 | ||
|
|
69c7d1b01b | ||
|
|
733299ec13 | ||
|
|
e1adf781c6 | ||
|
|
31b5e4810c | ||
|
|
c6801fe159 | ||
|
|
1b563067f8 | ||
|
|
1996d81d72 | ||
|
|
ab0677c6f1 | ||
|
|
bdb53c93cc | ||
|
|
94d5271cb5 | ||
|
|
e499db4266 | ||
|
|
cc3af82b47 | ||
|
|
9383b78be1 | ||
|
|
3c492571ab | ||
|
|
f2410f7ea7 | ||
|
|
91560b6a7a | ||
|
|
b1dd448233 | ||
|
|
904daf6f40 | ||
|
|
8e31a5d7bd | ||
|
|
ee630b4539 | ||
|
|
46971447df | ||
|
|
d8b94007c1 | ||
|
|
cf595dcc38 | ||
|
|
d27211cfa7 | ||
|
|
ca1a3fbe88 | ||
|
|
c955b53aed | ||
|
|
2a626d9608 | ||
|
|
0861cba04b | ||
|
|
88246f45b3 | ||
|
|
1d04514354 | ||
|
|
c2324b8f3e | ||
|
|
957ea65d12 | ||
|
|
00fa38a295 | ||
|
|
9d98c1b669 | ||
|
|
00cc9d421f | ||
|
|
65716cf590 | ||
|
|
1b77a191f4 | ||
|
|
ebfde9173c | ||
|
|
2fe0369049 | ||
|
|
e023201d42 | ||
|
|
d40e340479 | ||
|
|
9a09ed0659 | ||
|
|
5f27b546dd | ||
|
|
022fdd52c3 | ||
|
|
7946a8f64e | ||
|
|
7af79039fc | ||
|
|
1755750ca1 | ||
|
|
ddb53672e2 | ||
|
|
eeae34972f | ||
|
|
47d89b1e47 | ||
|
|
ee0bdaeb79 | ||
|
|
915c446c48 | ||
|
|
d1e2099408 | ||
|
|
6ea15b9efa | ||
|
|
69f33aaff5 | ||
|
|
3f66f102d2 | ||
|
|
c6547f58b7 | ||
|
|
dfb05a7fa0 | ||
|
|
2f67f9ddcb | ||
|
|
0e36185933 | ||
|
|
6617865440 | ||
|
|
6dba4912be | ||
|
|
7a3827471b | ||
|
|
f006bc4c7e | ||
|
|
0a442644e3 | ||
|
|
4960663546 | ||
|
|
1381137c37 | ||
|
|
b4a042dfc4 | ||
|
|
81c4f21b52 | ||
|
|
f2dab562a8 | ||
|
|
61196a8280 | ||
|
|
7a97c31ac0 | ||
|
|
424214041e | ||
|
|
b06bd6a913 | ||
|
|
1c762187e8 | ||
|
|
90aefc607f | ||
|
|
2ca73c479b | ||
|
|
17c7c273b8 | ||
|
|
493be259c3 | ||
|
|
106c6ac273 | ||
|
|
7aaaa371e7 | ||
|
|
468dad1780 | ||
|
|
32d294b89a | ||
|
|
dc5b7dace8 | ||
|
|
e00b7233cf | ||
|
|
91f7e73c27 | ||
|
|
75fff151e8 | ||
|
|
d05a0cb80d | ||
|
|
d24aa69ceb | ||
|
|
fabcacc3e5 | ||
|
|
ac58d75113 | ||
|
|
28564ef94e | ||
|
|
b62a9b57f3 | ||
|
|
76dd656f2a | ||
|
|
d218936763 | ||
|
|
123e29dc26 | ||
|
|
6a1dca113e | ||
|
|
8aea6dd23a | ||
|
|
78a2f86f70 | ||
|
|
b5e23e5823 | ||
|
|
7872643910 | ||
|
|
f15391f4fc | ||
|
|
ca9b81cc2e | ||
|
|
a2a9a02ecb | ||
|
|
e5e1d6c705 | ||
|
|
6ee19473ba | ||
|
|
a59551f3b4 | ||
|
|
3286a98b27 | ||
|
|
62769a0dac | ||
|
|
f94108b4bc | ||
|
|
60a0ff8217 | ||
|
|
b3dffc70e2 | ||
|
|
86ac39e11f | ||
|
|
6e036d38b2 | ||
|
|
2d30ebb53b | ||
|
|
b3934b9580 | ||
|
|
09102a634a | ||
|
|
95ff5901a1 | ||
|
|
f3d7152074 | ||
|
|
dff37f6048 | ||
|
|
832036ef0f | ||
|
|
f1742954ab | ||
|
|
6ab0476676 | ||
|
|
d36413c821 | ||
|
|
99097f799c | ||
|
|
0666571519 | ||
|
|
ef85161525 | ||
|
|
079eb808f8 | ||
|
|
39fb2d1a3b | ||
|
|
db7f2db1ae | ||
|
|
df46c82ae2 | ||
|
|
f8adbbc461 | ||
|
|
17f0716d6c | ||
|
|
5acd34ae92 | ||
|
|
84dbebac4f | ||
|
|
eddfcd2c88 | ||
|
|
9f470d297f | ||
|
|
2222470f69 | ||
|
|
78175fcb96 |
77
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
77
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -8,16 +8,15 @@ body:
|
||||
value: |
|
||||
Thank you for taking the time to file a bug report.
|
||||
|
||||
Use this to report BUGS in LangChain. For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
|
||||
For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
|
||||
|
||||
Relevant links to check before filing a bug report to see if your issue has already been reported, fixed or
|
||||
if there's another way to solve your problem:
|
||||
Check these before submitting to see if your issue has already been reported, fixed or if there's another way to solve your problem:
|
||||
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
* [LangChain documentation with the integrated search](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference](https://reference.langchain.com/python/),
|
||||
* [Documentation](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference Documentation](https://reference.langchain.com/python/),
|
||||
* [LangChain ChatBot](https://chat.langchain.com/)
|
||||
* [GitHub search](https://github.com/langchain-ai/langchain),
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
- type: checkboxes
|
||||
id: checks
|
||||
attributes:
|
||||
@@ -36,16 +35,48 @@ body:
|
||||
required: true
|
||||
- label: This is not related to the langchain-community package.
|
||||
required: true
|
||||
- label: I read what a minimal reproducible example is (https://stackoverflow.com/help/minimal-reproducible-example).
|
||||
required: true
|
||||
- label: I posted a self-contained, minimal, reproducible example. A maintainer can copy it and run it AS IS.
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Which `langchain` package(s) is this bug related to? Select at least one.
|
||||
|
||||
Note that if the package you are reporting for is not listed here, it is not in this repository (e.g. `langchain-google-genai` is in [`langchain-ai/langchain-google`](https://github.com/langchain-ai/langchain-google/)).
|
||||
|
||||
Please report issues for other packages to their respective repositories.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-cli
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-prompty
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Example Code
|
||||
label: Example Code (Python)
|
||||
description: |
|
||||
Please add a self-contained, [minimal, reproducible, example](https://stackoverflow.com/help/minimal-reproducible-example) with your use case.
|
||||
|
||||
@@ -53,15 +84,12 @@ body:
|
||||
|
||||
**Important!**
|
||||
|
||||
* Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
|
||||
* Reduce your code to the minimum required to reproduce the issue if possible. This makes it much easier for others to help you.
|
||||
* Use code tags (e.g., ```python ... ```) to correctly [format your code](https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting).
|
||||
* INCLUDE the language label (e.g. `python`) after the first three backticks to enable syntax highlighting. (e.g., ```python rather than ```).
|
||||
* Avoid screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
|
||||
* Reduce your code to the minimum required to reproduce the issue if possible.
|
||||
|
||||
(This will be automatically formatted into code, so no need for backticks.)
|
||||
render: python
|
||||
placeholder: |
|
||||
The following code:
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
def bad_code(inputs) -> int:
|
||||
@@ -69,17 +97,14 @@ body:
|
||||
|
||||
chain = RunnableLambda(bad_code)
|
||||
chain.invoke('Hello!')
|
||||
```
|
||||
- type: textarea
|
||||
id: error
|
||||
validations:
|
||||
required: false
|
||||
attributes:
|
||||
label: Error Message and Stack Trace (if applicable)
|
||||
description: |
|
||||
If you are reporting an error, please include the full error message and stack trace.
|
||||
placeholder: |
|
||||
Exception + full stack trace
|
||||
If you are reporting an error, please copy and paste the full error message and
|
||||
stack trace.
|
||||
(This will be automatically formatted into code, so no need for backticks.)
|
||||
render: shell
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
@@ -99,9 +124,7 @@ body:
|
||||
attributes:
|
||||
label: System Info
|
||||
description: |
|
||||
Please share your system info with us. Do NOT skip this step and please don't trim
|
||||
the output. Most users don't include enough information here and it makes it harder
|
||||
for us to help you.
|
||||
Please share your system info with us.
|
||||
|
||||
Run the following command in your terminal and paste the output here:
|
||||
|
||||
@@ -113,8 +136,6 @@ body:
|
||||
from langchain_core import sys_info
|
||||
sys_info.print_sys_info()
|
||||
```
|
||||
|
||||
alternatively, put the entire output of `pip freeze` here.
|
||||
placeholder: |
|
||||
python -m langchain_core.sys_info
|
||||
validations:
|
||||
|
||||
13
.github/ISSUE_TEMPLATE/config.yml
vendored
13
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,9 +1,18 @@
|
||||
blank_issues_enabled: false
|
||||
version: 2.1
|
||||
contact_links:
|
||||
- name: 📚 Documentation
|
||||
url: https://github.com/langchain-ai/docs/issues/new?template=langchain.yml
|
||||
- name: 📚 Documentation issue
|
||||
url: https://github.com/langchain-ai/docs/issues/new?template=01-langchain.yml
|
||||
about: Report an issue related to the LangChain documentation
|
||||
- name: 💬 LangChain Forum
|
||||
url: https://forum.langchain.com/
|
||||
about: General community discussions and support
|
||||
- name: 📚 LangChain Documentation
|
||||
url: https://docs.langchain.com/oss/python/langchain/overview
|
||||
about: View the official LangChain documentation
|
||||
- name: 📚 API Reference Documentation
|
||||
url: https://reference.langchain.com/python/
|
||||
about: View the official LangChain API reference documentation
|
||||
- name: 💬 LangChain Forum
|
||||
url: https://forum.langchain.com/
|
||||
about: Ask questions and get help from the community
|
||||
|
||||
40
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
40
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
@@ -13,11 +13,11 @@ body:
|
||||
Relevant links to check before filing a feature request to see if your request has already been made or
|
||||
if there's another way to achieve what you want:
|
||||
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
* [LangChain documentation with the integrated search](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference](https://reference.langchain.com/python/),
|
||||
* [Documentation](https://docs.langchain.com/oss/python/langchain/overview),
|
||||
* [API Reference Documentation](https://reference.langchain.com/python/),
|
||||
* [LangChain ChatBot](https://chat.langchain.com/)
|
||||
* [GitHub search](https://github.com/langchain-ai/langchain),
|
||||
* [LangChain Forum](https://forum.langchain.com/),
|
||||
- type: checkboxes
|
||||
id: checks
|
||||
attributes:
|
||||
@@ -34,6 +34,40 @@ body:
|
||||
required: true
|
||||
- label: This is not related to the langchain-community package.
|
||||
required: true
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Which `langchain` package(s) is this request related to? Select at least one.
|
||||
|
||||
Note that if the package you are requesting for is not listed here, it is not in this repository (e.g. `langchain-google-genai` is in `langchain-ai/langchain`).
|
||||
|
||||
Please submit feature requests for other packages to their respective repositories.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-cli
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-prompty
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
- type: textarea
|
||||
id: feature-description
|
||||
validations:
|
||||
|
||||
30
.github/ISSUE_TEMPLATE/privileged.yml
vendored
30
.github/ISSUE_TEMPLATE/privileged.yml
vendored
@@ -18,3 +18,33 @@ body:
|
||||
attributes:
|
||||
label: Issue Content
|
||||
description: Add the content of the issue here.
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Please select package(s) that this issue is related to.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-cli
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-prompty
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
|
||||
48
.github/ISSUE_TEMPLATE/task.yml
vendored
48
.github/ISSUE_TEMPLATE/task.yml
vendored
@@ -25,13 +25,13 @@ body:
|
||||
label: Task Description
|
||||
description: |
|
||||
Provide a clear and detailed description of the task.
|
||||
|
||||
|
||||
What needs to be done? Be specific about the scope and requirements.
|
||||
placeholder: |
|
||||
This task involves...
|
||||
|
||||
|
||||
The goal is to...
|
||||
|
||||
|
||||
Specific requirements:
|
||||
- ...
|
||||
- ...
|
||||
@@ -43,7 +43,7 @@ body:
|
||||
label: Acceptance Criteria
|
||||
description: |
|
||||
Define the criteria that must be met for this task to be considered complete.
|
||||
|
||||
|
||||
What are the specific deliverables or outcomes expected?
|
||||
placeholder: |
|
||||
This task will be complete when:
|
||||
@@ -58,15 +58,15 @@ body:
|
||||
label: Context and Background
|
||||
description: |
|
||||
Provide any relevant context, background information, or links to related issues/PRs.
|
||||
|
||||
|
||||
Why is this task needed? What problem does it solve?
|
||||
placeholder: |
|
||||
Background:
|
||||
- ...
|
||||
|
||||
|
||||
Related issues/PRs:
|
||||
- #...
|
||||
|
||||
|
||||
Additional context:
|
||||
- ...
|
||||
validations:
|
||||
@@ -77,15 +77,45 @@ body:
|
||||
label: Dependencies
|
||||
description: |
|
||||
List any dependencies or blockers for this task.
|
||||
|
||||
|
||||
Are there other tasks, issues, or external factors that need to be completed first?
|
||||
placeholder: |
|
||||
This task depends on:
|
||||
- [ ] Issue #...
|
||||
- [ ] PR #...
|
||||
- [ ] External dependency: ...
|
||||
|
||||
|
||||
Blocked by:
|
||||
- ...
|
||||
validations:
|
||||
required: false
|
||||
- type: checkboxes
|
||||
id: package
|
||||
attributes:
|
||||
label: Package (Required)
|
||||
description: |
|
||||
Please select package(s) that this task is related to.
|
||||
options:
|
||||
- label: langchain
|
||||
- label: langchain-openai
|
||||
- label: langchain-anthropic
|
||||
- label: langchain-classic
|
||||
- label: langchain-core
|
||||
- label: langchain-cli
|
||||
- label: langchain-model-profiles
|
||||
- label: langchain-tests
|
||||
- label: langchain-text-splitters
|
||||
- label: langchain-chroma
|
||||
- label: langchain-deepseek
|
||||
- label: langchain-exa
|
||||
- label: langchain-fireworks
|
||||
- label: langchain-groq
|
||||
- label: langchain-huggingface
|
||||
- label: langchain-mistralai
|
||||
- label: langchain-nomic
|
||||
- label: langchain-ollama
|
||||
- label: langchain-perplexity
|
||||
- label: langchain-prompty
|
||||
- label: langchain-qdrant
|
||||
- label: langchain-xai
|
||||
- label: Other / not sure / general
|
||||
|
||||
38
.github/PULL_REQUEST_TEMPLATE.md
vendored
38
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,28 +1,30 @@
|
||||
(Replace this entire block of text)
|
||||
|
||||
Thank you for contributing to LangChain! Follow these steps to mark your pull request as ready for review. **If any of these steps are not completed, your PR will not be considered for review.**
|
||||
Read the full contributing guidelines: https://docs.langchain.com/oss/python/contributing/overview
|
||||
|
||||
Thank you for contributing to LangChain! Follow these steps to have your pull request considered as ready for review.
|
||||
|
||||
1. PR title: Should follow the format: TYPE(SCOPE): DESCRIPTION
|
||||
|
||||
- [ ] **PR title**: Follows the format: {TYPE}({SCOPE}): {DESCRIPTION}
|
||||
- Examples:
|
||||
- fix(anthropic): resolve flag parsing error
|
||||
- feat(core): add multi-tenant support
|
||||
- fix(cli): resolve flag parsing error
|
||||
- docs(openai): update API usage examples
|
||||
- Allowed `{TYPE}` values:
|
||||
- feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert, release
|
||||
- Allowed `{SCOPE}` values (optional):
|
||||
- core, cli, langchain, standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq, huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant, xai, infra
|
||||
- Once you've written the title, please delete this checklist item; do not include it in the PR.
|
||||
- test(openai): update API usage tests
|
||||
- Allowed TYPE and SCOPE values: https://github.com/langchain-ai/langchain/blob/master/.github/workflows/pr_lint.yml#L15-L33
|
||||
|
||||
- [ ] **PR message**: ***Delete this entire checklist*** and replace with
|
||||
- **Description:** a description of the change. Include a [closing keyword](https://docs.github.com/en/issues/tracking-your-work-with-issues/using-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) if applicable to a relevant issue.
|
||||
- **Issue:** the issue # it fixes, if applicable (e.g. Fixes #123)
|
||||
- **Dependencies:** any dependencies required for this change
|
||||
2. PR description:
|
||||
|
||||
- [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. **We will not consider a PR unless these three are passing in CI.** See [contribution guidelines](https://docs.langchain.com/oss/python/contributing) for more.
|
||||
- Write 1-2 sentences summarizing the change.
|
||||
- If this PR addresses a specific issue, please include "Fixes #ISSUE_NUMBER" in the description to automatically close the issue when the PR is merged.
|
||||
- If there are any breaking changes, please clearly describe them.
|
||||
- If this PR depends on another PR being merged first, please include "Depends on #PR_NUMBER" inthe description.
|
||||
|
||||
3. Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified.
|
||||
|
||||
- We will not consider a PR unless these three are passing in CI.
|
||||
|
||||
Additional guidelines:
|
||||
|
||||
- Most PRs should not touch more than one package.
|
||||
- Please do not add dependencies to `pyproject.toml` files (even optional ones) unless they are **required** for unit tests. Likewise, please do not update the `uv.lock` files unless you are adding a required dependency.
|
||||
- Changes should be backwards compatible.
|
||||
- Make sure optional dependencies are imported within a function.
|
||||
- We ask that if you use generative AI for your contribution, you include a disclaimer.
|
||||
- PRs should not touch more than one package unless absolutely necessary.
|
||||
- Do not update the `uv.lock` files unless or add dependencies to `pyproject.toml` files (even optional ones) unless you have explicit permission to do so by a maintainer.
|
||||
|
||||
93
.github/actions/poetry_setup/action.yml
vendored
93
.github/actions/poetry_setup/action.yml
vendored
@@ -1,93 +0,0 @@
|
||||
# An action for setting up poetry install with caching.
|
||||
# Using a custom action since the default action does not
|
||||
# take poetry install groups into account.
|
||||
# Action code from:
|
||||
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
|
||||
name: poetry-install-with-caching
|
||||
description: Poetry install with support for caching of dependency groups.
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: Python version, supporting MAJOR.MINOR only
|
||||
required: true
|
||||
|
||||
poetry-version:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory whose poetry.lock file should be cached
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: actions/setup-python@v5
|
||||
name: Setup python ${{ inputs.python-version }}
|
||||
id: setup-python
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v4
|
||||
id: cache-bin-poetry
|
||||
name: Cache Poetry binary - Python ${{ inputs.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
|
||||
with:
|
||||
path: |
|
||||
/opt/pipx/venvs/poetry
|
||||
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
|
||||
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
|
||||
|
||||
- name: Refresh shell hashtable and fixup softlinks
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
run: |
|
||||
set -eux
|
||||
|
||||
# Refresh the shell hashtable, to ensure correct `which` output.
|
||||
hash -r
|
||||
|
||||
# `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
|
||||
# Delete and recreate the softlinks pipx expects to have.
|
||||
rm /opt/pipx/venvs/poetry/bin/python
|
||||
cd /opt/pipx/venvs/poetry/bin
|
||||
ln -s "$(which "python$PYTHON_VERSION")" python
|
||||
chmod +x python
|
||||
cd /opt/pipx_bin/
|
||||
ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
|
||||
chmod +x poetry
|
||||
|
||||
# Ensure everything got set up correctly.
|
||||
/opt/pipx/venvs/poetry/bin/python --version
|
||||
/opt/pipx_bin/poetry --version
|
||||
|
||||
- name: Install poetry
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
# Install poetry using the python version installed by setup-python step.
|
||||
run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose
|
||||
|
||||
- name: Restore pip and poetry cached dependencies
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
${{ env.WORKDIR }}/.venv
|
||||
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}
|
||||
85
.github/pr-file-labeler.yml
vendored
85
.github/pr-file-labeler.yml
vendored
@@ -7,13 +7,12 @@ core:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/core/**/*"
|
||||
|
||||
langchain:
|
||||
langchain-classic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/langchain/**/*"
|
||||
- "libs/langchain_v1/**/*"
|
||||
|
||||
v1:
|
||||
langchain:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/langchain_v1/**/*"
|
||||
@@ -28,6 +27,11 @@ standard-tests:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/standard-tests/**/*"
|
||||
|
||||
model-profiles:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/model-profiles/**/*"
|
||||
|
||||
text-splitters:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
@@ -39,6 +43,81 @@ integration:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/**/*"
|
||||
|
||||
anthropic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/anthropic/**/*"
|
||||
|
||||
chroma:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/chroma/**/*"
|
||||
|
||||
deepseek:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/deepseek/**/*"
|
||||
|
||||
exa:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/exa/**/*"
|
||||
|
||||
fireworks:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/fireworks/**/*"
|
||||
|
||||
groq:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/groq/**/*"
|
||||
|
||||
huggingface:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/huggingface/**/*"
|
||||
|
||||
mistralai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/mistralai/**/*"
|
||||
|
||||
nomic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/nomic/**/*"
|
||||
|
||||
ollama:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/ollama/**/*"
|
||||
|
||||
openai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/openai/**/*"
|
||||
|
||||
perplexity:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/perplexity/**/*"
|
||||
|
||||
prompty:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/prompty/**/*"
|
||||
|
||||
qdrant:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/qdrant/**/*"
|
||||
|
||||
xai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/xai/**/*"
|
||||
|
||||
# Infrastructure and DevOps
|
||||
infra:
|
||||
- changed-files:
|
||||
|
||||
41
.github/pr-title-labeler.yml
vendored
41
.github/pr-title-labeler.yml
vendored
@@ -1,41 +0,0 @@
|
||||
# PR title labeler config
|
||||
#
|
||||
# Labels PRs based on conventional commit patterns in titles
|
||||
#
|
||||
# Format: type(scope): description or type!: description (breaking)
|
||||
|
||||
add-missing-labels: true
|
||||
clear-prexisting: false
|
||||
include-commits: false
|
||||
include-title: true
|
||||
label-for-breaking-changes: breaking
|
||||
|
||||
label-mapping:
|
||||
documentation: ["docs"]
|
||||
feature: ["feat"]
|
||||
fix: ["fix"]
|
||||
infra: ["build", "ci", "chore"]
|
||||
integration:
|
||||
[
|
||||
"anthropic",
|
||||
"chroma",
|
||||
"deepseek",
|
||||
"exa",
|
||||
"fireworks",
|
||||
"groq",
|
||||
"huggingface",
|
||||
"mistralai",
|
||||
"nomic",
|
||||
"ollama",
|
||||
"openai",
|
||||
"perplexity",
|
||||
"prompty",
|
||||
"qdrant",
|
||||
"xai",
|
||||
]
|
||||
linting: ["style"]
|
||||
performance: ["perf"]
|
||||
refactor: ["refactor"]
|
||||
release: ["release"]
|
||||
revert: ["revert"]
|
||||
tests: ["test"]
|
||||
3
.github/scripts/check_diff.py
vendored
3
.github/scripts/check_diff.py
vendored
@@ -30,6 +30,7 @@ LANGCHAIN_DIRS = [
|
||||
"libs/text-splitters",
|
||||
"libs/langchain",
|
||||
"libs/langchain_v1",
|
||||
"libs/model-profiles",
|
||||
]
|
||||
|
||||
# When set to True, we are ignoring core dependents
|
||||
@@ -134,7 +135,7 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
|
||||
elif dir_ == "libs/core":
|
||||
py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
# custom logic for specific directories
|
||||
elif dir_ in {"libs/partners/chroma", "libs/partners/nomic"}:
|
||||
elif dir_ in {"libs/partners/chroma"}:
|
||||
py_versions = ["3.10", "3.13"]
|
||||
else:
|
||||
py_versions = ["3.10", "3.14"]
|
||||
|
||||
2
.github/scripts/get_min_versions.py
vendored
2
.github/scripts/get_min_versions.py
vendored
@@ -98,7 +98,7 @@ def _check_python_version_from_requirement(
|
||||
return True
|
||||
else:
|
||||
marker_str = str(requirement.marker)
|
||||
if "python_version" or "python_full_version" in marker_str:
|
||||
if "python_version" in marker_str or "python_full_version" in marker_str:
|
||||
python_version_str = "".join(
|
||||
char
|
||||
for char in marker_str
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
2
.github/workflows/_lint.yml
vendored
2
.github/workflows/_lint.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
40
.github/workflows/_release.yml
vendored
40
.github/workflows/_release.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
version: ${{ steps.check-version.outputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Upload build
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
outputs:
|
||||
release-body: ${{ steps.generate-release-body.outputs.release-body }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain
|
||||
path: langchain
|
||||
@@ -149,8 +149,8 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
# if PREV_TAG is empty, let it be empty
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
# if PREV_TAG is empty or came out to 0.0.0, let it be empty
|
||||
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
|
||||
echo "No previous tag found - first release"
|
||||
else
|
||||
# confirm prev-tag actually exists in git repo with git tag
|
||||
@@ -179,8 +179,8 @@ jobs:
|
||||
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
|
||||
run: |
|
||||
PREAMBLE="Changes since $PREV_TAG"
|
||||
# if PREV_TAG is empty, then we are releasing the first version
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
# if PREV_TAG is empty or 0.0.0, then we are releasing the first version
|
||||
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
|
||||
PREAMBLE="Initial release"
|
||||
PREV_TAG=$(git rev-list --max-parents=0 HEAD)
|
||||
fi
|
||||
@@ -206,9 +206,9 @@ jobs:
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -237,7 +237,7 @@ jobs:
|
||||
contents: read
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# We explicitly *don't* set up caching here. This ensures our tests are
|
||||
# maximally sensitive to catching breakage.
|
||||
@@ -258,7 +258,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -377,6 +377,7 @@ jobs:
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
|
||||
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
@@ -395,7 +396,7 @@ jobs:
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
partner: [openai, anthropic]
|
||||
partner: [anthropic]
|
||||
fail-fast: false # Continue testing other partners if one fails
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
@@ -409,8 +410,9 @@ jobs:
|
||||
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
# We implement this conditional as Github Actions does not have good support
|
||||
# for conditionally needing steps. https://github.com/actions/runner/issues/491
|
||||
@@ -428,7 +430,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
if: startsWith(inputs.working-directory, 'libs/core')
|
||||
with:
|
||||
name: dist
|
||||
@@ -442,7 +444,7 @@ jobs:
|
||||
git ls-remote --tags origin "langchain-${{ matrix.partner }}*" \
|
||||
| awk '{print $2}' \
|
||||
| sed 's|refs/tags/||' \
|
||||
| grep -E '[0-9]+\.[0-9]+\.[0-9]+([a-zA-Z]+[0-9]+)?$' \
|
||||
| grep -E '[0-9]+\.[0-9]+\.[0-9]+$' \
|
||||
| sort -Vr \
|
||||
| head -n 1
|
||||
)"
|
||||
@@ -490,14 +492,14 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -530,14 +532,14 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python + uv
|
||||
uses: "./.github/actions/uv_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v5
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
2
.github/workflows/_test.yml
vendored
2
.github/workflows/_test.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
name: "Python ${{ inputs.python-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
2
.github/workflows/_test_pydantic.yml
vendored
2
.github/workflows/_test_pydantic.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
name: "Pydantic ~=${{ inputs.pydantic-version }}"
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
|
||||
107
.github/workflows/auto-label-by-package.yml
vendored
Normal file
107
.github/workflows/auto-label-by-package.yml
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
name: Auto Label Issues by Package
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, edited]
|
||||
|
||||
jobs:
|
||||
label-by-package:
|
||||
permissions:
|
||||
issues: write
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Sync package labels
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const body = context.payload.issue.body || "";
|
||||
|
||||
// Extract text under "### Package"
|
||||
const match = body.match(/### Package\s+([\s\S]*?)\n###/i);
|
||||
if (!match) return;
|
||||
|
||||
const packageSection = match[1].trim();
|
||||
|
||||
// Mapping table for package names to labels
|
||||
const mapping = {
|
||||
"langchain": "langchain",
|
||||
"langchain-openai": "openai",
|
||||
"langchain-anthropic": "anthropic",
|
||||
"langchain-classic": "langchain-classic",
|
||||
"langchain-core": "core",
|
||||
"langchain-cli": "cli",
|
||||
"langchain-model-profiles": "model-profiles",
|
||||
"langchain-tests": "standard-tests",
|
||||
"langchain-text-splitters": "text-splitters",
|
||||
"langchain-chroma": "chroma",
|
||||
"langchain-deepseek": "deepseek",
|
||||
"langchain-exa": "exa",
|
||||
"langchain-fireworks": "fireworks",
|
||||
"langchain-groq": "groq",
|
||||
"langchain-huggingface": "huggingface",
|
||||
"langchain-mistralai": "mistralai",
|
||||
"langchain-nomic": "nomic",
|
||||
"langchain-ollama": "ollama",
|
||||
"langchain-perplexity": "perplexity",
|
||||
"langchain-prompty": "prompty",
|
||||
"langchain-qdrant": "qdrant",
|
||||
"langchain-xai": "xai",
|
||||
};
|
||||
|
||||
// All possible package labels we manage
|
||||
const allPackageLabels = Object.values(mapping);
|
||||
const selectedLabels = [];
|
||||
|
||||
// Check if this is checkbox format (multiple selection)
|
||||
const checkboxMatches = packageSection.match(/- \[x\]\s+([^\n\r]+)/gi);
|
||||
if (checkboxMatches) {
|
||||
// Handle checkbox format
|
||||
for (const match of checkboxMatches) {
|
||||
const packageName = match.replace(/- \[x\]\s+/i, '').trim();
|
||||
const label = mapping[packageName];
|
||||
if (label && !selectedLabels.includes(label)) {
|
||||
selectedLabels.push(label);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Handle dropdown format (single selection)
|
||||
const label = mapping[packageSection];
|
||||
if (label) {
|
||||
selectedLabels.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
// Get current issue labels
|
||||
const issue = await github.rest.issues.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number
|
||||
});
|
||||
|
||||
const currentLabels = issue.data.labels.map(label => label.name);
|
||||
const currentPackageLabels = currentLabels.filter(label => allPackageLabels.includes(label));
|
||||
|
||||
// Determine labels to add and remove
|
||||
const labelsToAdd = selectedLabels.filter(label => !currentPackageLabels.includes(label));
|
||||
const labelsToRemove = currentPackageLabels.filter(label => !selectedLabels.includes(label));
|
||||
|
||||
// Add new labels
|
||||
if (labelsToAdd.length > 0) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
labels: labelsToAdd
|
||||
});
|
||||
}
|
||||
|
||||
// Remove old labels
|
||||
for (const label of labelsToRemove) {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
name: label
|
||||
});
|
||||
}
|
||||
2
.github/workflows/check_core_versions.yml
vendored
2
.github/workflows/check_core_versions.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "✅ Verify pyproject.toml & version.py Match"
|
||||
run: |
|
||||
|
||||
6
.github/workflows/check_diffs.yml
vendored
6
.github/workflows/check_diffs.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ci-ignore') }}
|
||||
steps:
|
||||
- name: "📋 Checkout Code"
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
- name: "🐍 Setup Python 3.11"
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
run:
|
||||
working-directory: ${{ matrix.job-configs.working-directory }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV"
|
||||
uses: "./.github/actions/uv_setup"
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: "📦 Install UV Package Manager"
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
7
.github/workflows/integration_tests.yml
vendored
7
.github/workflows/integration_tests.yml
vendored
@@ -71,14 +71,14 @@ jobs:
|
||||
working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
path: langchain
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-google
|
||||
path: langchain-google
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-aws
|
||||
path: langchain-aws
|
||||
@@ -155,6 +155,7 @@ jobs:
|
||||
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
|
||||
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
run: |
|
||||
cd langchain/${{ matrix.working-directory }}
|
||||
make integration_tests
|
||||
|
||||
15
.github/workflows/pr_lint.yml
vendored
15
.github/workflows/pr_lint.yml
vendored
@@ -26,11 +26,13 @@
|
||||
# * revert — reverts a previous commit
|
||||
# * release — prepare a new release
|
||||
#
|
||||
# Allowed Scopes (optional):
|
||||
# core, cli, langchain, langchain_v1, langchain-classic, standard-tests,
|
||||
# text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq,
|
||||
# huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant,
|
||||
# xai, infra
|
||||
# Allowed Scope(s) (optional):
|
||||
# core, cli, langchain, langchain_v1, langchain-classic, model-profiles,
|
||||
# standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa,
|
||||
# fireworks, groq, huggingface, mistralai, nomic, ollama, openai,
|
||||
# perplexity, prompty, qdrant, xai, infra, deps
|
||||
#
|
||||
# Multiple scopes can be used by separating them with a comma.
|
||||
#
|
||||
# Rules:
|
||||
# 1. The 'Type' must start with a lowercase letter.
|
||||
@@ -79,8 +81,8 @@ jobs:
|
||||
core
|
||||
cli
|
||||
langchain
|
||||
langchain_v1
|
||||
langchain-classic
|
||||
model-profiles
|
||||
standard-tests
|
||||
text-splitters
|
||||
docs
|
||||
@@ -100,6 +102,7 @@ jobs:
|
||||
qdrant
|
||||
xai
|
||||
infra
|
||||
deps
|
||||
requireScope: false
|
||||
disallowScopes: |
|
||||
release
|
||||
|
||||
4
.github/workflows/v03_api_doc_build.yml
vendored
4
.github/workflows/v03_api_doc_build.yml
vendored
@@ -23,12 +23,12 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
ref: v0.3
|
||||
path: langchain
|
||||
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: langchain-ai/langchain-api-docs-html
|
||||
path: langchain-api-docs-html
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,6 +1,8 @@
|
||||
.vs/
|
||||
.claude/
|
||||
.idea/
|
||||
#Emacs backup
|
||||
*~
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
@@ -161,3 +163,6 @@ node_modules
|
||||
|
||||
prof
|
||||
virtualenv/
|
||||
scratch/
|
||||
|
||||
.langgraph_api/
|
||||
|
||||
8
.mcp.json
Normal file
8
.mcp.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"docs-langchain": {
|
||||
"type": "http",
|
||||
"url": "https://docs.langchain.com/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
405
AGENTS.md
405
AGENTS.md
@@ -1,255 +1,58 @@
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
# Global development guidelines for the LangChain monorepo
|
||||
|
||||
## Core Development Principles
|
||||
This document provides context to understand the LangChain Python project and assist with development.
|
||||
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
## Project architecture and context
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
### Monorepo structure
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
This is a Python monorepo with multiple independently versioned packages that use `uv`.
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```txt
|
||||
langchain/
|
||||
├── libs/
|
||||
│ ├── core/ # `langchain-core` primitives and base abstractions
|
||||
│ ├── langchain/ # `langchain-classic` (legacy, no new features)
|
||||
│ ├── langchain_v1/ # Actively maintained `langchain` package
|
||||
│ ├── partners/ # Third-party integrations
|
||||
│ │ ├── openai/ # OpenAI models and embeddings
|
||||
│ │ ├── anthropic/ # Anthropic (Claude) integration
|
||||
│ │ ├── ollama/ # Local model support
|
||||
│ │ └── ... (other integrations maintained by the LangChain team)
|
||||
│ ├── text-splitters/ # Document chunking utilities
|
||||
│ ├── standard-tests/ # Shared test suite for integrations
|
||||
│ ├── model-profiles/ # Model configuration profiles
|
||||
│ └── cli/ # Command-line interface tools
|
||||
├── .github/ # CI/CD workflows and templates
|
||||
├── .vscode/ # VSCode IDE standard settings and recommended extensions
|
||||
└── README.md # Information about LangChain
|
||||
```
|
||||
|
||||
✅ **Good - Stable Interface:**
|
||||
- **Core layer** (`langchain-core`): Base abstractions, interfaces, and protocols. Users should not need to know about this layer directly.
|
||||
- **Implementation layer** (`langchain`): Concrete implementations and high-level public utilities
|
||||
- **Integration layer** (`partners/`): Third-party service integrations. Note that this monorepo is not exhaustive of all LangChain integrations; some are maintained in separate repos, such as `langchain-ai/langchain-google` and `langchain-ai/langchain-aws`. Usually these repos are cloned at the same level as this monorepo, so if needed, you can refer to their code directly by navigating to `../langchain-google/` from this monorepo.
|
||||
- **Testing layer** (`standard-tests/`): Standardized integration tests for partner integrations
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
### Development tools & commands**
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
- `uv` – Fast Python package installer and resolver (replaces pip/poetry)
|
||||
- `make` – Task runner for common development commands. Feel free to look at the `Makefile` for available commands and usage patterns.
|
||||
- `ruff` – Fast Python linter and formatter
|
||||
- `mypy` – Static type checking
|
||||
- `pytest` – Testing framework
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
This monorepo uses `uv` for dependency management. Local development uses editable installs: `[tool.uv.sources]`
|
||||
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### 2. Code Quality Standards
|
||||
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
**Style Requirements:**
|
||||
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
### 3. Testing Requirements
|
||||
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
**Test Organization:**
|
||||
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
**Test Quality Checklist:**
|
||||
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
Checklist questions:
|
||||
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
with open(path) as f:
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
def load_config(path: str) -> dict:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
### 5. Documentation Standards
|
||||
|
||||
**Use Google-style docstrings with Args section for all public functions.**
|
||||
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level (`'low'`, `'normal'`, `'high'`).
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
`InvalidEmailError`: If the email address format is invalid.
|
||||
`SMTPConnectionError`: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications."""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
@@ -261,66 +64,118 @@ make format
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
#### Key config files
|
||||
|
||||
**Local Development Dependencies:**
|
||||
- pyproject.toml: Main workspace configuration with dependency groups
|
||||
- uv.lock: Locked dependencies for reproducible builds
|
||||
- Makefile: Development tasks
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
#### Commit standards
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes.
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
#### Pull request guidelines
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
|
||||
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
|
||||
- Highlight areas of the proposed changes that require careful review.
|
||||
|
||||
## Core development principles
|
||||
|
||||
### Maintain stable public interfaces
|
||||
|
||||
CRITICAL: Always attempt to preserve function signatures, argument positions, and names for exported/public methods. Do not make breaking changes.
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
|
||||
Ask: "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### Code quality standards
|
||||
|
||||
All Python code MUST include type hints and return types.
|
||||
|
||||
```python title="Example"
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Single line description of the function.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
- Use descriptive, self-explanatory variable names.
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
### Testing requirements
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `fix(cli): resolve flag parsing error`
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
Every new feature or bugfix MUST be covered by unit tests.
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- We use `pytest` as the testing framework; if in doubt, check other existing tests for examples.
|
||||
- The testing file structure should mirror the source code structure.
|
||||
|
||||
- Follow the existing patterns in `langchain-core` for base abstractions
|
||||
- Use `langchain_core.callbacks` for execution tracking
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components like legacy `LLMChain`
|
||||
**Checklist:**
|
||||
|
||||
### Partner Integrations
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
### Security and risk assessment
|
||||
|
||||
---
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
## Quick Reference Checklist
|
||||
### Documentation standards
|
||||
|
||||
Before submitting code changes:
|
||||
Use Google-style docstrings with Args section for all public functions.
|
||||
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
```python title="Example"
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""Send an email to a recipient with specified priority.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level.
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
## Additional resources
|
||||
|
||||
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
|
||||
- **Contributing Guide:** [`.github/CONTRIBUTING.md`](https://docs.langchain.com/oss/python/contributing/overview)
|
||||
|
||||
405
CLAUDE.md
405
CLAUDE.md
@@ -1,255 +1,58 @@
|
||||
# Global Development Guidelines for LangChain Projects
|
||||
# Global development guidelines for the LangChain monorepo
|
||||
|
||||
## Core Development Principles
|
||||
This document provides context to understand the LangChain Python project and assist with development.
|
||||
|
||||
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
|
||||
## Project architecture and context
|
||||
|
||||
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
|
||||
### Monorepo structure
|
||||
|
||||
❌ **Bad - Breaking Change:**
|
||||
This is a Python monorepo with multiple independently versioned packages that use `uv`.
|
||||
|
||||
```python
|
||||
def get_user(id, verbose=False): # Changed from `user_id`
|
||||
pass
|
||||
```txt
|
||||
langchain/
|
||||
├── libs/
|
||||
│ ├── core/ # `langchain-core` primitives and base abstractions
|
||||
│ ├── langchain/ # `langchain-classic` (legacy, no new features)
|
||||
│ ├── langchain_v1/ # Actively maintained `langchain` package
|
||||
│ ├── partners/ # Third-party integrations
|
||||
│ │ ├── openai/ # OpenAI models and embeddings
|
||||
│ │ ├── anthropic/ # Anthropic (Claude) integration
|
||||
│ │ ├── ollama/ # Local model support
|
||||
│ │ └── ... (other integrations maintained by the LangChain team)
|
||||
│ ├── text-splitters/ # Document chunking utilities
|
||||
│ ├── standard-tests/ # Shared test suite for integrations
|
||||
│ ├── model-profiles/ # Model configuration profiles
|
||||
│ └── cli/ # Command-line interface tools
|
||||
├── .github/ # CI/CD workflows and templates
|
||||
├── .vscode/ # VSCode IDE standard settings and recommended extensions
|
||||
└── README.md # Information about LangChain
|
||||
```
|
||||
|
||||
✅ **Good - Stable Interface:**
|
||||
- **Core layer** (`langchain-core`): Base abstractions, interfaces, and protocols. Users should not need to know about this layer directly.
|
||||
- **Implementation layer** (`langchain`): Concrete implementations and high-level public utilities
|
||||
- **Integration layer** (`partners/`): Third-party service integrations. Note that this monorepo is not exhaustive of all LangChain integrations; some are maintained in separate repos, such as `langchain-ai/langchain-google` and `langchain-ai/langchain-aws`. Usually these repos are cloned at the same level as this monorepo, so if needed, you can refer to their code directly by navigating to `../langchain-google/` from this monorepo.
|
||||
- **Testing layer** (`standard-tests/`): Standardized integration tests for partner integrations
|
||||
|
||||
```python
|
||||
def get_user(user_id: str, verbose: bool = False) -> User:
|
||||
"""Retrieve user by ID with optional verbose output."""
|
||||
pass
|
||||
```
|
||||
### Development tools & commands**
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
- `uv` – Fast Python package installer and resolver (replaces pip/poetry)
|
||||
- `make` – Task runner for common development commands. Feel free to look at the `Makefile` for available commands and usage patterns.
|
||||
- `ruff` – Fast Python linter and formatter
|
||||
- `mypy` – Static type checking
|
||||
- `pytest` – Testing framework
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
This monorepo uses `uv` for dependency management. Local development uses editable installs: `[tool.uv.sources]`
|
||||
|
||||
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### 2. Code Quality Standards
|
||||
|
||||
**All Python code MUST include type hints and return types.**
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def p(u, d):
|
||||
return [x for x in u if x not in d]
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Filter out users that are not in the known users set.
|
||||
|
||||
Args:
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
return [user for user in users if user not in known_users]
|
||||
```
|
||||
|
||||
**Style Requirements:**
|
||||
|
||||
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
- Avoid unnecessary abstraction or premature optimization
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
|
||||
### 3. Testing Requirements
|
||||
|
||||
**Every new feature or bugfix MUST be covered by unit tests.**
|
||||
|
||||
**Test Organization:**
|
||||
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- Use `pytest` as the testing framework
|
||||
|
||||
**Test Quality Checklist:**
|
||||
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
|
||||
Checklist questions:
|
||||
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
|
||||
- [ ] Do tests use fixtures or mocks where needed?
|
||||
|
||||
```python
|
||||
def test_filter_unknown_users():
|
||||
"""Test filtering unknown users from a list."""
|
||||
users = ["alice", "bob", "charlie"]
|
||||
known_users = {"alice", "bob"}
|
||||
|
||||
result = filter_unknown_users(users, known_users)
|
||||
|
||||
assert result == ["charlie"]
|
||||
assert len(result) == 1
|
||||
```
|
||||
|
||||
### 4. Security and Risk Assessment
|
||||
|
||||
**Security Checklist:**
|
||||
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
❌ **Bad:**
|
||||
|
||||
```python
|
||||
def load_config(path):
|
||||
with open(path) as f:
|
||||
return eval(f.read()) # ⚠️ Never eval config
|
||||
```
|
||||
|
||||
✅ **Good:**
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
def load_config(path: str) -> dict:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
```
|
||||
|
||||
### 5. Documentation Standards
|
||||
|
||||
**Use Google-style docstrings with Args section for all public functions.**
|
||||
|
||||
❌ **Insufficient Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to, msg):
|
||||
"""Send an email to a recipient."""
|
||||
```
|
||||
|
||||
✅ **Complete Documentation:**
|
||||
|
||||
```python
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""
|
||||
Send an email to a recipient with specified priority.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level (`'low'`, `'normal'`, `'high'`).
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
`InvalidEmailError`: If the email address format is invalid.
|
||||
`SMTPConnectionError`: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
**Documentation Guidelines:**
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
|
||||
|
||||
### 6. Architectural Improvements
|
||||
|
||||
**When you encounter code that could be improved, suggest better designs:**
|
||||
|
||||
❌ **Poor Design:**
|
||||
|
||||
```python
|
||||
def process_data(data, db_conn, email_client, logger):
|
||||
# Function doing too many things
|
||||
validated = validate_data(data)
|
||||
result = db_conn.save(validated)
|
||||
email_client.send_notification(result)
|
||||
logger.log(f"Processed {len(data)} items")
|
||||
return result
|
||||
```
|
||||
|
||||
✅ **Better Design:**
|
||||
|
||||
```python
|
||||
@dataclass
|
||||
class ProcessingResult:
|
||||
"""Result of data processing operation."""
|
||||
items_processed: int
|
||||
success: bool
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
class DataProcessor:
|
||||
"""Handles data validation, storage, and notification."""
|
||||
|
||||
def __init__(self, db_conn: Database, email_client: EmailClient):
|
||||
self.db = db_conn
|
||||
self.email = email_client
|
||||
|
||||
def process(self, data: List[dict]) -> ProcessingResult:
|
||||
"""Process and store data with notifications."""
|
||||
validated = self._validate_data(data)
|
||||
result = self.db.save(validated)
|
||||
self._notify_completion(result)
|
||||
return result
|
||||
```
|
||||
|
||||
**Design Improvement Areas:**
|
||||
|
||||
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
|
||||
|
||||
- Reduce code duplication through shared utilities
|
||||
- Make unit testing easier
|
||||
- Improve separation of concerns (single responsibility)
|
||||
- Make unit testing easier through dependency injection
|
||||
- Add clarity without adding complexity
|
||||
- Prefer dataclasses for structured data
|
||||
|
||||
## Development Tools & Commands
|
||||
|
||||
### Package Management
|
||||
|
||||
```bash
|
||||
# Add package
|
||||
uv add package-name
|
||||
|
||||
# Sync project dependencies
|
||||
uv sync
|
||||
uv lock
|
||||
```
|
||||
|
||||
### Testing
|
||||
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
|
||||
|
||||
```bash
|
||||
# Run unit tests (no network)
|
||||
make test
|
||||
|
||||
# Don't run integration tests, as API keys must be set
|
||||
|
||||
# Run specific test file
|
||||
uv run --group test pytest tests/unit_tests/test_specific.py
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Lint code
|
||||
make lint
|
||||
@@ -261,66 +64,118 @@ make format
|
||||
uv run --group lint mypy .
|
||||
```
|
||||
|
||||
### Dependency Management Patterns
|
||||
#### Key config files
|
||||
|
||||
**Local Development Dependencies:**
|
||||
- pyproject.toml: Main workspace configuration with dependency groups
|
||||
- uv.lock: Locked dependencies for reproducible builds
|
||||
- Makefile: Development tasks
|
||||
|
||||
```toml
|
||||
[tool.uv.sources]
|
||||
langchain-core = { path = "../core", editable = true }
|
||||
langchain-tests = { path = "../standard-tests", editable = true }
|
||||
```
|
||||
#### Commit standards
|
||||
|
||||
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
|
||||
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes.
|
||||
|
||||
```python
|
||||
from langchain_core.tools import tool
|
||||
#### Pull request guidelines
|
||||
|
||||
@tool
|
||||
def search_database(query: str) -> str:
|
||||
"""Search the database for relevant information.
|
||||
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
|
||||
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
|
||||
- Highlight areas of the proposed changes that require careful review.
|
||||
|
||||
## Core development principles
|
||||
|
||||
### Maintain stable public interfaces
|
||||
|
||||
CRITICAL: Always attempt to preserve function signatures, argument positions, and names for exported/public methods. Do not make breaking changes.
|
||||
|
||||
**Before making ANY changes to public APIs:**
|
||||
|
||||
- Check if the function/class is exported in `__init__.py`
|
||||
- Look for existing usage patterns in tests and examples
|
||||
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
|
||||
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
|
||||
|
||||
Ask: "Would this change break someone's code if they used it last week?"
|
||||
|
||||
### Code quality standards
|
||||
|
||||
All Python code MUST include type hints and return types.
|
||||
|
||||
```python title="Example"
|
||||
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
|
||||
"""Single line description of the function.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
users: List of user identifiers to filter.
|
||||
known_users: Set of known/valid user identifiers.
|
||||
|
||||
Returns:
|
||||
List of users that are not in the known_users set.
|
||||
"""
|
||||
# Implementation here
|
||||
return results
|
||||
```
|
||||
|
||||
## Commit Standards
|
||||
- Use descriptive, self-explanatory variable names.
|
||||
- Follow existing patterns in the codebase you're modifying
|
||||
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
|
||||
|
||||
**Use Conventional Commits format for PR titles:**
|
||||
### Testing requirements
|
||||
|
||||
- `feat(core): add multi-tenant support`
|
||||
- `fix(cli): resolve flag parsing error`
|
||||
- `docs: update API usage examples`
|
||||
- `docs(openai): update API usage examples`
|
||||
Every new feature or bugfix MUST be covered by unit tests.
|
||||
|
||||
## Framework-Specific Guidelines
|
||||
- Unit tests: `tests/unit_tests/` (no network calls allowed)
|
||||
- Integration tests: `tests/integration_tests/` (network calls permitted)
|
||||
- We use `pytest` as the testing framework; if in doubt, check other existing tests for examples.
|
||||
- The testing file structure should mirror the source code structure.
|
||||
|
||||
- Follow the existing patterns in `langchain-core` for base abstractions
|
||||
- Use `langchain_core.callbacks` for execution tracking
|
||||
- Implement proper streaming support where applicable
|
||||
- Avoid deprecated components like legacy `LLMChain`
|
||||
**Checklist:**
|
||||
|
||||
### Partner Integrations
|
||||
- [ ] Tests fail when your new logic is broken
|
||||
- [ ] Happy path is covered
|
||||
- [ ] Edge cases and error conditions are tested
|
||||
- [ ] Use fixtures/mocks for external dependencies
|
||||
- [ ] Tests are deterministic (no flaky tests)
|
||||
- [ ] Does the test suite fail if your new logic is broken?
|
||||
|
||||
- Follow the established patterns in existing partner libraries
|
||||
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
|
||||
- Include comprehensive integration tests
|
||||
- Document API key requirements and authentication
|
||||
### Security and risk assessment
|
||||
|
||||
---
|
||||
- No `eval()`, `exec()`, or `pickle` on user-controlled input
|
||||
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
|
||||
- Remove unreachable/commented code before committing
|
||||
- Race conditions or resource leaks (file handles, sockets, threads).
|
||||
- Ensure proper resource cleanup (file handles, connections)
|
||||
|
||||
## Quick Reference Checklist
|
||||
### Documentation standards
|
||||
|
||||
Before submitting code changes:
|
||||
Use Google-style docstrings with Args section for all public functions.
|
||||
|
||||
- [ ] **Breaking Changes**: Verified no public API changes
|
||||
- [ ] **Type Hints**: All functions have complete type annotations
|
||||
- [ ] **Tests**: New functionality is fully tested
|
||||
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
|
||||
- [ ] **Documentation**: Google-style docstrings for public functions
|
||||
- [ ] **Code Quality**: `make lint` and `make format` pass
|
||||
- [ ] **Architecture**: Suggested improvements where applicable
|
||||
- [ ] **Commit Message**: Follows Conventional Commits format
|
||||
```python title="Example"
|
||||
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
|
||||
"""Send an email to a recipient with specified priority.
|
||||
|
||||
Any additional context about the function can go here.
|
||||
|
||||
Args:
|
||||
to: The email address of the recipient.
|
||||
msg: The message body to send.
|
||||
priority: Email priority level.
|
||||
|
||||
Returns:
|
||||
`True` if email was sent successfully, `False` otherwise.
|
||||
|
||||
Raises:
|
||||
InvalidEmailError: If the email address format is invalid.
|
||||
SMTPConnectionError: If unable to connect to email server.
|
||||
"""
|
||||
```
|
||||
|
||||
- Types go in function signatures, NOT in docstrings
|
||||
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
|
||||
- Focus on "why" rather than "what" in descriptions
|
||||
- Document all parameters, return values, and exceptions
|
||||
- Keep descriptions concise but clear
|
||||
- Ensure American English spelling (e.g., "behavior", not "behaviour")
|
||||
|
||||
## Additional resources
|
||||
|
||||
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
|
||||
- **Contributing Guide:** [`.github/CONTRIBUTING.md`](https://docs.langchain.com/oss/python/contributing/overview)
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
# Migrating
|
||||
|
||||
Please see the following guides for migrating LangChain code:
|
||||
|
||||
* Migrate to [LangChain v1.0](https://docs.langchain.com/oss/python/migrate/langchain-v1)
|
||||
* Migrate to [LangChain v0.3](https://python.langchain.com/docs/versions/v0_3/)
|
||||
* Migrate to [LangChain v0.2](https://python.langchain.com/docs/versions/v0_2/)
|
||||
* Migrating from [LangChain 0.0.x Chains](https://python.langchain.com/docs/versions/migrating_chains/)
|
||||
* Upgrade to [LangGraph Memory](https://python.langchain.com/docs/versions/migrating_memory/)
|
||||
92
README.md
92
README.md
@@ -1,50 +1,43 @@
|
||||
<p align="center">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset=".github/images/logo-dark.svg">
|
||||
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-light.svg">
|
||||
<img alt="LangChain Logo" src=".github/images/logo-dark.svg" width="80%">
|
||||
</picture>
|
||||
</p>
|
||||
<div align="center">
|
||||
<a href="https://www.langchain.com/">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset=".github/images/logo-dark.svg">
|
||||
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-light.svg">
|
||||
<img alt="LangChain Logo" src=".github/images/logo-dark.svg" width="80%">
|
||||
</picture>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<p align="center">
|
||||
The platform for reliable agents.
|
||||
</p>
|
||||
<div align="center">
|
||||
<h3>The platform for reliable agents.</h3>
|
||||
</div>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://opensource.org/licenses/MIT" target="_blank">
|
||||
<img src="https://img.shields.io/pypi/l/langchain" alt="PyPI - License">
|
||||
</a>
|
||||
<a href="https://pypistats.org/packages/langchain" target="_blank">
|
||||
<img src="https://img.shields.io/pepy/dt/langchain" alt="PyPI - Downloads">
|
||||
</a>
|
||||
<a href="https://pypi.org/project/langchain/#history" target="_blank">
|
||||
<img src="https://img.shields.io/pypi/v/langchain?label=%20" alt="Version">
|
||||
</a>
|
||||
<a href="https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain" target="_blank">
|
||||
<img src="https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode" alt="Open in Dev Containers">
|
||||
</a>
|
||||
<a href="https://codespaces.new/langchain-ai/langchain" target="_blank">
|
||||
<img src="https://github.com/codespaces/badge.svg" alt="Open in Github Codespace" title="Open in Github Codespace" width="150" height="20">
|
||||
</a>
|
||||
<a href="https://codspeed.io/langchain-ai/langchain" target="_blank">
|
||||
<img src="https://img.shields.io/endpoint?url=https://codspeed.io/badge.json" alt="CodSpeed Badge">
|
||||
</a>
|
||||
<a href="https://twitter.com/langchainai" target="_blank">
|
||||
<img src="https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI" alt="Twitter / X">
|
||||
</a>
|
||||
</p>
|
||||
<div align="center">
|
||||
<a href="https://opensource.org/licenses/MIT" target="_blank"><img src="https://img.shields.io/pypi/l/langchain" alt="PyPI - License"></a>
|
||||
<a href="https://pypistats.org/packages/langchain" target="_blank"><img src="https://img.shields.io/pepy/dt/langchain" alt="PyPI - Downloads"></a>
|
||||
<a href="https://pypi.org/project/langchain/#history" target="_blank"><img src="https://img.shields.io/pypi/v/langchain?label=%20" alt="Version"></a>
|
||||
<a href="https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode" alt="Open in Dev Containers"></a>
|
||||
<a href="https://codespaces.new/langchain-ai/langchain" target="_blank"><img src="https://github.com/codespaces/badge.svg" alt="Open in Github Codespace" title="Open in Github Codespace" width="150" height="20"></a>
|
||||
<a href="https://codspeed.io/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/endpoint?url=https://codspeed.io/badge.json" alt="CodSpeed Badge"></a>
|
||||
<a href="https://twitter.com/langchainai" target="_blank"><img src="https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI" alt="Twitter / X"></a>
|
||||
</div>
|
||||
|
||||
LangChain is a framework for building LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development — all while future-proofing decisions as the underlying technology evolves.
|
||||
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development – all while future-proofing decisions as the underlying technology evolves.
|
||||
|
||||
```bash
|
||||
pip install langchain
|
||||
```
|
||||
|
||||
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
|
||||
|
||||
---
|
||||
|
||||
**Documentation**: To learn more about LangChain, check out [the docs](https://docs.langchain.com/oss/python/langchain/overview).
|
||||
**Documentation**:
|
||||
|
||||
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
|
||||
- [docs.langchain.com](https://docs.langchain.com/oss/python/langchain/overview) – Comprehensive documentation, including conceptual overviews and guides
|
||||
- [reference.langchain.com/python](https://reference.langchain.com/python) – API reference docs for LangChain packages
|
||||
|
||||
**Discussions**: Visit the [LangChain Forum](https://forum.langchain.com) to connect with the community and share all of your technical questions, ideas, and feedback.
|
||||
|
||||
> [!NOTE]
|
||||
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
||||
@@ -55,24 +48,27 @@ LangChain helps developers build applications powered by LLMs through a standard
|
||||
|
||||
Use LangChain for:
|
||||
|
||||
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain’s vast library of integrations with model providers, tools, vector stores, retrievers, and more.
|
||||
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application’s needs. As the industry frontier evolves, adapt quickly — LangChain’s abstractions keep you moving without losing momentum.
|
||||
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more.
|
||||
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly – LangChain's abstractions keep you moving without losing momentum.
|
||||
- **Rapid prototyping**. Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle.
|
||||
- **Production-ready features**. Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices.
|
||||
- **Vibrant community and ecosystem**. Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community.
|
||||
- **Flexible abstraction layers**. Work at the level of abstraction that suits your needs - from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity.
|
||||
|
||||
## LangChain’s ecosystem
|
||||
## LangChain ecosystem
|
||||
|
||||
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
|
||||
|
||||
To improve your LLM application development, pair LangChain with:
|
||||
|
||||
- [LangSmith](https://www.langchain.com/langsmith) - Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
|
||||
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) - Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows — and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
|
||||
- [LangGraph Platform](https://docs.langchain.com/langgraph-platform) - Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams — and iterate quickly with visual prototyping in [LangGraph Studio](https://langchain-ai.github.io/langgraph/concepts/langgraph_studio).
|
||||
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) – Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows – and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
|
||||
- [Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview) – List of LangChain integrations, including chat & embedding models, tools & toolkits, and more
|
||||
- [LangSmith](https://www.langchain.com/langsmith) – Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
|
||||
- [LangSmith Deployment](https://docs.langchain.com/langsmith/deployments) – Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams – and iterate quickly with visual prototyping in [LangSmith Studio](https://docs.langchain.com/langsmith/studio).
|
||||
- [Deep Agents](https://github.com/langchain-ai/deepagents) *(new!)* – Build agents that can plan, use subagents, and leverage file systems for complex tasks
|
||||
|
||||
## Additional resources
|
||||
|
||||
- [Learn](https://docs.langchain.com/oss/python/learn): Use cases, conceptual overviews, and more.
|
||||
- [API Reference](https://reference.langchain.com/python): Detailed reference on
|
||||
navigating base packages and integrations for LangChain.
|
||||
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview): Learn how to contribute to LangChain and find good first issues.
|
||||
- [LangChain Forum](https://forum.langchain.com): Connect with the community and share all of your technical questions, ideas, and feedback.
|
||||
- [Chat LangChain](https://chat.langchain.com): Ask questions & chat with our documentation.
|
||||
- [API Reference](https://reference.langchain.com/python) – Detailed reference on navigating base packages and integrations for LangChain.
|
||||
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview) – Learn how to contribute to LangChain projects and find good first issues.
|
||||
- [Code of Conduct](https://github.com/langchain-ai/langchain/blob/master/.github/CODE_OF_CONDUCT.md) – Our community guidelines and standards for participation.
|
||||
|
||||
@@ -55,10 +55,10 @@ All out of scope targets defined by huntr as well as:
|
||||
* **langchain-experimental**: This repository is for experimental code and is not
|
||||
eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
|
||||
time and published with no bounty attached.
|
||||
* **tools**: Tools in either langchain or langchain-community are not eligible for bug
|
||||
* **tools**: Tools in either `langchain` or `langchain-community` are not eligible for bug
|
||||
bounties. This includes the following directories
|
||||
* libs/langchain/langchain/tools
|
||||
* libs/community/langchain_community/tools
|
||||
* `libs/langchain/langchain/tools`
|
||||
* `libs/community/langchain_community/tools`
|
||||
* Please review the [Best Practices](#best-practices)
|
||||
for more details, but generally tools interact with the real world. Developers are
|
||||
expected to understand the security implications of their code and are responsible
|
||||
|
||||
@@ -295,7 +295,7 @@
|
||||
"source": [
|
||||
"## TODO: Any functionality specific to this vector store\n",
|
||||
"\n",
|
||||
"E.g. creating a persisten database to save to your disk, etc."
|
||||
"E.g. creating a persistent database to save to your disk, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,9 +6,8 @@ import hashlib
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
from typing import Any, TypedDict
|
||||
from typing import TYPE_CHECKING, Any, TypedDict
|
||||
|
||||
from git import Repo
|
||||
|
||||
@@ -18,6 +17,9 @@ from langchain_cli.constants import (
|
||||
DEFAULT_GIT_SUBDIRECTORY,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from .file import File
|
||||
from .folder import Folder
|
||||
if TYPE_CHECKING:
|
||||
from .file import File
|
||||
from .folder import Folder
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from .file import File
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Folder:
|
||||
def __init__(self, name: str, *files: Folder | File) -> None:
|
||||
|
||||
@@ -34,7 +34,7 @@ The LangChain ecosystem is built on top of `langchain-core`. Some of the benefit
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_core/).
|
||||
For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_core/). For conceptual guides, tutorials, and examples on using LangChain, see the [LangChain Docs](https://docs.langchain.com/oss/python/langchain/overview).
|
||||
|
||||
## 📕 Releases & Versioning
|
||||
|
||||
|
||||
@@ -5,12 +5,10 @@
|
||||
|
||||
!!! warning
|
||||
New agents should be built using the
|
||||
[langgraph library](https://github.com/langchain-ai/langgraph), which provides a
|
||||
[`langchain` library](https://pypi.org/project/langchain/), which provides a
|
||||
simpler and more flexible way to define agents.
|
||||
|
||||
Please see the
|
||||
[migration guide](https://python.langchain.com/docs/how_to/migrate_agent/) for
|
||||
information on how to migrate existing agents to modern langgraph agents.
|
||||
See docs on [building agents](https://docs.langchain.com/oss/python/langchain/agents).
|
||||
|
||||
Agents use language models to choose a sequence of actions to take.
|
||||
|
||||
@@ -54,31 +52,33 @@ class AgentAction(Serializable):
|
||||
"""The input to pass in to the Tool."""
|
||||
log: str
|
||||
"""Additional information to log about the action.
|
||||
This log can be used in a few ways. First, it can be used to audit
|
||||
what exactly the LLM predicted to lead to this (tool, tool_input).
|
||||
Second, it can be used in future iterations to show the LLMs prior
|
||||
thoughts. This is useful when (tool, tool_input) does not contain
|
||||
full information about the LLM prediction (for example, any `thought`
|
||||
before the tool/tool_input)."""
|
||||
|
||||
This log can be used in a few ways. First, it can be used to audit what exactly the
|
||||
LLM predicted to lead to this `(tool, tool_input)`.
|
||||
|
||||
Second, it can be used in future iterations to show the LLMs prior thoughts. This is
|
||||
useful when `(tool, tool_input)` does not contain full information about the LLM
|
||||
prediction (for example, any `thought` before the tool/tool_input).
|
||||
"""
|
||||
type: Literal["AgentAction"] = "AgentAction"
|
||||
|
||||
# Override init to support instantiation by position for backward compat.
|
||||
def __init__(self, tool: str, tool_input: str | dict, log: str, **kwargs: Any):
|
||||
"""Create an AgentAction.
|
||||
"""Create an `AgentAction`.
|
||||
|
||||
Args:
|
||||
tool: The name of the tool to execute.
|
||||
tool_input: The input to pass in to the Tool.
|
||||
tool_input: The input to pass in to the `Tool`.
|
||||
log: Additional information to log about the action.
|
||||
"""
|
||||
super().__init__(tool=tool, tool_input=tool_input, log=log, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""AgentAction is serializable.
|
||||
"""`AgentAction` is serializable.
|
||||
|
||||
Returns:
|
||||
True
|
||||
`True`
|
||||
"""
|
||||
return True
|
||||
|
||||
@@ -100,19 +100,23 @@ class AgentAction(Serializable):
|
||||
class AgentActionMessageLog(AgentAction):
|
||||
"""Representation of an action to be executed by an agent.
|
||||
|
||||
This is similar to AgentAction, but includes a message log consisting of
|
||||
chat messages. This is useful when working with ChatModels, and is used
|
||||
to reconstruct conversation history from the agent's perspective.
|
||||
This is similar to `AgentAction`, but includes a message log consisting of
|
||||
chat messages.
|
||||
|
||||
This is useful when working with `ChatModels`, and is used to reconstruct
|
||||
conversation history from the agent's perspective.
|
||||
"""
|
||||
|
||||
message_log: Sequence[BaseMessage]
|
||||
"""Similar to log, this can be used to pass along extra
|
||||
information about what exact messages were predicted by the LLM
|
||||
before parsing out the (tool, tool_input). This is again useful
|
||||
if (tool, tool_input) cannot be used to fully recreate the LLM
|
||||
prediction, and you need that LLM prediction (for future agent iteration).
|
||||
"""Similar to log, this can be used to pass along extra information about what exact
|
||||
messages were predicted by the LLM before parsing out the `(tool, tool_input)`.
|
||||
|
||||
This is again useful if `(tool, tool_input)` cannot be used to fully recreate the
|
||||
LLM prediction, and you need that LLM prediction (for future agent iteration).
|
||||
|
||||
Compared to `log`, this is useful when the underlying LLM is a
|
||||
chat model (and therefore returns messages rather than a string)."""
|
||||
chat model (and therefore returns messages rather than a string).
|
||||
"""
|
||||
# Ignoring type because we're overriding the type from AgentAction.
|
||||
# And this is the correct thing to do in this case.
|
||||
# The type literal is used for serialization purposes.
|
||||
@@ -120,12 +124,12 @@ class AgentActionMessageLog(AgentAction):
|
||||
|
||||
|
||||
class AgentStep(Serializable):
|
||||
"""Result of running an AgentAction."""
|
||||
"""Result of running an `AgentAction`."""
|
||||
|
||||
action: AgentAction
|
||||
"""The AgentAction that was executed."""
|
||||
"""The `AgentAction` that was executed."""
|
||||
observation: Any
|
||||
"""The result of the AgentAction."""
|
||||
"""The result of the `AgentAction`."""
|
||||
|
||||
@property
|
||||
def messages(self) -> Sequence[BaseMessage]:
|
||||
@@ -134,19 +138,22 @@ class AgentStep(Serializable):
|
||||
|
||||
|
||||
class AgentFinish(Serializable):
|
||||
"""Final return value of an ActionAgent.
|
||||
"""Final return value of an `ActionAgent`.
|
||||
|
||||
Agents return an AgentFinish when they have reached a stopping condition.
|
||||
Agents return an `AgentFinish` when they have reached a stopping condition.
|
||||
"""
|
||||
|
||||
return_values: dict
|
||||
"""Dictionary of return values."""
|
||||
log: str
|
||||
"""Additional information to log about the return value.
|
||||
|
||||
This is used to pass along the full LLM prediction, not just the parsed out
|
||||
return value. For example, if the full LLM prediction was
|
||||
`Final Answer: 2` you may want to just return `2` as a return value, but pass
|
||||
along the full string as a `log` (for debugging or observability purposes).
|
||||
return value.
|
||||
|
||||
For example, if the full LLM prediction was `Final Answer: 2` you may want to just
|
||||
return `2` as a return value, but pass along the full string as a `log` (for
|
||||
debugging or observability purposes).
|
||||
"""
|
||||
type: Literal["AgentFinish"] = "AgentFinish"
|
||||
|
||||
@@ -156,7 +163,7 @@ class AgentFinish(Serializable):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -204,7 +211,7 @@ def _convert_agent_observation_to_messages(
|
||||
observation: Observation to convert to a message.
|
||||
|
||||
Returns:
|
||||
AIMessage that corresponds to the original tool invocation.
|
||||
`AIMessage` that corresponds to the original tool invocation.
|
||||
"""
|
||||
if isinstance(agent_action, AgentActionMessageLog):
|
||||
return [_create_function_message(agent_action, observation)]
|
||||
@@ -227,7 +234,7 @@ def _create_function_message(
|
||||
observation: the result of the tool invocation.
|
||||
|
||||
Returns:
|
||||
FunctionMessage that corresponds to the original tool invocation.
|
||||
`FunctionMessage` that corresponds to the original tool invocation.
|
||||
"""
|
||||
if not isinstance(observation, str):
|
||||
try:
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
Distinct from provider-based [prompt caching](https://docs.langchain.com/oss/python/langchain/models#prompt-caching).
|
||||
|
||||
!!! warning
|
||||
This is a beta feature! Please be wary of deploying experimental code to production
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. Please be wary of deploying experimental code to production
|
||||
unless you've taken appropriate precautions.
|
||||
|
||||
A cache is useful for two reasons:
|
||||
@@ -49,17 +49,18 @@ class BaseCache(ABC):
|
||||
"""Look up based on `prompt` and `llm_string`.
|
||||
|
||||
A cache implementation is expected to generate a key from the 2-tuple
|
||||
of prompt and llm_string (e.g., by concatenating them with a delimiter).
|
||||
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
|
||||
|
||||
Args:
|
||||
prompt: A string representation of the prompt.
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
|
||||
These invocation parameters are serialized into a string representation.
|
||||
|
||||
Returns:
|
||||
On a cache miss, return `None`. On a cache hit, return the cached value.
|
||||
@@ -78,8 +79,10 @@ class BaseCache(ABC):
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
return_val: The value to be cached. The value is a list of `Generation`
|
||||
@@ -94,15 +97,17 @@ class BaseCache(ABC):
|
||||
"""Async look up based on `prompt` and `llm_string`.
|
||||
|
||||
A cache implementation is expected to generate a key from the 2-tuple
|
||||
of prompt and llm_string (e.g., by concatenating them with a delimiter).
|
||||
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
|
||||
|
||||
Args:
|
||||
prompt: A string representation of the prompt.
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
|
||||
@@ -125,8 +130,10 @@ class BaseCache(ABC):
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
return_val: The value to be cached. The value is a list of `Generation`
|
||||
|
||||
@@ -5,13 +5,12 @@ from __future__ import annotations
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from typing_extensions import Self
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
from uuid import UUID
|
||||
|
||||
from tenacity import RetryCallState
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core.agents import AgentAction, AgentFinish
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@@ -39,7 +39,6 @@ from langchain_core.tracers.context import (
|
||||
tracing_v2_callback_var,
|
||||
)
|
||||
from langchain_core.tracers.langchain import LangChainTracer
|
||||
from langchain_core.tracers.schemas import Run
|
||||
from langchain_core.tracers.stdout import ConsoleCallbackHandler
|
||||
from langchain_core.utils.env import env_var_is_set
|
||||
|
||||
@@ -52,6 +51,7 @@ if TYPE_CHECKING:
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
|
||||
from langchain_core.runnables.config import RunnableConfig
|
||||
from langchain_core.tracers.schemas import Run
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -229,7 +229,24 @@ def shielded(func: Func) -> Func:
|
||||
|
||||
@functools.wraps(func)
|
||||
async def wrapped(*args: Any, **kwargs: Any) -> Any:
|
||||
return await asyncio.shield(func(*args, **kwargs))
|
||||
# Capture the current context to preserve context variables
|
||||
ctx = copy_context()
|
||||
|
||||
# Create the coroutine
|
||||
coro = func(*args, **kwargs)
|
||||
|
||||
# For Python 3.11+, create task with explicit context
|
||||
# For older versions, fallback to original behavior
|
||||
try:
|
||||
# Create a task with the captured context to preserve context variables
|
||||
task = asyncio.create_task(coro, context=ctx) # type: ignore[call-arg, unused-ignore]
|
||||
# `call-arg` used to not fail 3.9 or 3.10 tests
|
||||
return await asyncio.shield(task)
|
||||
except TypeError:
|
||||
# Python < 3.11 fallback - create task normally then shield
|
||||
# This won't preserve context perfectly but is better than nothing
|
||||
task = asyncio.create_task(coro)
|
||||
return await asyncio.shield(task)
|
||||
|
||||
return cast("Func", wrapped)
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
||||
from langchain_core.callbacks import UsageMetadataCallbackHandler
|
||||
|
||||
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
|
||||
|
||||
callback = UsageMetadataCallbackHandler()
|
||||
result_1 = llm_1.invoke("Hello", config={"callbacks": [callback]})
|
||||
@@ -43,7 +43,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
||||
'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.49"
|
||||
!!! version-added "Added in `langchain-core` 0.3.49"
|
||||
|
||||
"""
|
||||
|
||||
@@ -109,7 +109,7 @@ def get_usage_metadata_callback(
|
||||
from langchain_core.callbacks import get_usage_metadata_callback
|
||||
|
||||
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
|
||||
|
||||
with get_usage_metadata_callback() as cb:
|
||||
llm_1.invoke("Hello")
|
||||
@@ -134,7 +134,7 @@ def get_usage_metadata_callback(
|
||||
}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.49"
|
||||
!!! version-added "Added in `langchain-core` 0.3.49"
|
||||
|
||||
"""
|
||||
usage_metadata_callback_var: ContextVar[UsageMetadataCallbackHandler | None] = (
|
||||
|
||||
@@ -121,7 +121,7 @@ class BaseChatMessageHistory(ABC):
|
||||
This method may be deprecated in a future release.
|
||||
|
||||
Args:
|
||||
message: The human message to add to the store.
|
||||
message: The `HumanMessage` to add to the store.
|
||||
"""
|
||||
if isinstance(message, HumanMessage):
|
||||
self.add_message(message)
|
||||
@@ -129,7 +129,7 @@ class BaseChatMessageHistory(ABC):
|
||||
self.add_message(HumanMessage(content=message))
|
||||
|
||||
def add_ai_message(self, message: AIMessage | str) -> None:
|
||||
"""Convenience method for adding an AI message string to the store.
|
||||
"""Convenience method for adding an `AIMessage` string to the store.
|
||||
|
||||
!!! note
|
||||
This is a convenience method. Code should favor the bulk `add_messages`
|
||||
@@ -138,7 +138,7 @@ class BaseChatMessageHistory(ABC):
|
||||
This method may be deprecated in a future release.
|
||||
|
||||
Args:
|
||||
message: The AI message to add.
|
||||
message: The `AIMessage` to add.
|
||||
"""
|
||||
if isinstance(message, AIMessage):
|
||||
self.add_message(message)
|
||||
@@ -173,7 +173,7 @@ class BaseChatMessageHistory(ABC):
|
||||
in an efficient manner to avoid unnecessary round-trips to the underlying store.
|
||||
|
||||
Args:
|
||||
messages: A sequence of BaseMessage objects to store.
|
||||
messages: A sequence of `BaseMessage` objects to store.
|
||||
"""
|
||||
for message in messages:
|
||||
self.add_message(message)
|
||||
@@ -182,7 +182,7 @@ class BaseChatMessageHistory(ABC):
|
||||
"""Async add a list of messages.
|
||||
|
||||
Args:
|
||||
messages: A sequence of BaseMessage objects to store.
|
||||
messages: A sequence of `BaseMessage` objects to store.
|
||||
"""
|
||||
await run_in_executor(None, self.add_messages, messages)
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class BaseLoader(ABC): # noqa: B024
|
||||
"""Interface for Document Loader.
|
||||
|
||||
Implementations should implement the lazy-loading method using generators
|
||||
to avoid loading all Documents into memory at once.
|
||||
to avoid loading all documents into memory at once.
|
||||
|
||||
`load` is provided just for user convenience and should not be overridden.
|
||||
"""
|
||||
@@ -53,9 +53,11 @@ class BaseLoader(ABC): # noqa: B024
|
||||
def load_and_split(
|
||||
self, text_splitter: TextSplitter | None = None
|
||||
) -> list[Document]:
|
||||
"""Load Documents and split into chunks. Chunks are returned as `Document`.
|
||||
"""Load `Document` and split into chunks. Chunks are returned as `Document`.
|
||||
|
||||
Do not override this method. It should be considered to be deprecated!
|
||||
!!! danger
|
||||
|
||||
Do not override this method. It should be considered to be deprecated!
|
||||
|
||||
Args:
|
||||
text_splitter: `TextSplitter` instance to use for splitting documents.
|
||||
@@ -135,7 +137,7 @@ class BaseBlobParser(ABC):
|
||||
"""
|
||||
|
||||
def parse(self, blob: Blob) -> list[Document]:
|
||||
"""Eagerly parse the blob into a `Document` or `Document` objects.
|
||||
"""Eagerly parse the blob into a `Document` or list of `Document` objects.
|
||||
|
||||
This is a convenience method for interactive development environment.
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ class BlobLoader(ABC):
|
||||
def yield_blobs(
|
||||
self,
|
||||
) -> Iterable[Blob]:
|
||||
"""A lazy loader for raw data represented by LangChain's Blob object.
|
||||
"""A lazy loader for raw data represented by LangChain's `Blob` object.
|
||||
|
||||
Returns:
|
||||
A generator over blobs
|
||||
|
||||
@@ -14,13 +14,13 @@ from langchain_core.documents import Document
|
||||
|
||||
|
||||
class LangSmithLoader(BaseLoader):
|
||||
"""Load LangSmith Dataset examples as Documents.
|
||||
"""Load LangSmith Dataset examples as `Document` objects.
|
||||
|
||||
Loads the example inputs as the Document page content and places the entire example
|
||||
into the Document metadata. This allows you to easily create few-shot example
|
||||
retrievers from the loaded documents.
|
||||
Loads the example inputs as the `Document` page content and places the entire
|
||||
example into the `Document` metadata. This allows you to easily create few-shot
|
||||
example retrievers from the loaded documents.
|
||||
|
||||
??? note "Lazy load"
|
||||
??? note "Lazy loading example"
|
||||
|
||||
```python
|
||||
from langchain_core.document_loaders import LangSmithLoader
|
||||
@@ -66,12 +66,11 @@ class LangSmithLoader(BaseLoader):
|
||||
format_content: Function for converting the content extracted from the example
|
||||
inputs into a string. Defaults to JSON-encoding the contents.
|
||||
example_ids: The IDs of the examples to filter by.
|
||||
as_of: The dataset version tag OR
|
||||
timestamp to retrieve the examples as of.
|
||||
Response examples will only be those that were present at the time
|
||||
of the tagged (or timestamped) version.
|
||||
as_of: The dataset version tag or timestamp to retrieve the examples as of.
|
||||
Response examples will only be those that were present at the time of
|
||||
the tagged (or timestamped) version.
|
||||
splits: A list of dataset splits, which are
|
||||
divisions of your dataset such as 'train', 'test', or 'validation'.
|
||||
divisions of your dataset such as `train`, `test`, or `validation`.
|
||||
Returns examples only from the specified splits.
|
||||
inline_s3_urls: Whether to inline S3 URLs.
|
||||
offset: The offset to start from.
|
||||
|
||||
@@ -1,7 +1,28 @@
|
||||
"""Documents module.
|
||||
"""Documents module for data retrieval and processing workflows.
|
||||
|
||||
**Document** module is a collection of classes that handle documents
|
||||
and their transformations.
|
||||
This module provides core abstractions for handling data in retrieval-augmented
|
||||
generation (RAG) pipelines, vector stores, and document processing workflows.
|
||||
|
||||
!!! warning "Documents vs. message content"
|
||||
This module is distinct from `langchain_core.messages.content`, which provides
|
||||
multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within
|
||||
messages).
|
||||
|
||||
**Key distinction:**
|
||||
|
||||
- **Documents** (this module): For **data retrieval and processing workflows**
|
||||
- Vector stores, retrievers, RAG pipelines
|
||||
- Text chunking, embedding, and semantic search
|
||||
- Example: Chunks of a PDF stored in a vector database
|
||||
|
||||
- **Content Blocks** (`messages.content`): For **LLM conversational I/O**
|
||||
- Multimodal message content sent to/from models
|
||||
- Tool calls, reasoning, citations within chat
|
||||
- Example: An image sent to a vision model in a chat message (via
|
||||
[`ImageContentBlock`][langchain.messages.ImageContentBlock])
|
||||
|
||||
While both can represent similar data types (text, files), they serve different
|
||||
architectural purposes in LangChain applications.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -1,4 +1,16 @@
|
||||
"""Base classes for media and documents."""
|
||||
"""Base classes for media and documents.
|
||||
|
||||
This module contains core abstractions for **data retrieval and processing workflows**:
|
||||
|
||||
- `BaseMedia`: Base class providing `id` and `metadata` fields
|
||||
- `Blob`: Raw data loading (files, binary data) - used by document loaders
|
||||
- `Document`: Text content for retrieval (RAG, vector stores, semantic search)
|
||||
|
||||
!!! note "Not for LLM chat messages"
|
||||
These classes are for data processing pipelines, not LLM I/O. For multimodal
|
||||
content in chat messages (images, audio in conversations), see
|
||||
`langchain.messages` content blocks instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -19,20 +31,18 @@ PathLike = str | PurePath
|
||||
|
||||
|
||||
class BaseMedia(Serializable):
|
||||
"""Use to represent media content.
|
||||
"""Base class for content used in retrieval and data processing workflows.
|
||||
|
||||
Media objects can be used to represent raw data, such as text or binary data.
|
||||
Provides common fields for content that needs to be stored, indexed, or searched.
|
||||
|
||||
LangChain Media objects allow associating metadata and an optional identifier
|
||||
with the content.
|
||||
|
||||
The presence of an ID and metadata make it easier to store, index, and search
|
||||
over the content in a structured way.
|
||||
!!! note
|
||||
For multimodal content in **chat messages** (images, audio sent to/from LLMs),
|
||||
use `langchain.messages` content blocks instead.
|
||||
"""
|
||||
|
||||
# The ID field is optional at the moment.
|
||||
# It will likely become required in a future major release after
|
||||
# it has been adopted by enough vectorstore implementations.
|
||||
# it has been adopted by enough VectorStore implementations.
|
||||
id: str | None = Field(default=None, coerce_numbers_to_str=True)
|
||||
"""An optional identifier for the document.
|
||||
|
||||
@@ -45,71 +55,70 @@ class BaseMedia(Serializable):
|
||||
|
||||
|
||||
class Blob(BaseMedia):
|
||||
"""Blob represents raw data by either reference or value.
|
||||
"""Raw data abstraction for document loading and file processing.
|
||||
|
||||
Provides an interface to materialize the blob in different representations, and
|
||||
help to decouple the development of data loaders from the downstream parsing of
|
||||
the raw data.
|
||||
Represents raw bytes or text, either in-memory or by file reference. Used
|
||||
primarily by document loaders to decouple data loading from parsing.
|
||||
|
||||
Inspired by: https://developer.mozilla.org/en-US/docs/Web/API/Blob
|
||||
Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
|
||||
|
||||
Example: Initialize a blob from in-memory data
|
||||
???+ example "Initialize a blob from in-memory data"
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_data("Hello, world!")
|
||||
blob = Blob.from_data("Hello, world!")
|
||||
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
|
||||
Example: Load from memory and specify mime-type and metadata
|
||||
??? example "Load from memory and specify MIME type and metadata"
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_data(
|
||||
data="Hello, world!",
|
||||
mime_type="text/plain",
|
||||
metadata={"source": "https://example.com"},
|
||||
)
|
||||
```
|
||||
blob = Blob.from_data(
|
||||
data="Hello, world!",
|
||||
mime_type="text/plain",
|
||||
metadata={"source": "https://example.com"},
|
||||
)
|
||||
```
|
||||
|
||||
Example: Load the blob from a file
|
||||
??? example "Load the blob from a file"
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_path("path/to/file.txt")
|
||||
blob = Blob.from_path("path/to/file.txt")
|
||||
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
"""
|
||||
|
||||
data: bytes | str | None = None
|
||||
"""Raw data associated with the blob."""
|
||||
"""Raw data associated with the `Blob`."""
|
||||
mimetype: str | None = None
|
||||
"""MimeType not to be confused with a file extension."""
|
||||
"""MIME type, not to be confused with a file extension."""
|
||||
encoding: str = "utf-8"
|
||||
"""Encoding to use if decoding the bytes into a string.
|
||||
|
||||
Use `utf-8` as default encoding, if decoding to string.
|
||||
Uses `utf-8` as default encoding if decoding to string.
|
||||
"""
|
||||
path: PathLike | None = None
|
||||
"""Location where the original content was found."""
|
||||
@@ -123,9 +132,9 @@ class Blob(BaseMedia):
|
||||
def source(self) -> str | None:
|
||||
"""The source location of the blob as string if known otherwise none.
|
||||
|
||||
If a path is associated with the blob, it will default to the path location.
|
||||
If a path is associated with the `Blob`, it will default to the path location.
|
||||
|
||||
Unless explicitly set via a metadata field called `"source"`, in which
|
||||
Unless explicitly set via a metadata field called `'source'`, in which
|
||||
case that value will be used instead.
|
||||
"""
|
||||
if self.metadata and "source" in self.metadata:
|
||||
@@ -211,13 +220,13 @@ class Blob(BaseMedia):
|
||||
Args:
|
||||
path: Path-like object to file to be read
|
||||
encoding: Encoding to use if decoding the bytes into a string
|
||||
mime_type: If provided, will be set as the mime-type of the data
|
||||
guess_type: If `True`, the mimetype will be guessed from the file extension,
|
||||
if a mime-type was not provided
|
||||
metadata: Metadata to associate with the blob
|
||||
mime_type: If provided, will be set as the MIME type of the data
|
||||
guess_type: If `True`, the MIME type will be guessed from the file
|
||||
extension, if a MIME type was not provided
|
||||
metadata: Metadata to associate with the `Blob`
|
||||
|
||||
Returns:
|
||||
Blob instance
|
||||
`Blob` instance
|
||||
"""
|
||||
if mime_type is None and guess_type:
|
||||
mimetype = mimetypes.guess_type(path)[0] if guess_type else None
|
||||
@@ -243,17 +252,17 @@ class Blob(BaseMedia):
|
||||
path: str | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> Blob:
|
||||
"""Initialize the blob from in-memory data.
|
||||
"""Initialize the `Blob` from in-memory data.
|
||||
|
||||
Args:
|
||||
data: The in-memory data associated with the blob
|
||||
data: The in-memory data associated with the `Blob`
|
||||
encoding: Encoding to use if decoding the bytes into a string
|
||||
mime_type: If provided, will be set as the mime-type of the data
|
||||
mime_type: If provided, will be set as the MIME type of the data
|
||||
path: If provided, will be set as the source from which the data came
|
||||
metadata: Metadata to associate with the blob
|
||||
metadata: Metadata to associate with the `Blob`
|
||||
|
||||
Returns:
|
||||
Blob instance
|
||||
`Blob` instance
|
||||
"""
|
||||
return cls(
|
||||
data=data,
|
||||
@@ -274,6 +283,10 @@ class Blob(BaseMedia):
|
||||
class Document(BaseMedia):
|
||||
"""Class for storing a piece of text and associated metadata.
|
||||
|
||||
!!! note
|
||||
`Document` is for **retrieval workflows**, not chat I/O. For sending text
|
||||
to an LLM in a conversation, use message types from `langchain.messages`.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
@@ -296,7 +309,7 @@ class Document(BaseMedia):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -309,10 +322,10 @@ class Document(BaseMedia):
|
||||
return ["langchain", "schema", "document"]
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Override __str__ to restrict it to page_content and metadata.
|
||||
"""Override `__str__` to restrict it to page_content and metadata.
|
||||
|
||||
Returns:
|
||||
A string representation of the Document.
|
||||
A string representation of the `Document`.
|
||||
"""
|
||||
# The format matches pydantic format for __str__.
|
||||
#
|
||||
|
||||
@@ -21,14 +21,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
|
||||
This abstraction is primarily used for post-processing of retrieved documents.
|
||||
|
||||
Documents matching a given query are first retrieved.
|
||||
`Document` objects matching a given query are first retrieved.
|
||||
|
||||
Then the list of documents can be further processed.
|
||||
|
||||
For example, one could re-rank the retrieved documents using an LLM.
|
||||
|
||||
!!! note
|
||||
Users should favor using a RunnableLambda instead of sub-classing from this
|
||||
Users should favor using a `RunnableLambda` instead of sub-classing from this
|
||||
interface.
|
||||
|
||||
"""
|
||||
@@ -43,9 +43,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
"""Compress retrieved documents given the query context.
|
||||
|
||||
Args:
|
||||
documents: The retrieved documents.
|
||||
documents: The retrieved `Document` objects.
|
||||
query: The query context.
|
||||
callbacks: Optional callbacks to run during compression.
|
||||
callbacks: Optional `Callbacks` to run during compression.
|
||||
|
||||
Returns:
|
||||
The compressed documents.
|
||||
@@ -61,9 +61,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
"""Async compress retrieved documents given the query context.
|
||||
|
||||
Args:
|
||||
documents: The retrieved documents.
|
||||
documents: The retrieved `Document` objects.
|
||||
query: The query context.
|
||||
callbacks: Optional callbacks to run during compression.
|
||||
callbacks: Optional `Callbacks` to run during compression.
|
||||
|
||||
Returns:
|
||||
The compressed documents.
|
||||
|
||||
@@ -16,8 +16,8 @@ if TYPE_CHECKING:
|
||||
class BaseDocumentTransformer(ABC):
|
||||
"""Abstract base class for document transformation.
|
||||
|
||||
A document transformation takes a sequence of Documents and returns a
|
||||
sequence of transformed Documents.
|
||||
A document transformation takes a sequence of `Document` objects and returns a
|
||||
sequence of transformed `Document` objects.
|
||||
|
||||
Example:
|
||||
```python
|
||||
|
||||
@@ -18,7 +18,7 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
||||
|
||||
This embedding model creates embeddings by sampling from a normal distribution.
|
||||
|
||||
!!! warning
|
||||
!!! danger "Toy model"
|
||||
Do not use this outside of testing, as it is not a real embedding model.
|
||||
|
||||
Instantiate:
|
||||
@@ -73,7 +73,7 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
||||
This embedding model creates embeddings by sampling from a normal distribution
|
||||
with a seed based on the hash of the text.
|
||||
|
||||
!!! warning
|
||||
!!! danger "Toy model"
|
||||
Do not use this outside of testing, as it is not a real embedding model.
|
||||
|
||||
Instantiate:
|
||||
|
||||
@@ -29,7 +29,7 @@ class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
|
||||
max_length: int = 2048
|
||||
"""Max length for the prompt, beyond which examples are cut."""
|
||||
|
||||
example_text_lengths: list[int] = Field(default_factory=list) # :meta private:
|
||||
example_text_lengths: list[int] = Field(default_factory=list)
|
||||
"""Length of each example."""
|
||||
|
||||
def add_example(self, example: dict[str, str]) -> None:
|
||||
|
||||
@@ -41,7 +41,7 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
|
||||
"""Optional keys to filter input to. If provided, the search is based on
|
||||
the input variables instead of all variables."""
|
||||
vectorstore_kwargs: dict[str, Any] | None = None
|
||||
"""Extra arguments passed to similarity_search function of the vectorstore."""
|
||||
"""Extra arguments passed to similarity_search function of the `VectorStore`."""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
@@ -159,7 +159,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the vectorstore.
|
||||
of the `VectorStore`.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -203,7 +203,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the vectorstore.
|
||||
of the `VectorStore`.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -286,12 +286,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
|
||||
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
||||
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
||||
k: Number of examples to select.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
input_keys: If provided, the search is based on the input variables
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the vectorstore.
|
||||
of the `VectorStore`.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -333,12 +333,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
|
||||
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
||||
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
||||
k: Number of examples to select.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
input_keys: If provided, the search is based on the input variables
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the vectorstore.
|
||||
of the `VectorStore`.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -16,9 +16,10 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
||||
"""Exception that output parsers should raise to signify a parsing error.
|
||||
|
||||
This exists to differentiate parsing errors from other code or execution errors
|
||||
that also may arise inside the output parser. `OutputParserException` will be
|
||||
available to catch and handle in ways to fix the parsing error, while other
|
||||
errors will be raised.
|
||||
that also may arise inside the output parser.
|
||||
|
||||
`OutputParserException` will be available to catch and handle in ways to fix the
|
||||
parsing error, while other errors will be raised.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -32,18 +33,19 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
||||
|
||||
Args:
|
||||
error: The error that's being re-raised or an error message.
|
||||
observation: String explanation of error which can be passed to a
|
||||
model to try and remediate the issue.
|
||||
observation: String explanation of error which can be passed to a model to
|
||||
try and remediate the issue.
|
||||
llm_output: String model output which is error-ing.
|
||||
|
||||
send_to_llm: Whether to send the observation and llm_output back to an Agent
|
||||
after an `OutputParserException` has been raised.
|
||||
|
||||
This gives the underlying model driving the agent the context that the
|
||||
previous output was improperly structured, in the hopes that it will
|
||||
update the output to the correct format.
|
||||
|
||||
Raises:
|
||||
ValueError: If `send_to_llm` is True but either observation or
|
||||
ValueError: If `send_to_llm` is `True` but either observation or
|
||||
`llm_output` are not provided.
|
||||
"""
|
||||
if isinstance(error, str):
|
||||
@@ -66,11 +68,11 @@ class ErrorCode(Enum):
|
||||
"""Error codes."""
|
||||
|
||||
INVALID_PROMPT_INPUT = "INVALID_PROMPT_INPUT"
|
||||
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS"
|
||||
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS" # Used in JS; not Py (yet)
|
||||
MESSAGE_COERCION_FAILURE = "MESSAGE_COERCION_FAILURE"
|
||||
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION"
|
||||
MODEL_NOT_FOUND = "MODEL_NOT_FOUND"
|
||||
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT"
|
||||
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION" # Used in JS; not Py (yet)
|
||||
MODEL_NOT_FOUND = "MODEL_NOT_FOUND" # Used in JS; not Py (yet)
|
||||
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT" # Used in JS; not Py (yet)
|
||||
OUTPUT_PARSING_FAILURE = "OUTPUT_PARSING_FAILURE"
|
||||
|
||||
|
||||
@@ -86,6 +88,6 @@ def create_message(*, message: str, error_code: ErrorCode) -> str:
|
||||
"""
|
||||
return (
|
||||
f"{message}\n"
|
||||
"For troubleshooting, visit: https://python.langchain.com/docs/"
|
||||
f"troubleshooting/errors/{error_code.value} "
|
||||
"For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain"
|
||||
f"/errors/{error_code.value} "
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Code to help indexing data into a vectorstore.
|
||||
|
||||
This package contains helper logic to help deal with indexing data into
|
||||
a vectorstore while avoiding duplicated content and over-writing content
|
||||
a `VectorStore` while avoiding duplicated content and over-writing content
|
||||
if it's unchanged.
|
||||
"""
|
||||
|
||||
|
||||
@@ -6,16 +6,9 @@ import hashlib
|
||||
import json
|
||||
import uuid
|
||||
import warnings
|
||||
from collections.abc import (
|
||||
AsyncIterable,
|
||||
AsyncIterator,
|
||||
Callable,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Sequence,
|
||||
)
|
||||
from itertools import islice
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Literal,
|
||||
TypedDict,
|
||||
@@ -29,6 +22,16 @@ from langchain_core.exceptions import LangChainException
|
||||
from langchain_core.indexing.base import DocumentIndex, RecordManager
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
AsyncIterable,
|
||||
AsyncIterator,
|
||||
Callable,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Sequence,
|
||||
)
|
||||
|
||||
# Magic UUID to use as a namespace for hashing.
|
||||
# Used to try and generate a unique UUID for each document
|
||||
# from hashing the document content and metadata.
|
||||
@@ -298,48 +301,49 @@ def index(
|
||||
For the time being, documents are indexed using their hashes, and users
|
||||
are not able to specify the uid of the document.
|
||||
|
||||
!!! warning "Behavior changed in 0.3.25"
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.25"
|
||||
|
||||
Added `scoped_full` cleanup mode.
|
||||
|
||||
!!! warning
|
||||
|
||||
* In full mode, the loader should be returning
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
* In incremental mode, if documents associated with a particular
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
|
||||
Args:
|
||||
docs_source: Data loader or iterable of documents to index.
|
||||
record_manager: Timestamped set to keep track of which documents were
|
||||
updated.
|
||||
vector_store: VectorStore or DocumentIndex to index the documents into.
|
||||
vector_store: `VectorStore` or DocumentIndex to index the documents into.
|
||||
batch_size: Batch size to use when indexing.
|
||||
cleanup: How to handle clean up of documents.
|
||||
|
||||
- incremental: Cleans up all documents that haven't been updated AND
|
||||
that are associated with source ids that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
that are associated with source IDs that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
- full: Delete all documents that have not been returned by the loader
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
- scoped_full: Similar to Full, but only deletes all documents
|
||||
that haven't been updated AND that are associated with
|
||||
source ids that were seen during indexing.
|
||||
that haven't been updated AND that are associated with
|
||||
source IDs that were seen during indexing.
|
||||
- None: Do not delete any documents.
|
||||
source_id_key: Optional key that helps identify the original source
|
||||
of the document.
|
||||
@@ -349,7 +353,7 @@ def index(
|
||||
key_encoder: Hashing algorithm to use for hashing the document content and
|
||||
metadata. Options include "blake2b", "sha256", and "sha512".
|
||||
|
||||
!!! version-added "Added in version 0.3.66"
|
||||
!!! version-added "Added in `langchain-core` 0.3.66"
|
||||
|
||||
key_encoder: Hashing algorithm to use for hashing the document.
|
||||
If not provided, a default encoder using SHA-1 will be used.
|
||||
@@ -363,10 +367,10 @@ def index(
|
||||
When changing the key encoder, you must change the
|
||||
index as well to avoid duplicated documents in the cache.
|
||||
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
||||
method of the VectorStore or the upsert method of the DocumentIndex.
|
||||
method of the `VectorStore` or the upsert method of the DocumentIndex.
|
||||
For example, you can use this to specify a custom vector_field:
|
||||
upsert_kwargs={"vector_field": "embedding"}
|
||||
!!! version-added "Added in version 0.3.10"
|
||||
!!! version-added "Added in `langchain-core` 0.3.10"
|
||||
|
||||
Returns:
|
||||
Indexing result which contains information about how many documents
|
||||
@@ -375,10 +379,10 @@ def index(
|
||||
Raises:
|
||||
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
|
||||
ValueError: If cleanup mode is incremental and source_id_key is None.
|
||||
ValueError: If vectorstore does not have
|
||||
ValueError: If `VectorStore` does not have
|
||||
"delete" and "add_documents" required methods.
|
||||
ValueError: If source_id_key is not None, but is not a string or callable.
|
||||
TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
|
||||
TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
|
||||
AssertionError: If `source_id` is None when cleanup mode is incremental.
|
||||
(should be unreachable code).
|
||||
"""
|
||||
@@ -415,7 +419,7 @@ def index(
|
||||
raise ValueError(msg)
|
||||
|
||||
if type(destination).delete == VectorStore.delete:
|
||||
# Checking if the vectorstore has overridden the default delete method
|
||||
# Checking if the VectorStore has overridden the default delete method
|
||||
# implementation which just raises a NotImplementedError
|
||||
msg = "Vectorstore has not implemented the delete method"
|
||||
raise ValueError(msg)
|
||||
@@ -466,11 +470,11 @@ def index(
|
||||
]
|
||||
|
||||
if cleanup in {"incremental", "scoped_full"}:
|
||||
# source ids are required.
|
||||
# Source IDs are required.
|
||||
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
|
||||
if source_id is None:
|
||||
msg = (
|
||||
f"Source ids are required when cleanup mode is "
|
||||
f"Source IDs are required when cleanup mode is "
|
||||
f"incremental or scoped_full. "
|
||||
f"Document that starts with "
|
||||
f"content: {hashed_doc.page_content[:100]} "
|
||||
@@ -479,7 +483,7 @@ def index(
|
||||
raise ValueError(msg)
|
||||
if cleanup == "scoped_full":
|
||||
scoped_full_cleanup_source_ids.add(source_id)
|
||||
# source ids cannot be None after for loop above.
|
||||
# Source IDs cannot be None after for loop above.
|
||||
source_ids = cast("Sequence[str]", source_ids)
|
||||
|
||||
exists_batch = record_manager.exists(
|
||||
@@ -538,7 +542,7 @@ def index(
|
||||
# If source IDs are provided, we can do the deletion incrementally!
|
||||
if cleanup == "incremental":
|
||||
# Get the uids of the documents that were not returned by the loader.
|
||||
# mypy isn't good enough to determine that source ids cannot be None
|
||||
# mypy isn't good enough to determine that source IDs cannot be None
|
||||
# here due to a check that's happening above, so we check again.
|
||||
for source_id in source_ids:
|
||||
if source_id is None:
|
||||
@@ -636,48 +640,49 @@ async def aindex(
|
||||
For the time being, documents are indexed using their hashes, and users
|
||||
are not able to specify the uid of the document.
|
||||
|
||||
!!! warning "Behavior changed in 0.3.25"
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.25"
|
||||
|
||||
Added `scoped_full` cleanup mode.
|
||||
|
||||
!!! warning
|
||||
|
||||
* In full mode, the loader should be returning
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
* In incremental mode, if documents associated with a particular
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
|
||||
Args:
|
||||
docs_source: Data loader or iterable of documents to index.
|
||||
record_manager: Timestamped set to keep track of which documents were
|
||||
updated.
|
||||
vector_store: VectorStore or DocumentIndex to index the documents into.
|
||||
vector_store: `VectorStore` or DocumentIndex to index the documents into.
|
||||
batch_size: Batch size to use when indexing.
|
||||
cleanup: How to handle clean up of documents.
|
||||
|
||||
- incremental: Cleans up all documents that haven't been updated AND
|
||||
that are associated with source ids that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
that are associated with source IDs that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
- full: Delete all documents that have not been returned by the loader
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
- scoped_full: Similar to Full, but only deletes all documents
|
||||
that haven't been updated AND that are associated with
|
||||
source ids that were seen during indexing.
|
||||
that haven't been updated AND that are associated with
|
||||
source IDs that were seen during indexing.
|
||||
- None: Do not delete any documents.
|
||||
source_id_key: Optional key that helps identify the original source
|
||||
of the document.
|
||||
@@ -687,7 +692,7 @@ async def aindex(
|
||||
key_encoder: Hashing algorithm to use for hashing the document content and
|
||||
metadata. Options include "blake2b", "sha256", and "sha512".
|
||||
|
||||
!!! version-added "Added in version 0.3.66"
|
||||
!!! version-added "Added in `langchain-core` 0.3.66"
|
||||
|
||||
key_encoder: Hashing algorithm to use for hashing the document.
|
||||
If not provided, a default encoder using SHA-1 will be used.
|
||||
@@ -701,10 +706,10 @@ async def aindex(
|
||||
When changing the key encoder, you must change the
|
||||
index as well to avoid duplicated documents in the cache.
|
||||
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
||||
method of the VectorStore or the upsert method of the DocumentIndex.
|
||||
method of the `VectorStore` or the upsert method of the DocumentIndex.
|
||||
For example, you can use this to specify a custom vector_field:
|
||||
upsert_kwargs={"vector_field": "embedding"}
|
||||
!!! version-added "Added in version 0.3.10"
|
||||
!!! version-added "Added in `langchain-core` 0.3.10"
|
||||
|
||||
Returns:
|
||||
Indexing result which contains information about how many documents
|
||||
@@ -713,10 +718,10 @@ async def aindex(
|
||||
Raises:
|
||||
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
|
||||
ValueError: If cleanup mode is incremental and source_id_key is None.
|
||||
ValueError: If vectorstore does not have
|
||||
ValueError: If `VectorStore` does not have
|
||||
"adelete" and "aadd_documents" required methods.
|
||||
ValueError: If source_id_key is not None, but is not a string or callable.
|
||||
TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
|
||||
TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
|
||||
AssertionError: If `source_id_key` is None when cleanup mode is
|
||||
incremental or `scoped_full` (should be unreachable).
|
||||
"""
|
||||
@@ -757,7 +762,7 @@ async def aindex(
|
||||
type(destination).adelete == VectorStore.adelete
|
||||
and type(destination).delete == VectorStore.delete
|
||||
):
|
||||
# Checking if the vectorstore has overridden the default adelete or delete
|
||||
# Checking if the VectorStore has overridden the default adelete or delete
|
||||
# methods implementation which just raises a NotImplementedError
|
||||
msg = "Vectorstore has not implemented the adelete or delete method"
|
||||
raise ValueError(msg)
|
||||
@@ -815,11 +820,11 @@ async def aindex(
|
||||
]
|
||||
|
||||
if cleanup in {"incremental", "scoped_full"}:
|
||||
# If the cleanup mode is incremental, source ids are required.
|
||||
# If the cleanup mode is incremental, source IDs are required.
|
||||
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
|
||||
if source_id is None:
|
||||
msg = (
|
||||
f"Source ids are required when cleanup mode is "
|
||||
f"Source IDs are required when cleanup mode is "
|
||||
f"incremental or scoped_full. "
|
||||
f"Document that starts with "
|
||||
f"content: {hashed_doc.page_content[:100]} "
|
||||
@@ -828,7 +833,7 @@ async def aindex(
|
||||
raise ValueError(msg)
|
||||
if cleanup == "scoped_full":
|
||||
scoped_full_cleanup_source_ids.add(source_id)
|
||||
# source ids cannot be None after for loop above.
|
||||
# Source IDs cannot be None after for loop above.
|
||||
source_ids = cast("Sequence[str]", source_ids)
|
||||
|
||||
exists_batch = await record_manager.aexists(
|
||||
@@ -888,7 +893,7 @@ async def aindex(
|
||||
if cleanup == "incremental":
|
||||
# Get the uids of the documents that were not returned by the loader.
|
||||
|
||||
# mypy isn't good enough to determine that source ids cannot be None
|
||||
# mypy isn't good enough to determine that source IDs cannot be None
|
||||
# here due to a check that's happening above, so we check again.
|
||||
for source_id in source_ids:
|
||||
if source_id is None:
|
||||
|
||||
@@ -25,7 +25,7 @@ class RecordManager(ABC):
|
||||
The record manager abstraction is used by the langchain indexing API.
|
||||
|
||||
The record manager keeps track of which documents have been
|
||||
written into a vectorstore and when they were written.
|
||||
written into a `VectorStore` and when they were written.
|
||||
|
||||
The indexing API computes hashes for each document and stores the hash
|
||||
together with the write time and the source id in the record manager.
|
||||
@@ -37,7 +37,7 @@ class RecordManager(ABC):
|
||||
already been indexed, and to only index new documents.
|
||||
|
||||
The main benefit of this abstraction is that it works across many vectorstores.
|
||||
To be supported, a vectorstore needs to only support the ability to add and
|
||||
To be supported, a `VectorStore` needs to only support the ability to add and
|
||||
delete documents by ID. Using the record manager, the indexing API will
|
||||
be able to delete outdated documents and avoid redundant indexing of documents
|
||||
that have already been indexed.
|
||||
@@ -45,13 +45,13 @@ class RecordManager(ABC):
|
||||
The main constraints of this abstraction are:
|
||||
|
||||
1. It relies on the time-stamps to determine which documents have been
|
||||
indexed and which have not. This means that the time-stamps must be
|
||||
monotonically increasing. The timestamp should be the timestamp
|
||||
as measured by the server to minimize issues.
|
||||
indexed and which have not. This means that the time-stamps must be
|
||||
monotonically increasing. The timestamp should be the timestamp
|
||||
as measured by the server to minimize issues.
|
||||
2. The record manager is currently implemented separately from the
|
||||
vectorstore, which means that the overall system becomes distributed
|
||||
and may create issues with consistency. For example, writing to
|
||||
record manager succeeds, but corresponding writing to vectorstore fails.
|
||||
vectorstore, which means that the overall system becomes distributed
|
||||
and may create issues with consistency. For example, writing to
|
||||
record manager succeeds, but corresponding writing to `VectorStore` fails.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -460,7 +460,7 @@ class UpsertResponse(TypedDict):
|
||||
class DeleteResponse(TypedDict, total=False):
|
||||
"""A generic response for delete operation.
|
||||
|
||||
The fields in this response are optional and whether the vectorstore
|
||||
The fields in this response are optional and whether the `VectorStore`
|
||||
returns them or not is up to the implementation.
|
||||
"""
|
||||
|
||||
@@ -518,7 +518,7 @@ class DocumentIndex(BaseRetriever):
|
||||
if it is provided. If the ID is not provided, the upsert method is free
|
||||
to generate an ID for the content.
|
||||
|
||||
When an ID is specified and the content already exists in the vectorstore,
|
||||
When an ID is specified and the content already exists in the `VectorStore`,
|
||||
the upsert method should update the content with the new data. If the content
|
||||
does not exist, the upsert method should add the item to the `VectorStore`.
|
||||
|
||||
@@ -528,20 +528,20 @@ class DocumentIndex(BaseRetriever):
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
successfully added or updated in the `VectorStore` and the list of IDs that
|
||||
failed to be added or updated.
|
||||
"""
|
||||
|
||||
async def aupsert(
|
||||
self, items: Sequence[Document], /, **kwargs: Any
|
||||
) -> UpsertResponse:
|
||||
"""Add or update documents in the vectorstore. Async version of upsert.
|
||||
"""Add or update documents in the `VectorStore`. Async version of `upsert`.
|
||||
|
||||
The upsert functionality should utilize the ID field of the item
|
||||
if it is provided. If the ID is not provided, the upsert method is free
|
||||
to generate an ID for the item.
|
||||
|
||||
When an ID is specified and the item already exists in the vectorstore,
|
||||
When an ID is specified and the item already exists in the `VectorStore`,
|
||||
the upsert method should update the item with the new data. If the item
|
||||
does not exist, the upsert method should add the item to the `VectorStore`.
|
||||
|
||||
@@ -551,7 +551,7 @@ class DocumentIndex(BaseRetriever):
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
successfully added or updated in the `VectorStore` and the list of IDs that
|
||||
failed to be added or updated.
|
||||
"""
|
||||
return await run_in_executor(
|
||||
@@ -568,7 +568,7 @@ class DocumentIndex(BaseRetriever):
|
||||
Calling delete without any input parameters should raise a ValueError!
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
ids: List of IDs to delete.
|
||||
**kwargs: Additional keyword arguments. This is up to the implementation.
|
||||
For example, can include an option to delete the entire index,
|
||||
or else issue a non-blocking delete etc.
|
||||
@@ -586,7 +586,7 @@ class DocumentIndex(BaseRetriever):
|
||||
Calling adelete without any input parameters should raise a ValueError!
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
ids: List of IDs to delete.
|
||||
**kwargs: Additional keyword arguments. This is up to the implementation.
|
||||
For example, can include an option to delete the entire index.
|
||||
|
||||
|
||||
@@ -62,10 +62,10 @@ class InMemoryDocumentIndex(DocumentIndex):
|
||||
"""Delete by IDs.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
ids: List of IDs to delete.
|
||||
|
||||
Raises:
|
||||
ValueError: If ids is None.
|
||||
ValueError: If IDs is None.
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were successfully
|
||||
|
||||
@@ -6,12 +6,13 @@ LangChain has two main classes to work with language models: chat models and
|
||||
**Chat models**
|
||||
|
||||
Language models that use a sequence of messages as inputs and return chat messages
|
||||
as outputs (as opposed to using plain text). Chat models support the assignment of
|
||||
distinct roles to conversation messages, helping to distinguish messages from the AI,
|
||||
users, and instructions such as system messages.
|
||||
as outputs (as opposed to using plain text).
|
||||
|
||||
The key abstraction for chat models is `BaseChatModel`. Implementations
|
||||
should inherit from this class.
|
||||
Chat models support the assignment of distinct roles to conversation messages, helping
|
||||
to distinguish messages from the AI, users, and instructions such as system messages.
|
||||
|
||||
The key abstraction for chat models is `BaseChatModel`. Implementations should inherit
|
||||
from this class.
|
||||
|
||||
See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
|
||||
|
||||
@@ -52,6 +53,10 @@ if TYPE_CHECKING:
|
||||
ParrotFakeChatModel,
|
||||
)
|
||||
from langchain_core.language_models.llms import LLM, BaseLLM
|
||||
from langchain_core.language_models.model_profile import (
|
||||
ModelProfile,
|
||||
ModelProfileRegistry,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"LLM",
|
||||
@@ -67,6 +72,8 @@ __all__ = (
|
||||
"LanguageModelInput",
|
||||
"LanguageModelLike",
|
||||
"LanguageModelOutput",
|
||||
"ModelProfile",
|
||||
"ModelProfileRegistry",
|
||||
"ParrotFakeChatModel",
|
||||
"SimpleChatModel",
|
||||
"get_tokenizer",
|
||||
@@ -89,6 +96,8 @@ _dynamic_imports = {
|
||||
"GenericFakeChatModel": "fake_chat_models",
|
||||
"ParrotFakeChatModel": "fake_chat_models",
|
||||
"LLM": "llms",
|
||||
"ModelProfile": "model_profile",
|
||||
"ModelProfileRegistry": "model_profile",
|
||||
"BaseLLM": "llms",
|
||||
"is_openai_data_block": "_utils",
|
||||
}
|
||||
|
||||
@@ -139,7 +139,8 @@ def _normalize_messages(
|
||||
directly; this may change in the future
|
||||
- LangChain v0 standard content blocks for backward compatibility
|
||||
|
||||
!!! warning "Behavior changed in 1.0.0"
|
||||
!!! warning "Behavior changed in `langchain-core` 1.0.0"
|
||||
|
||||
In previous versions, this function returned messages in LangChain v0 format.
|
||||
Now, it returns messages in LangChain v1 format, which upgraded chat models now
|
||||
expect to receive when passing back in message history. For backward
|
||||
|
||||
@@ -131,14 +131,19 @@ class BaseLanguageModel(
|
||||
|
||||
Caching is not currently supported for streaming methods of models.
|
||||
"""
|
||||
|
||||
verbose: bool = Field(default_factory=_get_verbosity, exclude=True, repr=False)
|
||||
"""Whether to print out response text."""
|
||||
|
||||
callbacks: Callbacks = Field(default=None, exclude=True)
|
||||
"""Callbacks to add to the run trace."""
|
||||
|
||||
tags: list[str] | None = Field(default=None, exclude=True)
|
||||
"""Tags to add to the run trace."""
|
||||
|
||||
metadata: dict[str, Any] | None = Field(default=None, exclude=True)
|
||||
"""Metadata to add to the run trace."""
|
||||
|
||||
custom_get_token_ids: Callable[[str], list[int]] | None = Field(
|
||||
default=None, exclude=True
|
||||
)
|
||||
@@ -195,15 +200,22 @@ class BaseLanguageModel(
|
||||
type (e.g., pure text completion models vs chat models).
|
||||
|
||||
Args:
|
||||
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
|
||||
can be converted to match the format of any language model (string for
|
||||
pure text generation models and `BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
prompts: List of `PromptValue` objects.
|
||||
|
||||
A `PromptValue` is an object that can be converted to match the format
|
||||
of any language model (string for pure text generation models and
|
||||
`BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generation` objects for
|
||||
@@ -232,15 +244,22 @@ class BaseLanguageModel(
|
||||
type (e.g., pure text completion models vs chat models).
|
||||
|
||||
Args:
|
||||
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
|
||||
can be converted to match the format of any language model (string for
|
||||
pure text generation models and `BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
prompts: List of `PromptValue` objects.
|
||||
|
||||
A `PromptValue` is an object that can be converted to match the format
|
||||
of any language model (string for pure text generation models and
|
||||
`BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generation` objects for
|
||||
@@ -262,13 +281,13 @@ class BaseLanguageModel(
|
||||
return self.lc_attributes
|
||||
|
||||
def get_token_ids(self, text: str) -> list[int]:
|
||||
"""Return the ordered ids of the tokens in a text.
|
||||
"""Return the ordered IDs of the tokens in a text.
|
||||
|
||||
Args:
|
||||
text: The string input to tokenize.
|
||||
|
||||
Returns:
|
||||
A list of ids corresponding to the tokens in the text, in order they occur
|
||||
A list of IDs corresponding to the tokens in the text, in order they occur
|
||||
in the text.
|
||||
"""
|
||||
if self.custom_get_token_ids is not None:
|
||||
@@ -280,6 +299,9 @@ class BaseLanguageModel(
|
||||
|
||||
Useful for checking if an input fits in a model's context window.
|
||||
|
||||
This should be overridden by model-specific implementations to provide accurate
|
||||
token counts via model-specific tokenizers.
|
||||
|
||||
Args:
|
||||
text: The string input to tokenize.
|
||||
|
||||
@@ -298,9 +320,17 @@ class BaseLanguageModel(
|
||||
|
||||
Useful for checking if an input fits in a model's context window.
|
||||
|
||||
This should be overridden by model-specific implementations to provide accurate
|
||||
token counts via model-specific tokenizers.
|
||||
|
||||
!!! note
|
||||
The base implementation of `get_num_tokens_from_messages` ignores tool
|
||||
schemas.
|
||||
|
||||
* The base implementation of `get_num_tokens_from_messages` ignores tool
|
||||
schemas.
|
||||
* The base implementation of `get_num_tokens_from_messages` adds additional
|
||||
prefixes to messages in represent user roles, which will add to the
|
||||
overall token count. Model-specific implementations may choose to
|
||||
handle this differently.
|
||||
|
||||
Args:
|
||||
messages: The message inputs to tokenize.
|
||||
|
||||
@@ -33,6 +33,7 @@ from langchain_core.language_models.base import (
|
||||
LangSmithParams,
|
||||
LanguageModelInput,
|
||||
)
|
||||
from langchain_core.language_models.model_profile import ModelProfile
|
||||
from langchain_core.load import dumpd, dumps
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
@@ -88,7 +89,10 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
|
||||
try:
|
||||
metadata["body"] = response.json()
|
||||
except Exception:
|
||||
metadata["body"] = getattr(response, "text", None)
|
||||
try:
|
||||
metadata["body"] = getattr(response, "text", None)
|
||||
except Exception:
|
||||
metadata["body"] = None
|
||||
if hasattr(response, "headers"):
|
||||
try:
|
||||
metadata["headers"] = dict(response.headers)
|
||||
@@ -329,10 +333,25 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
[`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this
|
||||
field to roll out new content formats in a backward-compatible way.
|
||||
|
||||
!!! version-added "Added in version 1.0"
|
||||
!!! version-added "Added in `langchain-core` 1.0.0"
|
||||
|
||||
"""
|
||||
|
||||
profile: ModelProfile | None = Field(default=None, exclude=True)
|
||||
"""Profile detailing model capabilities.
|
||||
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. The format of model profiles is subject to change.
|
||||
|
||||
If not specified, automatically loaded from the provider package on initialization
|
||||
if data is available.
|
||||
|
||||
Example profile data includes context window sizes, supported modalities, or support
|
||||
for tool calling, structured output, and other features.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 1.1.0"
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
)
|
||||
@@ -842,16 +861,21 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
|
||||
Args:
|
||||
messages: List of list of messages.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
tags: The tags to apply.
|
||||
metadata: The metadata to apply.
|
||||
run_name: The name of the run.
|
||||
run_id: The ID of the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generations` for each
|
||||
@@ -960,16 +984,21 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
|
||||
Args:
|
||||
messages: List of list of messages.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
tags: The tags to apply.
|
||||
metadata: The metadata to apply.
|
||||
run_name: The name of the run.
|
||||
run_id: The ID of the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generations` for each
|
||||
@@ -1502,10 +1531,10 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
Args:
|
||||
schema: The output schema. Can be passed in as:
|
||||
|
||||
- an OpenAI function/tool schema,
|
||||
- a JSON Schema,
|
||||
- a `TypedDict` class,
|
||||
- or a Pydantic class.
|
||||
- An OpenAI function/tool schema,
|
||||
- A JSON Schema,
|
||||
- A `TypedDict` class,
|
||||
- Or a Pydantic class.
|
||||
|
||||
If `schema` is a Pydantic class then the model output will be a
|
||||
Pydantic instance of that class, and the model-generated fields will be
|
||||
@@ -1517,11 +1546,15 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
when specifying a Pydantic or `TypedDict` class.
|
||||
|
||||
include_raw:
|
||||
If `False` then only the parsed structured output is returned. If
|
||||
an error occurs during model output parsing it will be raised. If `True`
|
||||
then both the raw model response (a `BaseMessage`) and the parsed model
|
||||
response will be returned. If an error occurs during output parsing it
|
||||
will be caught and returned as well.
|
||||
If `False` then only the parsed structured output is returned.
|
||||
|
||||
If an error occurs during model output parsing it will be raised.
|
||||
|
||||
If `True` then both the raw model response (a `BaseMessage`) and the
|
||||
parsed model response will be returned.
|
||||
|
||||
If an error occurs during output parsing it will be caught and returned
|
||||
as well.
|
||||
|
||||
The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
|
||||
`'parsing_error'`.
|
||||
@@ -1599,7 +1632,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
# }
|
||||
```
|
||||
|
||||
Example: `dict` schema (`include_raw=False`):
|
||||
Example: Dictionary schema (`include_raw=False`):
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
@@ -1626,8 +1659,9 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
# }
|
||||
```
|
||||
|
||||
!!! warning "Behavior changed in 0.2.26"
|
||||
Added support for TypedDict class.
|
||||
!!! warning "Behavior changed in `langchain-core` 0.2.26"
|
||||
|
||||
Added support for `TypedDict` class.
|
||||
|
||||
""" # noqa: E501
|
||||
_ = kwargs.pop("method", None)
|
||||
@@ -1726,9 +1760,12 @@ def _gen_info_and_msg_metadata(
|
||||
}
|
||||
|
||||
|
||||
_MAX_CLEANUP_DEPTH = 100
|
||||
|
||||
|
||||
def _cleanup_llm_representation(serialized: Any, depth: int) -> None:
|
||||
"""Remove non-serializable objects from a serialized object."""
|
||||
if depth > 100: # Don't cooperate for pathological cases
|
||||
if depth > _MAX_CLEANUP_DEPTH: # Don't cooperate for pathological cases
|
||||
return
|
||||
|
||||
if not isinstance(serialized, dict):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Fake chat model for testing purposes."""
|
||||
"""Fake chat models for testing purposes."""
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
"""Base interface for large language models to expose."""
|
||||
"""Base interface for traditional large language models (LLMs) to expose.
|
||||
|
||||
These are traditionally older models (newer models generally are chat models).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -648,9 +651,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: The prompts to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
run_manager: Callback manager for the run.
|
||||
|
||||
Returns:
|
||||
@@ -668,9 +674,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: The prompts to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
run_manager: Callback manager for the run.
|
||||
|
||||
Returns:
|
||||
@@ -702,11 +711,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Yields:
|
||||
Generation chunks.
|
||||
@@ -728,11 +740,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Yields:
|
||||
Generation chunks.
|
||||
@@ -843,10 +858,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: List of string prompts.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
tags: List of tags to associate with each prompt. If provided, the length
|
||||
of the list must match the length of the prompts list.
|
||||
metadata: List of metadata dictionaries to associate with each prompt. If
|
||||
@@ -856,8 +875,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
length of the list must match the length of the prompts list.
|
||||
run_id: List of run IDs to associate with each prompt. If provided, the
|
||||
length of the list must match the length of the prompts list.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Raises:
|
||||
ValueError: If prompts is not a list.
|
||||
@@ -1113,10 +1133,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: List of string prompts.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
tags: List of tags to associate with each prompt. If provided, the length
|
||||
of the list must match the length of the prompts list.
|
||||
metadata: List of metadata dictionaries to associate with each prompt. If
|
||||
@@ -1126,8 +1150,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
length of the list must match the length of the prompts list.
|
||||
run_id: List of run IDs to associate with each prompt. If provided, the
|
||||
length of the list must match the length of the prompts list.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Raises:
|
||||
ValueError: If the length of `callbacks`, `tags`, `metadata`, or
|
||||
@@ -1391,11 +1416,6 @@ class LLM(BaseLLM):
|
||||
`astream` will use `_astream` if provided, otherwise it will implement
|
||||
a fallback behavior that will use `_stream` if `_stream` is implemented,
|
||||
and use `_acall` if `_stream` is not implemented.
|
||||
|
||||
Please see the following guide for more information on how to
|
||||
implement a custom LLM:
|
||||
|
||||
https://python.langchain.com/docs/how_to/custom_llm/
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
@@ -1412,12 +1432,16 @@ class LLM(BaseLLM):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
The model output as a string. SHOULD NOT include the prompt.
|
||||
@@ -1438,12 +1462,16 @@ class LLM(BaseLLM):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
|
||||
Returns:
|
||||
The model output as a string. SHOULD NOT include the prompt.
|
||||
|
||||
84
libs/core/langchain_core/language_models/model_profile.py
Normal file
84
libs/core/langchain_core/language_models/model_profile.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""Model profile types and utilities."""
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
||||
class ModelProfile(TypedDict, total=False):
|
||||
"""Model profile.
|
||||
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. The format of model profiles is subject to change.
|
||||
|
||||
Provides information about chat model capabilities, such as context window sizes
|
||||
and supported features.
|
||||
"""
|
||||
|
||||
# --- Input constraints ---
|
||||
|
||||
max_input_tokens: int
|
||||
"""Maximum context window (tokens)"""
|
||||
|
||||
image_inputs: bool
|
||||
"""Whether image inputs are supported."""
|
||||
# TODO: add more detail about formats?
|
||||
|
||||
image_url_inputs: bool
|
||||
"""Whether [image URL inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
pdf_inputs: bool
|
||||
"""Whether [PDF inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
audio_inputs: bool
|
||||
"""Whether [audio inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
video_inputs: bool
|
||||
"""Whether [video inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
# TODO: add more detail about formats? e.g. bytes or base64
|
||||
|
||||
image_tool_message: bool
|
||||
"""Whether images can be included in tool messages."""
|
||||
|
||||
pdf_tool_message: bool
|
||||
"""Whether PDFs can be included in tool messages."""
|
||||
|
||||
# --- Output constraints ---
|
||||
|
||||
max_output_tokens: int
|
||||
"""Maximum output tokens"""
|
||||
|
||||
reasoning_output: bool
|
||||
"""Whether the model supports [reasoning / chain-of-thought](https://docs.langchain.com/oss/python/langchain/models#reasoning)"""
|
||||
|
||||
image_outputs: bool
|
||||
"""Whether [image outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
audio_outputs: bool
|
||||
"""Whether [audio outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
video_outputs: bool
|
||||
"""Whether [video outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
|
||||
are supported."""
|
||||
|
||||
# --- Tool calling ---
|
||||
tool_calling: bool
|
||||
"""Whether the model supports [tool calling](https://docs.langchain.com/oss/python/langchain/models#tool-calling)"""
|
||||
|
||||
tool_choice: bool
|
||||
"""Whether the model supports [tool choice](https://docs.langchain.com/oss/python/langchain/models#forcing-tool-calls)"""
|
||||
|
||||
# --- Structured output ---
|
||||
structured_output: bool
|
||||
"""Whether the model supports a native [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||
feature"""
|
||||
|
||||
|
||||
ModelProfileRegistry = dict[str, ModelProfile]
|
||||
"""Registry mapping model identifiers or names to their ModelProfile."""
|
||||
@@ -17,7 +17,7 @@ def default(obj: Any) -> Any:
|
||||
obj: The object to serialize to json if it is a Serializable object.
|
||||
|
||||
Returns:
|
||||
A json serializable object or a SerializedNotImplemented object.
|
||||
A JSON serializable object or a SerializedNotImplemented object.
|
||||
"""
|
||||
if isinstance(obj, Serializable):
|
||||
return obj.to_json()
|
||||
|
||||
@@ -61,13 +61,15 @@ class Reviver:
|
||||
"""Initialize the reviver.
|
||||
|
||||
Args:
|
||||
secrets_map: A map of secrets to load. If a secret is not found in
|
||||
the map, it will be loaded from the environment if `secrets_from_env`
|
||||
is True.
|
||||
secrets_map: A map of secrets to load.
|
||||
|
||||
If a secret is not found in the map, it will be loaded from the
|
||||
environment if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
"""
|
||||
@@ -195,13 +197,15 @@ def loads(
|
||||
|
||||
Args:
|
||||
text: The string to load.
|
||||
secrets_map: A map of secrets to load. If a secret is not found in
|
||||
the map, it will be loaded from the environment if `secrets_from_env`
|
||||
is True.
|
||||
secrets_map: A map of secrets to load.
|
||||
|
||||
If a secret is not found in the map, it will be loaded from the environment
|
||||
if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
|
||||
@@ -237,13 +241,15 @@ def load(
|
||||
|
||||
Args:
|
||||
obj: The object to load.
|
||||
secrets_map: A map of secrets to load. If a secret is not found in
|
||||
the map, it will be loaded from the environment if `secrets_from_env`
|
||||
is True.
|
||||
secrets_map: A map of secrets to load.
|
||||
|
||||
If a secret is not found in the map, it will be loaded from the environment
|
||||
if `secrets_from_env` is `True`.
|
||||
valid_namespaces: A list of additional namespaces (modules)
|
||||
to allow to be deserialized.
|
||||
secrets_from_env: Whether to load secrets from the environment.
|
||||
additional_import_mappings: A dictionary of additional namespace mappings
|
||||
|
||||
You can use this to override default mappings or add new mappings.
|
||||
ignore_unserializable_fields: Whether to ignore unserializable fields.
|
||||
|
||||
|
||||
@@ -97,11 +97,14 @@ class Serializable(BaseModel, ABC):
|
||||
by default. This is to prevent accidental serialization of objects that should
|
||||
not be serialized.
|
||||
- `get_lc_namespace`: Get the namespace of the LangChain object.
|
||||
|
||||
During deserialization, this namespace is used to identify
|
||||
the correct class to instantiate.
|
||||
|
||||
Please see the `Reviver` class in `langchain_core.load.load` for more details.
|
||||
During deserialization an additional mapping is handle classes that have moved
|
||||
or been renamed across package versions.
|
||||
|
||||
- `lc_secrets`: A map of constructor argument names to secret ids.
|
||||
- `lc_attributes`: List of additional attribute names that should be included
|
||||
as part of the serialized representation.
|
||||
@@ -194,7 +197,7 @@ class Serializable(BaseModel, ABC):
|
||||
ValueError: If the class has deprecated attributes.
|
||||
|
||||
Returns:
|
||||
A json serializable object or a `SerializedNotImplemented` object.
|
||||
A JSON serializable object or a `SerializedNotImplemented` object.
|
||||
"""
|
||||
if not self.is_lc_serializable():
|
||||
return self.to_json_not_implemented()
|
||||
|
||||
@@ -9,6 +9,9 @@ if TYPE_CHECKING:
|
||||
from langchain_core.messages.ai import (
|
||||
AIMessage,
|
||||
AIMessageChunk,
|
||||
InputTokenDetails,
|
||||
OutputTokenDetails,
|
||||
UsageMetadata,
|
||||
)
|
||||
from langchain_core.messages.base import (
|
||||
BaseMessage,
|
||||
@@ -87,10 +90,12 @@ __all__ = (
|
||||
"HumanMessage",
|
||||
"HumanMessageChunk",
|
||||
"ImageContentBlock",
|
||||
"InputTokenDetails",
|
||||
"InvalidToolCall",
|
||||
"MessageLikeRepresentation",
|
||||
"NonStandardAnnotation",
|
||||
"NonStandardContentBlock",
|
||||
"OutputTokenDetails",
|
||||
"PlainTextContentBlock",
|
||||
"ReasoningContentBlock",
|
||||
"RemoveMessage",
|
||||
@@ -104,6 +109,7 @@ __all__ = (
|
||||
"ToolCallChunk",
|
||||
"ToolMessage",
|
||||
"ToolMessageChunk",
|
||||
"UsageMetadata",
|
||||
"VideoContentBlock",
|
||||
"_message_from_dict",
|
||||
"convert_to_messages",
|
||||
@@ -145,6 +151,7 @@ _dynamic_imports = {
|
||||
"HumanMessageChunk": "human",
|
||||
"NonStandardAnnotation": "content",
|
||||
"NonStandardContentBlock": "content",
|
||||
"OutputTokenDetails": "ai",
|
||||
"PlainTextContentBlock": "content",
|
||||
"ReasoningContentBlock": "content",
|
||||
"RemoveMessage": "modifier",
|
||||
@@ -154,12 +161,14 @@ _dynamic_imports = {
|
||||
"SystemMessage": "system",
|
||||
"SystemMessageChunk": "system",
|
||||
"ImageContentBlock": "content",
|
||||
"InputTokenDetails": "ai",
|
||||
"InvalidToolCall": "tool",
|
||||
"TextContentBlock": "content",
|
||||
"ToolCall": "tool",
|
||||
"ToolCallChunk": "tool",
|
||||
"ToolMessage": "tool",
|
||||
"ToolMessageChunk": "tool",
|
||||
"UsageMetadata": "ai",
|
||||
"VideoContentBlock": "content",
|
||||
"AnyMessage": "utils",
|
||||
"MessageLikeRepresentation": "utils",
|
||||
|
||||
@@ -48,10 +48,10 @@ class InputTokenDetails(TypedDict, total=False):
|
||||
}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.9"
|
||||
|
||||
May also hold extra provider-specific keys.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.9"
|
||||
|
||||
"""
|
||||
|
||||
audio: int
|
||||
@@ -83,7 +83,9 @@ class OutputTokenDetails(TypedDict, total=False):
|
||||
}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.9"
|
||||
May also hold extra provider-specific keys.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.9"
|
||||
|
||||
"""
|
||||
|
||||
@@ -121,9 +123,15 @@ class UsageMetadata(TypedDict):
|
||||
}
|
||||
```
|
||||
|
||||
!!! warning "Behavior changed in 0.3.9"
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.9"
|
||||
|
||||
Added `input_token_details` and `output_token_details`.
|
||||
|
||||
!!! note "LangSmith SDK"
|
||||
|
||||
The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,
|
||||
LangSmith's `UsageMetadata` has additional fields to capture cost information
|
||||
used by the LangSmith platform.
|
||||
"""
|
||||
|
||||
input_tokens: int
|
||||
@@ -131,7 +139,7 @@ class UsageMetadata(TypedDict):
|
||||
output_tokens: int
|
||||
"""Count of output (or completion) tokens. Sum of all output token types."""
|
||||
total_tokens: int
|
||||
"""Total token count. Sum of input_tokens + output_tokens."""
|
||||
"""Total token count. Sum of `input_tokens` + `output_tokens`."""
|
||||
input_token_details: NotRequired[InputTokenDetails]
|
||||
"""Breakdown of input token counts.
|
||||
|
||||
@@ -141,7 +149,6 @@ class UsageMetadata(TypedDict):
|
||||
"""Breakdown of output token counts.
|
||||
|
||||
Does *not* need to sum to full output token count. Does *not* need to have all keys.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -153,7 +160,6 @@ class AIMessage(BaseMessage):
|
||||
This message represents the output of the model and consists of both
|
||||
the raw output as returned by the model and standardized fields
|
||||
(e.g., tool calls, usage metadata) added by the LangChain framework.
|
||||
|
||||
"""
|
||||
|
||||
tool_calls: list[ToolCall] = []
|
||||
@@ -651,13 +657,13 @@ def add_ai_message_chunks(
|
||||
chunk_id = id_
|
||||
break
|
||||
else:
|
||||
# second pass: prefer lc_run-* ids over lc_* ids
|
||||
# second pass: prefer lc_run-* IDs over lc_* IDs
|
||||
for id_ in candidates:
|
||||
if id_ and id_.startswith(LC_ID_PREFIX):
|
||||
chunk_id = id_
|
||||
break
|
||||
else:
|
||||
# third pass: take any remaining id (auto-generated lc_* ids)
|
||||
# third pass: take any remaining ID (auto-generated lc_* IDs)
|
||||
for id_ in candidates:
|
||||
if id_:
|
||||
chunk_id = id_
|
||||
|
||||
@@ -5,11 +5,9 @@ from __future__ import annotations
|
||||
from typing import TYPE_CHECKING, Any, cast, overload
|
||||
|
||||
from pydantic import ConfigDict, Field
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core._api.deprecation import warn_deprecated
|
||||
from langchain_core.load.serializable import Serializable
|
||||
from langchain_core.messages import content as types
|
||||
from langchain_core.utils import get_bolded_text
|
||||
from langchain_core.utils._merge import merge_dicts, merge_lists
|
||||
from langchain_core.utils.interactive_env import is_interactive_env
|
||||
@@ -17,6 +15,9 @@ from langchain_core.utils.interactive_env import is_interactive_env
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core.messages import content as types
|
||||
from langchain_core.prompts.chat import ChatPromptTemplate
|
||||
|
||||
|
||||
@@ -93,6 +94,10 @@ class BaseMessage(Serializable):
|
||||
"""Base abstract message class.
|
||||
|
||||
Messages are the inputs and outputs of a chat model.
|
||||
|
||||
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
|
||||
[`AIMessage`][langchain.messages.AIMessage], and
|
||||
[`SystemMessage`][langchain.messages.SystemMessage].
|
||||
"""
|
||||
|
||||
content: str | list[str | dict]
|
||||
@@ -195,7 +200,7 @@ class BaseMessage(Serializable):
|
||||
def content_blocks(self) -> list[types.ContentBlock]:
|
||||
r"""Load content blocks from the message content.
|
||||
|
||||
!!! version-added "Added in version 1.0.0"
|
||||
!!! version-added "Added in `langchain-core` 1.0.0"
|
||||
|
||||
"""
|
||||
# Needed here to avoid circular import, as these classes import BaseMessages
|
||||
|
||||
@@ -12,10 +12,11 @@ the implementation in `BaseMessage`.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk
|
||||
from langchain_core.messages import content as types
|
||||
|
||||
|
||||
@@ -368,7 +368,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
else:
|
||||
# Assume it's raw base64 without data URI
|
||||
try:
|
||||
# Validate base64 and decode for mime type detection
|
||||
# Validate base64 and decode for MIME type detection
|
||||
decoded_bytes = base64.b64decode(url, validate=True)
|
||||
|
||||
image_url_b64_block = {
|
||||
@@ -379,7 +379,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
try:
|
||||
import filetype # type: ignore[import-not-found] # noqa: PLC0415
|
||||
|
||||
# Guess mime type based on file bytes
|
||||
# Guess MIME type based on file bytes
|
||||
mime_type = None
|
||||
kind = filetype.guess(decoded_bytes)
|
||||
if kind:
|
||||
@@ -458,6 +458,8 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
if outcome is not None:
|
||||
server_tool_result_block["extras"]["outcome"] = outcome
|
||||
converted_blocks.append(server_tool_result_block)
|
||||
elif item_type == "text":
|
||||
converted_blocks.append(cast("types.TextContentBlock", item))
|
||||
else:
|
||||
# Unknown type, preserve as non-standard
|
||||
converted_blocks.append({"type": "non_standard", "value": item})
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from collections.abc import Iterable
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
|
||||
from langchain_core.language_models._utils import (
|
||||
@@ -14,6 +13,8 @@ from langchain_core.language_models._utils import (
|
||||
from langchain_core.messages import content as types
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk
|
||||
|
||||
|
||||
|
||||
@@ -644,7 +644,7 @@ class AudioContentBlock(TypedDict):
|
||||
|
||||
|
||||
class PlainTextContentBlock(TypedDict):
|
||||
"""Plaintext data (e.g., from a document).
|
||||
"""Plaintext data (e.g., from a `.txt` or `.md` document).
|
||||
|
||||
!!! note
|
||||
A `PlainTextContentBlock` existed in `langchain-core<1.0.0`. Although the
|
||||
@@ -654,7 +654,7 @@ class PlainTextContentBlock(TypedDict):
|
||||
|
||||
!!! note
|
||||
Title and context are optional fields that may be passed to the model. See
|
||||
Anthropic [example](https://docs.claude.com/en/docs/build-with-claude/citations#citable-vs-non-citable-content).
|
||||
Anthropic [example](https://platform.claude.com/docs/en/build-with-claude/citations#citable-vs-non-citable-content).
|
||||
|
||||
!!! note "Factory function"
|
||||
`create_plaintext_block` may also be used as a factory to create a
|
||||
@@ -767,7 +767,7 @@ class FileContentBlock(TypedDict):
|
||||
|
||||
|
||||
class NonStandardContentBlock(TypedDict):
|
||||
"""Provider-specific data.
|
||||
"""Provider-specific content data.
|
||||
|
||||
This block contains data for which there is not yet a standard type.
|
||||
|
||||
@@ -802,7 +802,7 @@ class NonStandardContentBlock(TypedDict):
|
||||
"""
|
||||
|
||||
value: dict[str, Any]
|
||||
"""Provider-specific data."""
|
||||
"""Provider-specific content data."""
|
||||
|
||||
index: NotRequired[int | str]
|
||||
"""Index of block in aggregate response. Used during streaming."""
|
||||
@@ -867,7 +867,7 @@ def _get_data_content_block_types() -> tuple[str, ...]:
|
||||
Example: ("image", "video", "audio", "text-plain", "file")
|
||||
|
||||
Note that old style multimodal blocks type literals with new style blocks.
|
||||
Speficially, "image", "audio", and "file".
|
||||
Specifically, "image", "audio", and "file".
|
||||
|
||||
See the docstring of `_normalize_messages` in `language_models._utils` for details.
|
||||
"""
|
||||
@@ -906,7 +906,7 @@ def is_data_content_block(block: dict) -> bool:
|
||||
|
||||
# 'text' is checked to support v0 PlainTextContentBlock types
|
||||
# We must guard against new style TextContentBlock which also has 'text' `type`
|
||||
# by ensuring the presense of `source_type`
|
||||
# by ensuring the presence of `source_type`
|
||||
if block["type"] == "text" and "source_type" not in block: # noqa: SIM103 # This is more readable
|
||||
return False
|
||||
|
||||
@@ -1399,7 +1399,7 @@ def create_non_standard_block(
|
||||
"""Create a `NonStandardContentBlock`.
|
||||
|
||||
Args:
|
||||
value: Provider-specific data.
|
||||
value: Provider-specific content data.
|
||||
id: Content block identifier. Generated automatically if not provided.
|
||||
index: Index of block in aggregate response. Used during streaming.
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@ AnyMessage = Annotated[
|
||||
| Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
|
||||
Field(discriminator=Discriminator(_get_type)),
|
||||
]
|
||||
""""A type representing any defined `Message` or `MessageChunk` type."""
|
||||
"""A type representing any defined `Message` or `MessageChunk` type."""
|
||||
|
||||
|
||||
def get_buffer_string(
|
||||
@@ -328,12 +328,16 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
||||
"""
|
||||
if isinstance(message, BaseMessage):
|
||||
message_ = message
|
||||
elif isinstance(message, str):
|
||||
message_ = _create_message_from_message_type("human", message)
|
||||
elif isinstance(message, Sequence) and len(message) == 2:
|
||||
# mypy doesn't realise this can't be a string given the previous branch
|
||||
message_type_str, template = message # type: ignore[misc]
|
||||
message_ = _create_message_from_message_type(message_type_str, template)
|
||||
elif isinstance(message, Sequence):
|
||||
if isinstance(message, str):
|
||||
message_ = _create_message_from_message_type("human", message)
|
||||
else:
|
||||
try:
|
||||
message_type_str, template = message
|
||||
except ValueError as e:
|
||||
msg = "Message as a sequence must be (role string, template)"
|
||||
raise NotImplementedError(msg) from e
|
||||
message_ = _create_message_from_message_type(message_type_str, template)
|
||||
elif isinstance(message, dict):
|
||||
msg_kwargs = message.copy()
|
||||
try:
|
||||
@@ -734,8 +738,10 @@ def trim_messages(
|
||||
Set to `len` to count the number of **messages** in the chat history.
|
||||
|
||||
!!! note
|
||||
|
||||
Use `count_tokens_approximately` to get fast, approximate token
|
||||
counts.
|
||||
|
||||
This is recommended for using `trim_messages` on the hot path, where
|
||||
exact token counting is not necessary.
|
||||
|
||||
@@ -1025,18 +1031,18 @@ def convert_to_openai_messages(
|
||||
messages: Message-like object or iterable of objects whose contents are
|
||||
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
|
||||
text_format: How to format string or text block contents:
|
||||
- `'string'`:
|
||||
If a message has a string content, this is left as a string. If
|
||||
a message has content blocks that are all of type `'text'`, these
|
||||
are joined with a newline to make a single string. If a message has
|
||||
content blocks and at least one isn't of type `'text'`, then
|
||||
all blocks are left as dicts.
|
||||
- `'block'`:
|
||||
If a message has a string content, this is turned into a list
|
||||
with a single content block of type `'text'`. If a message has
|
||||
content blocks these are left as is.
|
||||
include_id: Whether to include message ids in the openai messages, if they
|
||||
are present in the source messages.
|
||||
- `'string'`:
|
||||
If a message has a string content, this is left as a string. If
|
||||
a message has content blocks that are all of type `'text'`, these
|
||||
are joined with a newline to make a single string. If a message has
|
||||
content blocks and at least one isn't of type `'text'`, then
|
||||
all blocks are left as dicts.
|
||||
- `'block'`:
|
||||
If a message has a string content, this is turned into a list
|
||||
with a single content block of type `'text'`. If a message has
|
||||
content blocks these are left as is.
|
||||
include_id: Whether to include message IDs in the openai messages, if they
|
||||
are present in the source messages.
|
||||
|
||||
Raises:
|
||||
ValueError: if an unrecognized `text_format` is specified, or if a message
|
||||
@@ -1097,7 +1103,7 @@ def convert_to_openai_messages(
|
||||
# ]
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.11"
|
||||
!!! version-added "Added in `langchain-core` 0.3.11"
|
||||
|
||||
""" # noqa: E501
|
||||
if text_format not in {"string", "block"}:
|
||||
@@ -1697,7 +1703,7 @@ def count_tokens_approximately(
|
||||
Warning:
|
||||
This function does not currently support counting image tokens.
|
||||
|
||||
!!! version-added "Added in version 0.3.46"
|
||||
!!! version-added "Added in `langchain-core` 0.3.46"
|
||||
|
||||
"""
|
||||
token_count = 0.0
|
||||
|
||||
@@ -1,4 +1,20 @@
|
||||
"""**OutputParser** classes parse the output of an LLM call."""
|
||||
"""`OutputParser` classes parse the output of an LLM call into structured data.
|
||||
|
||||
!!! tip "Structured output"
|
||||
|
||||
Output parsers emerged as an early solution to the challenge of obtaining structured
|
||||
output from LLMs.
|
||||
|
||||
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||
natively. In such cases, using output parsers may be unnecessary, and you should
|
||||
leverage the model's built-in capabilities for structured output. Refer to the
|
||||
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
|
||||
for guidance on how to achieve structured output directly.
|
||||
|
||||
Output parsers remain valuable when working with models that do not support
|
||||
structured output natively, or when you require additional processing or validation
|
||||
of the model's output beyond its inherent capabilities.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
@@ -135,6 +135,9 @@ class BaseOutputParser(
|
||||
|
||||
Example:
|
||||
```python
|
||||
# Implement a simple boolean output parser
|
||||
|
||||
|
||||
class BooleanOutputParser(BaseOutputParser[bool]):
|
||||
true_val: str = "YES"
|
||||
false_val: str = "NO"
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
"""Format instructions."""
|
||||
|
||||
JSON_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
||||
JSON_FORMAT_INSTRUCTIONS = """STRICT OUTPUT FORMAT:
|
||||
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
|
||||
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
|
||||
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
|
||||
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.
|
||||
|
||||
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
|
||||
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
||||
The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
||||
|
||||
Here is the output schema:
|
||||
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
||||
|
||||
Here is the output schema (shown in a code block for readability only — do not include any backticks or Markdown in your output):
|
||||
```
|
||||
{schema}
|
||||
```""" # noqa: E501
|
||||
|
||||
@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
|
||||
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
||||
"""Parse the output of an LLM call to a JSON object.
|
||||
|
||||
Probably the most reliable output parser for getting structured data that does *not*
|
||||
use function calling.
|
||||
|
||||
When used in streaming mode, it will yield partial JSON objects containing
|
||||
all the keys that have been returned so far.
|
||||
|
||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations
|
||||
describing the difference between the previous and the current object.
|
||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
|
||||
difference between the previous and the current object.
|
||||
"""
|
||||
|
||||
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
|
||||
|
||||
@@ -41,7 +41,7 @@ def droplastn(
|
||||
|
||||
|
||||
class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
"""Parse the output of an LLM call to a list."""
|
||||
"""Parse the output of a model to a list."""
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
buffer = ""
|
||||
for chunk in input:
|
||||
if isinstance(chunk, BaseMessage):
|
||||
# extract text
|
||||
# Extract text
|
||||
chunk_content = chunk.content
|
||||
if not isinstance(chunk_content, str):
|
||||
continue
|
||||
buffer += chunk_content
|
||||
else:
|
||||
# add current chunk to buffer
|
||||
# Add current chunk to buffer
|
||||
buffer += chunk
|
||||
# parse buffer into a list of parts
|
||||
# Parse buffer into a list of parts
|
||||
try:
|
||||
done_idx = 0
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
for m in droplastn(self.parse_iter(buffer), 1):
|
||||
done_idx = m.end()
|
||||
yield [m.group(1)]
|
||||
buffer = buffer[done_idx:]
|
||||
except NotImplementedError:
|
||||
parts = self.parse(buffer)
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
if len(parts) > 1:
|
||||
for part in parts[:-1]:
|
||||
yield [part]
|
||||
buffer = parts[-1]
|
||||
# yield the last part
|
||||
# Yield the last part
|
||||
for part in self.parse(buffer):
|
||||
yield [part]
|
||||
|
||||
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
buffer = ""
|
||||
async for chunk in input:
|
||||
if isinstance(chunk, BaseMessage):
|
||||
# extract text
|
||||
# Extract text
|
||||
chunk_content = chunk.content
|
||||
if not isinstance(chunk_content, str):
|
||||
continue
|
||||
buffer += chunk_content
|
||||
else:
|
||||
# add current chunk to buffer
|
||||
# Add current chunk to buffer
|
||||
buffer += chunk
|
||||
# parse buffer into a list of parts
|
||||
# Parse buffer into a list of parts
|
||||
try:
|
||||
done_idx = 0
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
for m in droplastn(self.parse_iter(buffer), 1):
|
||||
done_idx = m.end()
|
||||
yield [m.group(1)]
|
||||
buffer = buffer[done_idx:]
|
||||
except NotImplementedError:
|
||||
parts = self.parse(buffer)
|
||||
# yield only complete parts
|
||||
# Yield only complete parts
|
||||
if len(parts) > 1:
|
||||
for part in parts[:-1]:
|
||||
yield [part]
|
||||
buffer = parts[-1]
|
||||
# yield the last part
|
||||
# Yield the last part
|
||||
for part in self.parse(buffer):
|
||||
yield [part]
|
||||
|
||||
|
||||
class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
"""Parse the output of an LLM call to a comma-separated list."""
|
||||
"""Parse the output of a model to a comma-separated list."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
)
|
||||
return [item for sublist in reader for item in sublist]
|
||||
except csv.Error:
|
||||
# keep old logic for backup
|
||||
# Keep old logic for backup
|
||||
return [part.strip() for part in text.split(",")]
|
||||
|
||||
@property
|
||||
|
||||
@@ -15,7 +15,11 @@ from langchain_core.messages.tool import tool_call as create_tool_call
|
||||
from langchain_core.output_parsers.transform import BaseCumulativeTransformOutputParser
|
||||
from langchain_core.outputs import ChatGeneration, Generation
|
||||
from langchain_core.utils.json import parse_partial_json
|
||||
from langchain_core.utils.pydantic import TypeBaseModel
|
||||
from langchain_core.utils.pydantic import (
|
||||
TypeBaseModel,
|
||||
is_pydantic_v1_subclass,
|
||||
is_pydantic_v2_subclass,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -224,7 +228,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
|
||||
result: The result of the LLM call.
|
||||
partial: Whether to parse partial JSON.
|
||||
If `True`, the output will be a JSON object containing
|
||||
all the keys that have been returned so far.
|
||||
all the keys that have been returned so far.
|
||||
If `False`, the output will be the full JSON object.
|
||||
|
||||
Raises:
|
||||
@@ -307,7 +311,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
||||
result: The result of the LLM call.
|
||||
partial: Whether to parse partial JSON.
|
||||
If `True`, the output will be a JSON object containing
|
||||
all the keys that have been returned so far.
|
||||
all the keys that have been returned so far.
|
||||
If `False`, the output will be the full JSON object.
|
||||
|
||||
Returns:
|
||||
@@ -323,7 +327,15 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
||||
return None if self.first_tool_only else []
|
||||
|
||||
json_results = [json_results] if self.first_tool_only else json_results
|
||||
name_dict = {tool.__name__: tool for tool in self.tools}
|
||||
name_dict_v2: dict[str, TypeBaseModel] = {
|
||||
tool.model_config.get("title") or tool.__name__: tool
|
||||
for tool in self.tools
|
||||
if is_pydantic_v2_subclass(tool)
|
||||
}
|
||||
name_dict_v1: dict[str, TypeBaseModel] = {
|
||||
tool.__name__: tool for tool in self.tools if is_pydantic_v1_subclass(tool)
|
||||
}
|
||||
name_dict: dict[str, TypeBaseModel] = {**name_dict_v2, **name_dict_v1}
|
||||
pydantic_objects = []
|
||||
for res in json_results:
|
||||
if not isinstance(res["args"], dict):
|
||||
|
||||
@@ -37,7 +37,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
|
||||
def _parser_exception(
|
||||
self, e: Exception, json_object: dict
|
||||
) -> OutputParserException:
|
||||
json_string = json.dumps(json_object)
|
||||
json_string = json.dumps(json_object, ensure_ascii=False)
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from completion {json_string}. Got: {e}"
|
||||
return OutputParserException(msg, llm_output=json_string)
|
||||
@@ -86,7 +86,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
|
||||
The format instructions for the JSON output.
|
||||
"""
|
||||
# Copy schema to avoid altering original Pydantic schema.
|
||||
schema = dict(self.pydantic_object.model_json_schema().items())
|
||||
schema = dict(self._get_schema(self.pydantic_object).items())
|
||||
|
||||
# Remove extraneous fields.
|
||||
reduced_schema = schema
|
||||
|
||||
@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
|
||||
|
||||
|
||||
class StrOutputParser(BaseTransformOutputParser[str]):
|
||||
"""OutputParser that parses LLMResult into the top likely string."""
|
||||
"""OutputParser that parses `LLMResult` into the top likely string."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""StrOutputParser is serializable.
|
||||
"""`StrOutputParser` is serializable.
|
||||
|
||||
Returns:
|
||||
True
|
||||
`True`
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
@@ -43,19 +43,19 @@ class _StreamingParser:
|
||||
"""Streaming parser for XML.
|
||||
|
||||
This implementation is pulled into a class to avoid implementation
|
||||
drift between transform and atransform of the XMLOutputParser.
|
||||
drift between transform and atransform of the `XMLOutputParser`.
|
||||
"""
|
||||
|
||||
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
|
||||
"""Initialize the streaming parser.
|
||||
|
||||
Args:
|
||||
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
||||
See documentation in XMLOutputParser for more information.
|
||||
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
|
||||
`'xml'`. See documentation in `XMLOutputParser` for more information.
|
||||
|
||||
Raises:
|
||||
ImportError: If defusedxml is not installed and the defusedxml
|
||||
parser is requested.
|
||||
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
|
||||
requested.
|
||||
"""
|
||||
if parser == "defusedxml":
|
||||
if not _HAS_DEFUSEDXML:
|
||||
@@ -79,10 +79,10 @@ class _StreamingParser:
|
||||
"""Parse a chunk of text.
|
||||
|
||||
Args:
|
||||
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
|
||||
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
|
||||
|
||||
Yields:
|
||||
A dictionary representing the parsed XML element.
|
||||
A `dict` representing the parsed XML element.
|
||||
|
||||
Raises:
|
||||
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
|
||||
@@ -147,46 +147,49 @@ class _StreamingParser:
|
||||
|
||||
|
||||
class XMLOutputParser(BaseTransformOutputParser):
|
||||
"""Parse an output using xml format."""
|
||||
"""Parse an output using xml format.
|
||||
|
||||
Returns a dictionary of tags.
|
||||
"""
|
||||
|
||||
tags: list[str] | None = None
|
||||
"""Tags to tell the LLM to expect in the XML output.
|
||||
|
||||
Note this may not be perfect depending on the LLM implementation.
|
||||
|
||||
For example, with tags=["foo", "bar", "baz"]:
|
||||
For example, with `tags=["foo", "bar", "baz"]`:
|
||||
|
||||
1. A well-formatted XML instance:
|
||||
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
|
||||
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
|
||||
|
||||
2. A badly-formatted XML instance (missing closing tag for 'bar'):
|
||||
"<foo>\n <bar>\n </foo>"
|
||||
`"<foo>\n <bar>\n </foo>"`
|
||||
|
||||
3. A badly-formatted XML instance (unexpected 'tag' element):
|
||||
"<foo>\n <tag>\n </tag>\n</foo>"
|
||||
`"<foo>\n <tag>\n </tag>\n</foo>"`
|
||||
"""
|
||||
encoding_matcher: re.Pattern = re.compile(
|
||||
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
|
||||
)
|
||||
parser: Literal["defusedxml", "xml"] = "defusedxml"
|
||||
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
||||
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
|
||||
|
||||
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
|
||||
present in some distributions of Python's standard library xml.
|
||||
`defusedxml` is a wrapper around the standard library parser that
|
||||
sets up the parser with secure defaults.
|
||||
* 'xml' is the standard library parser.
|
||||
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
|
||||
present in some distributions of Python's standard library xml.
|
||||
`defusedxml` is a wrapper around the standard library parser that
|
||||
sets up the parser with secure defaults.
|
||||
* `'xml'` is the standard library parser.
|
||||
|
||||
Use `xml` only if you are sure that your distribution of the standard library
|
||||
is not vulnerable to XML vulnerabilities.
|
||||
Use `xml` only if you are sure that your distribution of the standard library is not
|
||||
vulnerable to XML vulnerabilities.
|
||||
|
||||
Please review the following resources for more information:
|
||||
|
||||
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
|
||||
* https://github.com/tiran/defusedxml
|
||||
|
||||
The standard library relies on libexpat for parsing XML:
|
||||
https://github.com/libexpat/libexpat
|
||||
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
|
||||
for parsing XML.
|
||||
"""
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
|
||||
text: The output of an LLM call.
|
||||
|
||||
Returns:
|
||||
A dictionary representing the parsed XML.
|
||||
A `dict` representing the parsed XML.
|
||||
|
||||
Raises:
|
||||
OutputParserException: If the XML is not well-formed.
|
||||
ImportError: If defusedxml is not installed and the defusedxml
|
||||
parser is requested.
|
||||
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
|
||||
requested.
|
||||
"""
|
||||
# Try to find XML string within triple backticks
|
||||
# Imports are temporarily placed here to avoid issue with caching on CI
|
||||
|
||||
@@ -2,15 +2,17 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Literal
|
||||
from typing import TYPE_CHECKING, Literal
|
||||
|
||||
from pydantic import model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_core.messages import BaseMessage, BaseMessageChunk
|
||||
from langchain_core.outputs.generation import Generation
|
||||
from langchain_core.utils._merge import merge_dicts
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import Self
|
||||
|
||||
|
||||
class ChatGeneration(Generation):
|
||||
"""A single chat generation output.
|
||||
|
||||
@@ -11,9 +11,8 @@ from langchain_core.utils._merge import merge_dicts
|
||||
class Generation(Serializable):
|
||||
"""A single text generation output.
|
||||
|
||||
Generation represents the response from an
|
||||
`"old-fashioned" LLM <https://python.langchain.com/docs/concepts/text_llms/>__` that
|
||||
generates regular text (not chat messages).
|
||||
Generation represents the response from an "old-fashioned" LLM (string-in,
|
||||
string-out) that generates regular text (not chat messages).
|
||||
|
||||
This model is used internally by chat model and will eventually
|
||||
be mapped to a more general `LLMResult` object, and then projected into
|
||||
@@ -21,8 +20,7 @@ class Generation(Serializable):
|
||||
|
||||
LangChain users working with chat models will usually access information via
|
||||
`AIMessage` (returned from runnable interfaces) or `LLMResult` (available
|
||||
via callbacks). Please refer the `AIMessage` and `LLMResult` schema documentation
|
||||
for more information.
|
||||
via callbacks). Please refer to `AIMessage` and `LLMResult` for more information.
|
||||
"""
|
||||
|
||||
text: str
|
||||
@@ -35,11 +33,13 @@ class Generation(Serializable):
|
||||
"""
|
||||
type: Literal["Generation"] = "Generation"
|
||||
"""Type is used exclusively for serialization purposes.
|
||||
Set to "Generation" for this class."""
|
||||
|
||||
Set to "Generation" for this class.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -53,7 +53,7 @@ class Generation(Serializable):
|
||||
|
||||
|
||||
class GenerationChunk(Generation):
|
||||
"""Generation chunk, which can be concatenated with other Generation chunks."""
|
||||
"""`GenerationChunk`, which can be concatenated with other Generation chunks."""
|
||||
|
||||
def __add__(self, other: GenerationChunk) -> GenerationChunk:
|
||||
"""Concatenate two `GenerationChunk`s.
|
||||
|
||||
@@ -30,15 +30,13 @@ class PromptValue(Serializable, ABC):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "schema", "prompt"]`
|
||||
"""
|
||||
@@ -50,7 +48,7 @@ class PromptValue(Serializable, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def to_messages(self) -> list[BaseMessage]:
|
||||
"""Return prompt as a list of Messages."""
|
||||
"""Return prompt as a list of messages."""
|
||||
|
||||
|
||||
class StringPromptValue(PromptValue):
|
||||
@@ -64,8 +62,6 @@ class StringPromptValue(PromptValue):
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "prompts", "base"]`
|
||||
"""
|
||||
@@ -101,8 +97,6 @@ class ChatPromptValue(PromptValue):
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "prompts", "chat"]`
|
||||
"""
|
||||
|
||||
@@ -6,7 +6,7 @@ import contextlib
|
||||
import json
|
||||
import typing
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Callable, Mapping
|
||||
from collections.abc import Mapping
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
@@ -33,6 +33,8 @@ from langchain_core.runnables.config import ensure_config
|
||||
from langchain_core.utils.pydantic import create_model_v2
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
|
||||
@@ -46,21 +48,27 @@ class BasePromptTemplate(
|
||||
|
||||
input_variables: list[str]
|
||||
"""A list of the names of the variables whose values are required as inputs to the
|
||||
prompt."""
|
||||
prompt.
|
||||
"""
|
||||
optional_variables: list[str] = Field(default=[])
|
||||
"""optional_variables: A list of the names of the variables for placeholder
|
||||
or MessagePlaceholder that are optional. These variables are auto inferred
|
||||
from the prompt and user need not provide them."""
|
||||
"""A list of the names of the variables for placeholder or `MessagePlaceholder` that
|
||||
are optional.
|
||||
|
||||
These variables are auto inferred from the prompt and user need not provide them.
|
||||
"""
|
||||
input_types: typing.Dict[str, Any] = Field(default_factory=dict, exclude=True) # noqa: UP006
|
||||
"""A dictionary of the types of the variables the prompt template expects.
|
||||
If not provided, all variables are assumed to be strings."""
|
||||
|
||||
If not provided, all variables are assumed to be strings.
|
||||
"""
|
||||
output_parser: BaseOutputParser | None = None
|
||||
"""How to parse the output of calling an LLM on this formatted prompt."""
|
||||
partial_variables: Mapping[str, Any] = Field(default_factory=dict)
|
||||
"""A dictionary of the partial variables the prompt template carries.
|
||||
|
||||
Partial variables populate the template so that you don't need to
|
||||
pass them in every time you call the prompt."""
|
||||
Partial variables populate the template so that you don't need to pass them in every
|
||||
time you call the prompt.
|
||||
"""
|
||||
metadata: typing.Dict[str, Any] | None = None # noqa: UP006
|
||||
"""Metadata to be used for tracing."""
|
||||
tags: list[str] | None = None
|
||||
@@ -105,7 +113,7 @@ class BasePromptTemplate(
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -127,7 +135,7 @@ class BasePromptTemplate(
|
||||
"""Get the input schema for the prompt.
|
||||
|
||||
Args:
|
||||
config: configuration for the prompt.
|
||||
config: Configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The input schema for the prompt.
|
||||
@@ -195,8 +203,8 @@ class BasePromptTemplate(
|
||||
"""Invoke the prompt.
|
||||
|
||||
Args:
|
||||
input: Dict, input to the prompt.
|
||||
config: RunnableConfig, configuration for the prompt.
|
||||
input: Input to the prompt.
|
||||
config: Configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The output of the prompt.
|
||||
@@ -221,8 +229,8 @@ class BasePromptTemplate(
|
||||
"""Async invoke the prompt.
|
||||
|
||||
Args:
|
||||
input: Dict, input to the prompt.
|
||||
config: RunnableConfig, configuration for the prompt.
|
||||
input: Input to the prompt.
|
||||
config: Configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The output of the prompt.
|
||||
@@ -242,7 +250,7 @@ class BasePromptTemplate(
|
||||
|
||||
@abstractmethod
|
||||
def format_prompt(self, **kwargs: Any) -> PromptValue:
|
||||
"""Create Prompt Value.
|
||||
"""Create `PromptValue`.
|
||||
|
||||
Args:
|
||||
**kwargs: Any arguments to be passed to the prompt template.
|
||||
@@ -252,7 +260,7 @@ class BasePromptTemplate(
|
||||
"""
|
||||
|
||||
async def aformat_prompt(self, **kwargs: Any) -> PromptValue:
|
||||
"""Async create Prompt Value.
|
||||
"""Async create `PromptValue`.
|
||||
|
||||
Args:
|
||||
**kwargs: Any arguments to be passed to the prompt template.
|
||||
@@ -266,7 +274,7 @@ class BasePromptTemplate(
|
||||
"""Return a partial of the prompt template.
|
||||
|
||||
Args:
|
||||
**kwargs: partial variables to set.
|
||||
**kwargs: Partial variables to set.
|
||||
|
||||
Returns:
|
||||
A partial of the prompt template.
|
||||
@@ -296,9 +304,9 @@ class BasePromptTemplate(
|
||||
A formatted string.
|
||||
|
||||
Example:
|
||||
```python
|
||||
prompt.format(variable1="foo")
|
||||
```
|
||||
```python
|
||||
prompt.format(variable1="foo")
|
||||
```
|
||||
"""
|
||||
|
||||
async def aformat(self, **kwargs: Any) -> FormatOutputType:
|
||||
@@ -311,9 +319,9 @@ class BasePromptTemplate(
|
||||
A formatted string.
|
||||
|
||||
Example:
|
||||
```python
|
||||
await prompt.aformat(variable1="foo")
|
||||
```
|
||||
```python
|
||||
await prompt.aformat(variable1="foo")
|
||||
```
|
||||
"""
|
||||
return self.format(**kwargs)
|
||||
|
||||
@@ -348,9 +356,9 @@ class BasePromptTemplate(
|
||||
NotImplementedError: If the prompt type is not implemented.
|
||||
|
||||
Example:
|
||||
```python
|
||||
prompt.save(file_path="path/prompt.yaml")
|
||||
```
|
||||
```python
|
||||
prompt.save(file_path="path/prompt.yaml")
|
||||
```
|
||||
"""
|
||||
if self.partial_variables:
|
||||
msg = "Cannot save prompt with partial variables."
|
||||
@@ -402,23 +410,23 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
|
||||
|
||||
First, this pulls information from the document from two sources:
|
||||
|
||||
1. page_content:
|
||||
This takes the information from the `document.page_content`
|
||||
and assigns it to a variable named `page_content`.
|
||||
2. metadata:
|
||||
This takes information from `document.metadata` and assigns
|
||||
it to variables of the same name.
|
||||
1. `page_content`:
|
||||
This takes the information from the `document.page_content` and assigns it to a
|
||||
variable named `page_content`.
|
||||
2. `metadata`:
|
||||
This takes information from `document.metadata` and assigns it to variables of
|
||||
the same name.
|
||||
|
||||
Those variables are then passed into the `prompt` to produce a formatted string.
|
||||
|
||||
Args:
|
||||
doc: Document, the page_content and metadata will be used to create
|
||||
doc: `Document`, the `page_content` and `metadata` will be used to create
|
||||
the final string.
|
||||
prompt: BasePromptTemplate, will be used to format the page_content
|
||||
and metadata into the final string.
|
||||
prompt: `BasePromptTemplate`, will be used to format the `page_content`
|
||||
and `metadata` into the final string.
|
||||
|
||||
Returns:
|
||||
string of the document formatted.
|
||||
String of the document formatted.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -429,7 +437,6 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
|
||||
prompt = PromptTemplate.from_template("Page {page}: {page_content}")
|
||||
format_document(doc, prompt)
|
||||
>>> "Page 1: This is a joke"
|
||||
|
||||
```
|
||||
"""
|
||||
return prompt.format(**_get_document_info(doc, prompt))
|
||||
@@ -440,22 +447,22 @@ async def aformat_document(doc: Document, prompt: BasePromptTemplate[str]) -> st
|
||||
|
||||
First, this pulls information from the document from two sources:
|
||||
|
||||
1. page_content:
|
||||
This takes the information from the `document.page_content`
|
||||
and assigns it to a variable named `page_content`.
|
||||
2. metadata:
|
||||
This takes information from `document.metadata` and assigns
|
||||
it to variables of the same name.
|
||||
1. `page_content`:
|
||||
This takes the information from the `document.page_content` and assigns it to a
|
||||
variable named `page_content`.
|
||||
2. `metadata`:
|
||||
This takes information from `document.metadata` and assigns it to variables of
|
||||
the same name.
|
||||
|
||||
Those variables are then passed into the `prompt` to produce a formatted string.
|
||||
|
||||
Args:
|
||||
doc: Document, the page_content and metadata will be used to create
|
||||
doc: `Document`, the `page_content` and `metadata` will be used to create
|
||||
the final string.
|
||||
prompt: BasePromptTemplate, will be used to format the page_content
|
||||
and metadata into the final string.
|
||||
prompt: `BasePromptTemplate`, will be used to format the `page_content`
|
||||
and `metadata` into the final string.
|
||||
|
||||
Returns:
|
||||
string of the document formatted.
|
||||
String of the document formatted.
|
||||
"""
|
||||
return await prompt.aformat(**_get_document_info(doc, prompt))
|
||||
|
||||
@@ -587,14 +587,15 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
for prompt in self.prompt:
|
||||
inputs = {var: kwargs[var] for var in prompt.input_variables}
|
||||
if isinstance(prompt, StringPromptTemplate):
|
||||
formatted: str | ImageURL | dict[str, Any] = prompt.format(**inputs)
|
||||
content.append({"type": "text", "text": formatted})
|
||||
formatted_text: str = prompt.format(**inputs)
|
||||
if formatted_text != "":
|
||||
content.append({"type": "text", "text": formatted_text})
|
||||
elif isinstance(prompt, ImagePromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted})
|
||||
formatted_image: ImageURL = prompt.format(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted_image})
|
||||
elif isinstance(prompt, DictPromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append(formatted)
|
||||
formatted_dict: dict[str, Any] = prompt.format(**inputs)
|
||||
content.append(formatted_dict)
|
||||
return self._msg_class(
|
||||
content=content, additional_kwargs=self.additional_kwargs
|
||||
)
|
||||
@@ -617,16 +618,15 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
for prompt in self.prompt:
|
||||
inputs = {var: kwargs[var] for var in prompt.input_variables}
|
||||
if isinstance(prompt, StringPromptTemplate):
|
||||
formatted: str | ImageURL | dict[str, Any] = await prompt.aformat(
|
||||
**inputs
|
||||
)
|
||||
content.append({"type": "text", "text": formatted})
|
||||
formatted_text: str = await prompt.aformat(**inputs)
|
||||
if formatted_text != "":
|
||||
content.append({"type": "text", "text": formatted_text})
|
||||
elif isinstance(prompt, ImagePromptTemplate):
|
||||
formatted = await prompt.aformat(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted})
|
||||
formatted_image: ImageURL = await prompt.aformat(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted_image})
|
||||
elif isinstance(prompt, DictPromptTemplate):
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append(formatted)
|
||||
formatted_dict: dict[str, Any] = prompt.format(**inputs)
|
||||
content.append(formatted_dict)
|
||||
return self._msg_class(
|
||||
content=content, additional_kwargs=self.additional_kwargs
|
||||
)
|
||||
@@ -776,42 +776,36 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
|
||||
Use to create flexible templated prompts for chat models.
|
||||
|
||||
Examples:
|
||||
!!! warning "Behavior changed in 0.2.24"
|
||||
You can pass any Message-like formats supported by
|
||||
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
|
||||
init.
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
template = ChatPromptTemplate(
|
||||
[
|
||||
("system", "You are a helpful AI bot. Your name is {name}."),
|
||||
("human", "Hello, how are you doing?"),
|
||||
("ai", "I'm doing well, thanks!"),
|
||||
("human", "{user_input}"),
|
||||
]
|
||||
)
|
||||
|
||||
template = ChatPromptTemplate(
|
||||
[
|
||||
("system", "You are a helpful AI bot. Your name is {name}."),
|
||||
("human", "Hello, how are you doing?"),
|
||||
("ai", "I'm doing well, thanks!"),
|
||||
("human", "{user_input}"),
|
||||
]
|
||||
)
|
||||
prompt_value = template.invoke(
|
||||
{
|
||||
"name": "Bob",
|
||||
"user_input": "What is your name?",
|
||||
}
|
||||
)
|
||||
# Output:
|
||||
# ChatPromptValue(
|
||||
# messages=[
|
||||
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
||||
# HumanMessage(content='Hello, how are you doing?'),
|
||||
# AIMessage(content="I'm doing well, thanks!"),
|
||||
# HumanMessage(content='What is your name?')
|
||||
# ]
|
||||
# )
|
||||
```
|
||||
|
||||
prompt_value = template.invoke(
|
||||
{
|
||||
"name": "Bob",
|
||||
"user_input": "What is your name?",
|
||||
}
|
||||
)
|
||||
# Output:
|
||||
# ChatPromptValue(
|
||||
# messages=[
|
||||
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
||||
# HumanMessage(content='Hello, how are you doing?'),
|
||||
# AIMessage(content="I'm doing well, thanks!"),
|
||||
# HumanMessage(content='What is your name?')
|
||||
# ]
|
||||
# )
|
||||
```
|
||||
|
||||
Messages Placeholder:
|
||||
!!! note "Messages Placeholder"
|
||||
|
||||
```python
|
||||
# In addition to Human/AI/Tool/Function messages,
|
||||
@@ -852,13 +846,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
# )
|
||||
```
|
||||
|
||||
Single-variable template:
|
||||
!!! note "Single-variable template"
|
||||
|
||||
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
|
||||
and you invoke the template with a non-dict object, the prompt template will
|
||||
inject the provided argument into that variable location.
|
||||
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
@@ -898,25 +891,40 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
"""Create a chat prompt template from a variety of message formats.
|
||||
|
||||
Args:
|
||||
messages: sequence of message representations.
|
||||
messages: Sequence of message representations.
|
||||
|
||||
A message can be represented using the following formats:
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}".
|
||||
template_format: format of the template.
|
||||
input_variables: A list of the names of the variables whose values are
|
||||
required as inputs to the prompt.
|
||||
optional_variables: A list of the names of the variables for placeholder
|
||||
or MessagePlaceholder that are optional.
|
||||
These variables are auto inferred from the prompt and user need not
|
||||
provide them.
|
||||
partial_variables: A dictionary of the partial variables the prompt
|
||||
template carries. Partial variables populate the template so that you
|
||||
don't need to pass them in every time you call the prompt.
|
||||
validate_template: Whether to validate the template.
|
||||
input_types: A dictionary of the types of the variables the prompt template
|
||||
expects. If not provided, all variables are assumed to be strings.
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
template_format: Format of the template.
|
||||
**kwargs: Additional keyword arguments passed to `BasePromptTemplate`,
|
||||
including (but not limited to):
|
||||
|
||||
- `input_variables`: A list of the names of the variables whose values
|
||||
are required as inputs to the prompt.
|
||||
- `optional_variables`: A list of the names of the variables for
|
||||
placeholder or `MessagePlaceholder` that are optional.
|
||||
|
||||
These variables are auto inferred from the prompt and user need not
|
||||
provide them.
|
||||
|
||||
- `partial_variables`: A dictionary of the partial variables the prompt
|
||||
template carries.
|
||||
|
||||
Partial variables populate the template so that you don't need to
|
||||
pass them in every time you call the prompt.
|
||||
|
||||
- `validate_template`: Whether to validate the template.
|
||||
- `input_types`: A dictionary of the types of the variables the prompt
|
||||
template expects.
|
||||
|
||||
If not provided, all variables are assumed to be strings.
|
||||
|
||||
Examples:
|
||||
Instantiation from a list of message templates:
|
||||
@@ -1121,12 +1129,17 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
)
|
||||
```
|
||||
Args:
|
||||
messages: sequence of message representations.
|
||||
messages: Sequence of message representations.
|
||||
|
||||
A message can be represented using the following formats:
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}".
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
template_format: format of the template.
|
||||
|
||||
Returns:
|
||||
@@ -1238,7 +1251,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
"""Extend the chat template with a sequence of messages.
|
||||
|
||||
Args:
|
||||
messages: sequence of message representations to append.
|
||||
messages: Sequence of message representations to append.
|
||||
"""
|
||||
self.messages.extend(
|
||||
[_convert_to_message_template(message) for message in messages]
|
||||
@@ -1335,11 +1348,25 @@ def _create_template_from_message_type(
|
||||
raise ValueError(msg)
|
||||
var_name = template[1:-1]
|
||||
message = MessagesPlaceholder(variable_name=var_name, optional=True)
|
||||
elif len(template) == 2 and isinstance(template[1], bool):
|
||||
var_name_wrapped, is_optional = template
|
||||
else:
|
||||
try:
|
||||
var_name_wrapped, is_optional = template
|
||||
except ValueError as e:
|
||||
msg = (
|
||||
"Unexpected arguments for placeholder message type."
|
||||
" Expected either a single string variable name"
|
||||
" or a list of [variable_name: str, is_optional: bool]."
|
||||
f" Got: {template}"
|
||||
)
|
||||
raise ValueError(msg) from e
|
||||
|
||||
if not isinstance(is_optional, bool):
|
||||
msg = f"Expected is_optional to be a boolean. Got: {is_optional}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
|
||||
if not isinstance(var_name_wrapped, str):
|
||||
msg = f"Expected variable name to be a string. Got: {var_name_wrapped}"
|
||||
raise ValueError(msg) # noqa:TRY004
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
if var_name_wrapped[0] != "{" or var_name_wrapped[-1] != "}":
|
||||
msg = (
|
||||
f"Invalid placeholder template: {var_name_wrapped}."
|
||||
@@ -1349,14 +1376,6 @@ def _create_template_from_message_type(
|
||||
var_name = var_name_wrapped[1:-1]
|
||||
|
||||
message = MessagesPlaceholder(variable_name=var_name, optional=is_optional)
|
||||
else:
|
||||
msg = (
|
||||
"Unexpected arguments for placeholder message type."
|
||||
" Expected either a single string variable name"
|
||||
" or a list of [variable_name: str, is_optional: bool]."
|
||||
f" Got: {template}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
msg = (
|
||||
f"Unexpected message type: {message_type}. Use one of 'human',"
|
||||
@@ -1410,10 +1429,11 @@ def _convert_to_message_template(
|
||||
)
|
||||
raise ValueError(msg)
|
||||
message = (message["role"], message["content"])
|
||||
if len(message) != 2:
|
||||
try:
|
||||
message_type_str, template = message
|
||||
except ValueError as e:
|
||||
msg = f"Expected 2-tuple of (role, template), got {message}"
|
||||
raise ValueError(msg)
|
||||
message_type_str, template = message
|
||||
raise ValueError(msg) from e
|
||||
if isinstance(message_type_str, str):
|
||||
message_ = _create_template_from_message_type(
|
||||
message_type_str, template, template_format=template_format
|
||||
|
||||
@@ -69,7 +69,7 @@ class DictPromptTemplate(RunnableSerializable[dict, dict]):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -6,10 +6,10 @@ from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from langchain_core.load import Serializable
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.utils.interactive_env import is_interactive_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.prompts.chat import ChatPromptTemplate
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -32,13 +32,13 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def format_messages(self, **kwargs: Any) -> list[BaseMessage]:
|
||||
"""Format messages from kwargs. Should return a list of BaseMessages.
|
||||
"""Format messages from kwargs. Should return a list of `BaseMessage` objects.
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments to use for formatting.
|
||||
|
||||
Returns:
|
||||
List of BaseMessages.
|
||||
List of `BaseMessage` objects.
|
||||
"""
|
||||
|
||||
async def aformat_messages(self, **kwargs: Any) -> list[BaseMessage]:
|
||||
@@ -48,7 +48,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
**kwargs: Keyword arguments to use for formatting.
|
||||
|
||||
Returns:
|
||||
List of BaseMessages.
|
||||
List of `BaseMessage` objects.
|
||||
"""
|
||||
return self.format_messages(**kwargs)
|
||||
|
||||
|
||||
@@ -4,9 +4,8 @@ from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from abc import ABC
|
||||
from collections.abc import Callable, Sequence
|
||||
from string import Formatter
|
||||
from typing import Any, Literal
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
from pydantic import BaseModel, create_model
|
||||
|
||||
@@ -16,10 +15,70 @@ from langchain_core.utils import get_colored_text, mustache
|
||||
from langchain_core.utils.formatting import formatter
|
||||
from langchain_core.utils.interactive_env import is_interactive_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Callable, Sequence
|
||||
|
||||
try:
|
||||
from jinja2 import Environment, meta
|
||||
from jinja2 import meta
|
||||
from jinja2.exceptions import SecurityError
|
||||
from jinja2.sandbox import SandboxedEnvironment
|
||||
|
||||
class _RestrictedSandboxedEnvironment(SandboxedEnvironment):
|
||||
"""A more restrictive Jinja2 sandbox that blocks all attribute/method access.
|
||||
|
||||
This sandbox only allows simple variable lookups, no attribute or method access.
|
||||
This prevents template injection attacks via methods like parse_raw().
|
||||
"""
|
||||
|
||||
def is_safe_attribute(self, _obj: Any, _attr: str, _value: Any) -> bool:
|
||||
"""Block ALL attribute access for security.
|
||||
|
||||
Only allow accessing variables directly from the context dict,
|
||||
no attribute access on those objects.
|
||||
|
||||
Args:
|
||||
_obj: The object being accessed (unused, always blocked).
|
||||
_attr: The attribute name (unused, always blocked).
|
||||
_value: The attribute value (unused, always blocked).
|
||||
|
||||
Returns:
|
||||
False - all attribute access is blocked.
|
||||
"""
|
||||
# Block all attribute access
|
||||
return False
|
||||
|
||||
def is_safe_callable(self, _obj: Any) -> bool:
|
||||
"""Block all method calls for security.
|
||||
|
||||
Args:
|
||||
_obj: The object being checked (unused, always blocked).
|
||||
|
||||
Returns:
|
||||
False - all callables are blocked.
|
||||
"""
|
||||
return False
|
||||
|
||||
def getattr(self, obj: Any, attribute: str) -> Any:
|
||||
"""Override getattr to block all attribute access.
|
||||
|
||||
Args:
|
||||
obj: The object.
|
||||
attribute: The attribute name.
|
||||
|
||||
Returns:
|
||||
Never returns.
|
||||
|
||||
Raises:
|
||||
SecurityError: Always, to block attribute access.
|
||||
"""
|
||||
msg = (
|
||||
f"Access to attributes is not allowed in templates. "
|
||||
f"Attempted to access '{attribute}' on {type(obj).__name__}. "
|
||||
f"Use only simple variable names like {{{{variable}}}} "
|
||||
f"without dots or methods."
|
||||
)
|
||||
raise SecurityError(msg)
|
||||
|
||||
_HAS_JINJA2 = True
|
||||
except ImportError:
|
||||
_HAS_JINJA2 = False
|
||||
@@ -59,14 +118,10 @@ def jinja2_formatter(template: str, /, **kwargs: Any) -> str:
|
||||
)
|
||||
raise ImportError(msg)
|
||||
|
||||
# This uses a sandboxed environment to prevent arbitrary code execution.
|
||||
# Jinja2 uses an opt-out rather than opt-in approach for sand-boxing.
|
||||
# Please treat this sand-boxing as a best-effort approach rather than
|
||||
# a guarantee of security.
|
||||
# We recommend to never use jinja2 templates with untrusted inputs.
|
||||
# https://jinja.palletsprojects.com/en/3.1.x/sandbox/
|
||||
# approach not a guarantee of security.
|
||||
return SandboxedEnvironment().from_string(template).render(**kwargs)
|
||||
# Use a restricted sandbox that blocks ALL attribute/method access
|
||||
# Only simple variable lookups like {{variable}} are allowed
|
||||
# Attribute access like {{variable.attr}} or {{variable.method()}} is blocked
|
||||
return _RestrictedSandboxedEnvironment().from_string(template).render(**kwargs)
|
||||
|
||||
|
||||
def validate_jinja2(template: str, input_variables: list[str]) -> None:
|
||||
@@ -101,7 +156,7 @@ def _get_jinja2_variables_from_template(template: str) -> set[str]:
|
||||
"Please install it with `pip install jinja2`."
|
||||
)
|
||||
raise ImportError(msg)
|
||||
env = Environment() # noqa: S701
|
||||
env = _RestrictedSandboxedEnvironment()
|
||||
ast = env.parse(template)
|
||||
return meta.find_undeclared_variables(ast)
|
||||
|
||||
@@ -122,13 +177,16 @@ def mustache_formatter(template: str, /, **kwargs: Any) -> str:
|
||||
def mustache_template_vars(
|
||||
template: str,
|
||||
) -> set[str]:
|
||||
"""Get the variables from a mustache template.
|
||||
"""Get the top-level variables from a mustache template.
|
||||
|
||||
For nested variables like `{{person.name}}`, only the top-level
|
||||
key (`person`) is returned.
|
||||
|
||||
Args:
|
||||
template: The template string.
|
||||
|
||||
Returns:
|
||||
The variables from the template.
|
||||
The top-level variables from the template.
|
||||
"""
|
||||
variables: set[str] = set()
|
||||
section_depth = 0
|
||||
@@ -268,6 +326,30 @@ def get_template_variables(template: str, template_format: str) -> list[str]:
|
||||
msg = f"Unsupported template format: {template_format}"
|
||||
raise ValueError(msg)
|
||||
|
||||
# For f-strings, block attribute access and indexing syntax
|
||||
# This prevents template injection attacks via accessing dangerous attributes
|
||||
if template_format == "f-string":
|
||||
for var in input_variables:
|
||||
# Formatter().parse() returns field names with dots/brackets if present
|
||||
# e.g., "obj.attr" or "obj[0]" - we need to block these
|
||||
if "." in var or "[" in var or "]" in var:
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot contain attribute "
|
||||
f"access (.) or indexing ([])."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
# Block variable names that are all digits (e.g., "0", "100")
|
||||
# These are interpreted as positional arguments, not keyword arguments
|
||||
if var.isdigit():
|
||||
msg = (
|
||||
f"Invalid variable name {var!r} in f-string template. "
|
||||
f"Variable names cannot be all digits as they are interpreted "
|
||||
f"as positional arguments."
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
return sorted(input_variables)
|
||||
|
||||
|
||||
|
||||
@@ -49,7 +49,13 @@ class StructuredPrompt(ChatPromptTemplate):
|
||||
structured_output_kwargs: additional kwargs for structured output.
|
||||
template_format: template format for the prompt.
|
||||
"""
|
||||
schema_ = schema_ or kwargs.pop("schema")
|
||||
schema_ = schema_ or kwargs.pop("schema", None)
|
||||
if not schema_:
|
||||
err_msg = (
|
||||
"Must pass in a non-empty structured output schema. Received: "
|
||||
f"{schema_}"
|
||||
)
|
||||
raise ValueError(err_msg)
|
||||
structured_output_kwargs = structured_output_kwargs or {}
|
||||
for k in set(kwargs).difference(get_pydantic_field_names(self.__class__)):
|
||||
structured_output_kwargs[k] = kwargs.pop(k)
|
||||
@@ -104,19 +110,23 @@ class StructuredPrompt(ChatPromptTemplate):
|
||||
)
|
||||
```
|
||||
Args:
|
||||
messages: sequence of message representations.
|
||||
messages: Sequence of message representations.
|
||||
|
||||
A message can be represented using the following formats:
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}"
|
||||
schema: a dictionary representation of function call, or a Pydantic model.
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
schema: A dictionary representation of function call, or a Pydantic model.
|
||||
**kwargs: Any additional kwargs to pass through to
|
||||
`ChatModel.with_structured_output(schema, **kwargs)`.
|
||||
|
||||
Returns:
|
||||
a structured prompt template
|
||||
|
||||
A structured prompt template
|
||||
"""
|
||||
return cls(messages, schema, **kwargs)
|
||||
|
||||
|
||||
@@ -105,7 +105,9 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
||||
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
|
||||
model = ChatAnthropic(model_name="claude-sonnet-4-5", rate_limiter=rate_limiter)
|
||||
model = ChatAnthropic(
|
||||
model_name="claude-sonnet-4-5-20250929", rate_limiter=rate_limiter
|
||||
)
|
||||
|
||||
for _ in range(5):
|
||||
tic = time.time()
|
||||
|
||||
@@ -50,65 +50,65 @@ class LangSmithRetrieverParams(TypedDict, total=False):
|
||||
|
||||
|
||||
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
"""Abstract base class for a Document retrieval system.
|
||||
"""Abstract base class for a document retrieval system.
|
||||
|
||||
A retrieval system is defined as something that can take string queries and return
|
||||
the most 'relevant' Documents from some source.
|
||||
the most 'relevant' documents from some source.
|
||||
|
||||
Usage:
|
||||
|
||||
A retriever follows the standard Runnable interface, and should be used
|
||||
via the standard Runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`.
|
||||
A retriever follows the standard `Runnable` interface, and should be used via the
|
||||
standard `Runnable` methods of `invoke`, `ainvoke`, `batch`, `abatch`.
|
||||
|
||||
Implementation:
|
||||
|
||||
When implementing a custom retriever, the class should implement
|
||||
the `_get_relevant_documents` method to define the logic for retrieving documents.
|
||||
When implementing a custom retriever, the class should implement the
|
||||
`_get_relevant_documents` method to define the logic for retrieving documents.
|
||||
|
||||
Optionally, an async native implementations can be provided by overriding the
|
||||
`_aget_relevant_documents` method.
|
||||
|
||||
Example: A retriever that returns the first 5 documents from a list of documents
|
||||
!!! example "Retriever that returns the first 5 documents from a list of documents"
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
|
||||
class SimpleRetriever(BaseRetriever):
|
||||
docs: list[Document]
|
||||
k: int = 5
|
||||
class SimpleRetriever(BaseRetriever):
|
||||
docs: list[Document]
|
||||
k: int = 5
|
||||
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"Return the first k documents from the list of documents\"\"\"
|
||||
return self.docs[:self.k]
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"Return the first k documents from the list of documents\"\"\"
|
||||
return self.docs[:self.k]
|
||||
|
||||
async def _aget_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"(Optional) async native implementation.\"\"\"
|
||||
return self.docs[:self.k]
|
||||
```
|
||||
async def _aget_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"(Optional) async native implementation.\"\"\"
|
||||
return self.docs[:self.k]
|
||||
```
|
||||
|
||||
Example: A simple retriever based on a scikit-learn vectorizer
|
||||
!!! example "Simple retriever based on a scikit-learn vectorizer"
|
||||
|
||||
```python
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
```python
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
|
||||
class TFIDFRetriever(BaseRetriever, BaseModel):
|
||||
vectorizer: Any
|
||||
docs: list[Document]
|
||||
tfidf_array: Any
|
||||
k: int = 4
|
||||
class TFIDFRetriever(BaseRetriever, BaseModel):
|
||||
vectorizer: Any
|
||||
docs: list[Document]
|
||||
tfidf_array: Any
|
||||
k: int = 4
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
||||
query_vec = self.vectorizer.transform([query])
|
||||
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
||||
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
|
||||
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
|
||||
```
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
||||
query_vec = self.vectorizer.transform([query])
|
||||
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
||||
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
|
||||
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
|
||||
```
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -119,15 +119,19 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
_expects_other_args: bool = False
|
||||
tags: list[str] | None = None
|
||||
"""Optional list of tags associated with the retriever.
|
||||
|
||||
These tags will be associated with each call to this retriever,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
|
||||
You can use these to eg identify a specific instance of a retriever with its
|
||||
use case.
|
||||
"""
|
||||
metadata: dict[str, Any] | None = None
|
||||
"""Optional metadata associated with the retriever.
|
||||
|
||||
This metadata will be associated with each call to this retriever,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
|
||||
You can use these to eg identify a specific instance of a retriever with its
|
||||
use case.
|
||||
"""
|
||||
|
||||
@@ -118,6 +118,8 @@ if TYPE_CHECKING:
|
||||
|
||||
Other = TypeVar("Other")
|
||||
|
||||
_RUNNABLE_GENERIC_NUM_ARGS = 2 # Input and Output
|
||||
|
||||
|
||||
class Runnable(ABC, Generic[Input, Output]):
|
||||
"""A unit of work that can be invoked, batched, streamed, transformed and composed.
|
||||
@@ -147,11 +149,11 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
the `input_schema` property, the `output_schema` property and `config_schema`
|
||||
method.
|
||||
|
||||
LCEL and Composition
|
||||
====================
|
||||
Composition
|
||||
===========
|
||||
|
||||
Runnable objects can be composed together to create chains in a declarative way.
|
||||
|
||||
The LangChain Expression Language (LCEL) is a declarative way to compose
|
||||
`Runnable` objectsinto chains.
|
||||
Any chain constructed this way will automatically have sync, async, batch, and
|
||||
streaming support.
|
||||
|
||||
@@ -235,21 +237,21 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
|
||||
You can set the global debug flag to True to enable debug output for all chains:
|
||||
|
||||
```python
|
||||
from langchain_core.globals import set_debug
|
||||
```python
|
||||
from langchain_core.globals import set_debug
|
||||
|
||||
set_debug(True)
|
||||
```
|
||||
set_debug(True)
|
||||
```
|
||||
|
||||
Alternatively, you can pass existing or custom callbacks to any given chain:
|
||||
|
||||
```python
|
||||
from langchain_core.tracers import ConsoleCallbackHandler
|
||||
```python
|
||||
from langchain_core.tracers import ConsoleCallbackHandler
|
||||
|
||||
chain.invoke(..., config={"callbacks": [ConsoleCallbackHandler()]})
|
||||
```
|
||||
chain.invoke(..., config={"callbacks": [ConsoleCallbackHandler()]})
|
||||
```
|
||||
|
||||
For a UI (and much more) checkout [LangSmith](https://docs.smith.langchain.com/).
|
||||
For a UI (and much more) checkout [LangSmith](https://docs.langchain.com/langsmith/home).
|
||||
|
||||
"""
|
||||
|
||||
@@ -309,7 +311,10 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
for base in self.__class__.mro():
|
||||
if hasattr(base, "__pydantic_generic_metadata__"):
|
||||
metadata = base.__pydantic_generic_metadata__
|
||||
if "args" in metadata and len(metadata["args"]) == 2:
|
||||
if (
|
||||
"args" in metadata
|
||||
and len(metadata["args"]) == _RUNNABLE_GENERIC_NUM_ARGS
|
||||
):
|
||||
return metadata["args"][0]
|
||||
|
||||
# If we didn't find a Pydantic model in the parent classes,
|
||||
@@ -317,7 +322,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
# Runnables that are not pydantic models.
|
||||
for cls in self.__class__.__orig_bases__: # type: ignore[attr-defined]
|
||||
type_args = get_args(cls)
|
||||
if type_args and len(type_args) == 2:
|
||||
if type_args and len(type_args) == _RUNNABLE_GENERIC_NUM_ARGS:
|
||||
return type_args[0]
|
||||
|
||||
msg = (
|
||||
@@ -340,12 +345,15 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
for base in self.__class__.mro():
|
||||
if hasattr(base, "__pydantic_generic_metadata__"):
|
||||
metadata = base.__pydantic_generic_metadata__
|
||||
if "args" in metadata and len(metadata["args"]) == 2:
|
||||
if (
|
||||
"args" in metadata
|
||||
and len(metadata["args"]) == _RUNNABLE_GENERIC_NUM_ARGS
|
||||
):
|
||||
return metadata["args"][1]
|
||||
|
||||
for cls in self.__class__.__orig_bases__: # type: ignore[attr-defined]
|
||||
type_args = get_args(cls)
|
||||
if type_args and len(type_args) == 2:
|
||||
if type_args and len(type_args) == _RUNNABLE_GENERIC_NUM_ARGS:
|
||||
return type_args[1]
|
||||
|
||||
msg = (
|
||||
@@ -424,7 +432,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
print(runnable.get_input_jsonschema())
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.0"
|
||||
!!! version-added "Added in `langchain-core` 0.3.0"
|
||||
|
||||
"""
|
||||
return self.get_input_schema(config).model_json_schema()
|
||||
@@ -502,7 +510,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
print(runnable.get_output_jsonschema())
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.3.0"
|
||||
!!! version-added "Added in `langchain-core` 0.3.0"
|
||||
|
||||
"""
|
||||
return self.get_output_schema(config).model_json_schema()
|
||||
@@ -566,7 +574,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Returns:
|
||||
A JSON schema that represents the config of the `Runnable`.
|
||||
|
||||
!!! version-added "Added in version 0.3.0"
|
||||
!!! version-added "Added in `langchain-core` 0.3.0"
|
||||
|
||||
"""
|
||||
return self.config_schema(include=include).model_json_schema()
|
||||
@@ -699,51 +707,53 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
def pick(self, keys: str | list[str]) -> RunnableSerializable[Any, Any]:
|
||||
"""Pick keys from the output `dict` of this `Runnable`.
|
||||
|
||||
Pick a single key:
|
||||
!!! example "Pick a single key"
|
||||
|
||||
```python
|
||||
import json
|
||||
```python
|
||||
import json
|
||||
|
||||
from langchain_core.runnables import RunnableLambda, RunnableMap
|
||||
from langchain_core.runnables import RunnableLambda, RunnableMap
|
||||
|
||||
as_str = RunnableLambda(str)
|
||||
as_json = RunnableLambda(json.loads)
|
||||
chain = RunnableMap(str=as_str, json=as_json)
|
||||
as_str = RunnableLambda(str)
|
||||
as_json = RunnableLambda(json.loads)
|
||||
chain = RunnableMap(str=as_str, json=as_json)
|
||||
|
||||
chain.invoke("[1, 2, 3]")
|
||||
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3]}
|
||||
chain.invoke("[1, 2, 3]")
|
||||
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3]}
|
||||
|
||||
json_only_chain = chain.pick("json")
|
||||
json_only_chain.invoke("[1, 2, 3]")
|
||||
# -> [1, 2, 3]
|
||||
```
|
||||
json_only_chain = chain.pick("json")
|
||||
json_only_chain.invoke("[1, 2, 3]")
|
||||
# -> [1, 2, 3]
|
||||
```
|
||||
|
||||
Pick a list of keys:
|
||||
!!! example "Pick a list of keys"
|
||||
|
||||
```python
|
||||
from typing import Any
|
||||
```python
|
||||
from typing import Any
|
||||
|
||||
import json
|
||||
import json
|
||||
|
||||
from langchain_core.runnables import RunnableLambda, RunnableMap
|
||||
from langchain_core.runnables import RunnableLambda, RunnableMap
|
||||
|
||||
as_str = RunnableLambda(str)
|
||||
as_json = RunnableLambda(json.loads)
|
||||
as_str = RunnableLambda(str)
|
||||
as_json = RunnableLambda(json.loads)
|
||||
|
||||
|
||||
def as_bytes(x: Any) -> bytes:
|
||||
return bytes(x, "utf-8")
|
||||
def as_bytes(x: Any) -> bytes:
|
||||
return bytes(x, "utf-8")
|
||||
|
||||
|
||||
chain = RunnableMap(str=as_str, json=as_json, bytes=RunnableLambda(as_bytes))
|
||||
chain = RunnableMap(
|
||||
str=as_str, json=as_json, bytes=RunnableLambda(as_bytes)
|
||||
)
|
||||
|
||||
chain.invoke("[1, 2, 3]")
|
||||
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
|
||||
chain.invoke("[1, 2, 3]")
|
||||
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
|
||||
|
||||
json_and_bytes_chain = chain.pick(["json", "bytes"])
|
||||
json_and_bytes_chain.invoke("[1, 2, 3]")
|
||||
# -> {"json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
|
||||
```
|
||||
json_and_bytes_chain = chain.pick(["json", "bytes"])
|
||||
json_and_bytes_chain.invoke("[1, 2, 3]")
|
||||
# -> {"json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
|
||||
```
|
||||
|
||||
Args:
|
||||
keys: A key or list of keys to pick from the output dict.
|
||||
@@ -766,7 +776,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
"""Assigns new fields to the `dict` output of this `Runnable`.
|
||||
|
||||
```python
|
||||
from langchain_community.llms.fake import FakeStreamingListLLM
|
||||
from langchain_core.language_models.fake import FakeStreamingListLLM
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import SystemMessagePromptTemplate
|
||||
from langchain_core.runnables import Runnable
|
||||
@@ -818,10 +828,12 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
|
||||
Returns:
|
||||
The output of the `Runnable`.
|
||||
@@ -838,10 +850,12 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
|
||||
Returns:
|
||||
The output of the `Runnable`.
|
||||
@@ -868,8 +882,9 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
config: A config to use when invoking the `Runnable`. The config supports
|
||||
standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work
|
||||
to do in parallel, and other keys. Please refer to the
|
||||
`RunnableConfig` for more details.
|
||||
to do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
return_exceptions: Whether to return exceptions instead of raising them.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
@@ -932,10 +947,12 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
inputs: A list of inputs to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
return_exceptions: Whether to return exceptions instead of raising them.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
@@ -998,10 +1015,12 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
inputs: A list of inputs to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
return_exceptions: Whether to return exceptions instead of raising them.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
@@ -1061,10 +1080,12 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
inputs: A list of inputs to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
return_exceptions: Whether to return exceptions instead of raising them.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
@@ -1353,48 +1374,50 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
).with_config({"run_name": "my_template", "tags": ["my_template"]})
|
||||
```
|
||||
|
||||
For instance:
|
||||
!!! example
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
|
||||
async def reverse(s: str) -> str:
|
||||
return s[::-1]
|
||||
async def reverse(s: str) -> str:
|
||||
return s[::-1]
|
||||
|
||||
|
||||
chain = RunnableLambda(func=reverse)
|
||||
chain = RunnableLambda(func=reverse)
|
||||
|
||||
events = [event async for event in chain.astream_events("hello", version="v2")]
|
||||
events = [
|
||||
event async for event in chain.astream_events("hello", version="v2")
|
||||
]
|
||||
|
||||
# Will produce the following events
|
||||
# (run_id, and parent_ids has been omitted for brevity):
|
||||
[
|
||||
{
|
||||
"data": {"input": "hello"},
|
||||
"event": "on_chain_start",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"data": {"chunk": "olleh"},
|
||||
"event": "on_chain_stream",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"data": {"output": "olleh"},
|
||||
"event": "on_chain_end",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
]
|
||||
```
|
||||
# Will produce the following events
|
||||
# (run_id, and parent_ids has been omitted for brevity):
|
||||
[
|
||||
{
|
||||
"data": {"input": "hello"},
|
||||
"event": "on_chain_start",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"data": {"chunk": "olleh"},
|
||||
"event": "on_chain_stream",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
{
|
||||
"data": {"output": "olleh"},
|
||||
"event": "on_chain_end",
|
||||
"metadata": {},
|
||||
"name": "reverse",
|
||||
"tags": [],
|
||||
},
|
||||
]
|
||||
```
|
||||
|
||||
```python title="Example: Dispatch Custom Event"
|
||||
```python title="Dispatch custom event"
|
||||
from langchain_core.callbacks.manager import (
|
||||
adispatch_custom_event,
|
||||
)
|
||||
@@ -1428,10 +1451,13 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: The config to use for the `Runnable`.
|
||||
version: The version of the schema to use either `'v2'` or `'v1'`.
|
||||
version: The version of the schema to use, either `'v2'` or `'v1'`.
|
||||
|
||||
Users should use `'v2'`.
|
||||
|
||||
`'v1'` is for backwards compatibility and will be deprecated
|
||||
in `0.4.0`.
|
||||
|
||||
No default will be assigned until the API is stabilized.
|
||||
custom events will only be surfaced in `'v2'`.
|
||||
include_names: Only include events from `Runnable` objects with matching names.
|
||||
@@ -1441,6 +1467,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
exclude_types: Exclude events from `Runnable` objects with matching types.
|
||||
exclude_tags: Exclude events from `Runnable` objects with matching tags.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
These will be passed to `astream_log` as this implementation
|
||||
of `astream_events` is built on top of `astream_log`.
|
||||
|
||||
@@ -1742,46 +1769,52 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
import time
|
||||
import asyncio
|
||||
|
||||
|
||||
def format_t(timestamp: float) -> str:
|
||||
return datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat()
|
||||
|
||||
|
||||
async def test_runnable(time_to_sleep: int):
|
||||
print(f"Runnable[{time_to_sleep}s]: starts at {format_t(time.time())}")
|
||||
await asyncio.sleep(time_to_sleep)
|
||||
print(f"Runnable[{time_to_sleep}s]: ends at {format_t(time.time())}")
|
||||
|
||||
|
||||
async def fn_start(run_obj: Runnable):
|
||||
print(f"on start callback starts at {format_t(time.time())}")
|
||||
await asyncio.sleep(3)
|
||||
print(f"on start callback ends at {format_t(time.time())}")
|
||||
|
||||
|
||||
async def fn_end(run_obj: Runnable):
|
||||
print(f"on end callback starts at {format_t(time.time())}")
|
||||
await asyncio.sleep(2)
|
||||
print(f"on end callback ends at {format_t(time.time())}")
|
||||
|
||||
|
||||
runnable = RunnableLambda(test_runnable).with_alisteners(
|
||||
on_start=fn_start,
|
||||
on_end=fn_end
|
||||
on_start=fn_start, on_end=fn_end
|
||||
)
|
||||
|
||||
|
||||
async def concurrent_runs():
|
||||
await asyncio.gather(runnable.ainvoke(2), runnable.ainvoke(3))
|
||||
|
||||
asyncio.run(concurrent_runs())
|
||||
Result:
|
||||
on start callback starts at 2025-03-01T07:05:22.875378+00:00
|
||||
on start callback starts at 2025-03-01T07:05:22.875495+00:00
|
||||
on start callback ends at 2025-03-01T07:05:25.878862+00:00
|
||||
on start callback ends at 2025-03-01T07:05:25.878947+00:00
|
||||
Runnable[2s]: starts at 2025-03-01T07:05:25.879392+00:00
|
||||
Runnable[3s]: starts at 2025-03-01T07:05:25.879804+00:00
|
||||
Runnable[2s]: ends at 2025-03-01T07:05:27.881998+00:00
|
||||
on end callback starts at 2025-03-01T07:05:27.882360+00:00
|
||||
Runnable[3s]: ends at 2025-03-01T07:05:28.881737+00:00
|
||||
on end callback starts at 2025-03-01T07:05:28.882428+00:00
|
||||
on end callback ends at 2025-03-01T07:05:29.883893+00:00
|
||||
on end callback ends at 2025-03-01T07:05:30.884831+00:00
|
||||
|
||||
asyncio.run(concurrent_runs())
|
||||
# Result:
|
||||
# on start callback starts at 2025-03-01T07:05:22.875378+00:00
|
||||
# on start callback starts at 2025-03-01T07:05:22.875495+00:00
|
||||
# on start callback ends at 2025-03-01T07:05:25.878862+00:00
|
||||
# on start callback ends at 2025-03-01T07:05:25.878947+00:00
|
||||
# Runnable[2s]: starts at 2025-03-01T07:05:25.879392+00:00
|
||||
# Runnable[3s]: starts at 2025-03-01T07:05:25.879804+00:00
|
||||
# Runnable[2s]: ends at 2025-03-01T07:05:27.881998+00:00
|
||||
# on end callback starts at 2025-03-01T07:05:27.882360+00:00
|
||||
# Runnable[3s]: ends at 2025-03-01T07:05:28.881737+00:00
|
||||
# on end callback starts at 2025-03-01T07:05:28.882428+00:00
|
||||
# on end callback ends at 2025-03-01T07:05:29.883893+00:00
|
||||
# on end callback ends at 2025-03-01T07:05:30.884831+00:00
|
||||
```
|
||||
"""
|
||||
return RunnableBinding(
|
||||
@@ -1843,7 +1876,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
`exp_base`, and `jitter` (all `float` values).
|
||||
|
||||
Returns:
|
||||
A new Runnable that retries the original Runnable on exceptions.
|
||||
A new `Runnable` that retries the original `Runnable` on exceptions.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -1927,7 +1960,9 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
exceptions_to_handle: A tuple of exception types to handle.
|
||||
exception_key: If `string` is specified then handled exceptions will be
|
||||
passed to fallbacks as part of the input under the specified key.
|
||||
|
||||
If `None`, exceptions will not be passed to fallbacks.
|
||||
|
||||
If used, the base `Runnable` and its fallbacks must accept a
|
||||
dictionary as input.
|
||||
|
||||
@@ -1963,7 +1998,9 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
exceptions_to_handle: A tuple of exception types to handle.
|
||||
exception_key: If `string` is specified then handled exceptions will be
|
||||
passed to fallbacks as part of the input under the specified key.
|
||||
|
||||
If `None`, exceptions will not be passed to fallbacks.
|
||||
|
||||
If used, the base `Runnable` and its fallbacks must accept a
|
||||
dictionary as input.
|
||||
|
||||
@@ -2429,10 +2466,14 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
|
||||
`as_tool` will instantiate a `BaseTool` with a name, description, and
|
||||
`args_schema` from a `Runnable`. Where possible, schemas are inferred
|
||||
from `runnable.get_input_schema`. Alternatively (e.g., if the
|
||||
`Runnable` takes a dict as input and the specific dict keys are not typed),
|
||||
the schema can be specified directly with `args_schema`. You can also
|
||||
pass `arg_types` to just specify the required arguments and their types.
|
||||
from `runnable.get_input_schema`.
|
||||
|
||||
Alternatively (e.g., if the `Runnable` takes a dict as input and the specific
|
||||
`dict` keys are not typed), the schema can be specified directly with
|
||||
`args_schema`.
|
||||
|
||||
You can also pass `arg_types` to just specify the required arguments and their
|
||||
types.
|
||||
|
||||
Args:
|
||||
args_schema: The schema for the tool.
|
||||
@@ -2443,82 +2484,82 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Returns:
|
||||
A `BaseTool` instance.
|
||||
|
||||
Typed dict input:
|
||||
!!! example "`TypedDict` input"
|
||||
|
||||
```python
|
||||
from typing_extensions import TypedDict
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from typing_extensions import TypedDict
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
|
||||
class Args(TypedDict):
|
||||
a: int
|
||||
b: list[int]
|
||||
class Args(TypedDict):
|
||||
a: int
|
||||
b: list[int]
|
||||
|
||||
|
||||
def f(x: Args) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
def f(x: Args) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
|
||||
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
|
||||
`dict` input, specifying schema via `args_schema`:
|
||||
!!! example "`dict` input, specifying schema via `args_schema`"
|
||||
|
||||
```python
|
||||
from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
def f(x: dict[str, Any]) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
def f(x: dict[str, Any]) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
|
||||
class FSchema(BaseModel):
|
||||
\"\"\"Apply a function to an integer and list of integers.\"\"\"
|
||||
class FSchema(BaseModel):
|
||||
\"\"\"Apply a function to an integer and list of integers.\"\"\"
|
||||
|
||||
a: int = Field(..., description="Integer")
|
||||
b: list[int] = Field(..., description="List of ints")
|
||||
a: int = Field(..., description="Integer")
|
||||
b: list[int] = Field(..., description="List of ints")
|
||||
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool(FSchema)
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool(FSchema)
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
|
||||
`dict` input, specifying schema via `arg_types`:
|
||||
!!! example "`dict` input, specifying schema via `arg_types`"
|
||||
|
||||
```python
|
||||
from typing import Any
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from typing import Any
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
|
||||
def f(x: dict[str, Any]) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
def f(x: dict[str, Any]) -> str:
|
||||
return str(x["a"] * max(x["b"]))
|
||||
|
||||
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool(arg_types={"a": int, "b": list[int]})
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
runnable = RunnableLambda(f)
|
||||
as_tool = runnable.as_tool(arg_types={"a": int, "b": list[int]})
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
|
||||
String input:
|
||||
!!! example "`str` input"
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
|
||||
|
||||
def f(x: str) -> str:
|
||||
return x + "a"
|
||||
def f(x: str) -> str:
|
||||
return x + "a"
|
||||
|
||||
|
||||
def g(x: str) -> str:
|
||||
return x + "z"
|
||||
def g(x: str) -> str:
|
||||
return x + "z"
|
||||
|
||||
|
||||
runnable = RunnableLambda(f) | g
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke("b")
|
||||
```
|
||||
runnable = RunnableLambda(f) | g
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke("b")
|
||||
```
|
||||
"""
|
||||
# Avoid circular import
|
||||
from langchain_core.tools import convert_runnable_to_tool # noqa: PLC0415
|
||||
@@ -2570,29 +2611,33 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
|
||||
Returns:
|
||||
A new `Runnable` with the fields configured.
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import ConfigurableField
|
||||
from langchain_openai import ChatOpenAI
|
||||
!!! example
|
||||
|
||||
model = ChatOpenAI(max_tokens=20).configurable_fields(
|
||||
max_tokens=ConfigurableField(
|
||||
id="output_token_number",
|
||||
name="Max tokens in the output",
|
||||
description="The maximum number of tokens in the output",
|
||||
```python
|
||||
from langchain_core.runnables import ConfigurableField
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model = ChatOpenAI(max_tokens=20).configurable_fields(
|
||||
max_tokens=ConfigurableField(
|
||||
id="output_token_number",
|
||||
name="Max tokens in the output",
|
||||
description="The maximum number of tokens in the output",
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# max_tokens = 20
|
||||
print("max_tokens_20: ", model.invoke("tell me something about chess").content)
|
||||
# max_tokens = 20
|
||||
print(
|
||||
"max_tokens_20: ", model.invoke("tell me something about chess").content
|
||||
)
|
||||
|
||||
# max_tokens = 200
|
||||
print(
|
||||
"max_tokens_200: ",
|
||||
model.with_config(configurable={"output_token_number": 200})
|
||||
.invoke("tell me something about chess")
|
||||
.content,
|
||||
)
|
||||
```
|
||||
# max_tokens = 200
|
||||
print(
|
||||
"max_tokens_200: ",
|
||||
model.with_config(configurable={"output_token_number": 200})
|
||||
.invoke("tell me something about chess")
|
||||
.content,
|
||||
)
|
||||
```
|
||||
"""
|
||||
# Import locally to prevent circular import
|
||||
from langchain_core.runnables.configurable import ( # noqa: PLC0415
|
||||
@@ -2631,29 +2676,31 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
|
||||
Returns:
|
||||
A new `Runnable` with the alternatives configured.
|
||||
|
||||
```python
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.runnables.utils import ConfigurableField
|
||||
from langchain_openai import ChatOpenAI
|
||||
!!! example
|
||||
|
||||
model = ChatAnthropic(
|
||||
model_name="claude-3-7-sonnet-20250219"
|
||||
).configurable_alternatives(
|
||||
ConfigurableField(id="llm"),
|
||||
default_key="anthropic",
|
||||
openai=ChatOpenAI(),
|
||||
)
|
||||
```python
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.runnables.utils import ConfigurableField
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
# uses the default model ChatAnthropic
|
||||
print(model.invoke("which organization created you?").content)
|
||||
model = ChatAnthropic(
|
||||
model_name="claude-sonnet-4-5-20250929"
|
||||
).configurable_alternatives(
|
||||
ConfigurableField(id="llm"),
|
||||
default_key="anthropic",
|
||||
openai=ChatOpenAI(),
|
||||
)
|
||||
|
||||
# uses ChatOpenAI
|
||||
print(
|
||||
model.with_config(configurable={"llm": "openai"})
|
||||
.invoke("which organization created you?")
|
||||
.content
|
||||
)
|
||||
```
|
||||
# uses the default model ChatAnthropic
|
||||
print(model.invoke("which organization created you?").content)
|
||||
|
||||
# uses ChatOpenAI
|
||||
print(
|
||||
model.with_config(configurable={"llm": "openai"})
|
||||
.invoke("which organization created you?")
|
||||
.content
|
||||
)
|
||||
```
|
||||
"""
|
||||
# Import locally to prevent circular import
|
||||
from langchain_core.runnables.configurable import ( # noqa: PLC0415
|
||||
@@ -2750,6 +2797,9 @@ def _seq_output_schema(
|
||||
return last.get_output_schema(config)
|
||||
|
||||
|
||||
_RUNNABLE_SEQUENCE_MIN_STEPS = 2
|
||||
|
||||
|
||||
class RunnableSequence(RunnableSerializable[Input, Output]):
|
||||
"""Sequence of `Runnable` objects, where the output of one is the input of the next.
|
||||
|
||||
@@ -2859,7 +2909,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
|
||||
name: The name of the `Runnable`.
|
||||
first: The first `Runnable` in the sequence.
|
||||
middle: The middle `Runnable` objects in the sequence.
|
||||
last: The last Runnable in the sequence.
|
||||
last: The last `Runnable` in the sequence.
|
||||
|
||||
Raises:
|
||||
ValueError: If the sequence has less than 2 steps.
|
||||
@@ -2872,8 +2922,11 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
|
||||
steps_flat.extend(step.steps)
|
||||
else:
|
||||
steps_flat.append(coerce_to_runnable(step))
|
||||
if len(steps_flat) < 2:
|
||||
msg = f"RunnableSequence must have at least 2 steps, got {len(steps_flat)}"
|
||||
if len(steps_flat) < _RUNNABLE_SEQUENCE_MIN_STEPS:
|
||||
msg = (
|
||||
f"RunnableSequence must have at least {_RUNNABLE_SEQUENCE_MIN_STEPS} "
|
||||
f"steps, got {len(steps_flat)}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
super().__init__(
|
||||
first=steps_flat[0],
|
||||
@@ -2904,7 +2957,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -3500,7 +3553,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
|
||||
|
||||
Returns a mapping of their outputs.
|
||||
|
||||
`RunnableParallel` is one of the two main composition primitives for the LCEL,
|
||||
`RunnableParallel` is one of the two main composition primitives,
|
||||
alongside `RunnableSequence`. It invokes `Runnable`s concurrently, providing the
|
||||
same input to each.
|
||||
|
||||
@@ -3610,7 +3663,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -3668,6 +3721,12 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
|
||||
== "object"
|
||||
for s in self.steps__.values()
|
||||
):
|
||||
for step in self.steps__.values():
|
||||
fields = step.get_input_schema(config).model_fields
|
||||
root_field = fields.get("root")
|
||||
if root_field is not None and root_field.annotation != Any:
|
||||
return super().get_input_schema(config)
|
||||
|
||||
# This is correct, but pydantic typings/mypy don't think so.
|
||||
return create_model_v2(
|
||||
self.get_name("Input"),
|
||||
@@ -4477,7 +4536,7 @@ class RunnableLambda(Runnable[Input, Output]):
|
||||
# on itemgetter objects, so we have to parse the repr
|
||||
items = str(func).replace("operator.itemgetter(", "")[:-1].split(", ")
|
||||
if all(
|
||||
item[0] == "'" and item[-1] == "'" and len(item) > 2 for item in items
|
||||
item[0] == "'" and item[-1] == "'" and item != "''" for item in items
|
||||
):
|
||||
fields = {item[1:-1]: (Any, ...) for item in items}
|
||||
# It's a dict, lol
|
||||
@@ -5139,7 +5198,7 @@ class RunnableEachBase(RunnableSerializable[list[Input], list[Output]]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -5322,7 +5381,7 @@ class RunnableEach(RunnableEachBase[Input, Output]):
|
||||
|
||||
|
||||
class RunnableBindingBase(RunnableSerializable[Input, Output]): # type: ignore[no-redef]
|
||||
"""`Runnable` that delegates calls to another `Runnable` with a set of kwargs.
|
||||
"""`Runnable` that delegates calls to another `Runnable` with a set of `**kwargs`.
|
||||
|
||||
Use only if creating a new `RunnableBinding` subclass with different `__init__`
|
||||
args.
|
||||
@@ -5462,7 +5521,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]): # type: ignore[
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -5752,7 +5811,7 @@ class RunnableBinding(RunnableBindingBase[Input, Output]): # type: ignore[no-re
|
||||
```python
|
||||
# Create a Runnable binding that invokes the chat model with the
|
||||
# additional kwarg `stop=['-']` when running it.
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model = ChatOpenAI()
|
||||
model.invoke('Say "Parrot-MAGIC"', stop=["-"]) # Should return `Parrot`
|
||||
|
||||
@@ -36,11 +36,13 @@ from langchain_core.runnables.utils import (
|
||||
get_unique_config_specs,
|
||||
)
|
||||
|
||||
_MIN_BRANCHES = 2
|
||||
|
||||
|
||||
class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
"""Runnable that selects which branch to run based on a condition.
|
||||
"""`Runnable` that selects which branch to run based on a condition.
|
||||
|
||||
The Runnable is initialized with a list of `(condition, Runnable)` pairs and
|
||||
The `Runnable` is initialized with a list of `(condition, Runnable)` pairs and
|
||||
a default branch.
|
||||
|
||||
When operating on an input, the first condition that evaluates to True is
|
||||
@@ -86,12 +88,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
Defaults a `Runnable` to run if no condition is met.
|
||||
|
||||
Raises:
|
||||
ValueError: If the number of branches is less than 2.
|
||||
ValueError: If the number of branches is less than `2`.
|
||||
TypeError: If the default branch is not `Runnable`, `Callable` or `Mapping`.
|
||||
TypeError: If a branch is not a tuple or list.
|
||||
ValueError: If a branch is not of length 2.
|
||||
TypeError: If a branch is not a `tuple` or `list`.
|
||||
ValueError: If a branch is not of length `2`.
|
||||
"""
|
||||
if len(branches) < 2:
|
||||
if len(branches) < _MIN_BRANCHES:
|
||||
msg = "RunnableBranch requires at least two branches"
|
||||
raise ValueError(msg)
|
||||
|
||||
@@ -118,7 +120,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
)
|
||||
raise TypeError(msg)
|
||||
|
||||
if len(branch) != 2:
|
||||
if len(branch) != _MIN_BRANCHES:
|
||||
msg = (
|
||||
f"RunnableBranch branches must be "
|
||||
f"tuples or lists of length 2, not {len(branch)}"
|
||||
@@ -140,7 +142,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -187,12 +189,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
def invoke(
|
||||
self, input: Input, config: RunnableConfig | None = None, **kwargs: Any
|
||||
) -> Output:
|
||||
"""First evaluates the condition, then delegate to true or false branch.
|
||||
"""First evaluates the condition, then delegate to `True` or `False` branch.
|
||||
|
||||
Args:
|
||||
input: The input to the Runnable.
|
||||
config: The configuration for the Runnable.
|
||||
**kwargs: Additional keyword arguments to pass to the Runnable.
|
||||
input: The input to the `Runnable`.
|
||||
config: The configuration for the `Runnable`.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
Returns:
|
||||
The output of the branch that was run.
|
||||
@@ -297,12 +299,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any | None,
|
||||
) -> Iterator[Output]:
|
||||
"""First evaluates the condition, then delegate to true or false branch.
|
||||
"""First evaluates the condition, then delegate to `True` or `False` branch.
|
||||
|
||||
Args:
|
||||
input: The input to the Runnable.
|
||||
config: The configuration for the Runnable.
|
||||
**kwargs: Additional keyword arguments to pass to the Runnable.
|
||||
input: The input to the `Runnable`.
|
||||
config: The configuration for the `Runnable`.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
Yields:
|
||||
The output of the branch that was run.
|
||||
@@ -381,12 +383,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any | None,
|
||||
) -> AsyncIterator[Output]:
|
||||
"""First evaluates the condition, then delegate to true or false branch.
|
||||
"""First evaluates the condition, then delegate to `True` or `False` branch.
|
||||
|
||||
Args:
|
||||
input: The input to the Runnable.
|
||||
config: The configuration for the Runnable.
|
||||
**kwargs: Additional keyword arguments to pass to the Runnable.
|
||||
input: The input to the `Runnable`.
|
||||
config: The configuration for the `Runnable`.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
Yields:
|
||||
The output of the branch that was run.
|
||||
|
||||
@@ -47,54 +47,59 @@ class EmptyDict(TypedDict, total=False):
|
||||
|
||||
|
||||
class RunnableConfig(TypedDict, total=False):
|
||||
"""Configuration for a Runnable."""
|
||||
"""Configuration for a `Runnable`.
|
||||
|
||||
See the [reference docs](https://reference.langchain.com/python/langchain_core/runnables/#langchain_core.runnables.RunnableConfig)
|
||||
for more details.
|
||||
"""
|
||||
|
||||
tags: list[str]
|
||||
"""
|
||||
Tags for this call and any sub-calls (eg. a Chain calling an LLM).
|
||||
"""Tags for this call and any sub-calls (e.g. a Chain calling an LLM).
|
||||
|
||||
You can use these to filter calls.
|
||||
"""
|
||||
|
||||
metadata: dict[str, Any]
|
||||
"""
|
||||
Metadata for this call and any sub-calls (eg. a Chain calling an LLM).
|
||||
"""Metadata for this call and any sub-calls (e.g. a Chain calling an LLM).
|
||||
|
||||
Keys should be strings, values should be JSON-serializable.
|
||||
"""
|
||||
|
||||
callbacks: Callbacks
|
||||
"""
|
||||
Callbacks for this call and any sub-calls (eg. a Chain calling an LLM).
|
||||
"""Callbacks for this call and any sub-calls (e.g. a Chain calling an LLM).
|
||||
|
||||
Tags are passed to all callbacks, metadata is passed to handle*Start callbacks.
|
||||
"""
|
||||
|
||||
run_name: str
|
||||
"""
|
||||
Name for the tracer run for this call. Defaults to the name of the class.
|
||||
"""
|
||||
"""Name for the tracer run for this call.
|
||||
|
||||
Defaults to the name of the class."""
|
||||
|
||||
max_concurrency: int | None
|
||||
"""
|
||||
Maximum number of parallel calls to make. If not provided, defaults to
|
||||
`ThreadPoolExecutor`'s default.
|
||||
"""Maximum number of parallel calls to make.
|
||||
|
||||
If not provided, defaults to `ThreadPoolExecutor`'s default.
|
||||
"""
|
||||
|
||||
recursion_limit: int
|
||||
"""
|
||||
Maximum number of times a call can recurse. If not provided, defaults to `25`.
|
||||
"""Maximum number of times a call can recurse.
|
||||
|
||||
If not provided, defaults to `25`.
|
||||
"""
|
||||
|
||||
configurable: dict[str, Any]
|
||||
"""
|
||||
Runtime values for attributes previously made configurable on this `Runnable`,
|
||||
"""Runtime values for attributes previously made configurable on this `Runnable`,
|
||||
or sub-Runnables, through `configurable_fields` or `configurable_alternatives`.
|
||||
|
||||
Check `output_schema` for a description of the attributes that have been made
|
||||
configurable.
|
||||
"""
|
||||
|
||||
run_id: uuid.UUID | None
|
||||
"""
|
||||
Unique identifier for the tracer run for this call. If not provided, a new UUID
|
||||
will be generated.
|
||||
"""Unique identifier for the tracer run for this call.
|
||||
|
||||
If not provided, a new UUID will be generated.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Runnables that can be dynamically configured."""
|
||||
"""`Runnable` objects that can be dynamically configured."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -47,14 +47,14 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class DynamicRunnable(RunnableSerializable[Input, Output]):
|
||||
"""Serializable Runnable that can be dynamically configured.
|
||||
"""Serializable `Runnable` that can be dynamically configured.
|
||||
|
||||
A DynamicRunnable should be initiated using the `configurable_fields` or
|
||||
`configurable_alternatives` method of a Runnable.
|
||||
A `DynamicRunnable` should be initiated using the `configurable_fields` or
|
||||
`configurable_alternatives` method of a `Runnable`.
|
||||
"""
|
||||
|
||||
default: RunnableSerializable[Input, Output]
|
||||
"""The default Runnable to use."""
|
||||
"""The default `Runnable` to use."""
|
||||
|
||||
config: RunnableConfig | None = None
|
||||
"""The configuration to use."""
|
||||
@@ -66,7 +66,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -120,13 +120,13 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
|
||||
def prepare(
|
||||
self, config: RunnableConfig | None = None
|
||||
) -> tuple[Runnable[Input, Output], RunnableConfig]:
|
||||
"""Prepare the Runnable for invocation.
|
||||
"""Prepare the `Runnable` for invocation.
|
||||
|
||||
Args:
|
||||
config: The configuration to use.
|
||||
|
||||
Returns:
|
||||
The prepared Runnable and configuration.
|
||||
The prepared `Runnable` and configuration.
|
||||
"""
|
||||
runnable: Runnable[Input, Output] = self
|
||||
while isinstance(runnable, DynamicRunnable):
|
||||
@@ -316,12 +316,12 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
|
||||
|
||||
|
||||
class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
|
||||
"""Runnable that can be dynamically configured.
|
||||
"""`Runnable` that can be dynamically configured.
|
||||
|
||||
A RunnableConfigurableFields should be initiated using the
|
||||
`configurable_fields` method of a Runnable.
|
||||
A `RunnableConfigurableFields` should be initiated using the
|
||||
`configurable_fields` method of a `Runnable`.
|
||||
|
||||
Here is an example of using a RunnableConfigurableFields with LLMs:
|
||||
Here is an example of using a `RunnableConfigurableFields` with LLMs:
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
@@ -348,7 +348,7 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
|
||||
chain.invoke({"x": 0}, config={"configurable": {"temperature": 0.9}})
|
||||
```
|
||||
|
||||
Here is an example of using a RunnableConfigurableFields with HubRunnables:
|
||||
Here is an example of using a `RunnableConfigurableFields` with `HubRunnables`:
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
@@ -380,7 +380,7 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
|
||||
|
||||
@property
|
||||
def config_specs(self) -> list[ConfigurableFieldSpec]:
|
||||
"""Get the configuration specs for the RunnableConfigurableFields.
|
||||
"""Get the configuration specs for the `RunnableConfigurableFields`.
|
||||
|
||||
Returns:
|
||||
The configuration specs.
|
||||
@@ -473,13 +473,13 @@ _enums_for_spec_lock = threading.Lock()
|
||||
|
||||
|
||||
class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
"""Runnable that can be dynamically configured.
|
||||
"""`Runnable` that can be dynamically configured.
|
||||
|
||||
A RunnableConfigurableAlternatives should be initiated using the
|
||||
`configurable_alternatives` method of a Runnable or can be
|
||||
A `RunnableConfigurableAlternatives` should be initiated using the
|
||||
`configurable_alternatives` method of a `Runnable` or can be
|
||||
initiated directly as well.
|
||||
|
||||
Here is an example of using a RunnableConfigurableAlternatives that uses
|
||||
Here is an example of using a `RunnableConfigurableAlternatives` that uses
|
||||
alternative prompts to illustrate its functionality:
|
||||
|
||||
```python
|
||||
@@ -506,7 +506,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
chain.with_config(configurable={"prompt": "poem"}).invoke({"topic": "bears"})
|
||||
```
|
||||
|
||||
Equivalently, you can initialize RunnableConfigurableAlternatives directly
|
||||
Equivalently, you can initialize `RunnableConfigurableAlternatives` directly
|
||||
and use in LCEL in the same way:
|
||||
|
||||
```python
|
||||
@@ -531,7 +531,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
"""
|
||||
|
||||
which: ConfigurableField
|
||||
"""The ConfigurableField to use to choose between alternatives."""
|
||||
"""The `ConfigurableField` to use to choose between alternatives."""
|
||||
|
||||
alternatives: dict[
|
||||
str,
|
||||
@@ -544,8 +544,9 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
|
||||
prefix_keys: bool
|
||||
"""Whether to prefix configurable fields of each alternative with a namespace
|
||||
of the form <which.id>==<alternative_key>, eg. a key named "temperature" used by
|
||||
the alternative named "gpt3" becomes "model==gpt3/temperature"."""
|
||||
of the form <which.id>==<alternative_key>, e.g. a key named "temperature" used by
|
||||
the alternative named "gpt3" becomes "model==gpt3/temperature".
|
||||
"""
|
||||
|
||||
@property
|
||||
@override
|
||||
@@ -638,24 +639,24 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
|
||||
|
||||
def _strremoveprefix(s: str, prefix: str) -> str:
|
||||
"""str.removeprefix() is only available in Python 3.9+."""
|
||||
"""`str.removeprefix()` is only available in Python 3.9+."""
|
||||
return s.replace(prefix, "", 1) if s.startswith(prefix) else s
|
||||
|
||||
|
||||
def prefix_config_spec(
|
||||
spec: ConfigurableFieldSpec, prefix: str
|
||||
) -> ConfigurableFieldSpec:
|
||||
"""Prefix the id of a ConfigurableFieldSpec.
|
||||
"""Prefix the id of a `ConfigurableFieldSpec`.
|
||||
|
||||
This is useful when a RunnableConfigurableAlternatives is used as a
|
||||
ConfigurableField of another RunnableConfigurableAlternatives.
|
||||
This is useful when a `RunnableConfigurableAlternatives` is used as a
|
||||
`ConfigurableField` of another `RunnableConfigurableAlternatives`.
|
||||
|
||||
Args:
|
||||
spec: The ConfigurableFieldSpec to prefix.
|
||||
spec: The `ConfigurableFieldSpec` to prefix.
|
||||
prefix: The prefix to add.
|
||||
|
||||
Returns:
|
||||
The prefixed ConfigurableFieldSpec.
|
||||
The prefixed `ConfigurableFieldSpec`.
|
||||
"""
|
||||
return (
|
||||
ConfigurableFieldSpec(
|
||||
@@ -677,15 +678,15 @@ def make_options_spec(
|
||||
) -> ConfigurableFieldSpec:
|
||||
"""Make options spec.
|
||||
|
||||
Make a ConfigurableFieldSpec for a ConfigurableFieldSingleOption or
|
||||
ConfigurableFieldMultiOption.
|
||||
Make a `ConfigurableFieldSpec` for a `ConfigurableFieldSingleOption` or
|
||||
`ConfigurableFieldMultiOption`.
|
||||
|
||||
Args:
|
||||
spec: The ConfigurableFieldSingleOption or ConfigurableFieldMultiOption.
|
||||
spec: The `ConfigurableFieldSingleOption` or `ConfigurableFieldMultiOption`.
|
||||
description: The description to use if the spec does not have one.
|
||||
|
||||
Returns:
|
||||
The ConfigurableFieldSpec.
|
||||
The `ConfigurableFieldSpec`.
|
||||
"""
|
||||
with _enums_for_spec_lock:
|
||||
if enum := _enums_for_spec.get(spec):
|
||||
|
||||
@@ -35,20 +35,20 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
"""Runnable that can fallback to other Runnables if it fails.
|
||||
"""`Runnable` that can fallback to other `Runnable`s if it fails.
|
||||
|
||||
External APIs (e.g., APIs for a language model) may at times experience
|
||||
degraded performance or even downtime.
|
||||
|
||||
In these cases, it can be useful to have a fallback Runnable that can be
|
||||
used in place of the original Runnable (e.g., fallback to another LLM provider).
|
||||
In these cases, it can be useful to have a fallback `Runnable` that can be
|
||||
used in place of the original `Runnable` (e.g., fallback to another LLM provider).
|
||||
|
||||
Fallbacks can be defined at the level of a single Runnable, or at the level
|
||||
of a chain of Runnables. Fallbacks are tried in order until one succeeds or
|
||||
Fallbacks can be defined at the level of a single `Runnable`, or at the level
|
||||
of a chain of `Runnable`s. Fallbacks are tried in order until one succeeds or
|
||||
all fail.
|
||||
|
||||
While you can instantiate a `RunnableWithFallbacks` directly, it is usually
|
||||
more convenient to use the `with_fallbacks` method on a Runnable.
|
||||
more convenient to use the `with_fallbacks` method on a `Runnable`.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -87,7 +87,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
"""
|
||||
|
||||
runnable: Runnable[Input, Output]
|
||||
"""The Runnable to run first."""
|
||||
"""The `Runnable` to run first."""
|
||||
fallbacks: Sequence[Runnable[Input, Output]]
|
||||
"""A sequence of fallbacks to try."""
|
||||
exceptions_to_handle: tuple[type[BaseException], ...] = (Exception,)
|
||||
@@ -97,9 +97,12 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
"""
|
||||
exception_key: str | None = None
|
||||
"""If `string` is specified then handled exceptions will be passed to fallbacks as
|
||||
part of the input under the specified key. If `None`, exceptions
|
||||
will not be passed to fallbacks. If used, the base Runnable and its fallbacks
|
||||
must accept a dictionary as input."""
|
||||
part of the input under the specified key.
|
||||
|
||||
If `None`, exceptions will not be passed to fallbacks.
|
||||
|
||||
If used, the base `Runnable` and its fallbacks must accept a dictionary as input.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
@@ -137,7 +140,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return True as this class is serializable."""
|
||||
"""Return `True` as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -152,10 +155,10 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
|
||||
@property
|
||||
def runnables(self) -> Iterator[Runnable[Input, Output]]:
|
||||
"""Iterator over the Runnable and its fallbacks.
|
||||
"""Iterator over the `Runnable` and its fallbacks.
|
||||
|
||||
Yields:
|
||||
The Runnable then its fallbacks.
|
||||
The `Runnable` then its fallbacks.
|
||||
"""
|
||||
yield self.runnable
|
||||
yield from self.fallbacks
|
||||
@@ -589,14 +592,14 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
await run_manager.on_chain_end(output)
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
"""Get an attribute from the wrapped Runnable and its fallbacks.
|
||||
"""Get an attribute from the wrapped `Runnable` and its fallbacks.
|
||||
|
||||
Returns:
|
||||
If the attribute is anything other than a method that outputs a Runnable,
|
||||
returns getattr(self.runnable, name). If the attribute is a method that
|
||||
does return a new Runnable (e.g. model.bind_tools([...]) outputs a new
|
||||
RunnableBinding) then self.runnable and each of the runnables in
|
||||
self.fallbacks is replaced with getattr(x, name).
|
||||
If the attribute is anything other than a method that outputs a `Runnable`,
|
||||
returns `getattr(self.runnable, name)`. If the attribute is a method that
|
||||
does return a new `Runnable` (e.g. `model.bind_tools([...])` outputs a new
|
||||
`RunnableBinding`) then `self.runnable` and each of the runnables in
|
||||
`self.fallbacks` is replaced with `getattr(x, name)`.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -604,7 +607,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
|
||||
gpt_4o = ChatOpenAI(model="gpt-4o")
|
||||
claude_3_sonnet = ChatAnthropic(model="claude-3-7-sonnet-20250219")
|
||||
claude_3_sonnet = ChatAnthropic(model="claude-sonnet-4-5-20250929")
|
||||
model = gpt_4o.with_fallbacks([claude_3_sonnet])
|
||||
|
||||
model.model_name
|
||||
@@ -618,7 +621,6 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
runnable=RunnableBinding(bound=ChatOpenAI(...), kwargs={"tools": [...]}),
|
||||
fallbacks=[RunnableBinding(bound=ChatAnthropic(...), kwargs={"tools": [...]})],
|
||||
)
|
||||
|
||||
```
|
||||
""" # noqa: E501
|
||||
attr = getattr(self.runnable, name)
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
from collections import defaultdict
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
@@ -22,7 +21,7 @@ from langchain_core.runnables.base import Runnable, RunnableSerializable
|
||||
from langchain_core.utils.pydantic import _IgnoreUnserializable, is_basemodel_subclass
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
from collections.abc import Callable, Sequence
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -132,7 +131,7 @@ class Branch(NamedTuple):
|
||||
condition: Callable[..., str]
|
||||
"""A callable that returns a string representation of the condition."""
|
||||
ends: dict[str, str] | None
|
||||
"""Optional dictionary of end node ids for the branches. """
|
||||
"""Optional dictionary of end node IDs for the branches. """
|
||||
|
||||
|
||||
class CurveStyle(Enum):
|
||||
@@ -642,6 +641,7 @@ class Graph:
|
||||
retry_delay: float = 1.0,
|
||||
frontmatter_config: dict[str, Any] | None = None,
|
||||
base_url: str | None = None,
|
||||
proxies: dict[str, str] | None = None,
|
||||
) -> bytes:
|
||||
"""Draw the graph as a PNG image using Mermaid.
|
||||
|
||||
@@ -674,11 +674,10 @@ class Graph:
|
||||
}
|
||||
```
|
||||
base_url: The base URL of the Mermaid server for rendering via API.
|
||||
|
||||
proxies: HTTP/HTTPS proxies for requests (e.g. `{"http": "http://127.0.0.1:7890"}`).
|
||||
|
||||
Returns:
|
||||
The PNG image as bytes.
|
||||
|
||||
"""
|
||||
# Import locally to prevent circular import
|
||||
from langchain_core.runnables.graph_mermaid import ( # noqa: PLC0415
|
||||
@@ -699,6 +698,7 @@ class Graph:
|
||||
padding=padding,
|
||||
max_retries=max_retries,
|
||||
retry_delay=retry_delay,
|
||||
proxies=proxies,
|
||||
base_url=base_url,
|
||||
)
|
||||
|
||||
@@ -706,8 +706,10 @@ class Graph:
|
||||
def _first_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
|
||||
"""Find the single node that is not a target of any edge.
|
||||
|
||||
Exclude nodes/sources with ids in the exclude list.
|
||||
Exclude nodes/sources with IDs in the exclude list.
|
||||
|
||||
If there is no such node, or there are multiple, return `None`.
|
||||
|
||||
When drawing the graph, this node would be the origin.
|
||||
"""
|
||||
targets = {edge.target for edge in graph.edges if edge.source not in exclude}
|
||||
@@ -722,8 +724,10 @@ def _first_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
|
||||
def _last_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
|
||||
"""Find the single node that is not a source of any edge.
|
||||
|
||||
Exclude nodes/targets with ids in the exclude list.
|
||||
Exclude nodes/targets with IDs in the exclude list.
|
||||
|
||||
If there is no such node, or there are multiple, return `None`.
|
||||
|
||||
When drawing the graph, this node would be the destination.
|
||||
"""
|
||||
sources = {edge.source for edge in graph.edges if edge.target not in exclude}
|
||||
|
||||
@@ -7,7 +7,6 @@ from __future__ import annotations
|
||||
|
||||
import math
|
||||
import os
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
try:
|
||||
@@ -20,6 +19,8 @@ except ImportError:
|
||||
_HAS_GRANDALF = False
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping, Sequence
|
||||
|
||||
from langchain_core.runnables.graph import Edge as LangEdge
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user