Compare commits

..

220 Commits

Author SHA1 Message Date
Chester Curme
2c938b787f Merge branch 'master' into cc/summarization_patch
# Conflicts:
#	libs/langchain_v1/langchain/agents/middleware/summarization.py
2025-12-02 10:07:51 -05:00
ccurme
c63f23d233 revert(model-profiles): update docs link (#34162) 2025-12-01 17:29:45 +00:00
Mason Daugherty
b7091d391d feat(anthropic): auto append relevant beta headers (#34113) 2025-12-01 12:20:41 -05:00
ccurme
7a2952210e fix(langchain): (SummarizationMiddleware) adjust token counts based on model (#34161) 2025-12-01 16:22:44 +00:00
ccurme
7549845d82 chore(anthropic): vcr integration test (#34160) 2025-12-01 15:28:28 +00:00
Chester Curme
fa18f8eda0 Merge branch 'master' into cc/summarization_patch 2025-12-01 09:39:54 -05:00
Mason Daugherty
878f033ed7 docs(langchain): docstrings for summariziation middleware types (#34158)
improving devx :)
2025-12-01 09:39:33 -05:00
Steffen Hausmann
4065106c2e fix(langchain): add types to human_in_the_loop middleware (#34137)
The `HumanInTheLoopMiddleware` is missing a type annotation for the
context schema. Without the fix in this PR, the following code does not
type check:

```
graph = create_agent(
    "gpt-5",
    tools=[send_email_tool, read_email_tool],
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={
                # Require approval or rejection for sending emails
                "send_email_tool": {
                    "allowed_decisions": ["approve", "reject"],
                },
                # Auto-approve reading emails
                "read_email_tool": False,
            }
        ),
    ],
    context_schema=ContextSchema,
)
```

```
Argument of type "list[HumanInTheLoopMiddleware]" cannot be assigned to parameter "middleware" of type "Sequence[AgentMiddleware[StateT_co@create_agent, ContextT@create_agent]]" in function "create_agent"
  "HumanInTheLoopMiddleware" is not assignable to "AgentMiddleware[AgentState[Unknown], ContextSchema | None]"
    Type parameter "ContextT@AgentMiddleware" is invariant, but "None" is not the same as "ContextSchema | None"
```
2025-12-01 08:46:38 -05:00
Mason Daugherty
12df938ace docs(core): update docstrings in RunnableConfig, dereference_refs (#34131) 2025-11-28 03:55:37 -05:00
Mason Daugherty
65ee43cc10 chore(infra): update agent files, remove top-level pyproject (#34128) 2025-11-27 21:06:43 -05:00
Mason Daugherty
fe7c000fc1 fix(model-profiles): update docs link (#34127) 2025-11-28 00:19:36 +00:00
Mason Daugherty
dad50e5624 chore(infra): updated allowed scopes in PR lint configuration (#34115) 2025-11-27 00:34:15 -05:00
Mason Daugherty
0a6d01e61d docs(anthropic,core,langchain): updates (#34106) 2025-11-25 17:58:09 -05:00
Mason Daugherty
c6f8b0875a style(core,langchain,qdrant): fix some docstrings for refs (#34105) 2025-11-25 13:58:53 -05:00
Mason Daugherty
4c3800d743 chore(infra): update PR template, agent files (#34104) 2025-11-25 13:58:41 -05:00
dependabot[bot]
7fe1c4b78f chore(deps): bump actions/checkout from 5 to 6 (#34083)
Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to
6.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/checkout/releases">actions/checkout's
releases</a>.</em></p>
<blockquote>
<h2>v6.0.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update README to include Node.js 24 support details and requirements
by <a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2248">actions/checkout#2248</a></li>
<li>Persist creds to a separate file by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2286">actions/checkout#2286</a></li>
<li>v6-beta by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2298">actions/checkout#2298</a></li>
<li>update readme/changelog for v6 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2311">actions/checkout#2311</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/checkout/compare/v5.0.0...v6.0.0">https://github.com/actions/checkout/compare/v5.0.0...v6.0.0</a></p>
<h2>v6-beta</h2>
<h2>What's Changed</h2>
<p>Updated persist-credentials to store the credentials under
<code>$RUNNER_TEMP</code> instead of directly in the local git
config.</p>
<p>This requires a minimum Actions Runner version of <a
href="https://github.com/actions/runner/releases/tag/v2.329.0">v2.329.0</a>
to access the persisted credentials for <a
href="https://docs.github.com/en/actions/tutorials/use-containerized-services/create-a-docker-container-action">Docker
container action</a> scenarios.</p>
<h2>v5.0.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Port v6 cleanup to v5 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2301">actions/checkout#2301</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/checkout/compare/v5...v5.0.1">https://github.com/actions/checkout/compare/v5...v5.0.1</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/actions/checkout/blob/main/CHANGELOG.md">actions/checkout's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>V6.0.0</h2>
<ul>
<li>Persist creds to a separate file by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2286">actions/checkout#2286</a></li>
<li>Update README to include Node.js 24 support details and requirements
by <a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2248">actions/checkout#2248</a></li>
</ul>
<h2>V5.0.1</h2>
<ul>
<li>Port v6 cleanup to v5 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2301">actions/checkout#2301</a></li>
</ul>
<h2>V5.0.0</h2>
<ul>
<li>Update actions checkout to use node 24 by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2226">actions/checkout#2226</a></li>
</ul>
<h2>V4.3.1</h2>
<ul>
<li>Port v6 cleanup to v4 by <a
href="https://github.com/ericsciple"><code>@​ericsciple</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2305">actions/checkout#2305</a></li>
</ul>
<h2>V4.3.0</h2>
<ul>
<li>docs: update README.md by <a
href="https://github.com/motss"><code>@​motss</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1971">actions/checkout#1971</a></li>
<li>Add internal repos for checking out multiple repositories by <a
href="https://github.com/mouismail"><code>@​mouismail</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1977">actions/checkout#1977</a></li>
<li>Documentation update - add recommended permissions to Readme by <a
href="https://github.com/benwells"><code>@​benwells</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2043">actions/checkout#2043</a></li>
<li>Adjust positioning of user email note and permissions heading by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2044">actions/checkout#2044</a></li>
<li>Update README.md by <a
href="https://github.com/nebuk89"><code>@​nebuk89</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2194">actions/checkout#2194</a></li>
<li>Update CODEOWNERS for actions by <a
href="https://github.com/TingluoHuang"><code>@​TingluoHuang</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/2224">actions/checkout#2224</a></li>
<li>Update package dependencies by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/2236">actions/checkout#2236</a></li>
</ul>
<h2>v4.2.2</h2>
<ul>
<li><code>url-helper.ts</code> now leverages well-known environment
variables by <a href="https://github.com/jww3"><code>@​jww3</code></a>
in <a
href="https://redirect.github.com/actions/checkout/pull/1941">actions/checkout#1941</a></li>
<li>Expand unit test coverage for <code>isGhes</code> by <a
href="https://github.com/jww3"><code>@​jww3</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1946">actions/checkout#1946</a></li>
</ul>
<h2>v4.2.1</h2>
<ul>
<li>Check out other refs/* by commit if provided, fall back to ref by <a
href="https://github.com/orhantoy"><code>@​orhantoy</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1924">actions/checkout#1924</a></li>
</ul>
<h2>v4.2.0</h2>
<ul>
<li>Add Ref and Commit outputs by <a
href="https://github.com/lucacome"><code>@​lucacome</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1180">actions/checkout#1180</a></li>
<li>Dependency updates by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>- <a
href="https://redirect.github.com/actions/checkout/pull/1777">actions/checkout#1777</a>,
<a
href="https://redirect.github.com/actions/checkout/pull/1872">actions/checkout#1872</a></li>
</ul>
<h2>v4.1.7</h2>
<ul>
<li>Bump the minor-npm-dependencies group across 1 directory with 4
updates by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1739">actions/checkout#1739</a></li>
<li>Bump actions/checkout from 3 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1697">actions/checkout#1697</a></li>
<li>Check out other refs/* by commit by <a
href="https://github.com/orhantoy"><code>@​orhantoy</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1774">actions/checkout#1774</a></li>
<li>Pin actions/checkout's own workflows to a known, good, stable
version. by <a href="https://github.com/jww3"><code>@​jww3</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1776">actions/checkout#1776</a></li>
</ul>
<h2>v4.1.6</h2>
<ul>
<li>Check platform to set archive extension appropriately by <a
href="https://github.com/cory-miller"><code>@​cory-miller</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1732">actions/checkout#1732</a></li>
</ul>
<h2>v4.1.5</h2>
<ul>
<li>Update NPM dependencies by <a
href="https://github.com/cory-miller"><code>@​cory-miller</code></a> in
<a
href="https://redirect.github.com/actions/checkout/pull/1703">actions/checkout#1703</a></li>
<li>Bump github/codeql-action from 2 to 3 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1694">actions/checkout#1694</a></li>
<li>Bump actions/setup-node from 1 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1696">actions/checkout#1696</a></li>
<li>Bump actions/upload-artifact from 2 to 4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/actions/checkout/pull/1695">actions/checkout#1695</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="1af3b93b68"><code>1af3b93</code></a>
update readme/changelog for v6 (<a
href="https://redirect.github.com/actions/checkout/issues/2311">#2311</a>)</li>
<li><a
href="71cf2267d8"><code>71cf226</code></a>
v6-beta (<a
href="https://redirect.github.com/actions/checkout/issues/2298">#2298</a>)</li>
<li><a
href="069c695914"><code>069c695</code></a>
Persist creds to a separate file (<a
href="https://redirect.github.com/actions/checkout/issues/2286">#2286</a>)</li>
<li><a
href="ff7abcd0c3"><code>ff7abcd</code></a>
Update README to include Node.js 24 support details and requirements (<a
href="https://redirect.github.com/actions/checkout/issues/2248">#2248</a>)</li>
<li>See full diff in <a
href="https://github.com/actions/checkout/compare/v5...v6">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=5&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-11-24 19:10:28 -05:00
Bagatur
c375732396 fix(core): handle missing StructuredPrompt schema (#34096)
- **Description:** if you dont pass in schema= or schema_= to
StrucutredPrompt(...) today you get a confusing KeyError. Raise a more
readable ValueError instead.
- **Issue:** na
- **Dependencies:** na
2025-11-24 18:39:29 -05:00
Chester Curme
b2db842cd4 treat keep threshold as a hard cap 2025-11-24 11:21:28 -05:00
ccurme
9c21f83e82 release(langchain): 1.1 (#34090) 2025-11-24 10:27:13 -05:00
ccurme
880652b713 release: (integration packages): 1.1 (#34088) 2025-11-24 10:00:06 -05:00
Sydney Runkle
4ab94579ad feat(langchain): support SystemMessage in create_agent's system_prompt (#34055)
* `create_agent`'s `system_prompt` allows `str | SystemMessage`
* added `system_message: SystemMessage` on `ModelRequest`
* `ModelRequest.system_prompt` is a function of `system_message.text`,
now deprecated
* disallow setting `system_prompt` and `system_message`
* `ModelRequest.system_prompt` can still be set (w/ custom setattr) for
custom backwards compat, but the updates just get propogated to the
`ModelRequest.system_message`

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-11-24 14:53:57 +00:00
ccurme
eb0545a173 release: (integration packages) 1.1 (#34087) 2025-11-24 09:13:01 -05:00
ccurme
a2e389de9f release(fireworks): 1.1 (#34086) 2025-11-24 09:05:43 -05:00
Alex Kondratev
01573c1375 fix(core): ensure_ascii=False in PydanticOutputParser exception formatting (#34006)
- **Description:** When formatting an error, `PydanticOutputParser`
dumps json with default `ensure_ascii=True`
  -  **Issue:** Fixes #34005
  - **Dependencies:** None

- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. **We will not consider
a PR unless these three are passing in CI.** See [contribution
guidelines](https://docs.langchain.com/oss/python/contributing) for
more.

Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-11-23 20:22:50 -05:00
Abhinav
2ba3ce81a6 fix(openai): make GPT-5 temperature validation case-insensitive (#34012)
Fixed a bug where GPT-5 temperature validation was case-sensitive,
causing issues when users
specified Azure deployment names or model names in uppercase (e.g.,
`"GPT-5-2025-01-01"`, `"GPT-5-NANO"`). The validation now correctly
handles model names regardless of case.

  Changes made:
- Updated `validate_temperature()` method in `BaseChatOpenAI` to perform
case-insensitive
  model name comparisons
- Updated `_get_encoding_model()` method to use case-insensitive checks
for tiktoken encoder
  selection
- Added comprehensive unit tests to verify case-insensitive behavior
with various case
  combinations

  **Issue:** Fixes #34003

  **Dependencies:** None

  **Test Coverage:**
  - All existing tests pass
- New test `test_gpt_5_temperature_case_insensitive` covers uppercase,
lowercase, and
  mixed-case model names
- Tests verify both non-chat GPT-5 models (temperature removed) and chat
models (temperature
  preserved)
  - Lint and format checks pass (`make lint`, `make format`)

---------

Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-23 20:17:03 -05:00
dependabot[bot]
4e4e5d7337 chore(infra): bump actions/github-script from 6 to 8 (#33991)
Bumps [actions/github-script](https://github.com/actions/github-script)
from 6 to 8.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/github-script/releases">actions/github-script's
releases</a>.</em></p>
<blockquote>
<h2>v8.0.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update Node.js version support to 24.x by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/637">actions/github-script#637</a></li>
<li>README for updating actions/github-script from v7 to v8 by <a
href="https://github.com/sneha-krip"><code>@​sneha-krip</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/653">actions/github-script#653</a></li>
</ul>
<h2>⚠️ Minimum Compatible Runner Version</h2>
<p><strong>v2.327.1</strong><br />
<a
href="https://github.com/actions/runner/releases/tag/v2.327.1">Release
Notes</a></p>
<p>Make sure your runner is updated to this version or newer to use this
release.</p>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/github-script/pull/637">actions/github-script#637</a></li>
<li><a
href="https://github.com/sneha-krip"><code>@​sneha-krip</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/github-script/pull/653">actions/github-script#653</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/github-script/compare/v7.1.0...v8.0.0">https://github.com/actions/github-script/compare/v7.1.0...v8.0.0</a></p>
<h2>v7.1.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Upgrade husky to v9 by <a
href="https://github.com/benelan"><code>@​benelan</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/482">actions/github-script#482</a></li>
<li>Add workflow file for publishing releases to immutable action
package by <a
href="https://github.com/Jcambass"><code>@​Jcambass</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/485">actions/github-script#485</a></li>
<li>Upgrade IA Publish by <a
href="https://github.com/Jcambass"><code>@​Jcambass</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/486">actions/github-script#486</a></li>
<li>Fix workflow status badges by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/497">actions/github-script#497</a></li>
<li>Update usage of <code>actions/upload-artifact</code> by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/512">actions/github-script#512</a></li>
<li>Clear up package name confusion by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/514">actions/github-script#514</a></li>
<li>Update dependencies with <code>npm audit fix</code> by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/515">actions/github-script#515</a></li>
<li>Specify that the used script is JavaScript by <a
href="https://github.com/timotk"><code>@​timotk</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/478">actions/github-script#478</a></li>
<li>chore: Add Dependabot for NPM and Actions by <a
href="https://github.com/nschonni"><code>@​nschonni</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/472">actions/github-script#472</a></li>
<li>Define <code>permissions</code> in workflows and update actions by
<a href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in
<a
href="https://redirect.github.com/actions/github-script/pull/531">actions/github-script#531</a></li>
<li>chore: Add Dependabot for .github/actions/install-dependencies by <a
href="https://github.com/nschonni"><code>@​nschonni</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/532">actions/github-script#532</a></li>
<li>chore: Remove .vscode settings by <a
href="https://github.com/nschonni"><code>@​nschonni</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/533">actions/github-script#533</a></li>
<li>ci: Use github/setup-licensed by <a
href="https://github.com/nschonni"><code>@​nschonni</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/473">actions/github-script#473</a></li>
<li>make octokit instance available as octokit on top of github, to make
it easier to seamlessly copy examples from GitHub rest api or octokit
documentations by <a
href="https://github.com/iamstarkov"><code>@​iamstarkov</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/508">actions/github-script#508</a></li>
<li>Remove <code>octokit</code> README updates for v7 by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/557">actions/github-script#557</a></li>
<li>docs: add &quot;exec&quot; usage examples by <a
href="https://github.com/neilime"><code>@​neilime</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/546">actions/github-script#546</a></li>
<li>Bump ruby/setup-ruby from 1.213.0 to 1.222.0 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/github-script/pull/563">actions/github-script#563</a></li>
<li>Bump ruby/setup-ruby from 1.222.0 to 1.229.0 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a>[bot]
in <a
href="https://redirect.github.com/actions/github-script/pull/575">actions/github-script#575</a></li>
<li>Clearly document passing inputs to the <code>script</code> by <a
href="https://github.com/joshmgross"><code>@​joshmgross</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/603">actions/github-script#603</a></li>
<li>Update README.md by <a
href="https://github.com/nebuk89"><code>@​nebuk89</code></a> in <a
href="https://redirect.github.com/actions/github-script/pull/610">actions/github-script#610</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/benelan"><code>@​benelan</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/github-script/pull/482">actions/github-script#482</a></li>
<li><a href="https://github.com/Jcambass"><code>@​Jcambass</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/github-script/pull/485">actions/github-script#485</a></li>
<li><a href="https://github.com/timotk"><code>@​timotk</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/github-script/pull/478">actions/github-script#478</a></li>
<li><a
href="https://github.com/iamstarkov"><code>@​iamstarkov</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/github-script/pull/508">actions/github-script#508</a></li>
<li><a href="https://github.com/neilime"><code>@​neilime</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/github-script/pull/546">actions/github-script#546</a></li>
<li><a href="https://github.com/nebuk89"><code>@​nebuk89</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/github-script/pull/610">actions/github-script#610</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/github-script/compare/v7...v7.1.0">https://github.com/actions/github-script/compare/v7...v7.1.0</a></p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="ed597411d8"><code>ed59741</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/github-script/issues/653">#653</a>
from actions/sneha-krip/readme-for-v8</li>
<li><a
href="2dc352e4ba"><code>2dc352e</code></a>
Bold minimum Actions Runner version in README</li>
<li><a
href="01e118c8d0"><code>01e118c</code></a>
Update README for Node 24 runtime requirements</li>
<li><a
href="8b222ac82e"><code>8b222ac</code></a>
Apply suggestion from <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a></li>
<li><a
href="adc0eeac99"><code>adc0eea</code></a>
README for updating actions/github-script from v7 to v8</li>
<li><a
href="20fe497b3f"><code>20fe497</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/github-script/issues/637">#637</a>
from actions/node24</li>
<li><a
href="e7b7f222b1"><code>e7b7f22</code></a>
update licenses</li>
<li><a
href="2c81ba05f3"><code>2c81ba0</code></a>
Update Node.js version support to 24.x</li>
<li><a
href="f28e40c7f3"><code>f28e40c</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/github-script/issues/610">#610</a>
from actions/nebuk89-patch-1</li>
<li><a
href="1ae9958572"><code>1ae9958</code></a>
Update README.md</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/github-script/compare/v6...v8">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/github-script&package-manager=github_actions&previous-version=6&new-version=8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-11-23 20:00:22 -05:00
Mason Daugherty
2a863727f9 fix(infra,core): nits (#34079)
* Add missing `nits` to allowed PR linting scopes
* Ensure `MAJOR.MINOR.PATCH` consistency in admonitions
* Ensure valid spacing in admonitions
2025-11-23 20:00:07 -05:00
dumko2001
30e2260e26 fix(core): Decouple provider prefix from model name in init_chat_mode… (#34046)
:…l logic

Addresses Issue #34007.
Fixes a bug where aliases like 'mistral:' were inferred correctly as a
provider but the prefix was not stripped from the model name, causing
API 400 errors. Added logic to strip prefix when inference succeeds.

**Description**
This PR resolves a logic error in `init_chat_model` where inferred
provider aliases (specifically `mistral:`) were correctly identified but
not stripped from the model string.

**The Problem**
When passing a string like `mistral:ministral-8b-latest`, the factory
logic correctly inferred the provider as `mistralai` but failed to enter
the string-splitting block because the alias `mistral` was not in the
hardcoded `_SUPPORTED_PROVIDERS` list. This caused the raw string
`mistral:ministral-8b-latest` to be passed to the `ChatMistralAI`
constructor, resulting in a 400 API error.

**The Fix**
I updated `_parse_model` in
`libs/langchain/langchain/chat_models/base.py`. The logic now attempts
to infer the provider from the prefix *before* determining whether to
split the string. This ensures that valid aliases trigger the stripping
logic, passing only the clean `model_name` to the integration class.

**Issue**
Fixes #34007

**Dependencies**
None.

**Verification**
Validated locally with a reproduction script:
- Input: `mistral:ministral-8b-latest`
- Result: Successfully instantiates `ChatMistralAI` with
`model="ministral-8b-latest"`.
- Validated that standard inputs (e.g., `gpt-4o`) remain unaffected.

Co-authored-by: ioop <ioop@Sidharths-MacBook-Air.local>
2025-11-23 19:52:24 -05:00
Mason Daugherty
cbaea351b2 style(core,langchain-classic,openai): fix griffe warnings (#34074) 2025-11-23 01:06:46 -05:00
ccurme
f070217c3b release(standard-tests): 1.0.2 (#34071)
Resolves https://github.com/langchain-ai/langchain/issues/34069
2025-11-22 18:35:09 -05:00
ccurme
0915682c12 chore(fireworks): update tested models (#34070) 2025-11-22 16:50:49 -05:00
Sydney Runkle
68ab9a1e56 fix: don't reorder tool calls in HITL middleware (#34023) 2025-11-22 05:10:32 -05:00
Mason Daugherty
47b79c30c0 chore(docs): fix a few refs syntax errors (#34044)
missing whitespace for some admonitions
2025-11-22 00:58:21 -05:00
ccurme
5899f980aa release(model-profiles): 0.0.5 (#34064) 2025-11-21 16:12:00 -05:00
ccurme
b0bf4afe81 release(core): 1.1.0 (#34063) 2025-11-21 15:57:25 -05:00
ccurme
33e5d01f7c feat(model-profiles): distribute data across packages (#34024) 2025-11-21 15:47:05 -05:00
Sydney Runkle
ee3373afc2 chore: add more robust test for runtime injection w/ explicit args_schema (#34051) 2025-11-20 16:51:37 +00:00
Sydney Runkle
b296f103a9 feat: ModelRetryMiddleware (#34027)
Closes https://github.com/langchain-ai/langchain/issues/33983

* Adds `ModelRetryMiddleware` modeled after `ToolRetryMiddleware`
* Uses `on_failure` modes of `error` and `continue` to match the
`exit_behavior` modes of model + tool call limit middleware
* In a backwards compatible manner, aligns the API of
`ToolRetryMiddleware`'s `on_failure` with the above
* Centralize common "retry" utils across these middlewares
2025-11-20 11:42:33 -05:00
Eugene Yurtsev
525d5c0169 release(core): 1.0.7 (#34036)
Release core 1.0.7
2025-11-19 21:17:31 +00:00
Eugene Yurtsev
c4b6ba254e fix(core): fix validation for input variables in f-string templates, restrict functionality supported by jinja2, mustache templates (#34035)
* Fix validation for input variables in f-string templates
* Restrict functionality of features supported by jinja2 and mustache
templates
2025-11-19 16:09:46 -05:00
Sydney Runkle
b7d1831f9d fix: deprecate setattr on ModelCallRequest (#34022)
* one alternative considered was setting `frozen=True` on the dataclass,
but this is breaking, so a deprecation is a nicer approach
2025-11-19 11:08:55 -05:00
ccurme
328ba36601 chore(openai): skip Azure text completions tests (#34021) 2025-11-19 09:29:12 -05:00
Sydney Runkle
6f677ef5c1 chore: temporarily skip openai integration tests (#34020)
getting around deprecated azure model issues blocking core release
2025-11-19 14:05:22 +00:00
Sydney Runkle
d47d41cbd3 release: langchain-core 1.0.6 (#34018) 2025-11-19 08:16:34 -05:00
William FH
32bbe99efc chore: Support tool runtime injection when custom args schema is prov… (#33999)
Support injection of injected args (like `InjectedToolCallId`,
`ToolRuntime`) when an `args_schema` is specified that doesn't contain
said args.

This allows for pydantic validation of other args while retaining the
ability to inject langchain specific arguments.

fixes https://github.com/langchain-ai/langchain/issues/33646
fixes https://github.com/langchain-ai/langchain/issues/31688

Taking a deep dive here reminded me that we definitely need to revisit
our internal tooling logic, but I don't think we should do that in this
PR.

---------

Co-authored-by: Sydney Runkle <54324534+sydney-runkle@users.noreply.github.com>
Co-authored-by: Sydney Runkle <sydneymarierunkle@gmail.com>
2025-11-18 17:09:59 +00:00
ccurme
990e346c46 release(anthropic): 1.1 (#33997) 2025-11-17 16:24:29 -05:00
ccurme
9b7792631d feat(anthropic): support native structured output feature and strict tool calling (#33980) 2025-11-17 16:14:20 -05:00
CKLogic
558a8fe25b feat(core): add proxy support for mermaid png rendering (#32400)
### Description

This PR adds support for configuring HTTP/HTTPS proxies when rendering
Mermaid diagrams as PNG images using the remote Mermaid.INK API. This
enhancement allows users in restricted network environments to access
the API via a proxy, making the remote rendering feature more robust and
accessible.

The changes include:
- Added optional `proxies` parameter to `draw_mermaid_png` and
`_render_mermaid_using_api` functions
- Updated `Graph.draw_mermaid_png` method to support and pass through
proxy configuration
- Enhanced docstrings with usage examples for the new parameter
- Maintained full backward compatibility with existing code

### Usage Example

```python
proxies = {
        "http": "http://127.0.0.1:7890",
        "https": "http://127.0.0.1:7890"
}

display(Image(chain.get_graph().draw_mermaid_png(proxies=proxies)))

```

### Dependencies

No new dependencies required. Uses existing `requests` library for HTTP
requests.

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-17 12:45:17 -06:00
Mason Daugherty
52b1516d44 style(langchain): fix some middleware ref syntax (#33988) 2025-11-16 00:33:17 -05:00
Mason Daugherty
8a3bb73c05 release(openai): 1.0.3 (#33981)
- Respect 300k token limit for embeddings API requests #33668
- fix create_agent / response_format for Responses API #33939
- fix response.incomplete event is not handled when using
stream_mode=['messages'] #33871
2025-11-14 19:18:50 -05:00
Mason Daugherty
099c042395 refactor(openai): embedding utils and calculations (#33982)
Now returns (`_iter`, `tokens`, `indices`, token_counts`). The
`token_counts` are calculated directly during tokenization, which is
more accurate and efficient than splitting strings later.
2025-11-14 19:18:37 -05:00
Kaparthy Reddy
2d4f00a451 fix(openai): Respect 300k token limit for embeddings API requests (#33668)
## Description

Fixes #31227 - Resolves the issue where `OpenAIEmbeddings` exceeds
OpenAI's 300,000 token per request limit, causing 400 BadRequest errors.

## Problem

When embedding large document sets, LangChain would send batches
containing more than 300,000 tokens in a single API request, causing
this error:
```
openai.BadRequestError: Error code: 400 - {'error': {'message': 'Requested 673477 tokens, max 300000 tokens per request'}}
```

The issue occurred because:
- The code chunks texts by `embedding_ctx_length` (8191 tokens per
chunk)
- Then batches chunks by `chunk_size` (default 1000 chunks per request)
- **But didn't check**: Total tokens per batch against OpenAI's 300k
limit
- Result: `1000 chunks × 8191 tokens = 8,191,000 tokens` → Exceeds
limit!

## Solution

This PR implements dynamic batching that respects the 300k token limit:

1. **Added constant**: `MAX_TOKENS_PER_REQUEST = 300000`
2. **Track token counts**: Calculate actual tokens for each chunk
3. **Dynamic batching**: Instead of fixed `chunk_size` batches,
accumulate chunks until approaching the 300k limit
4. **Applied to both sync and async**: Fixed both
`_get_len_safe_embeddings` and `_aget_len_safe_embeddings`

## Changes

- Modified `langchain_openai/embeddings/base.py`:
  - Added `MAX_TOKENS_PER_REQUEST` constant
  - Replaced fixed-size batching with token-aware dynamic batching
  - Applied to both sync (line ~478) and async (line ~527) methods
- Added test in `tests/unit_tests/embeddings/test_base.py`:
- `test_embeddings_respects_token_limit()` - Verifies large document
sets are properly batched

## Testing

All existing tests pass (280 passed, 4 xfailed, 1 xpassed).

New test verifies:
- Large document sets (500 texts × 1000 tokens = 500k tokens) are split
into multiple API calls
- Each API call respects the 300k token limit

## Usage

After this fix, users can embed large document sets without errors:
```python
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_text_splitters import CharacterTextSplitter

# This will now work without exceeding token limits
embeddings = OpenAIEmbeddings()
documents = CharacterTextSplitter().split_documents(large_documents)
Chroma.from_documents(documents, embeddings)
```

Resolves #31227

---------

Co-authored-by: Kaparthy Reddy <kaparthyreddy@Kaparthys-MacBook-Air.local>
Co-authored-by: Chester Curme <chester.curme@gmail.com>
Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-14 18:12:07 -05:00
Sydney Runkle
9bd401a6d4 fix: resumable shell, works w/ interrupts (#33978)
fixes https://github.com/langchain-ai/langchain/issues/33684

Now able to run this minimal snippet successfully

```py
import os

from langchain.agents import create_agent
from langchain.agents.middleware import (
    HostExecutionPolicy,
    HumanInTheLoopMiddleware,
    ShellToolMiddleware,
)
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.types import Command


shell_middleware = ShellToolMiddleware(
    workspace_root=os.getcwd(),
    env=os.environ,  # danger
    execution_policy=HostExecutionPolicy()
)

hil_middleware = HumanInTheLoopMiddleware(interrupt_on={"shell": True})

checkpointer = InMemorySaver()

agent = create_agent(
    "openai:gpt-4.1-mini",
    middleware=[shell_middleware, hil_middleware],
    checkpointer=checkpointer,
)

input_message = {"role": "user", "content": "run `which python`"}

config = {"configurable": {"thread_id": "1"}}

result = agent.invoke(
    {"messages": [input_message]},
    config=config,
    durability="exit",
)
```
2025-11-14 15:32:25 -05:00
ccurme
6aa3794b74 feat(langchain): reference model profiles for provider strategy (#33974) 2025-11-14 19:24:18 +00:00
Sydney Runkle
189dcf7295 chore: increase coverage for shell, filesystem, and summarization middleware (#33928)
cc generated, just a start here but wanted to bump things up from 70%
ish
2025-11-14 13:30:36 -05:00
Sydney Runkle
1bc88028e6 fix(anthropic): execute bash + file tools via tool node (#33960)
* use `override` instead of directly patching things on `ModelRequest`
* rely on `ToolNode` for execution of tools related to said middleware,
using `wrap_model_call` to inject the relevant claude tool specs +
allowing tool node to forward them along to corresponding langchain tool
implementations
* making the same change for the native shell tool middleware
* allowing shell tool middleware to specify a name for the shell tool
(negative diff then for claude bash middleware)


long term I think the solution might be to attach metadata to a tool to
map the provider spec to a langchain implementation, which we could also
take some lessons from on the MCP front.
2025-11-14 13:17:01 -05:00
Mason Daugherty
d2942351ce release(core): 1.0.5 (#33973) 2025-11-14 11:51:27 -05:00
Sydney Runkle
83c078f363 fix: adding missing async hooks (#33957)
* filling in missing async gaps
* using recommended tool runtime injection instead of injected state
  * updating tests to use helper function as well
2025-11-14 09:13:39 -05:00
ZhangShenao
26d39ffc4a docs: Fix doc links (#33964) 2025-11-14 09:07:32 -05:00
Mason Daugherty
421e2ceeee fix(core): don't mask exceptions (#33959) 2025-11-14 09:05:29 -05:00
Mason Daugherty
275dcbf69f docs(core): add clarity to base token counting methods (#33958)
Wasn't immediately obvious that `get_num_tokens_from_messages` adds
additional prefixes to represent user roles in conversation, which adds
to the overall token count.

```python
from langchain_google_genai import GoogleGenerativeAI

llm = GoogleGenerativeAI(model="gemini-2.5-flash")
num_tokens = llm.get_num_tokens("Hello, world!")
print(f"Number of tokens: {num_tokens}")
# Number of tokens: 4
```

```python
from langchain.messages import HumanMessage

messages = [HumanMessage(content="Hello, world!")]

num_tokens = llm.get_num_tokens_from_messages(messages)
print(f"Number of tokens: {num_tokens}")
# Number of tokens: 6
```
2025-11-13 17:15:47 -05:00
Sydney Runkle
9f87b27a5b fix: add filesystem middleware in init (#33955) 2025-11-13 15:07:33 -05:00
Mason Daugherty
b2e1196e29 chore(core,infra): nits (#33954) 2025-11-13 14:50:54 -05:00
Sydney Runkle
2dc1396380 chore(langchain): update deps (#33951) 2025-11-13 14:21:25 -05:00
Mason Daugherty
77941ab3ce feat(infra): add automatic issue labeling (#33952) 2025-11-13 14:13:52 -05:00
Mason Daugherty
ee19a30dde fix(groq): bump min ver for core dep (#33949)
Due to issue with unit tests and docs URL for exceptions
2025-11-13 11:46:54 -05:00
Mason Daugherty
5d799b3174 release(nomic): 1.0.1 (#33948)
support Python 3.14 #33655
2025-11-13 11:25:39 -05:00
Mason Daugherty
8f33a985a2 release(groq): 1.0.1 (#33947)
- fix: handle tool calls with no args #33896
- add prompt caching token usage details #33708
2025-11-13 11:25:00 -05:00
Mason Daugherty
78eeccef0e release(deepseek): 1.0.1 (#33946)
- support strict beta structured output #32727
2025-11-13 11:24:39 -05:00
ccurme
3d415441e8 fix(langchain, openai): backward compat for response_format (#33945) 2025-11-13 11:11:35 -05:00
ccurme
74385e0ebd fix(langchain, openai): fix create_agent / response_format for Responses API (#33939) 2025-11-13 10:18:15 -05:00
Christophe Bornet
2bfbc29ccc chore(core): fix some ruff TC rules (#33929)
fix some ruff TC rules but still don't enforce them as Pydantic model
fields use type annotations at runtime.
2025-11-12 14:07:19 -05:00
Christophe Bornet
ef79c26f18 chore(cli,standard-tests,text-splitters): fix some ruff TC rules (#33934)
Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-11-12 14:06:31 -05:00
ccurme
fbe32c8e89 release(anthropic): 1.0.3 (#33935) 2025-11-12 10:55:28 -05:00
Mohammad Mohtashim
2511c28f92 feat(anthropic): support code_execution_20250825 (#33925) 2025-11-12 10:44:51 -05:00
Sydney Runkle
637bb1cbbc feat: refactor tests coverage (#33927)
middleware tests have gotten quite unwieldy, major restructuring, sets
the stage for coverage increase

this is super hard to review -- as a proof that we've retained important
tests, I ran coverage on `master` and this branch and confirmed
identical coverage.

* moving all middleware related tests to `agents/middleware` folder
* consolidating related test files
* adding coverage utility to makefile
2025-11-11 10:40:12 -05:00
Mason Daugherty
3dfea96ec1 chore: update README.md files (#33919) 2025-11-10 22:51:35 -05:00
ccurme
68643153e5 feat(langchain): support async summarization in SummarizationMiddleware (#33918) 2025-11-10 15:48:51 -05:00
Abbas Syed
462762f75b test(core): add comprehensive tests for groq block translator (#33906) 2025-11-10 15:45:36 -05:00
ccurme
4f3729c004 release(model-profiles): 0.0.4 (#33917) 2025-11-10 12:06:32 -05:00
Mason Daugherty
ba428cdf54 chore(infra): add note to pr linting workflow (#33916) 2025-11-10 11:49:31 -05:00
Mason Daugherty
69c7d1b01b test(groq,openai): add retries for flaky tests (#33914) 2025-11-10 10:36:11 -05:00
Mason Daugherty
733299ec13 revert(core): "applied secrets_map in load to plain string values" (#33913)
Reverts langchain-ai/langchain#33678

Breaking API change
2025-11-10 10:29:30 -05:00
ccurme
e1adf781c6 feat(langchain): (SummarizationMiddleware) support use of model context windows when triggering summarization (#33825) 2025-11-10 10:08:52 -05:00
Shahroz Ahmad
31b5e4810c feat(deepseek): support strict beta structured output (#32727)
**Description:** This PR adds support for DeepSeek's beta strict mode
feature for structured
outputs and tool calling. It overrides `bind_tools()` and
`with_structured_output()` to automatically use
DeepSeek's beta endpoint (https://api.deepseek.com/beta) when
`strict=True`. Both methods need overriding because they're independent
entry points and user can call either directly. When DeepSeek's strict
mode graduates from beta, we can just remove both overriden methods. You
can read more about the beta feature here:
https://api-docs.deepseek.com/guides/function_calling#strict-mode-beta
  
**Issue:** Implements #32670 


**Dependencies:** None


**Sample Code**

```python
from langchain_deepseek import ChatDeepSeek
from pydantic import BaseModel, Field
from typing import Optional
import os


# Enter your DeepSeek API Key here
API_KEY = "YOUR_API_KEY"


# location, temperature, condition are required fields
# humidity is optional field with default value
class WeatherInfo(BaseModel):
    location: str = Field(description="City name")
    temperature: int = Field(description="Temperature in Celsius")
    condition: str = Field(description="Weather condition (sunny, cloudy, rainy)")
    humidity: Optional[int] = Field(default=None, description="Humidity percentage")


llm = ChatDeepSeek(
    model="deepseek-chat",
    api_key=API_KEY,
)

# just to confirm that a new instance will use the default base url (instead of beta)
print(f"Default API base: {llm.api_base}")



# Test 1: bind_tools with strict=True shoud list all the tools calls
print("\nTest 1: bind_tools with strict=True")
llm_with_tools = llm.bind_tools([WeatherInfo], strict=True)
response = llm_with_tools.invoke("Tell me the weather in New York. It's 22 degrees, sunny.")
print(response.tool_calls)



# Test 2: with_structured_output with strict=True
print("\nTest 2: with_structured_output with strict=True")
structured_llm = llm.with_structured_output(WeatherInfo, strict=True)
result = structured_llm.invoke("Tell me the weather in New York.")
print(f"  Result: {result}")
assert isinstance(result, WeatherInfo), "Result should be a WeatherInfo instance"
```

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-09 22:24:33 -05:00
Mason Daugherty
c6801fe159 chore: fix URL underlining in README.md (#33905) 2025-11-09 22:22:56 -05:00
AmazingcatAndrew
1b563067f8 fix(chroma): resolve OpenCLIP + Chroma image embedding test regression (#33899)
**Description:**  
Fixes the OpenCLIP × Chroma regression that caused nested embedding
errors when adding or searching image data.
The test case `test_openclip_chroma_embed_no_nesting_error` has been
restored and verified to work correctly with the current LangChain core
dependencies.
Functional validation confirms that `similarity_search_by_image` now
returns correct, metadata‑preserving results.

**Issue:**  
Fixes #33851

**Dependencies:**  
No new dependencies introduced.  

**Testing:**  
All tests under  
```bash
uv run --group test pytest tests/unit_tests
```  
result:
```
30 passed in 91.26s (0:01:31)
```
have passed successfully using Python 3.13.9 and uv‑managed environment.
This confirms that the regression has been fixed.  

Running  
```bash
make test
```  
still produces cleanup‑time `AttributeError: 'ProactorEventLoop' object
has no attribute '_ssock'` on Windows (Python 3.13+).
This is a benign asyncio teardown message rather than a functional
failure.
`uv run pytest` closes event loops immediately after tests, while `make
test` invokes pytest through a secondary process layer that leaves a
background loop alive at interpreter shutdown.
This difference in teardown behavior explains the extra messages seen
only when using `make test`.

**Summary:**  
- Verified the OpenCLIP + Chroma image pipeline works correctly.  
- `uv run --group test pytest` fully passes; the fix is complete.  
- The residual `_ssock` warnings occur only during
Windows asyncio cleanup and are not related to this code change.

This is my first time contributing code, please contact me with any
questions

---

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-09 21:24:33 -05:00
Mason Daugherty
1996d81d72 chore(langchain): pass on reference docstrings (middleware) (#33904) 2025-11-09 21:18:28 -05:00
Mason Daugherty
ab0677c6f1 fix(groq): handle tool calls with no args (#33896)
When Groq returns tool calls with no arguments, it sends arguments:
`'null'` (JSON null), but LangChain's core parsing expects either a dict
or converts null to Python None, which fails the `isinstance(args_,
dict)` check and incorrectly marks the tool call as invalid.

Related to #32017
2025-11-08 22:30:44 -05:00
artreimus
bdb53c93cc docs(langchain): correct IBM provider link in chat_models docstring (#33897)
**PR title**

```
docs(langchain): correct IBM provider link in chat_models docstring
```

**PR message**

**Description**
Fix broken link in the `chat_models` docstring. The **ibm** bullet
incorrectly linked to the DeepSeek provider page; update it to the
canonical IBM provider docs.

This only affects generated API reference content on
`reference.langchain.com`. No runtime behavior changes.

**Issue**
N/A (documentation-only).

**Dependencies**
None.

**Testing & quality**

* Ran `make format`, `make lint`, and `make test` in the package (no
code changes expected to affect tests).
2025-11-08 07:02:33 -06:00
Alazar Genene
94d5271cb5 fix(standard-tests): fix semantic typo in if statement (#33890) 2025-11-07 18:01:59 -05:00
ccurme
e499db4266 release(langchain): 1.0.5 (#33893) 2025-11-07 17:54:43 -05:00
npage902
cc3af82b47 fix(core): applied secrets_map in load to plain string values (#33678)
Replaces #33618 

**Description:** Fixes the bug in the `load()` function where secret
placeholders in plain dicts were not replaced, even if they match a key
in `secrets_map`, and adds a test case.

Example:
```py
obj = {"api_key": "__SECRET_API_KEY__"}
secret_key = "secret_key_1234"
secrets_map = {"__SECRET_API_KEY__": secret_key}
result = load(obj, secrets_map=secrets_map)
```
Before this change, printing `api_key` in `result` would output
`"__SECRET_API_KEY__"`. Now, it will properly output
`"secret_key_1234"`.

**Issue:** Fixes #31804 

**Dependencies:** None

`make format`, `make lint`, and `make test` have all passed on my
machine.

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-07 17:14:13 -05:00
Mshari
9383b78be1 feat(groq): add prompt caching token usage details (#33708)
**Description:** 
Adds support for prompt caching usage metadata in ChatGroq. The
integration now captures cached token information from the Groq API
response and includes it in the `input_token_details` field of the
`usage_metadata`.

Changes:
- Created new `_create_usage_metadata()` helper function to centralize
usage metadata creation logic
- Extracts `cached_tokens` from `prompt_tokens_details` in API responses
and maps to `input_token_details.cache_read`
- Integrated the helper function in both streaming
(`_convert_chunk_to_message_chunk`) and non-streaming
(`_create_chat_result`) code paths
- Added comprehensive unit tests to verify caching metadata handling and
backward compatibility

This enables users to monitor prompt caching effectiveness when using
Groq models with prompt caching enabled.

**Issue:** N/A

**Dependencies:** None

---------

Co-authored-by: Mason Daugherty <github@mdrxy.com>
Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-11-07 17:05:22 -05:00
ccurme
3c492571ab release(anthropic): 1.0.2 (#33888) 2025-11-07 16:47:25 -05:00
ccurme
f2410f7ea7 revert: Support for SystemMessage in create_agent (#33889)
Reverts langchain-ai/langchain#33640

Introduces lint errors into langchain-anthropic

Should incorporate into 1.1 instead of patch release.
2025-11-07 16:44:11 -05:00
Mason Daugherty
91560b6a7a chore(infra): expand PR labeling (#33887) 2025-11-07 16:37:35 -05:00
ccurme
b1dd448233 release(core): 1.0.4 (#33886) 2025-11-07 16:26:44 -05:00
dy93
904daf6f40 feat(core): support draw subgraph using pygraphviz (#32966)
The `draw_png()` method currently does not support drawing subgraphs.
This PR adds the ability to render subgraph outlines, improving
visualization clarity when working with nested structures.
2025-11-07 15:58:35 -05:00
Mohammad Mohtashim
8e31a5d7bd fix(core): Fix tool name check in name_dict for PydanticToolsParser (#33479)
- **Description:** The root cause of this issue is that when a user
defines `model_config` in a `BaseModel`, the `{"type": <tool_name>}`
value is derived from the title specified in `model_config` when the
results are parsed
[here](https://vscode.dev/github/keenborder786/langchain/blob/fix/tool_name_dict/libs/core/langchain_core/output_parsers/openai_tools.py#L199).
However,
[tool.__name__](https://vscode.dev/github/keenborder786/langchain/blob/fix/tool_name_dict/libs/core/langchain_core/output_parsers/openai_tools.py#L331)
uses the class name (in uppercase) of the `BaseModel`, resulting in a
`KeyError` when a custom title is provided in `model_config`.
 

The Best Solution will be to use the title provided in `model_config`
attribute if provided one since that is what `type` will be parsed to,
if not then use `tool.__name__`. But need to make sure that this works
only for Pydantic V2.

  - **Issue:** #27260

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-11-07 15:39:47 -05:00
Sydney Runkle
ee630b4539 fix: bump up default recursion limit (#33881)
Fixes https://github.com/langchain-ai/langchain/issues/33740

We don't want to depend on recursion limit here, model call limit
middleware is more appropriate
2025-11-07 13:49:12 -06:00
Jacob Lee
46971447df fix(core): Filter empty content blocks from formatted prompts (#32519)
Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-11-07 14:39:25 -05:00
Azibek
d8b94007c1 fix(huggingface): pass llm params to ChatHuggingFace (#32368)
This PR fixes #32234 and improves HuggingFace chat model integration by:

Ensuring ChatHuggingFace inherits key parameters (temperature,
max_tokens, top_p, streaming, etc.) from the underlying LLM when not
explicitly set.
Adding and updating unit tests to verify property inheritance.
No breaking changes; these updates enhance reliability and
maintainability.

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-07 14:29:15 -05:00
Mohammad Mohtashim
cf595dcc38 chore(langchain): Support for SystemMessage in create_agent (#33640)
- **Description:** Updated Function Signature of `create_agent`, the
system prompt can be both a list and string. I see no harm in doing
this, since SystemMessage accepts both.
- **Issue:** #33630

---------

Co-authored-by: Sydney Runkle <54324534+sydney-runkle@users.noreply.github.com>
2025-11-07 13:00:38 -06:00
Copilot
d27211cfa7 fix(core): context preservation in shielded async callbacks (#32163)
The `@shielded` decorator in async callback managers was not preserving
context variables, breaking OpenTelemetry instrumentation and other
context-dependent functionality.

## Problem

When using async callbacks with the `@shielded` decorator (applied to
methods like `on_llm_end`, `on_chain_end`, etc.), context variables were
not being preserved across the shield boundary. This caused issues with:

- OpenTelemetry span context propagation
- Other instrumentation that relies on context variables
- Inconsistent context behavior between sync and async execution

The issue was reproducible with:

```python
from contextvars import copy_context
import asyncio
from langgraph.graph import StateGraph

# Sync case: context remains consistent
print("SYNC")
print(copy_context())  # Same object
graph.invoke({"result": "init"})
print(copy_context())  # Same object

# Async case: context was inconsistent (before fix)
print("ASYNC") 
asyncio.run(graph.ainvoke({"result": "init"}))
print(copy_context())  # Different object than expected
```

## Root Cause

The original `shielded` decorator implementation:

```python
async def wrapped(*args: Any, **kwargs: Any) -> Any:
    return await asyncio.shield(func(*args, **kwargs))
```

Used `asyncio.shield()` directly without preserving the current
execution context, causing context variables to be lost.

## Solution

Modified the `shielded` decorator to:

1. Capture the current context using `copy_context()`
2. Create a task with explicit context using `asyncio.create_task(coro,
context=ctx)` for Python 3.11+
3. Shield the context-aware task
4. Fallback to regular task creation for Python < 3.11

```python
async def wrapped(*args: Any, **kwargs: Any) -> Any:
    # Capture the current context to preserve context variables
    ctx = copy_context()
    coro = func(*args, **kwargs)
    
    try:
        # Create a task with the captured context to preserve context variables
        task = asyncio.create_task(coro, context=ctx)
        return await asyncio.shield(task)
    except TypeError:
        # Python < 3.11 fallback
        task = asyncio.create_task(coro)
        return await asyncio.shield(task)
```

## Testing

- Added comprehensive test
`test_shielded_callback_context_preservation()` that validates context
variables are preserved across shielded callback boundaries
- Verified the fix resolves the original LangGraph context consistency
issue
- Confirmed all existing callback manager tests still pass
- Validated OpenTelemetry-like instrumentation scenarios work correctly

The fix is minimal, maintains backward compatibility, and ensures proper
context preservation for both modern Python versions and older ones.

Fixes #31398.

<!-- START COPILOT CODING AGENT TIPS -->
---

💬 Share your feedback on Copilot coding agent for the chance to win a
$200 gift card! Click
[here](https://survey.alchemer.com/s3/8343779/Copilot-Coding-agent) to
start the survey.

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: mdrxy <61371264+mdrxy@users.noreply.github.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-11-07 13:09:47 -05:00
Swastik-Swarup-Dash
ca1a3fbe88 fix(core): RunnablePick may not return a dict if keys is a string (#31321)
Change made From:
```python
class RunnablePick(RunnableSerializable[dict[str, Any], dict[str, Any]]):
```
To:
```python
class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
```
As suggested by @cbornet 

Fixes ##31309

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-07 13:04:20 -05:00
williamzhu54
c955b53aed fix(core): fix Runnable parallel schema being empty when children runnable input schemas use TypedDict (#28196)
# Description
This submission is a part of a school project from our team of 4
@EminGul @williamzhu54 @annay54 @donttouch22.

Our pull request fixes the issue with RunnableParallel scheme being
empty by returning the correct schema output when children runnable
input schemas use TypedDicts.

# Issue
Fixes #24326


# Dependencies
No extra dependencies required for this fix.

# Feedback
Any feedback and advice is gladly welcomed. Please feel free to let us
know what we can change or improve upon regarding this issue.

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
2025-11-07 12:01:21 -05:00
Christophe Bornet
2a626d9608 refactor(langchain): use create_importer for HypotheticalDocumentEmbedder (#32078) 2025-11-07 11:16:00 -05:00
Abhinav
0861cba04b fix(chroma): pydantic validation error when using retriever.invoke() (#31377) 2025-11-07 10:59:16 -05:00
Lê Nam Khánh
88246f45b3 docs: fix typos in libs/core/langchain_core/utils/function_calling.py (#33873) 2025-11-07 10:34:28 -05:00
Lê Nam Khánh
1d04514354 docs: fix typos in libs/core/tests/unit_tests/utils/test_strings.py (#33875) 2025-11-07 10:34:12 -05:00
Lê Nam Khánh
c2324b8f3e docs: fix typos in libs/langchain/langchain_classic/chains/summarize/chain.py (#33877) 2025-11-07 10:33:53 -05:00
Lê Nam Khánh
957ea65d12 docs: fix typos in libs/core/tests/unit_tests/indexing/test_hashed_document.py (#33874) 2025-11-07 10:32:20 -05:00
Lê Nam Khánh
00fa38a295 docs: fix typos in libs/core/tests/unit_tests/test_tools.py (#33876) 2025-11-07 10:31:57 -05:00
Lê Nam Khánh
9d98c1b669 docs: fix typos in libs/partners/groq/langchain_groq/chat_models.py (#33878) 2025-11-07 10:31:35 -05:00
Mahmut CAVDAR
00cc9d421f fix(langchain): Update langchain-core dependency version (#33775) 2025-11-07 10:31:06 -05:00
Mohammad Mohtashim
65716cf590 feat(perplexity): Created Dedicated Output Parser to Support Reasoning Model Output for perplexity (#33670) 2025-11-07 10:17:35 -05:00
riunyfir
1b77a191f4 feat: The response.incomplete event is not handled when using stream_mode=['messages'] (#33871) 2025-11-07 09:46:11 -05:00
repeat-Q
ebfde9173c docs: expand "Why use LangChain?" section in README (#33846) 2025-11-07 09:09:05 -05:00
Lê Nam Khánh
2fe0369049 docs: fix typos in some files (#33867) 2025-11-07 09:04:29 -05:00
Mason Daugherty
e023201d42 style: some cleanup (#33857) 2025-11-06 23:50:46 -05:00
Mason Daugherty
d40e340479 chore: attribute package change versions (#33854)
Needed to disambiguate for within inherited docs
2025-11-06 16:57:30 -05:00
Sydney Runkle
9a09ed0659 fix: don't trace conditional edges and no todos in input state (#33842)
while experimenting w/ todo middleware

| Before | After |
|--------|-------|
| ![Screenshot 2025-11-05 at 1 56 21
PM](https://github.com/user-attachments/assets/63195ae4-8122-4662-8246-0fbc16cb1e22)
| ![Screenshot 2025-11-05 at 1 56 03
PM](https://github.com/user-attachments/assets/255e2fa8-e52d-4d1a-949a-33df52ee6668)
|
| Tracing conditional edges (verbose) | Not tracing conditional edges
(cleaner) |
| ![Screenshot 2025-11-05 at 1 57 56
PM](https://github.com/user-attachments/assets/449ccfe9-4c21-4c87-8e0e-6e89d7a97611)
| ![Screenshot 2025-11-05 at 1 56 58
PM](https://github.com/user-attachments/assets/c5c28d0e-2153-4572-af29-b2528761fec6)
|
| Todos in input state (cluttered) | No todos in input state (cleaner) |
2025-11-05 14:25:57 -05:00
Mason Daugherty
5f27b546dd chore: update README.md with deepagents (#33843) 2025-11-05 14:22:20 -05:00
Mason Daugherty
022fdd52c3 fix(core): handle missing dependency version information (#33844)
Follow up to #33347

This continues to make searching issues difficult
2025-11-05 14:19:55 -05:00
Sydney Runkle
7946a8f64e release: langchain v1.0.4 (#33839) 2025-11-05 12:37:58 -05:00
Sydney Runkle
7af79039fc fix: only increment thread count on successful executions (#33837)
* for run count + thread count overflow we should warn model not to call
again
* don't tally mocked tool calls in thread limit -- consider the
following
  * run limit is 1 
  * thread limit is 3
  * first run calls the tool 2 times, 1 executes, 1 is blocked
* we should only count the successful execution above towards the total
thread count
* raise more helpful warnings on invalid config
2025-11-05 10:00:07 -05:00
Sydney Runkle
1755750ca1 fix: more robust tool call limit middleware (#33817)
* improving typing (covariance)
* adding in support for continuing w/ tool calls not yet at threshold,
switching default to continue
* moving all logic into after model

```py
ExitBehavior = Literal["continue", "error", "end"]
"""How to handle execution when tool call limits are exceeded.
- `"continue"`: Block exceeded tools with error messages, let other tools continue (default)
- `"error"`: Raise a `ToolCallLimitExceededError` exception
- `"end"`: Stop execution immediately, injecting a ToolMessage and an AI message
    for the single tool call that exceeded the limit. Raises `NotImplementedError`
    if there are multiple tool calls
"""
```
2025-11-05 09:18:21 -05:00
Mason Daugherty
ddb53672e2 chore(infra): remove unused pr-title-labeler.yml (#33831) 2025-11-04 20:06:52 -05:00
Mason Daugherty
eeae34972f chore(infra): drop langchain_v1 pr lint (#33830)
Just use `langchain`
2025-11-04 19:46:05 -05:00
Mason Daugherty
47d89b1e47 fix(langchain): remove Tigris (#33829)
Removing this code as there is no possible way for it to work.

See https://github.com/langchain-ai/langchain-community/pull/159
2025-11-04 19:45:52 -05:00
Mason Daugherty
ee0bdaeb79 chore: correct langchain-community references (#33827)
fix docstrings that referenced community versions of now-native packages
2025-11-04 17:01:35 -05:00
Christophe Bornet
915c446c48 chore(core): add ruff rule PLR2004 (#33706)
Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-11-04 13:33:37 -05:00
Mason Daugherty
d1e2099408 chore(core): clean pyproject formatting (#33821) 2025-11-04 18:21:15 +00:00
Mason Daugherty
6ea15b9efa docs(model-profiles): fix typo (#33820) 2025-11-04 18:19:55 +00:00
Mason Daugherty
69f33aaff5 chore(infra): remova unused poetry_setup action (#33819) 2025-11-04 13:18:55 -05:00
Mason Daugherty
3f66f102d2 chore: update issue template xref url (#33818) 2025-11-04 13:17:42 -05:00
Mason Daugherty
c6547f58b7 style(standard-tests): refs pass (#33814) 2025-11-04 00:01:16 -05:00
Mason Daugherty
dfb05a7fa0 style: refs pass (#33813) 2025-11-03 22:11:10 -05:00
ccurme
2f67f9ddcb release(huggingface): 1.0.1 (#33803) 2025-11-03 14:49:52 -05:00
Hyejeong Jo
0e36185933 fix(huggingface): add stream_usage support for ChatHuggingFace invoke/stream (#32708) 2025-11-03 14:44:32 -05:00
Michael Li
6617865440 fix(core): add no colors check (#33780)
Patch edge case in get_color_mapping
2025-11-03 13:23:23 -05:00
ccurme
6dba4912be release(model-profiles): 0.0.3 (#33798) 2025-11-03 11:17:08 -05:00
ccurme
7a3827471b fix(model-profiles): fix pdf_inputs field (#33797) 2025-11-03 11:10:33 -05:00
ccurme
f006bc4c7e feat(langchain): add model-profiles as optional dependency (#33794) 2025-11-03 10:13:58 -05:00
Mason Daugherty
0a442644e3 test(anthropic): add vcr to test_search_result_tool_message (#33793)
To fix nondeterministic results causing integration testing to sometimes
fail

Also speeds up from 10s to 0.5

---------

Co-authored-by: ccurme <chester.curme@gmail.com>
2025-11-03 15:13:30 +00:00
repeat-Q
4960663546 docs: add Code of Conduct link to README (#33782)
**Description:** Add link to Code of Conduct in the Additional resources
section to make community guidelines more accessible for all
contributors.

**Rationale:** 
- **Community Health:** Making the Code of Conduct easily discoverable
helps set clear expectations for community behavior and fosters a more
inclusive, respectful environment
- **New Contributor Experience:** Many new contributors look to the
README as the primary source of project information. Having the Code of
Conduct readily available helps onboard them properly
- **Best Practices:** Prominent Code of Conduct links are considered a
best practice in open source projects and improve project accessibility
- **Low Impact:** This is a simple, non-breaking change that
significantly improves documentation completeness

**Issue:** N/A

**Dependencies:** None
2025-11-03 09:50:47 -05:00
ccurme
1381137c37 release(standard-tests): 1.0.1 (#33792) 2025-11-03 09:46:39 -05:00
ccurme
b4a042dfc4 release(core): 1.0.3 (#33768) 2025-11-03 09:19:32 -05:00
ccurme
81c4f21b52 fix(standard-tests): update multimodal tests (#33781) 2025-11-01 16:38:20 -04:00
Mason Daugherty
f2dab562a8 style: misc refs work (#33771) 2025-10-31 18:29:53 -04:00
ccurme
61196a8280 release(openai): 1.0.2 (#33769) 2025-10-31 14:21:32 -04:00
ccurme
7a97c31ac0 release(model-profiles): 0.0.2 (#33767) 2025-10-31 13:58:04 -04:00
ccurme
424214041e feat(model-profiles): support more providers (#33766) 2025-10-31 13:48:56 -04:00
ccurme
b06bd6a913 fix(model-profiles): add typing-extensions as explicit dep (#33762) 2025-10-31 11:21:55 -04:00
ccurme
1c762187e8 fix(model-profiles): remove langchain-core as a dependency (#33761) 2025-10-31 11:04:14 -04:00
Mason Daugherty
90aefc607f docs(core): improve tools module docstrings (#33755)
styling in `base.py`, content updates in
`libs/core/langchain_core/tools/convert.py`
2025-10-31 10:54:30 -04:00
ccurme
2ca73c479b fix(infra): fix release workflow for new packages (#33760) 2025-10-31 10:38:38 -04:00
ccurme
17c7c273b8 fix(infra): fix release workflow for new packages (#33759) 2025-10-31 10:21:12 -04:00
ccurme
493be259c3 feat(core): mint langchain-model-profiles and add profile property to BaseChatModel (#33728) 2025-10-31 09:44:46 -04:00
Mason Daugherty
106c6ac273 revert: "chore: skip anthropic tests while waiting on new anthropic release" (#33753)
Reverts langchain-ai/langchain#33739
2025-10-30 16:37:12 -04:00
Mason Daugherty
7aaaa371e7 release(anthropic): 1.0.1 (#33752) 2025-10-30 16:19:44 -04:00
Mason Daugherty
468dad1780 chore: use model IDs, latest anthropic models (#33747)
- standardize on using model IDs, no more aliases - makes future
maintenance easier
- use latest models in docstrings to highlight support
- remove remaining sonnet 3-7 usage due to deprecation

Depends on #33751
2025-10-30 16:13:28 -04:00
Mason Daugherty
32d294b89a fix(anthropic): clean up tests, update default model to use ID (#33751)
- use latest models in examples to highlight support
- standardize on using IDs in examples - no more aliases to improve
determinism in future tests
- bump lock
- in integration tests, fix stale casettes and use `MODEL_NAME`
uniformly where possible
- add case for default max tokens for sonnet-4-5 (was missing)
2025-10-30 16:08:18 -04:00
Mason Daugherty
dc5b7dace8 test(openai): mark tests flaky (#33750)
see:
https://github.com/langchain-ai/langchain/actions/runs/18921929210/job/54020065079#step:10:560
2025-10-30 16:07:58 -04:00
Mason Daugherty
e00b7233cf chore(langchain): fix lint_imports paths (#33749) 2025-10-30 16:06:08 -04:00
Mason Daugherty
91f7e73c27 fix(langchain): use system_prompt in integration tests (#33748) 2025-10-30 16:05:57 -04:00
Shagun Gupta
75fff151e8 fix(openai): replace pytest.warns(None) with warnings.catch_warnings in ChatOpenAI test to resolve TypeError . Resolves issue #33705 (#33741) 2025-10-30 09:22:34 -04:00
Sydney Runkle
d05a0cb80d chore: skip anthropic tests while waiting on new anthropic release (#33739)
like https://github.com/langchain-ai/langchain/pull/33312/files

temporarily skip while waiting on new anthropic release

dependent on https://github.com/langchain-ai/langchain/pull/33737
2025-10-29 16:10:42 -07:00
Sydney Runkle
d24aa69ceb chore: don't pick up alphas for testing (#33738)
reverting change made in
eaa6dcce9e
2025-10-29 16:04:57 -07:00
Sydney Runkle
fabcacc3e5 chore: remove mentions of sonnet 3.5 (#33737)
see
https://docs.claude.com/en/docs/about-claude/model-deprecations#2025-08-13%3A-claude-sonnet-3-5-models
2025-10-29 15:49:27 -07:00
Christian Bromann
ac58d75113 fix(langchain_v1): remove thread_model_call_count and run_model_call_count from tool node test (#33725)
While working on ToolRuntime in TS I discovered that Python still uses
`thread_model_call_count` and `run_model_call_count` in ToolNode tests
which afaik we removed.
2025-10-29 15:36:18 -07:00
Sydney Runkle
28564ef94e release: core 1.0.2 and langchain 1.0.3 (#33736) 2025-10-29 15:30:17 -07:00
Christian Bromann
b62a9b57f3 fix(langchain_v1): removed unsed functions in tool_call_limit middleware (#33735)
These functions seem unused and can be removed.
2025-10-29 15:21:38 -07:00
Sydney Runkle
76dd656f2a fix: filter out injected args from tracing (#33729)
this is CC generated and I want to do a thorough review + update the
tests. but should be able to ship today.

before eek

<img width="637" height="485" alt="Screenshot 2025-10-29 at 12 34 52 PM"
src="https://github.com/user-attachments/assets/121def87-fb7b-4847-b9e2-74f37b3b4763"
/>

now, woo

<img width="651" height="158" alt="Screenshot 2025-10-29 at 12 36 09 PM"
src="https://github.com/user-attachments/assets/1fc0e19e-a83f-417c-81e2-3aa0028630d6"
/>
2025-10-29 22:20:53 +00:00
ccurme
d218936763 fix(openai): update model used in test (#33733)
Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-10-29 17:09:18 -04:00
Mason Daugherty
123e29dc26 style: more refs fixes (#33730) 2025-10-29 16:34:46 -04:00
Sydney Runkle
6a1dca113e chore: move ToolNode improvements back to langgraph (#33634)
Moving all `ToolNode` related improvements back to LangGraph and
importing them in LC!
pairing w/ https://github.com/langchain-ai/langgraph/pull/6321

this fixes a couple of things:
1. `InjectedState`, store etc will continue to work as expected no
matter where the import is from
2. `ToolRuntime` is now usable w/in langgraph, woohoo!
2025-10-29 11:44:23 -07:00
Sydney Runkle
8aea6dd23a feat: support structured output retry middleware (#33663)
* attach the latest `AIMessage` to all `StructuredOutputError`s so that
relevant middleware can use as desired
* raise `StructuredOutputError` from `ProviderStrategy` logic in case of
failed parsing (so that we can retry from middleware)
* added a test suite w/ example custom middleware that retries for tool
+ provider strategy

Long term, we could add our own opinionated structured output retry
middleware, but this at least unblocks folks who want to use custom
retry logic in the short term :)

```py
class StructuredOutputRetryMiddleware(AgentMiddleware):
    """Retries model calls when structured output parsing fails."""

    def __init__(self, max_retries: int) -> None:
        self.max_retries = max_retries

    def wrap_model_call(
        self, request: ModelRequest, handler: Callable[[ModelRequest], ModelResponse]
    ) -> ModelResponse:
        for attempt in range(self.max_retries + 1):
            try:
                return handler(request)
            except StructuredOutputError as exc:
                if attempt == self.max_retries:
                    raise

                ai_content = exc.ai_message.content
                error_message = (
                    f"Your previous response was:\n{ai_content}\n\n"
                    f"Error: {exc}. Please try again with a valid response."
                )
                request.messages.append(HumanMessage(content=error_message))
```
2025-10-29 08:41:44 -07:00
Vincent Koc
78a2f86f70 fix(core): improve JSON get_format_instructions using Opik Agent Optimizer (#33718) 2025-10-29 11:05:24 -04:00
Mason Daugherty
b5e23e5823 fix(langchain_v1): correct ref url (#33715) 2025-10-28 23:29:19 -04:00
Mason Daugherty
7872643910 chore(standard-tests): Update API reference link in README (#33714) 2025-10-28 23:29:02 -04:00
Mason Daugherty
f15391f4fc chore(text-splitters): API reference link in README (#33713) 2025-10-28 23:28:48 -04:00
Mason Daugherty
ca9b81cc2e chore(infra): update README (#33712)
Updated the README to clarify LangChain's focus on building agents and
LLM-powered applications. Added a section for community discussions and
refined the ecosystem description.
2025-10-28 23:22:18 -04:00
Mason Daugherty
a2a9a02ecb style(core): more cleanup all around (#33711) 2025-10-28 22:58:19 -04:00
Mason Daugherty
e5e1d6c705 style: more refs work (#33707) 2025-10-28 14:43:28 -04:00
dependabot[bot]
6ee19473ba chore(infra): bump actions/download-artifact from 5 to 6 (#33682)
Bumps
[actions/download-artifact](https://github.com/actions/download-artifact)
from 5 to 6.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/download-artifact/releases">actions/download-artifact's
releases</a>.</em></p>
<blockquote>
<h2>v6.0.0</h2>
<h2>What's Changed</h2>
<p><strong>BREAKING CHANGE:</strong> this update supports Node
<code>v24.x</code>. This is not a breaking change per-se but we're
treating it as such.</p>
<ul>
<li>Update README for download-artifact v5 changes by <a
href="https://github.com/yacaovsnc"><code>@​yacaovsnc</code></a> in <a
href="https://redirect.github.com/actions/download-artifact/pull/417">actions/download-artifact#417</a></li>
<li>Update README with artifact extraction details by <a
href="https://github.com/yacaovsnc"><code>@​yacaovsnc</code></a> in <a
href="https://redirect.github.com/actions/download-artifact/pull/424">actions/download-artifact#424</a></li>
<li>Readme: spell out the first use of GHES by <a
href="https://github.com/danwkennedy"><code>@​danwkennedy</code></a> in
<a
href="https://redirect.github.com/actions/download-artifact/pull/431">actions/download-artifact#431</a></li>
<li>Bump <code>@actions/artifact</code> to <code>v4.0.0</code></li>
<li>Prepare <code>v6.0.0</code> by <a
href="https://github.com/danwkennedy"><code>@​danwkennedy</code></a> in
<a
href="https://redirect.github.com/actions/download-artifact/pull/438">actions/download-artifact#438</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/danwkennedy"><code>@​danwkennedy</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/download-artifact/pull/431">actions/download-artifact#431</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/download-artifact/compare/v5...v6.0.0">https://github.com/actions/download-artifact/compare/v5...v6.0.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="018cc2cf5b"><code>018cc2c</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/download-artifact/issues/438">#438</a>
from actions/danwkennedy/prepare-6.0.0</li>
<li><a
href="815651c680"><code>815651c</code></a>
Revert &quot;Remove <code>github.dep.yml</code>&quot;</li>
<li><a
href="bb3a066a8b"><code>bb3a066</code></a>
Remove <code>github.dep.yml</code></li>
<li><a
href="fa1ce46bbd"><code>fa1ce46</code></a>
Prepare <code>v6.0.0</code></li>
<li><a
href="4a24838f3d"><code>4a24838</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/download-artifact/issues/431">#431</a>
from danwkennedy/patch-1</li>
<li><a
href="5e3251c4ff"><code>5e3251c</code></a>
Readme: spell out the first use of GHES</li>
<li><a
href="abefc31eaf"><code>abefc31</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/download-artifact/issues/424">#424</a>
from actions/yacaovsnc/update_readme</li>
<li><a
href="ac43a6070a"><code>ac43a60</code></a>
Update README with artifact extraction details</li>
<li><a
href="de96f4613b"><code>de96f46</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/download-artifact/issues/417">#417</a>
from actions/yacaovsnc/update_readme</li>
<li><a
href="7993cb44e9"><code>7993cb4</code></a>
Remove migration guide for artifact download changes</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/download-artifact/compare/v5...v6">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/download-artifact&package-manager=github_actions&previous-version=5&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-10-28 14:07:16 -04:00
dependabot[bot]
a59551f3b4 chore(infra): bump actions/upload-artifact from 4 to 5 (#33681)
Bumps
[actions/upload-artifact](https://github.com/actions/upload-artifact)
from 4 to 5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/actions/upload-artifact/releases">actions/upload-artifact's
releases</a>.</em></p>
<blockquote>
<h2>v5.0.0</h2>
<h2>What's Changed</h2>
<p><strong>BREAKING CHANGE:</strong> this update supports Node
<code>v24.x</code>. This is not a breaking change per-se but we're
treating it as such.</p>
<ul>
<li>Update README.md by <a
href="https://github.com/GhadimiR"><code>@​GhadimiR</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/681">actions/upload-artifact#681</a></li>
<li>Update README.md by <a
href="https://github.com/nebuk89"><code>@​nebuk89</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/712">actions/upload-artifact#712</a></li>
<li>Readme: spell out the first use of GHES by <a
href="https://github.com/danwkennedy"><code>@​danwkennedy</code></a> in
<a
href="https://redirect.github.com/actions/upload-artifact/pull/727">actions/upload-artifact#727</a></li>
<li>Update GHES guidance to include reference to Node 20 version by <a
href="https://github.com/patrikpolyak"><code>@​patrikpolyak</code></a>
in <a
href="https://redirect.github.com/actions/upload-artifact/pull/725">actions/upload-artifact#725</a></li>
<li>Bump <code>@actions/artifact</code> to <code>v4.0.0</code></li>
<li>Prepare <code>v5.0.0</code> by <a
href="https://github.com/danwkennedy"><code>@​danwkennedy</code></a> in
<a
href="https://redirect.github.com/actions/upload-artifact/pull/734">actions/upload-artifact#734</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/GhadimiR"><code>@​GhadimiR</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/upload-artifact/pull/681">actions/upload-artifact#681</a></li>
<li><a href="https://github.com/nebuk89"><code>@​nebuk89</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/upload-artifact/pull/712">actions/upload-artifact#712</a></li>
<li><a
href="https://github.com/danwkennedy"><code>@​danwkennedy</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/upload-artifact/pull/727">actions/upload-artifact#727</a></li>
<li><a
href="https://github.com/patrikpolyak"><code>@​patrikpolyak</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/upload-artifact/pull/725">actions/upload-artifact#725</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/upload-artifact/compare/v4...v5.0.0">https://github.com/actions/upload-artifact/compare/v4...v5.0.0</a></p>
<h2>v4.6.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Update to use artifact 2.3.2 package &amp; prepare for new
upload-artifact release by <a
href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/685">actions/upload-artifact#685</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/salmanmkc"><code>@​salmanmkc</code></a>
made their first contribution in <a
href="https://redirect.github.com/actions/upload-artifact/pull/685">actions/upload-artifact#685</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/upload-artifact/compare/v4...v4.6.2">https://github.com/actions/upload-artifact/compare/v4...v4.6.2</a></p>
<h2>v4.6.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Update to use artifact 2.2.2 package by <a
href="https://github.com/yacaovsnc"><code>@​yacaovsnc</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/673">actions/upload-artifact#673</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/upload-artifact/compare/v4...v4.6.1">https://github.com/actions/upload-artifact/compare/v4...v4.6.1</a></p>
<h2>v4.6.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Expose env vars to control concurrency and timeout by <a
href="https://github.com/yacaovsnc"><code>@​yacaovsnc</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/662">actions/upload-artifact#662</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/actions/upload-artifact/compare/v4...v4.6.0">https://github.com/actions/upload-artifact/compare/v4...v4.6.0</a></p>
<h2>v4.5.0</h2>
<h2>What's Changed</h2>
<ul>
<li>fix: deprecated <code>Node.js</code> version in action by <a
href="https://github.com/hamirmahal"><code>@​hamirmahal</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/578">actions/upload-artifact#578</a></li>
<li>Add new <code>artifact-digest</code> output by <a
href="https://github.com/bdehamer"><code>@​bdehamer</code></a> in <a
href="https://redirect.github.com/actions/upload-artifact/pull/656">actions/upload-artifact#656</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a
href="https://github.com/hamirmahal"><code>@​hamirmahal</code></a> made
their first contribution in <a
href="https://redirect.github.com/actions/upload-artifact/pull/578">actions/upload-artifact#578</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="330a01c490"><code>330a01c</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/upload-artifact/issues/734">#734</a>
from actions/danwkennedy/prepare-5.0.0</li>
<li><a
href="03f2824452"><code>03f2824</code></a>
Update <code>github.dep.yml</code></li>
<li><a
href="905a1ecb59"><code>905a1ec</code></a>
Prepare <code>v5.0.0</code></li>
<li><a
href="2d9f9cdfa9"><code>2d9f9cd</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/upload-artifact/issues/725">#725</a>
from patrikpolyak/patch-1</li>
<li><a
href="9687587dec"><code>9687587</code></a>
Merge branch 'main' into patch-1</li>
<li><a
href="2848b2cda0"><code>2848b2c</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/upload-artifact/issues/727">#727</a>
from danwkennedy/patch-1</li>
<li><a
href="9b511775fd"><code>9b51177</code></a>
Spell out the first use of GHES</li>
<li><a
href="cd231ca1ed"><code>cd231ca</code></a>
Update GHES guidance to include reference to Node 20 version</li>
<li><a
href="de65e23aa2"><code>de65e23</code></a>
Merge pull request <a
href="https://redirect.github.com/actions/upload-artifact/issues/712">#712</a>
from actions/nebuk89-patch-1</li>
<li><a
href="8747d8cd76"><code>8747d8c</code></a>
Update README.md</li>
<li>Additional commits viewable in <a
href="https://github.com/actions/upload-artifact/compare/v4...v5">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/upload-artifact&package-manager=github_actions&previous-version=4&new-version=5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-10-28 14:07:03 -04:00
ccurme
3286a98b27 fix(core): translate Google GenAI text blocks to v1 (#33699) 2025-10-28 09:53:01 -04:00
Mason Daugherty
62769a0dac feat(langchain): export UsageMetadata (#33692)
as well as `InputTokenDetails`, and `OutputTokenDetails` from
`langchain_core.messages`
2025-10-27 19:47:41 -04:00
Mason Daugherty
f94108b4bc fix: links (#33691)
* X-ref to new docs
* Formatting updates
2025-10-27 19:04:29 -04:00
ccurme
60a0ff8217 fix(standard-tests): fix tool description in agent loop test (#33690) 2025-10-27 15:02:13 -04:00
Christophe Bornet
b3dffc70e2 fix(core): fix PydanticOutputParser's get_format_instructions for v1 models (#32479) 2025-10-27 13:44:20 -04:00
Arun Prasad
86ac39e11f refactor(core): Minor refactor for code readability (#33674) 2025-10-27 11:39:36 -04:00
John Eismeier
6e036d38b2 fix(infra): add emacs backup files to gitignore (#33675) 2025-10-27 11:26:47 -04:00
Shanto Mathew
2d30ebb53b docs(langchain): clarify create_tool_calling_agent system_prompt formatting and add troubleshooting (#33679) 2025-10-27 11:18:10 -04:00
Arun Prasad
b3934b9580 refactor(anthropic): remove unnecessary url check (#33671)
if "url" in annotation: in Line 15 , already ensures "url" is key in
annotation , so no need to check again to set "url" key in out object

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-10-27 11:13:54 -04:00
Mason Daugherty
09102a634a fix: update some links (#33686) 2025-10-27 11:12:11 -04:00
ccurme
95ff5901a1 chore(anthropic): update integration test cassette (#33685) 2025-10-27 10:43:36 -04:00
Mason Daugherty
f3d7152074 style(core): more refs work (#33664) 2025-10-24 16:06:24 -04:00
Christophe Bornet
dff37f6048 fix(nomic): support Python 3.14 (#33655)
Pyarrow just published 3.14 binaries

Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-10-24 13:32:07 -04:00
ccurme
832036ef0f chore(infra): remove openai from langchain-core release test matrix (#33661) 2025-10-24 11:55:33 -04:00
ccurme
f1742954ab fix(core): make handling of schemas more defensive (#33660) 2025-10-24 11:10:06 -04:00
ccurme
6ab0476676 fix(openai): update test (#33659) 2025-10-24 11:04:33 -04:00
ccurme
d36413c821 release(mistralai): 1.0.1 (#33657) 2025-10-24 09:50:23 -04:00
Romi45
99097f799c fix(mistralai): resolve duplicate tool calls when converting to mistral chat message (#33648) 2025-10-24 09:40:31 -04:00
Mohammad Mohtashim
0666571519 chore(perplexity): Added all keys for usage metadata (#33480) 2025-10-24 09:32:35 -04:00
ccurme
ef85161525 release(core): 1.0.1 (#33639) 2025-10-22 14:25:21 -04:00
ccurme
079eb808f8 release(qdrant): 1.1.0 (#33638) 2025-10-22 13:24:36 -04:00
Anush
39fb2d1a3b feat(qdrant): Use Qdrant's built-in MMR search (#32302) 2025-10-22 13:19:32 -04:00
Mason Daugherty
db7f2db1ae feat(infra): langchain docs MCP (#33636) 2025-10-22 11:50:35 -04:00
Yu Zhong
df46c82ae2 feat(core): automatic set required to include all properties in strict mode (#32930) 2025-10-22 11:31:08 -04:00
Eugene Yurtsev
f8adbbc461 chore(langchain_v1): bump version from 1.0.1 to 1.0.2 (#33629)
Release 1.0.2
2025-10-21 17:05:51 -04:00
Eugene Yurtsev
17f0716d6c fix(langchain_v1): remove non llm controllable params from tool message on invocation failure (#33625)
The LLM shouldn't be seeing parameters it cannot control in the
ToolMessage error it gets when it invokes a tool with incorrect args.

This fixes the behavior within langchain to address immediate issue.

We may want to change the behavior in langchain_core as well to prevent
validation of injected arguments. But this would be done in a separate
change
2025-10-21 15:40:30 -04:00
Ali Ismail
5acd34ae92 feat(openai): add unit test for streaming error in _generate (#33134) 2025-10-21 15:08:37 -04:00
Aaron Sequeira
84dbebac4f fix(langchain): correctly initialize huggingface models in init_chat_model (#33167) 2025-10-21 14:21:46 -04:00
Mohammad Mohtashim
eddfcd2c88 docs(core): Updated docs for mustache_template_vars (#33481) 2025-10-21 13:01:25 -04:00
noeliecherrier
9f470d297f feat(mistralai): remove tenacity retries for embeddings (#33491) 2025-10-21 12:35:10 -04:00
ccurme
2222470f69 release(openai): 1.0.1 (#33624) 2025-10-21 11:37:47 -04:00
Marlene
78175fcb96 feat(openai): add callable support for openai_api_key parameter (#33532) 2025-10-21 11:16:02 -04:00
523 changed files with 33004 additions and 19791 deletions

View File

@@ -8,16 +8,15 @@ body:
value: |
Thank you for taking the time to file a bug report.
Use this to report BUGS in LangChain. For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
For usage questions, feature requests and general design questions, please use the [LangChain Forum](https://forum.langchain.com/).
Relevant links to check before filing a bug report to see if your issue has already been reported, fixed or
if there's another way to solve your problem:
Check these before submitting to see if your issue has already been reported, fixed or if there's another way to solve your problem:
* [LangChain Forum](https://forum.langchain.com/),
* [LangChain documentation with the integrated search](https://docs.langchain.com/oss/python/langchain/overview),
* [API Reference](https://reference.langchain.com/python/),
* [Documentation](https://docs.langchain.com/oss/python/langchain/overview),
* [API Reference Documentation](https://reference.langchain.com/python/),
* [LangChain ChatBot](https://chat.langchain.com/)
* [GitHub search](https://github.com/langchain-ai/langchain),
* [LangChain Forum](https://forum.langchain.com/),
- type: checkboxes
id: checks
attributes:
@@ -36,16 +35,48 @@ body:
required: true
- label: This is not related to the langchain-community package.
required: true
- label: I read what a minimal reproducible example is (https://stackoverflow.com/help/minimal-reproducible-example).
required: true
- label: I posted a self-contained, minimal, reproducible example. A maintainer can copy it and run it AS IS.
required: true
- type: checkboxes
id: package
attributes:
label: Package (Required)
description: |
Which `langchain` package(s) is this bug related to? Select at least one.
Note that if the package you are reporting for is not listed here, it is not in this repository (e.g. `langchain-google-genai` is in [`langchain-ai/langchain-google`](https://github.com/langchain-ai/langchain-google/)).
Please report issues for other packages to their respective repositories.
options:
- label: langchain
- label: langchain-openai
- label: langchain-anthropic
- label: langchain-classic
- label: langchain-core
- label: langchain-cli
- label: langchain-model-profiles
- label: langchain-tests
- label: langchain-text-splitters
- label: langchain-chroma
- label: langchain-deepseek
- label: langchain-exa
- label: langchain-fireworks
- label: langchain-groq
- label: langchain-huggingface
- label: langchain-mistralai
- label: langchain-nomic
- label: langchain-ollama
- label: langchain-perplexity
- label: langchain-prompty
- label: langchain-qdrant
- label: langchain-xai
- label: Other / not sure / general
- type: textarea
id: reproduction
validations:
required: true
attributes:
label: Example Code
label: Example Code (Python)
description: |
Please add a self-contained, [minimal, reproducible, example](https://stackoverflow.com/help/minimal-reproducible-example) with your use case.
@@ -53,15 +84,12 @@ body:
**Important!**
* Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
* Reduce your code to the minimum required to reproduce the issue if possible. This makes it much easier for others to help you.
* Use code tags (e.g., ```python ... ```) to correctly [format your code](https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting).
* INCLUDE the language label (e.g. `python`) after the first three backticks to enable syntax highlighting. (e.g., ```python rather than ```).
* Avoid screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
* Reduce your code to the minimum required to reproduce the issue if possible.
(This will be automatically formatted into code, so no need for backticks.)
render: python
placeholder: |
The following code:
```python
from langchain_core.runnables import RunnableLambda
def bad_code(inputs) -> int:
@@ -69,17 +97,14 @@ body:
chain = RunnableLambda(bad_code)
chain.invoke('Hello!')
```
- type: textarea
id: error
validations:
required: false
attributes:
label: Error Message and Stack Trace (if applicable)
description: |
If you are reporting an error, please include the full error message and stack trace.
placeholder: |
Exception + full stack trace
If you are reporting an error, please copy and paste the full error message and
stack trace.
(This will be automatically formatted into code, so no need for backticks.)
render: shell
- type: textarea
id: description
attributes:
@@ -99,9 +124,7 @@ body:
attributes:
label: System Info
description: |
Please share your system info with us. Do NOT skip this step and please don't trim
the output. Most users don't include enough information here and it makes it harder
for us to help you.
Please share your system info with us.
Run the following command in your terminal and paste the output here:
@@ -113,8 +136,6 @@ body:
from langchain_core import sys_info
sys_info.print_sys_info()
```
alternatively, put the entire output of `pip freeze` here.
placeholder: |
python -m langchain_core.sys_info
validations:

View File

@@ -1,9 +1,18 @@
blank_issues_enabled: false
version: 2.1
contact_links:
- name: 📚 Documentation
url: https://github.com/langchain-ai/docs/issues/new?template=langchain.yml
- name: 📚 Documentation issue
url: https://github.com/langchain-ai/docs/issues/new?template=01-langchain.yml
about: Report an issue related to the LangChain documentation
- name: 💬 LangChain Forum
url: https://forum.langchain.com/
about: General community discussions and support
- name: 📚 LangChain Documentation
url: https://docs.langchain.com/oss/python/langchain/overview
about: View the official LangChain documentation
- name: 📚 API Reference Documentation
url: https://reference.langchain.com/python/
about: View the official LangChain API reference documentation
- name: 💬 LangChain Forum
url: https://forum.langchain.com/
about: Ask questions and get help from the community

View File

@@ -13,11 +13,11 @@ body:
Relevant links to check before filing a feature request to see if your request has already been made or
if there's another way to achieve what you want:
* [LangChain Forum](https://forum.langchain.com/),
* [LangChain documentation with the integrated search](https://docs.langchain.com/oss/python/langchain/overview),
* [API Reference](https://reference.langchain.com/python/),
* [Documentation](https://docs.langchain.com/oss/python/langchain/overview),
* [API Reference Documentation](https://reference.langchain.com/python/),
* [LangChain ChatBot](https://chat.langchain.com/)
* [GitHub search](https://github.com/langchain-ai/langchain),
* [LangChain Forum](https://forum.langchain.com/),
- type: checkboxes
id: checks
attributes:
@@ -34,6 +34,40 @@ body:
required: true
- label: This is not related to the langchain-community package.
required: true
- type: checkboxes
id: package
attributes:
label: Package (Required)
description: |
Which `langchain` package(s) is this request related to? Select at least one.
Note that if the package you are requesting for is not listed here, it is not in this repository (e.g. `langchain-google-genai` is in `langchain-ai/langchain`).
Please submit feature requests for other packages to their respective repositories.
options:
- label: langchain
- label: langchain-openai
- label: langchain-anthropic
- label: langchain-classic
- label: langchain-core
- label: langchain-cli
- label: langchain-model-profiles
- label: langchain-tests
- label: langchain-text-splitters
- label: langchain-chroma
- label: langchain-deepseek
- label: langchain-exa
- label: langchain-fireworks
- label: langchain-groq
- label: langchain-huggingface
- label: langchain-mistralai
- label: langchain-nomic
- label: langchain-ollama
- label: langchain-perplexity
- label: langchain-prompty
- label: langchain-qdrant
- label: langchain-xai
- label: Other / not sure / general
- type: textarea
id: feature-description
validations:

View File

@@ -18,3 +18,33 @@ body:
attributes:
label: Issue Content
description: Add the content of the issue here.
- type: checkboxes
id: package
attributes:
label: Package (Required)
description: |
Please select package(s) that this issue is related to.
options:
- label: langchain
- label: langchain-openai
- label: langchain-anthropic
- label: langchain-classic
- label: langchain-core
- label: langchain-cli
- label: langchain-model-profiles
- label: langchain-tests
- label: langchain-text-splitters
- label: langchain-chroma
- label: langchain-deepseek
- label: langchain-exa
- label: langchain-fireworks
- label: langchain-groq
- label: langchain-huggingface
- label: langchain-mistralai
- label: langchain-nomic
- label: langchain-ollama
- label: langchain-perplexity
- label: langchain-prompty
- label: langchain-qdrant
- label: langchain-xai
- label: Other / not sure / general

View File

@@ -25,13 +25,13 @@ body:
label: Task Description
description: |
Provide a clear and detailed description of the task.
What needs to be done? Be specific about the scope and requirements.
placeholder: |
This task involves...
The goal is to...
Specific requirements:
- ...
- ...
@@ -43,7 +43,7 @@ body:
label: Acceptance Criteria
description: |
Define the criteria that must be met for this task to be considered complete.
What are the specific deliverables or outcomes expected?
placeholder: |
This task will be complete when:
@@ -58,15 +58,15 @@ body:
label: Context and Background
description: |
Provide any relevant context, background information, or links to related issues/PRs.
Why is this task needed? What problem does it solve?
placeholder: |
Background:
- ...
Related issues/PRs:
- #...
Additional context:
- ...
validations:
@@ -77,15 +77,45 @@ body:
label: Dependencies
description: |
List any dependencies or blockers for this task.
Are there other tasks, issues, or external factors that need to be completed first?
placeholder: |
This task depends on:
- [ ] Issue #...
- [ ] PR #...
- [ ] External dependency: ...
Blocked by:
- ...
validations:
required: false
- type: checkboxes
id: package
attributes:
label: Package (Required)
description: |
Please select package(s) that this task is related to.
options:
- label: langchain
- label: langchain-openai
- label: langchain-anthropic
- label: langchain-classic
- label: langchain-core
- label: langchain-cli
- label: langchain-model-profiles
- label: langchain-tests
- label: langchain-text-splitters
- label: langchain-chroma
- label: langchain-deepseek
- label: langchain-exa
- label: langchain-fireworks
- label: langchain-groq
- label: langchain-huggingface
- label: langchain-mistralai
- label: langchain-nomic
- label: langchain-ollama
- label: langchain-perplexity
- label: langchain-prompty
- label: langchain-qdrant
- label: langchain-xai
- label: Other / not sure / general

View File

@@ -1,28 +1,30 @@
(Replace this entire block of text)
Thank you for contributing to LangChain! Follow these steps to mark your pull request as ready for review. **If any of these steps are not completed, your PR will not be considered for review.**
Read the full contributing guidelines: https://docs.langchain.com/oss/python/contributing/overview
Thank you for contributing to LangChain! Follow these steps to have your pull request considered as ready for review.
1. PR title: Should follow the format: TYPE(SCOPE): DESCRIPTION
- [ ] **PR title**: Follows the format: {TYPE}({SCOPE}): {DESCRIPTION}
- Examples:
- fix(anthropic): resolve flag parsing error
- feat(core): add multi-tenant support
- fix(cli): resolve flag parsing error
- docs(openai): update API usage examples
- Allowed `{TYPE}` values:
- feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert, release
- Allowed `{SCOPE}` values (optional):
- core, cli, langchain, standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq, huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant, xai, infra
- Once you've written the title, please delete this checklist item; do not include it in the PR.
- test(openai): update API usage tests
- Allowed TYPE and SCOPE values: https://github.com/langchain-ai/langchain/blob/master/.github/workflows/pr_lint.yml#L15-L33
- [ ] **PR message**: ***Delete this entire checklist*** and replace with
- **Description:** a description of the change. Include a [closing keyword](https://docs.github.com/en/issues/tracking-your-work-with-issues/using-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) if applicable to a relevant issue.
- **Issue:** the issue # it fixes, if applicable (e.g. Fixes #123)
- **Dependencies:** any dependencies required for this change
2. PR description:
- [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. **We will not consider a PR unless these three are passing in CI.** See [contribution guidelines](https://docs.langchain.com/oss/python/contributing) for more.
- Write 1-2 sentences summarizing the change.
- If this PR addresses a specific issue, please include "Fixes #ISSUE_NUMBER" in the description to automatically close the issue when the PR is merged.
- If there are any breaking changes, please clearly describe them.
- If this PR depends on another PR being merged first, please include "Depends on #PR_NUMBER" inthe description.
3. Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified.
- We will not consider a PR unless these three are passing in CI.
Additional guidelines:
- Most PRs should not touch more than one package.
- Please do not add dependencies to `pyproject.toml` files (even optional ones) unless they are **required** for unit tests. Likewise, please do not update the `uv.lock` files unless you are adding a required dependency.
- Changes should be backwards compatible.
- Make sure optional dependencies are imported within a function.
- We ask that if you use generative AI for your contribution, you include a disclaimer.
- PRs should not touch more than one package unless absolutely necessary.
- Do not update the `uv.lock` files unless or add dependencies to `pyproject.toml` files (even optional ones) unless you have explicit permission to do so by a maintainer.

View File

@@ -1,93 +0,0 @@
# An action for setting up poetry install with caching.
# Using a custom action since the default action does not
# take poetry install groups into account.
# Action code from:
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
name: poetry-install-with-caching
description: Poetry install with support for caching of dependency groups.
inputs:
python-version:
description: Python version, supporting MAJOR.MINOR only
required: true
poetry-version:
description: Poetry version
required: true
cache-key:
description: Cache key to use for manual handling of caching
required: true
working-directory:
description: Directory whose poetry.lock file should be cached
required: true
runs:
using: composite
steps:
- uses: actions/setup-python@v5
name: Setup python ${{ inputs.python-version }}
id: setup-python
with:
python-version: ${{ inputs.python-version }}
- uses: actions/cache@v4
id: cache-bin-poetry
name: Cache Poetry binary - Python ${{ inputs.python-version }}
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
with:
path: |
/opt/pipx/venvs/poetry
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
- name: Refresh shell hashtable and fixup softlinks
if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
shell: bash
env:
POETRY_VERSION: ${{ inputs.poetry-version }}
PYTHON_VERSION: ${{ inputs.python-version }}
run: |
set -eux
# Refresh the shell hashtable, to ensure correct `which` output.
hash -r
# `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
# Delete and recreate the softlinks pipx expects to have.
rm /opt/pipx/venvs/poetry/bin/python
cd /opt/pipx/venvs/poetry/bin
ln -s "$(which "python$PYTHON_VERSION")" python
chmod +x python
cd /opt/pipx_bin/
ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
chmod +x poetry
# Ensure everything got set up correctly.
/opt/pipx/venvs/poetry/bin/python --version
/opt/pipx_bin/poetry --version
- name: Install poetry
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
shell: bash
env:
POETRY_VERSION: ${{ inputs.poetry-version }}
PYTHON_VERSION: ${{ inputs.python-version }}
# Install poetry using the python version installed by setup-python step.
run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose
- name: Restore pip and poetry cached dependencies
uses: actions/cache@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
with:
path: |
~/.cache/pip
~/.cache/pypoetry/virtualenvs
~/.cache/pypoetry/cache
~/.cache/pypoetry/artifacts
${{ env.WORKDIR }}/.venv
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}

View File

@@ -7,13 +7,12 @@ core:
- any-glob-to-any-file:
- "libs/core/**/*"
langchain:
langchain-classic:
- changed-files:
- any-glob-to-any-file:
- "libs/langchain/**/*"
- "libs/langchain_v1/**/*"
v1:
langchain:
- changed-files:
- any-glob-to-any-file:
- "libs/langchain_v1/**/*"
@@ -28,6 +27,11 @@ standard-tests:
- any-glob-to-any-file:
- "libs/standard-tests/**/*"
model-profiles:
- changed-files:
- any-glob-to-any-file:
- "libs/model-profiles/**/*"
text-splitters:
- changed-files:
- any-glob-to-any-file:
@@ -39,6 +43,81 @@ integration:
- any-glob-to-any-file:
- "libs/partners/**/*"
anthropic:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/anthropic/**/*"
chroma:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/chroma/**/*"
deepseek:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/deepseek/**/*"
exa:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/exa/**/*"
fireworks:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/fireworks/**/*"
groq:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/groq/**/*"
huggingface:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/huggingface/**/*"
mistralai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/mistralai/**/*"
nomic:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/nomic/**/*"
ollama:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/ollama/**/*"
openai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/openai/**/*"
perplexity:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/perplexity/**/*"
prompty:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/prompty/**/*"
qdrant:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/qdrant/**/*"
xai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/xai/**/*"
# Infrastructure and DevOps
infra:
- changed-files:

View File

@@ -1,41 +0,0 @@
# PR title labeler config
#
# Labels PRs based on conventional commit patterns in titles
#
# Format: type(scope): description or type!: description (breaking)
add-missing-labels: true
clear-prexisting: false
include-commits: false
include-title: true
label-for-breaking-changes: breaking
label-mapping:
documentation: ["docs"]
feature: ["feat"]
fix: ["fix"]
infra: ["build", "ci", "chore"]
integration:
[
"anthropic",
"chroma",
"deepseek",
"exa",
"fireworks",
"groq",
"huggingface",
"mistralai",
"nomic",
"ollama",
"openai",
"perplexity",
"prompty",
"qdrant",
"xai",
]
linting: ["style"]
performance: ["perf"]
refactor: ["refactor"]
release: ["release"]
revert: ["revert"]
tests: ["test"]

View File

@@ -30,6 +30,7 @@ LANGCHAIN_DIRS = [
"libs/text-splitters",
"libs/langchain",
"libs/langchain_v1",
"libs/model-profiles",
]
# When set to True, we are ignoring core dependents
@@ -134,7 +135,7 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
elif dir_ == "libs/core":
py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]
# custom logic for specific directories
elif dir_ in {"libs/partners/chroma", "libs/partners/nomic"}:
elif dir_ in {"libs/partners/chroma"}:
py_versions = ["3.10", "3.13"]
else:
py_versions = ["3.10", "3.14"]

View File

@@ -98,7 +98,7 @@ def _check_python_version_from_requirement(
return True
else:
marker_str = str(requirement.marker)
if "python_version" or "python_full_version" in marker_str:
if "python_version" in marker_str or "python_full_version" in marker_str:
python_version_str = "".join(
char
for char in marker_str

View File

@@ -35,7 +35,7 @@ jobs:
timeout-minutes: 20
name: "Python ${{ inputs.python-version }}"
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"

View File

@@ -38,7 +38,7 @@ jobs:
timeout-minutes: 20
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"

View File

@@ -54,7 +54,7 @@ jobs:
version: ${{ steps.check-version.outputs.version }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
@@ -77,7 +77,7 @@ jobs:
working-directory: ${{ inputs.working-directory }}
- name: Upload build
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v5
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -105,7 +105,7 @@ jobs:
outputs:
release-body: ${{ steps.generate-release-body.outputs.release-body }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
repository: langchain-ai/langchain
path: langchain
@@ -149,8 +149,8 @@ jobs:
fi
fi
# if PREV_TAG is empty, let it be empty
if [ -z "$PREV_TAG" ]; then
# if PREV_TAG is empty or came out to 0.0.0, let it be empty
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
echo "No previous tag found - first release"
else
# confirm prev-tag actually exists in git repo with git tag
@@ -179,8 +179,8 @@ jobs:
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
run: |
PREAMBLE="Changes since $PREV_TAG"
# if PREV_TAG is empty, then we are releasing the first version
if [ -z "$PREV_TAG" ]; then
# if PREV_TAG is empty or 0.0.0, then we are releasing the first version
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
PREAMBLE="Initial release"
PREV_TAG=$(git rev-list --max-parents=0 HEAD)
fi
@@ -206,9 +206,9 @@ jobs:
id-token: write
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- uses: actions/download-artifact@v5
- uses: actions/download-artifact@v6
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -237,7 +237,7 @@ jobs:
contents: read
timeout-minutes: 20
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
# We explicitly *don't* set up caching here. This ensures our tests are
# maximally sensitive to catching breakage.
@@ -258,7 +258,7 @@ jobs:
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v5
- uses: actions/download-artifact@v6
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -377,6 +377,7 @@ jobs:
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
run: make integration_tests
working-directory: ${{ inputs.working-directory }}
@@ -395,7 +396,7 @@ jobs:
contents: read
strategy:
matrix:
partner: [openai, anthropic]
partner: [anthropic]
fail-fast: false # Continue testing other partners if one fails
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -409,8 +410,9 @@ jobs:
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
# We implement this conditional as Github Actions does not have good support
# for conditionally needing steps. https://github.com/actions/runner/issues/491
@@ -428,7 +430,7 @@ jobs:
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v5
- uses: actions/download-artifact@v6
if: startsWith(inputs.working-directory, 'libs/core')
with:
name: dist
@@ -442,7 +444,7 @@ jobs:
git ls-remote --tags origin "langchain-${{ matrix.partner }}*" \
| awk '{print $2}' \
| sed 's|refs/tags/||' \
| grep -E '[0-9]+\.[0-9]+\.[0-9]+([a-zA-Z]+[0-9]+)?$' \
| grep -E '[0-9]+\.[0-9]+\.[0-9]+$' \
| sort -Vr \
| head -n 1
)"
@@ -490,14 +492,14 @@ jobs:
working-directory: ${{ inputs.working-directory }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v5
- uses: actions/download-artifact@v6
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -530,14 +532,14 @@ jobs:
working-directory: ${{ inputs.working-directory }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: Set up Python + uv
uses: "./.github/actions/uv_setup"
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v5
- uses: actions/download-artifact@v6
with:
name: dist
path: ${{ inputs.working-directory }}/dist/

View File

@@ -33,7 +33,7 @@ jobs:
name: "Python ${{ inputs.python-version }}"
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"

View File

@@ -36,7 +36,7 @@ jobs:
name: "Pydantic ~=${{ inputs.pydantic-version }}"
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ inputs.python-version }} + UV"
uses: "./.github/actions/uv_setup"

View File

@@ -0,0 +1,107 @@
name: Auto Label Issues by Package
on:
issues:
types: [opened, edited]
jobs:
label-by-package:
permissions:
issues: write
runs-on: ubuntu-latest
steps:
- name: Sync package labels
uses: actions/github-script@v8
with:
script: |
const body = context.payload.issue.body || "";
// Extract text under "### Package"
const match = body.match(/### Package\s+([\s\S]*?)\n###/i);
if (!match) return;
const packageSection = match[1].trim();
// Mapping table for package names to labels
const mapping = {
"langchain": "langchain",
"langchain-openai": "openai",
"langchain-anthropic": "anthropic",
"langchain-classic": "langchain-classic",
"langchain-core": "core",
"langchain-cli": "cli",
"langchain-model-profiles": "model-profiles",
"langchain-tests": "standard-tests",
"langchain-text-splitters": "text-splitters",
"langchain-chroma": "chroma",
"langchain-deepseek": "deepseek",
"langchain-exa": "exa",
"langchain-fireworks": "fireworks",
"langchain-groq": "groq",
"langchain-huggingface": "huggingface",
"langchain-mistralai": "mistralai",
"langchain-nomic": "nomic",
"langchain-ollama": "ollama",
"langchain-perplexity": "perplexity",
"langchain-prompty": "prompty",
"langchain-qdrant": "qdrant",
"langchain-xai": "xai",
};
// All possible package labels we manage
const allPackageLabels = Object.values(mapping);
const selectedLabels = [];
// Check if this is checkbox format (multiple selection)
const checkboxMatches = packageSection.match(/- \[x\]\s+([^\n\r]+)/gi);
if (checkboxMatches) {
// Handle checkbox format
for (const match of checkboxMatches) {
const packageName = match.replace(/- \[x\]\s+/i, '').trim();
const label = mapping[packageName];
if (label && !selectedLabels.includes(label)) {
selectedLabels.push(label);
}
}
} else {
// Handle dropdown format (single selection)
const label = mapping[packageSection];
if (label) {
selectedLabels.push(label);
}
}
// Get current issue labels
const issue = await github.rest.issues.get({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number
});
const currentLabels = issue.data.labels.map(label => label.name);
const currentPackageLabels = currentLabels.filter(label => allPackageLabels.includes(label));
// Determine labels to add and remove
const labelsToAdd = selectedLabels.filter(label => !currentPackageLabels.includes(label));
const labelsToRemove = currentPackageLabels.filter(label => !selectedLabels.includes(label));
// Add new labels
if (labelsToAdd.length > 0) {
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
labels: labelsToAdd
});
}
// Remove old labels
for (const label of labelsToRemove) {
await github.rest.issues.removeLabel({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
name: label
});
}

View File

@@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: "✅ Verify pyproject.toml & version.py Match"
run: |

View File

@@ -47,7 +47,7 @@ jobs:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ci-ignore') }}
steps:
- name: "📋 Checkout Code"
uses: actions/checkout@v5
uses: actions/checkout@v6
- name: "🐍 Setup Python 3.11"
uses: actions/setup-python@v6
with:
@@ -141,7 +141,7 @@ jobs:
run:
working-directory: ${{ matrix.job-configs.working-directory }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: "🐍 Set up Python ${{ matrix.job-configs.python-version }} + UV"
uses: "./.github/actions/uv_setup"
@@ -182,7 +182,7 @@ jobs:
job-configs: ${{ fromJson(needs.build.outputs.codspeed) }}
fail-fast: false
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: "📦 Install UV Package Manager"
uses: astral-sh/setup-uv@v7

View File

@@ -71,14 +71,14 @@ jobs:
working-directory: ${{ fromJSON(needs.compute-matrix.outputs.matrix).working-directory }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
path: langchain
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
repository: langchain-ai/langchain-google
path: langchain-google
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
repository: langchain-ai/langchain-aws
path: langchain-aws
@@ -155,6 +155,7 @@ jobs:
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
run: |
cd langchain/${{ matrix.working-directory }}
make integration_tests

View File

@@ -26,11 +26,13 @@
# * revert — reverts a previous commit
# * release — prepare a new release
#
# Allowed Scopes (optional):
# core, cli, langchain, langchain_v1, langchain-classic, standard-tests,
# text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq,
# huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant,
# xai, infra
# Allowed Scope(s) (optional):
# core, cli, langchain, langchain_v1, langchain-classic, model-profiles,
# standard-tests, text-splitters, docs, anthropic, chroma, deepseek, exa,
# fireworks, groq, huggingface, mistralai, nomic, ollama, openai,
# perplexity, prompty, qdrant, xai, infra, deps
#
# Multiple scopes can be used by separating them with a comma.
#
# Rules:
# 1. The 'Type' must start with a lowercase letter.
@@ -79,8 +81,8 @@ jobs:
core
cli
langchain
langchain_v1
langchain-classic
model-profiles
standard-tests
text-splitters
docs
@@ -100,6 +102,7 @@ jobs:
qdrant
xai
infra
deps
requireScope: false
disallowScopes: |
release

View File

@@ -23,12 +23,12 @@ jobs:
permissions:
contents: read
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
ref: v0.3
path: langchain
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
repository: langchain-ai/langchain-api-docs-html
path: langchain-api-docs-html

5
.gitignore vendored
View File

@@ -1,6 +1,8 @@
.vs/
.claude/
.idea/
#Emacs backup
*~
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
@@ -161,3 +163,6 @@ node_modules
prof
virtualenv/
scratch/
.langgraph_api/

8
.mcp.json Normal file
View File

@@ -0,0 +1,8 @@
{
"mcpServers": {
"docs-langchain": {
"type": "http",
"url": "https://docs.langchain.com/mcp"
}
}
}

405
AGENTS.md
View File

@@ -1,255 +1,58 @@
# Global Development Guidelines for LangChain Projects
# Global development guidelines for the LangChain monorepo
## Core Development Principles
This document provides context to understand the LangChain Python project and assist with development.
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
## Project architecture and context
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
### Monorepo structure
**Bad - Breaking Change:**
This is a Python monorepo with multiple independently versioned packages that use `uv`.
```python
def get_user(id, verbose=False): # Changed from `user_id`
pass
```txt
langchain/
├── libs/
│ ├── core/ # `langchain-core` primitives and base abstractions
│ ├── langchain/ # `langchain-classic` (legacy, no new features)
│ ├── langchain_v1/ # Actively maintained `langchain` package
│ ├── partners/ # Third-party integrations
│ │ ├── openai/ # OpenAI models and embeddings
│ │ ├── anthropic/ # Anthropic (Claude) integration
│ │ ├── ollama/ # Local model support
│ │ └── ... (other integrations maintained by the LangChain team)
│ ├── text-splitters/ # Document chunking utilities
│ ├── standard-tests/ # Shared test suite for integrations
│ ├── model-profiles/ # Model configuration profiles
│ └── cli/ # Command-line interface tools
├── .github/ # CI/CD workflows and templates
├── .vscode/ # VSCode IDE standard settings and recommended extensions
└── README.md # Information about LangChain
```
**Good - Stable Interface:**
- **Core layer** (`langchain-core`): Base abstractions, interfaces, and protocols. Users should not need to know about this layer directly.
- **Implementation layer** (`langchain`): Concrete implementations and high-level public utilities
- **Integration layer** (`partners/`): Third-party service integrations. Note that this monorepo is not exhaustive of all LangChain integrations; some are maintained in separate repos, such as `langchain-ai/langchain-google` and `langchain-ai/langchain-aws`. Usually these repos are cloned at the same level as this monorepo, so if needed, you can refer to their code directly by navigating to `../langchain-google/` from this monorepo.
- **Testing layer** (`standard-tests/`): Standardized integration tests for partner integrations
```python
def get_user(user_id: str, verbose: bool = False) -> User:
"""Retrieve user by ID with optional verbose output."""
pass
```
### Development tools & commands**
**Before making ANY changes to public APIs:**
- `uv` Fast Python package installer and resolver (replaces pip/poetry)
- `make` Task runner for common development commands. Feel free to look at the `Makefile` for available commands and usage patterns.
- `ruff` Fast Python linter and formatter
- `mypy` Static type checking
- `pytest` Testing framework
- Check if the function/class is exported in `__init__.py`
- Look for existing usage patterns in tests and examples
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
This monorepo uses `uv` for dependency management. Local development uses editable installs: `[tool.uv.sources]`
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
### 2. Code Quality Standards
**All Python code MUST include type hints and return types.**
**Bad:**
```python
def p(u, d):
return [x for x in u if x not in d]
```
**Good:**
```python
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
"""Filter out users that are not in the known users set.
Args:
users: List of user identifiers to filter.
known_users: Set of known/valid user identifiers.
Returns:
List of users that are not in the known_users set.
"""
return [user for user in users if user not in known_users]
```
**Style Requirements:**
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
- Avoid unnecessary abstraction or premature optimization
- Follow existing patterns in the codebase you're modifying
### 3. Testing Requirements
**Every new feature or bugfix MUST be covered by unit tests.**
**Test Organization:**
- Unit tests: `tests/unit_tests/` (no network calls allowed)
- Integration tests: `tests/integration_tests/` (network calls permitted)
- Use `pytest` as the testing framework
**Test Quality Checklist:**
- [ ] Tests fail when your new logic is broken
- [ ] Happy path is covered
- [ ] Edge cases and error conditions are tested
- [ ] Use fixtures/mocks for external dependencies
- [ ] Tests are deterministic (no flaky tests)
Checklist questions:
- [ ] Does the test suite fail if your new logic is broken?
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
- [ ] Do tests use fixtures or mocks where needed?
```python
def test_filter_unknown_users():
"""Test filtering unknown users from a list."""
users = ["alice", "bob", "charlie"]
known_users = {"alice", "bob"}
result = filter_unknown_users(users, known_users)
assert result == ["charlie"]
assert len(result) == 1
```
### 4. Security and Risk Assessment
**Security Checklist:**
- No `eval()`, `exec()`, or `pickle` on user-controlled input
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
- Remove unreachable/commented code before committing
- Race conditions or resource leaks (file handles, sockets, threads).
- Ensure proper resource cleanup (file handles, connections)
**Bad:**
```python
def load_config(path):
with open(path) as f:
return eval(f.read()) # ⚠️ Never eval config
```
**Good:**
```python
import json
def load_config(path: str) -> dict:
with open(path) as f:
return json.load(f)
```
### 5. Documentation Standards
**Use Google-style docstrings with Args section for all public functions.**
**Insufficient Documentation:**
```python
def send_email(to, msg):
"""Send an email to a recipient."""
```
**Complete Documentation:**
```python
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
"""
Send an email to a recipient with specified priority.
Args:
to: The email address of the recipient.
msg: The message body to send.
priority: Email priority level (`'low'`, `'normal'`, `'high'`).
Returns:
`True` if email was sent successfully, `False` otherwise.
Raises:
`InvalidEmailError`: If the email address format is invalid.
`SMTPConnectionError`: If unable to connect to email server.
"""
```
**Documentation Guidelines:**
- Types go in function signatures, NOT in docstrings
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
- Focus on "why" rather than "what" in descriptions
- Document all parameters, return values, and exceptions
- Keep descriptions concise but clear
- Ensure American English spelling (e.g., "behavior", not "behaviour")
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
### 6. Architectural Improvements
**When you encounter code that could be improved, suggest better designs:**
**Poor Design:**
```python
def process_data(data, db_conn, email_client, logger):
# Function doing too many things
validated = validate_data(data)
result = db_conn.save(validated)
email_client.send_notification(result)
logger.log(f"Processed {len(data)} items")
return result
```
**Better Design:**
```python
@dataclass
class ProcessingResult:
"""Result of data processing operation."""
items_processed: int
success: bool
errors: List[str] = field(default_factory=list)
class DataProcessor:
"""Handles data validation, storage, and notification."""
def __init__(self, db_conn: Database, email_client: EmailClient):
self.db = db_conn
self.email = email_client
def process(self, data: List[dict]) -> ProcessingResult:
"""Process and store data with notifications."""
validated = self._validate_data(data)
result = self.db.save(validated)
self._notify_completion(result)
return result
```
**Design Improvement Areas:**
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
- Reduce code duplication through shared utilities
- Make unit testing easier
- Improve separation of concerns (single responsibility)
- Make unit testing easier through dependency injection
- Add clarity without adding complexity
- Prefer dataclasses for structured data
## Development Tools & Commands
### Package Management
```bash
# Add package
uv add package-name
# Sync project dependencies
uv sync
uv lock
```
### Testing
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
```bash
# Run unit tests (no network)
make test
# Don't run integration tests, as API keys must be set
# Run specific test file
uv run --group test pytest tests/unit_tests/test_specific.py
```
### Code Quality
```bash
# Lint code
make lint
@@ -261,66 +64,118 @@ make format
uv run --group lint mypy .
```
### Dependency Management Patterns
#### Key config files
**Local Development Dependencies:**
- pyproject.toml: Main workspace configuration with dependency groups
- uv.lock: Locked dependencies for reproducible builds
- Makefile: Development tasks
```toml
[tool.uv.sources]
langchain-core = { path = "../core", editable = true }
langchain-tests = { path = "../standard-tests", editable = true }
```
#### Commit standards
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes.
```python
from langchain_core.tools import tool
#### Pull request guidelines
@tool
def search_database(query: str) -> str:
"""Search the database for relevant information.
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
- Highlight areas of the proposed changes that require careful review.
## Core development principles
### Maintain stable public interfaces
CRITICAL: Always attempt to preserve function signatures, argument positions, and names for exported/public methods. Do not make breaking changes.
**Before making ANY changes to public APIs:**
- Check if the function/class is exported in `__init__.py`
- Look for existing usage patterns in tests and examples
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
Ask: "Would this change break someone's code if they used it last week?"
### Code quality standards
All Python code MUST include type hints and return types.
```python title="Example"
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
"""Single line description of the function.
Any additional context about the function can go here.
Args:
query: The search query string.
users: List of user identifiers to filter.
known_users: Set of known/valid user identifiers.
Returns:
List of users that are not in the known_users set.
"""
# Implementation here
return results
```
## Commit Standards
- Use descriptive, self-explanatory variable names.
- Follow existing patterns in the codebase you're modifying
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
**Use Conventional Commits format for PR titles:**
### Testing requirements
- `feat(core): add multi-tenant support`
- `fix(cli): resolve flag parsing error`
- `docs: update API usage examples`
- `docs(openai): update API usage examples`
Every new feature or bugfix MUST be covered by unit tests.
## Framework-Specific Guidelines
- Unit tests: `tests/unit_tests/` (no network calls allowed)
- Integration tests: `tests/integration_tests/` (network calls permitted)
- We use `pytest` as the testing framework; if in doubt, check other existing tests for examples.
- The testing file structure should mirror the source code structure.
- Follow the existing patterns in `langchain-core` for base abstractions
- Use `langchain_core.callbacks` for execution tracking
- Implement proper streaming support where applicable
- Avoid deprecated components like legacy `LLMChain`
**Checklist:**
### Partner Integrations
- [ ] Tests fail when your new logic is broken
- [ ] Happy path is covered
- [ ] Edge cases and error conditions are tested
- [ ] Use fixtures/mocks for external dependencies
- [ ] Tests are deterministic (no flaky tests)
- [ ] Does the test suite fail if your new logic is broken?
- Follow the established patterns in existing partner libraries
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
- Include comprehensive integration tests
- Document API key requirements and authentication
### Security and risk assessment
---
- No `eval()`, `exec()`, or `pickle` on user-controlled input
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
- Remove unreachable/commented code before committing
- Race conditions or resource leaks (file handles, sockets, threads).
- Ensure proper resource cleanup (file handles, connections)
## Quick Reference Checklist
### Documentation standards
Before submitting code changes:
Use Google-style docstrings with Args section for all public functions.
- [ ] **Breaking Changes**: Verified no public API changes
- [ ] **Type Hints**: All functions have complete type annotations
- [ ] **Tests**: New functionality is fully tested
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
- [ ] **Documentation**: Google-style docstrings for public functions
- [ ] **Code Quality**: `make lint` and `make format` pass
- [ ] **Architecture**: Suggested improvements where applicable
- [ ] **Commit Message**: Follows Conventional Commits format
```python title="Example"
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
"""Send an email to a recipient with specified priority.
Any additional context about the function can go here.
Args:
to: The email address of the recipient.
msg: The message body to send.
priority: Email priority level.
Returns:
`True` if email was sent successfully, `False` otherwise.
Raises:
InvalidEmailError: If the email address format is invalid.
SMTPConnectionError: If unable to connect to email server.
"""
```
- Types go in function signatures, NOT in docstrings
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
- Focus on "why" rather than "what" in descriptions
- Document all parameters, return values, and exceptions
- Keep descriptions concise but clear
- Ensure American English spelling (e.g., "behavior", not "behaviour")
## Additional resources
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
- **Contributing Guide:** [`.github/CONTRIBUTING.md`](https://docs.langchain.com/oss/python/contributing/overview)

405
CLAUDE.md
View File

@@ -1,255 +1,58 @@
# Global Development Guidelines for LangChain Projects
# Global development guidelines for the LangChain monorepo
## Core Development Principles
This document provides context to understand the LangChain Python project and assist with development.
### 1. Maintain Stable Public Interfaces ⚠️ CRITICAL
## Project architecture and context
**Always attempt to preserve function signatures, argument positions, and names for exported/public methods.**
### Monorepo structure
**Bad - Breaking Change:**
This is a Python monorepo with multiple independently versioned packages that use `uv`.
```python
def get_user(id, verbose=False): # Changed from `user_id`
pass
```txt
langchain/
├── libs/
│ ├── core/ # `langchain-core` primitives and base abstractions
│ ├── langchain/ # `langchain-classic` (legacy, no new features)
│ ├── langchain_v1/ # Actively maintained `langchain` package
│ ├── partners/ # Third-party integrations
│ │ ├── openai/ # OpenAI models and embeddings
│ │ ├── anthropic/ # Anthropic (Claude) integration
│ │ ├── ollama/ # Local model support
│ │ └── ... (other integrations maintained by the LangChain team)
│ ├── text-splitters/ # Document chunking utilities
│ ├── standard-tests/ # Shared test suite for integrations
│ ├── model-profiles/ # Model configuration profiles
│ └── cli/ # Command-line interface tools
├── .github/ # CI/CD workflows and templates
├── .vscode/ # VSCode IDE standard settings and recommended extensions
└── README.md # Information about LangChain
```
**Good - Stable Interface:**
- **Core layer** (`langchain-core`): Base abstractions, interfaces, and protocols. Users should not need to know about this layer directly.
- **Implementation layer** (`langchain`): Concrete implementations and high-level public utilities
- **Integration layer** (`partners/`): Third-party service integrations. Note that this monorepo is not exhaustive of all LangChain integrations; some are maintained in separate repos, such as `langchain-ai/langchain-google` and `langchain-ai/langchain-aws`. Usually these repos are cloned at the same level as this monorepo, so if needed, you can refer to their code directly by navigating to `../langchain-google/` from this monorepo.
- **Testing layer** (`standard-tests/`): Standardized integration tests for partner integrations
```python
def get_user(user_id: str, verbose: bool = False) -> User:
"""Retrieve user by ID with optional verbose output."""
pass
```
### Development tools & commands**
**Before making ANY changes to public APIs:**
- `uv` Fast Python package installer and resolver (replaces pip/poetry)
- `make` Task runner for common development commands. Feel free to look at the `Makefile` for available commands and usage patterns.
- `ruff` Fast Python linter and formatter
- `mypy` Static type checking
- `pytest` Testing framework
- Check if the function/class is exported in `__init__.py`
- Look for existing usage patterns in tests and examples
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
This monorepo uses `uv` for dependency management. Local development uses editable installs: `[tool.uv.sources]`
🧠 *Ask yourself:* "Would this change break someone's code if they used it last week?"
### 2. Code Quality Standards
**All Python code MUST include type hints and return types.**
**Bad:**
```python
def p(u, d):
return [x for x in u if x not in d]
```
**Good:**
```python
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
"""Filter out users that are not in the known users set.
Args:
users: List of user identifiers to filter.
known_users: Set of known/valid user identifiers.
Returns:
List of users that are not in the known_users set.
"""
return [user for user in users if user not in known_users]
```
**Style Requirements:**
- Use descriptive, **self-explanatory variable names**. Avoid overly short or cryptic identifiers.
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
- Avoid unnecessary abstraction or premature optimization
- Follow existing patterns in the codebase you're modifying
### 3. Testing Requirements
**Every new feature or bugfix MUST be covered by unit tests.**
**Test Organization:**
- Unit tests: `tests/unit_tests/` (no network calls allowed)
- Integration tests: `tests/integration_tests/` (network calls permitted)
- Use `pytest` as the testing framework
**Test Quality Checklist:**
- [ ] Tests fail when your new logic is broken
- [ ] Happy path is covered
- [ ] Edge cases and error conditions are tested
- [ ] Use fixtures/mocks for external dependencies
- [ ] Tests are deterministic (no flaky tests)
Checklist questions:
- [ ] Does the test suite fail if your new logic is broken?
- [ ] Are all expected behaviors exercised (happy path, invalid input, etc)?
- [ ] Do tests use fixtures or mocks where needed?
```python
def test_filter_unknown_users():
"""Test filtering unknown users from a list."""
users = ["alice", "bob", "charlie"]
known_users = {"alice", "bob"}
result = filter_unknown_users(users, known_users)
assert result == ["charlie"]
assert len(result) == 1
```
### 4. Security and Risk Assessment
**Security Checklist:**
- No `eval()`, `exec()`, or `pickle` on user-controlled input
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
- Remove unreachable/commented code before committing
- Race conditions or resource leaks (file handles, sockets, threads).
- Ensure proper resource cleanup (file handles, connections)
**Bad:**
```python
def load_config(path):
with open(path) as f:
return eval(f.read()) # ⚠️ Never eval config
```
**Good:**
```python
import json
def load_config(path: str) -> dict:
with open(path) as f:
return json.load(f)
```
### 5. Documentation Standards
**Use Google-style docstrings with Args section for all public functions.**
**Insufficient Documentation:**
```python
def send_email(to, msg):
"""Send an email to a recipient."""
```
**Complete Documentation:**
```python
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
"""
Send an email to a recipient with specified priority.
Args:
to: The email address of the recipient.
msg: The message body to send.
priority: Email priority level (`'low'`, `'normal'`, `'high'`).
Returns:
`True` if email was sent successfully, `False` otherwise.
Raises:
`InvalidEmailError`: If the email address format is invalid.
`SMTPConnectionError`: If unable to connect to email server.
"""
```
**Documentation Guidelines:**
- Types go in function signatures, NOT in docstrings
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
- Focus on "why" rather than "what" in descriptions
- Document all parameters, return values, and exceptions
- Keep descriptions concise but clear
- Ensure American English spelling (e.g., "behavior", not "behaviour")
📌 *Tip:* Keep descriptions concise but clear. Only document return values if non-obvious.
### 6. Architectural Improvements
**When you encounter code that could be improved, suggest better designs:**
**Poor Design:**
```python
def process_data(data, db_conn, email_client, logger):
# Function doing too many things
validated = validate_data(data)
result = db_conn.save(validated)
email_client.send_notification(result)
logger.log(f"Processed {len(data)} items")
return result
```
**Better Design:**
```python
@dataclass
class ProcessingResult:
"""Result of data processing operation."""
items_processed: int
success: bool
errors: List[str] = field(default_factory=list)
class DataProcessor:
"""Handles data validation, storage, and notification."""
def __init__(self, db_conn: Database, email_client: EmailClient):
self.db = db_conn
self.email = email_client
def process(self, data: List[dict]) -> ProcessingResult:
"""Process and store data with notifications."""
validated = self._validate_data(data)
result = self.db.save(validated)
self._notify_completion(result)
return result
```
**Design Improvement Areas:**
If there's a **cleaner**, **more scalable**, or **simpler** design, highlight it and suggest improvements that would:
- Reduce code duplication through shared utilities
- Make unit testing easier
- Improve separation of concerns (single responsibility)
- Make unit testing easier through dependency injection
- Add clarity without adding complexity
- Prefer dataclasses for structured data
## Development Tools & Commands
### Package Management
```bash
# Add package
uv add package-name
# Sync project dependencies
uv sync
uv lock
```
### Testing
Each package in `libs/` has its own `pyproject.toml` and `uv.lock`.
```bash
# Run unit tests (no network)
make test
# Don't run integration tests, as API keys must be set
# Run specific test file
uv run --group test pytest tests/unit_tests/test_specific.py
```
### Code Quality
```bash
# Lint code
make lint
@@ -261,66 +64,118 @@ make format
uv run --group lint mypy .
```
### Dependency Management Patterns
#### Key config files
**Local Development Dependencies:**
- pyproject.toml: Main workspace configuration with dependency groups
- uv.lock: Locked dependencies for reproducible builds
- Makefile: Development tasks
```toml
[tool.uv.sources]
langchain-core = { path = "../core", editable = true }
langchain-tests = { path = "../standard-tests", editable = true }
```
#### Commit standards
**For tools, use the `@tool` decorator from `langchain_core.tools`:**
Suggest PR titles that follow Conventional Commits format. Refer to .github/workflows/pr_lint for allowed types and scopes.
```python
from langchain_core.tools import tool
#### Pull request guidelines
@tool
def search_database(query: str) -> str:
"""Search the database for relevant information.
- Always add a disclaimer to the PR description mentioning how AI agents are involved with the contribution.
- Describe the "why" of the changes, why the proposed solution is the right one. Limit prose.
- Highlight areas of the proposed changes that require careful review.
## Core development principles
### Maintain stable public interfaces
CRITICAL: Always attempt to preserve function signatures, argument positions, and names for exported/public methods. Do not make breaking changes.
**Before making ANY changes to public APIs:**
- Check if the function/class is exported in `__init__.py`
- Look for existing usage patterns in tests and examples
- Use keyword-only arguments for new parameters: `*, new_param: str = "default"`
- Mark experimental features clearly with docstring warnings (using MkDocs Material admonitions, like `!!! warning`)
Ask: "Would this change break someone's code if they used it last week?"
### Code quality standards
All Python code MUST include type hints and return types.
```python title="Example"
def filter_unknown_users(users: list[str], known_users: set[str]) -> list[str]:
"""Single line description of the function.
Any additional context about the function can go here.
Args:
query: The search query string.
users: List of user identifiers to filter.
known_users: Set of known/valid user identifiers.
Returns:
List of users that are not in the known_users set.
"""
# Implementation here
return results
```
## Commit Standards
- Use descriptive, self-explanatory variable names.
- Follow existing patterns in the codebase you're modifying
- Attempt to break up complex functions (>20 lines) into smaller, focused functions where it makes sense
**Use Conventional Commits format for PR titles:**
### Testing requirements
- `feat(core): add multi-tenant support`
- `fix(cli): resolve flag parsing error`
- `docs: update API usage examples`
- `docs(openai): update API usage examples`
Every new feature or bugfix MUST be covered by unit tests.
## Framework-Specific Guidelines
- Unit tests: `tests/unit_tests/` (no network calls allowed)
- Integration tests: `tests/integration_tests/` (network calls permitted)
- We use `pytest` as the testing framework; if in doubt, check other existing tests for examples.
- The testing file structure should mirror the source code structure.
- Follow the existing patterns in `langchain-core` for base abstractions
- Use `langchain_core.callbacks` for execution tracking
- Implement proper streaming support where applicable
- Avoid deprecated components like legacy `LLMChain`
**Checklist:**
### Partner Integrations
- [ ] Tests fail when your new logic is broken
- [ ] Happy path is covered
- [ ] Edge cases and error conditions are tested
- [ ] Use fixtures/mocks for external dependencies
- [ ] Tests are deterministic (no flaky tests)
- [ ] Does the test suite fail if your new logic is broken?
- Follow the established patterns in existing partner libraries
- Implement standard interfaces (`BaseChatModel`, `BaseEmbeddings`, etc.)
- Include comprehensive integration tests
- Document API key requirements and authentication
### Security and risk assessment
---
- No `eval()`, `exec()`, or `pickle` on user-controlled input
- Proper exception handling (no bare `except:`) and use a `msg` variable for error messages
- Remove unreachable/commented code before committing
- Race conditions or resource leaks (file handles, sockets, threads).
- Ensure proper resource cleanup (file handles, connections)
## Quick Reference Checklist
### Documentation standards
Before submitting code changes:
Use Google-style docstrings with Args section for all public functions.
- [ ] **Breaking Changes**: Verified no public API changes
- [ ] **Type Hints**: All functions have complete type annotations
- [ ] **Tests**: New functionality is fully tested
- [ ] **Security**: No dangerous patterns (eval, silent failures, etc.)
- [ ] **Documentation**: Google-style docstrings for public functions
- [ ] **Code Quality**: `make lint` and `make format` pass
- [ ] **Architecture**: Suggested improvements where applicable
- [ ] **Commit Message**: Follows Conventional Commits format
```python title="Example"
def send_email(to: str, msg: str, *, priority: str = "normal") -> bool:
"""Send an email to a recipient with specified priority.
Any additional context about the function can go here.
Args:
to: The email address of the recipient.
msg: The message body to send.
priority: Email priority level.
Returns:
`True` if email was sent successfully, `False` otherwise.
Raises:
InvalidEmailError: If the email address format is invalid.
SMTPConnectionError: If unable to connect to email server.
"""
```
- Types go in function signatures, NOT in docstrings
- If a default is present, DO NOT repeat it in the docstring unless there is post-processing or it is set conditionally.
- Focus on "why" rather than "what" in descriptions
- Document all parameters, return values, and exceptions
- Keep descriptions concise but clear
- Ensure American English spelling (e.g., "behavior", not "behaviour")
## Additional resources
- **Documentation:** https://docs.langchain.com/oss/python/langchain/overview and source at https://github.com/langchain-ai/docs or `../docs/`. Prefer the local install and use file search tools for best results. If needed, use the docs MCP server as defined in `.mcp.json` for programmatic access.
- **Contributing Guide:** [`.github/CONTRIBUTING.md`](https://docs.langchain.com/oss/python/contributing/overview)

View File

@@ -1,9 +0,0 @@
# Migrating
Please see the following guides for migrating LangChain code:
* Migrate to [LangChain v1.0](https://docs.langchain.com/oss/python/migrate/langchain-v1)
* Migrate to [LangChain v0.3](https://python.langchain.com/docs/versions/v0_3/)
* Migrate to [LangChain v0.2](https://python.langchain.com/docs/versions/v0_2/)
* Migrating from [LangChain 0.0.x Chains](https://python.langchain.com/docs/versions/migrating_chains/)
* Upgrade to [LangGraph Memory](https://python.langchain.com/docs/versions/migrating_memory/)

View File

@@ -1,50 +1,43 @@
<p align="center">
<picture>
<source media="(prefers-color-scheme: light)" srcset=".github/images/logo-dark.svg">
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-light.svg">
<img alt="LangChain Logo" src=".github/images/logo-dark.svg" width="80%">
</picture>
</p>
<div align="center">
<a href="https://www.langchain.com/">
<picture>
<source media="(prefers-color-scheme: light)" srcset=".github/images/logo-dark.svg">
<source media="(prefers-color-scheme: dark)" srcset=".github/images/logo-light.svg">
<img alt="LangChain Logo" src=".github/images/logo-dark.svg" width="80%">
</picture>
</a>
</div>
<p align="center">
The platform for reliable agents.
</p>
<div align="center">
<h3>The platform for reliable agents.</h3>
</div>
<p align="center">
<a href="https://opensource.org/licenses/MIT" target="_blank">
<img src="https://img.shields.io/pypi/l/langchain" alt="PyPI - License">
</a>
<a href="https://pypistats.org/packages/langchain" target="_blank">
<img src="https://img.shields.io/pepy/dt/langchain" alt="PyPI - Downloads">
</a>
<a href="https://pypi.org/project/langchain/#history" target="_blank">
<img src="https://img.shields.io/pypi/v/langchain?label=%20" alt="Version">
</a>
<a href="https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain" target="_blank">
<img src="https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode" alt="Open in Dev Containers">
</a>
<a href="https://codespaces.new/langchain-ai/langchain" target="_blank">
<img src="https://github.com/codespaces/badge.svg" alt="Open in Github Codespace" title="Open in Github Codespace" width="150" height="20">
</a>
<a href="https://codspeed.io/langchain-ai/langchain" target="_blank">
<img src="https://img.shields.io/endpoint?url=https://codspeed.io/badge.json" alt="CodSpeed Badge">
</a>
<a href="https://twitter.com/langchainai" target="_blank">
<img src="https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI" alt="Twitter / X">
</a>
</p>
<div align="center">
<a href="https://opensource.org/licenses/MIT" target="_blank"><img src="https://img.shields.io/pypi/l/langchain" alt="PyPI - License"></a>
<a href="https://pypistats.org/packages/langchain" target="_blank"><img src="https://img.shields.io/pepy/dt/langchain" alt="PyPI - Downloads"></a>
<a href="https://pypi.org/project/langchain/#history" target="_blank"><img src="https://img.shields.io/pypi/v/langchain?label=%20" alt="Version"></a>
<a href="https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode" alt="Open in Dev Containers"></a>
<a href="https://codespaces.new/langchain-ai/langchain" target="_blank"><img src="https://github.com/codespaces/badge.svg" alt="Open in Github Codespace" title="Open in Github Codespace" width="150" height="20"></a>
<a href="https://codspeed.io/langchain-ai/langchain" target="_blank"><img src="https://img.shields.io/endpoint?url=https://codspeed.io/badge.json" alt="CodSpeed Badge"></a>
<a href="https://twitter.com/langchainai" target="_blank"><img src="https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI" alt="Twitter / X"></a>
</div>
LangChain is a framework for building LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development all while future-proofing decisions as the underlying technology evolves.
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development all while future-proofing decisions as the underlying technology evolves.
```bash
pip install langchain
```
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
---
**Documentation**: To learn more about LangChain, check out [the docs](https://docs.langchain.com/oss/python/langchain/overview).
**Documentation**:
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
- [docs.langchain.com](https://docs.langchain.com/oss/python/langchain/overview) Comprehensive documentation, including conceptual overviews and guides
- [reference.langchain.com/python](https://reference.langchain.com/python) API reference docs for LangChain packages
**Discussions**: Visit the [LangChain Forum](https://forum.langchain.com) to connect with the community and share all of your technical questions, ideas, and feedback.
> [!NOTE]
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
@@ -55,24 +48,27 @@ LangChain helps developers build applications powered by LLMs through a standard
Use LangChain for:
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChains vast library of integrations with model providers, tools, vector stores, retrievers, and more.
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your applications needs. As the industry frontier evolves, adapt quickly LangChains abstractions keep you moving without losing momentum.
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more.
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly LangChain's abstractions keep you moving without losing momentum.
- **Rapid prototyping**. Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle.
- **Production-ready features**. Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices.
- **Vibrant community and ecosystem**. Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community.
- **Flexible abstraction layers**. Work at the level of abstraction that suits your needs - from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity.
## LangChains ecosystem
## LangChain ecosystem
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
To improve your LLM application development, pair LangChain with:
- [LangSmith](https://www.langchain.com/langsmith) - Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) - Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows — and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
- [LangGraph Platform](https://docs.langchain.com/langgraph-platform) - Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams — and iterate quickly with visual prototyping in [LangGraph Studio](https://langchain-ai.github.io/langgraph/concepts/langgraph_studio).
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
- [Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview) List of LangChain integrations, including chat & embedding models, tools & toolkits, and more
- [LangSmith](https://www.langchain.com/langsmith) Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
- [LangSmith Deployment](https://docs.langchain.com/langsmith/deployments) Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams and iterate quickly with visual prototyping in [LangSmith Studio](https://docs.langchain.com/langsmith/studio).
- [Deep Agents](https://github.com/langchain-ai/deepagents) *(new!)* Build agents that can plan, use subagents, and leverage file systems for complex tasks
## Additional resources
- [Learn](https://docs.langchain.com/oss/python/learn): Use cases, conceptual overviews, and more.
- [API Reference](https://reference.langchain.com/python): Detailed reference on
navigating base packages and integrations for LangChain.
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview): Learn how to contribute to LangChain and find good first issues.
- [LangChain Forum](https://forum.langchain.com): Connect with the community and share all of your technical questions, ideas, and feedback.
- [Chat LangChain](https://chat.langchain.com): Ask questions & chat with our documentation.
- [API Reference](https://reference.langchain.com/python) Detailed reference on navigating base packages and integrations for LangChain.
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview) Learn how to contribute to LangChain projects and find good first issues.
- [Code of Conduct](https://github.com/langchain-ai/langchain/blob/master/.github/CODE_OF_CONDUCT.md) Our community guidelines and standards for participation.

View File

@@ -55,10 +55,10 @@ All out of scope targets defined by huntr as well as:
* **langchain-experimental**: This repository is for experimental code and is not
eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
time and published with no bounty attached.
* **tools**: Tools in either langchain or langchain-community are not eligible for bug
* **tools**: Tools in either `langchain` or `langchain-community` are not eligible for bug
bounties. This includes the following directories
* libs/langchain/langchain/tools
* libs/community/langchain_community/tools
* `libs/langchain/langchain/tools`
* `libs/community/langchain_community/tools`
* Please review the [Best Practices](#best-practices)
for more details, but generally tools interact with the real world. Developers are
expected to understand the security implications of their code and are responsible

View File

@@ -295,7 +295,7 @@
"source": [
"## TODO: Any functionality specific to this vector store\n",
"\n",
"E.g. creating a persisten database to save to your disk, etc."
"E.g. creating a persistent database to save to your disk, etc."
]
},
{

View File

@@ -6,9 +6,8 @@ import hashlib
import logging
import re
import shutil
from collections.abc import Sequence
from pathlib import Path
from typing import Any, TypedDict
from typing import TYPE_CHECKING, Any, TypedDict
from git import Repo
@@ -18,6 +17,9 @@ from langchain_cli.constants import (
DEFAULT_GIT_SUBDIRECTORY,
)
if TYPE_CHECKING:
from collections.abc import Sequence
logger = logging.getLogger(__name__)

View File

@@ -1,9 +1,11 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
from .file import File
from .folder import Folder
if TYPE_CHECKING:
from .file import File
from .folder import Folder
@dataclass

View File

@@ -1,9 +1,12 @@
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
from .file import File
if TYPE_CHECKING:
from pathlib import Path
class Folder:
def __init__(self, name: str, *files: Folder | File) -> None:

View File

@@ -34,7 +34,7 @@ The LangChain ecosystem is built on top of `langchain-core`. Some of the benefit
## 📖 Documentation
For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_core/).
For full documentation, see the [API reference](https://reference.langchain.com/python/langchain_core/). For conceptual guides, tutorials, and examples on using LangChain, see the [LangChain Docs](https://docs.langchain.com/oss/python/langchain/overview).
## 📕 Releases & Versioning

View File

@@ -5,12 +5,10 @@
!!! warning
New agents should be built using the
[langgraph library](https://github.com/langchain-ai/langgraph), which provides a
[`langchain` library](https://pypi.org/project/langchain/), which provides a
simpler and more flexible way to define agents.
Please see the
[migration guide](https://python.langchain.com/docs/how_to/migrate_agent/) for
information on how to migrate existing agents to modern langgraph agents.
See docs on [building agents](https://docs.langchain.com/oss/python/langchain/agents).
Agents use language models to choose a sequence of actions to take.
@@ -54,31 +52,33 @@ class AgentAction(Serializable):
"""The input to pass in to the Tool."""
log: str
"""Additional information to log about the action.
This log can be used in a few ways. First, it can be used to audit
what exactly the LLM predicted to lead to this (tool, tool_input).
Second, it can be used in future iterations to show the LLMs prior
thoughts. This is useful when (tool, tool_input) does not contain
full information about the LLM prediction (for example, any `thought`
before the tool/tool_input)."""
This log can be used in a few ways. First, it can be used to audit what exactly the
LLM predicted to lead to this `(tool, tool_input)`.
Second, it can be used in future iterations to show the LLMs prior thoughts. This is
useful when `(tool, tool_input)` does not contain full information about the LLM
prediction (for example, any `thought` before the tool/tool_input).
"""
type: Literal["AgentAction"] = "AgentAction"
# Override init to support instantiation by position for backward compat.
def __init__(self, tool: str, tool_input: str | dict, log: str, **kwargs: Any):
"""Create an AgentAction.
"""Create an `AgentAction`.
Args:
tool: The name of the tool to execute.
tool_input: The input to pass in to the Tool.
tool_input: The input to pass in to the `Tool`.
log: Additional information to log about the action.
"""
super().__init__(tool=tool, tool_input=tool_input, log=log, **kwargs)
@classmethod
def is_lc_serializable(cls) -> bool:
"""AgentAction is serializable.
"""`AgentAction` is serializable.
Returns:
True
`True`
"""
return True
@@ -100,19 +100,23 @@ class AgentAction(Serializable):
class AgentActionMessageLog(AgentAction):
"""Representation of an action to be executed by an agent.
This is similar to AgentAction, but includes a message log consisting of
chat messages. This is useful when working with ChatModels, and is used
to reconstruct conversation history from the agent's perspective.
This is similar to `AgentAction`, but includes a message log consisting of
chat messages.
This is useful when working with `ChatModels`, and is used to reconstruct
conversation history from the agent's perspective.
"""
message_log: Sequence[BaseMessage]
"""Similar to log, this can be used to pass along extra
information about what exact messages were predicted by the LLM
before parsing out the (tool, tool_input). This is again useful
if (tool, tool_input) cannot be used to fully recreate the LLM
prediction, and you need that LLM prediction (for future agent iteration).
"""Similar to log, this can be used to pass along extra information about what exact
messages were predicted by the LLM before parsing out the `(tool, tool_input)`.
This is again useful if `(tool, tool_input)` cannot be used to fully recreate the
LLM prediction, and you need that LLM prediction (for future agent iteration).
Compared to `log`, this is useful when the underlying LLM is a
chat model (and therefore returns messages rather than a string)."""
chat model (and therefore returns messages rather than a string).
"""
# Ignoring type because we're overriding the type from AgentAction.
# And this is the correct thing to do in this case.
# The type literal is used for serialization purposes.
@@ -120,12 +124,12 @@ class AgentActionMessageLog(AgentAction):
class AgentStep(Serializable):
"""Result of running an AgentAction."""
"""Result of running an `AgentAction`."""
action: AgentAction
"""The AgentAction that was executed."""
"""The `AgentAction` that was executed."""
observation: Any
"""The result of the AgentAction."""
"""The result of the `AgentAction`."""
@property
def messages(self) -> Sequence[BaseMessage]:
@@ -134,19 +138,22 @@ class AgentStep(Serializable):
class AgentFinish(Serializable):
"""Final return value of an ActionAgent.
"""Final return value of an `ActionAgent`.
Agents return an AgentFinish when they have reached a stopping condition.
Agents return an `AgentFinish` when they have reached a stopping condition.
"""
return_values: dict
"""Dictionary of return values."""
log: str
"""Additional information to log about the return value.
This is used to pass along the full LLM prediction, not just the parsed out
return value. For example, if the full LLM prediction was
`Final Answer: 2` you may want to just return `2` as a return value, but pass
along the full string as a `log` (for debugging or observability purposes).
return value.
For example, if the full LLM prediction was `Final Answer: 2` you may want to just
return `2` as a return value, but pass along the full string as a `log` (for
debugging or observability purposes).
"""
type: Literal["AgentFinish"] = "AgentFinish"
@@ -156,7 +163,7 @@ class AgentFinish(Serializable):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -204,7 +211,7 @@ def _convert_agent_observation_to_messages(
observation: Observation to convert to a message.
Returns:
AIMessage that corresponds to the original tool invocation.
`AIMessage` that corresponds to the original tool invocation.
"""
if isinstance(agent_action, AgentActionMessageLog):
return [_create_function_message(agent_action, observation)]
@@ -227,7 +234,7 @@ def _create_function_message(
observation: the result of the tool invocation.
Returns:
FunctionMessage that corresponds to the original tool invocation.
`FunctionMessage` that corresponds to the original tool invocation.
"""
if not isinstance(observation, str):
try:

View File

@@ -2,8 +2,8 @@
Distinct from provider-based [prompt caching](https://docs.langchain.com/oss/python/langchain/models#prompt-caching).
!!! warning
This is a beta feature! Please be wary of deploying experimental code to production
!!! warning "Beta feature"
This is a beta feature. Please be wary of deploying experimental code to production
unless you've taken appropriate precautions.
A cache is useful for two reasons:
@@ -49,17 +49,18 @@ class BaseCache(ABC):
"""Look up based on `prompt` and `llm_string`.
A cache implementation is expected to generate a key from the 2-tuple
of prompt and llm_string (e.g., by concatenating them with a delimiter).
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
Args:
prompt: A string representation of the prompt.
In the case of a chat model, the prompt is a non-trivial
serialization of the prompt into the language model.
llm_string: A string representation of the LLM configuration.
This is used to capture the invocation parameters of the LLM
(e.g., model name, temperature, stop tokens, max tokens, etc.).
These invocation parameters are serialized into a string
representation.
These invocation parameters are serialized into a string representation.
Returns:
On a cache miss, return `None`. On a cache hit, return the cached value.
@@ -78,8 +79,10 @@ class BaseCache(ABC):
In the case of a chat model, the prompt is a non-trivial
serialization of the prompt into the language model.
llm_string: A string representation of the LLM configuration.
This is used to capture the invocation parameters of the LLM
(e.g., model name, temperature, stop tokens, max tokens, etc.).
These invocation parameters are serialized into a string
representation.
return_val: The value to be cached. The value is a list of `Generation`
@@ -94,15 +97,17 @@ class BaseCache(ABC):
"""Async look up based on `prompt` and `llm_string`.
A cache implementation is expected to generate a key from the 2-tuple
of prompt and llm_string (e.g., by concatenating them with a delimiter).
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
Args:
prompt: A string representation of the prompt.
In the case of a chat model, the prompt is a non-trivial
serialization of the prompt into the language model.
llm_string: A string representation of the LLM configuration.
This is used to capture the invocation parameters of the LLM
(e.g., model name, temperature, stop tokens, max tokens, etc.).
These invocation parameters are serialized into a string
representation.
@@ -125,8 +130,10 @@ class BaseCache(ABC):
In the case of a chat model, the prompt is a non-trivial
serialization of the prompt into the language model.
llm_string: A string representation of the LLM configuration.
This is used to capture the invocation parameters of the LLM
(e.g., model name, temperature, stop tokens, max tokens, etc.).
These invocation parameters are serialized into a string
representation.
return_val: The value to be cached. The value is a list of `Generation`

View File

@@ -5,13 +5,12 @@ from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any
from typing_extensions import Self
if TYPE_CHECKING:
from collections.abc import Sequence
from uuid import UUID
from tenacity import RetryCallState
from typing_extensions import Self
from langchain_core.agents import AgentAction, AgentFinish
from langchain_core.documents import Document

View File

@@ -39,7 +39,6 @@ from langchain_core.tracers.context import (
tracing_v2_callback_var,
)
from langchain_core.tracers.langchain import LangChainTracer
from langchain_core.tracers.schemas import Run
from langchain_core.tracers.stdout import ConsoleCallbackHandler
from langchain_core.utils.env import env_var_is_set
@@ -52,6 +51,7 @@ if TYPE_CHECKING:
from langchain_core.documents import Document
from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
from langchain_core.runnables.config import RunnableConfig
from langchain_core.tracers.schemas import Run
logger = logging.getLogger(__name__)
@@ -229,7 +229,24 @@ def shielded(func: Func) -> Func:
@functools.wraps(func)
async def wrapped(*args: Any, **kwargs: Any) -> Any:
return await asyncio.shield(func(*args, **kwargs))
# Capture the current context to preserve context variables
ctx = copy_context()
# Create the coroutine
coro = func(*args, **kwargs)
# For Python 3.11+, create task with explicit context
# For older versions, fallback to original behavior
try:
# Create a task with the captured context to preserve context variables
task = asyncio.create_task(coro, context=ctx) # type: ignore[call-arg, unused-ignore]
# `call-arg` used to not fail 3.9 or 3.10 tests
return await asyncio.shield(task)
except TypeError:
# Python < 3.11 fallback - create task normally then shield
# This won't preserve context perfectly but is better than nothing
task = asyncio.create_task(coro)
return await asyncio.shield(task)
return cast("Func", wrapped)

View File

@@ -24,7 +24,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
from langchain_core.callbacks import UsageMetadataCallbackHandler
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
callback = UsageMetadataCallbackHandler()
result_1 = llm_1.invoke("Hello", config={"callbacks": [callback]})
@@ -43,7 +43,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
```
!!! version-added "Added in version 0.3.49"
!!! version-added "Added in `langchain-core` 0.3.49"
"""
@@ -109,7 +109,7 @@ def get_usage_metadata_callback(
from langchain_core.callbacks import get_usage_metadata_callback
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
with get_usage_metadata_callback() as cb:
llm_1.invoke("Hello")
@@ -134,7 +134,7 @@ def get_usage_metadata_callback(
}
```
!!! version-added "Added in version 0.3.49"
!!! version-added "Added in `langchain-core` 0.3.49"
"""
usage_metadata_callback_var: ContextVar[UsageMetadataCallbackHandler | None] = (

View File

@@ -121,7 +121,7 @@ class BaseChatMessageHistory(ABC):
This method may be deprecated in a future release.
Args:
message: The human message to add to the store.
message: The `HumanMessage` to add to the store.
"""
if isinstance(message, HumanMessage):
self.add_message(message)
@@ -129,7 +129,7 @@ class BaseChatMessageHistory(ABC):
self.add_message(HumanMessage(content=message))
def add_ai_message(self, message: AIMessage | str) -> None:
"""Convenience method for adding an AI message string to the store.
"""Convenience method for adding an `AIMessage` string to the store.
!!! note
This is a convenience method. Code should favor the bulk `add_messages`
@@ -138,7 +138,7 @@ class BaseChatMessageHistory(ABC):
This method may be deprecated in a future release.
Args:
message: The AI message to add.
message: The `AIMessage` to add.
"""
if isinstance(message, AIMessage):
self.add_message(message)
@@ -173,7 +173,7 @@ class BaseChatMessageHistory(ABC):
in an efficient manner to avoid unnecessary round-trips to the underlying store.
Args:
messages: A sequence of BaseMessage objects to store.
messages: A sequence of `BaseMessage` objects to store.
"""
for message in messages:
self.add_message(message)
@@ -182,7 +182,7 @@ class BaseChatMessageHistory(ABC):
"""Async add a list of messages.
Args:
messages: A sequence of BaseMessage objects to store.
messages: A sequence of `BaseMessage` objects to store.
"""
await run_in_executor(None, self.add_messages, messages)

View File

@@ -27,7 +27,7 @@ class BaseLoader(ABC): # noqa: B024
"""Interface for Document Loader.
Implementations should implement the lazy-loading method using generators
to avoid loading all Documents into memory at once.
to avoid loading all documents into memory at once.
`load` is provided just for user convenience and should not be overridden.
"""
@@ -53,9 +53,11 @@ class BaseLoader(ABC): # noqa: B024
def load_and_split(
self, text_splitter: TextSplitter | None = None
) -> list[Document]:
"""Load Documents and split into chunks. Chunks are returned as `Document`.
"""Load `Document` and split into chunks. Chunks are returned as `Document`.
Do not override this method. It should be considered to be deprecated!
!!! danger
Do not override this method. It should be considered to be deprecated!
Args:
text_splitter: `TextSplitter` instance to use for splitting documents.
@@ -135,7 +137,7 @@ class BaseBlobParser(ABC):
"""
def parse(self, blob: Blob) -> list[Document]:
"""Eagerly parse the blob into a `Document` or `Document` objects.
"""Eagerly parse the blob into a `Document` or list of `Document` objects.
This is a convenience method for interactive development environment.

View File

@@ -28,7 +28,7 @@ class BlobLoader(ABC):
def yield_blobs(
self,
) -> Iterable[Blob]:
"""A lazy loader for raw data represented by LangChain's Blob object.
"""A lazy loader for raw data represented by LangChain's `Blob` object.
Returns:
A generator over blobs

View File

@@ -14,13 +14,13 @@ from langchain_core.documents import Document
class LangSmithLoader(BaseLoader):
"""Load LangSmith Dataset examples as Documents.
"""Load LangSmith Dataset examples as `Document` objects.
Loads the example inputs as the Document page content and places the entire example
into the Document metadata. This allows you to easily create few-shot example
retrievers from the loaded documents.
Loads the example inputs as the `Document` page content and places the entire
example into the `Document` metadata. This allows you to easily create few-shot
example retrievers from the loaded documents.
??? note "Lazy load"
??? note "Lazy loading example"
```python
from langchain_core.document_loaders import LangSmithLoader
@@ -66,12 +66,11 @@ class LangSmithLoader(BaseLoader):
format_content: Function for converting the content extracted from the example
inputs into a string. Defaults to JSON-encoding the contents.
example_ids: The IDs of the examples to filter by.
as_of: The dataset version tag OR
timestamp to retrieve the examples as of.
Response examples will only be those that were present at the time
of the tagged (or timestamped) version.
as_of: The dataset version tag or timestamp to retrieve the examples as of.
Response examples will only be those that were present at the time of
the tagged (or timestamped) version.
splits: A list of dataset splits, which are
divisions of your dataset such as 'train', 'test', or 'validation'.
divisions of your dataset such as `train`, `test`, or `validation`.
Returns examples only from the specified splits.
inline_s3_urls: Whether to inline S3 URLs.
offset: The offset to start from.

View File

@@ -1,7 +1,28 @@
"""Documents module.
"""Documents module for data retrieval and processing workflows.
**Document** module is a collection of classes that handle documents
and their transformations.
This module provides core abstractions for handling data in retrieval-augmented
generation (RAG) pipelines, vector stores, and document processing workflows.
!!! warning "Documents vs. message content"
This module is distinct from `langchain_core.messages.content`, which provides
multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within
messages).
**Key distinction:**
- **Documents** (this module): For **data retrieval and processing workflows**
- Vector stores, retrievers, RAG pipelines
- Text chunking, embedding, and semantic search
- Example: Chunks of a PDF stored in a vector database
- **Content Blocks** (`messages.content`): For **LLM conversational I/O**
- Multimodal message content sent to/from models
- Tool calls, reasoning, citations within chat
- Example: An image sent to a vision model in a chat message (via
[`ImageContentBlock`][langchain.messages.ImageContentBlock])
While both can represent similar data types (text, files), they serve different
architectural purposes in LangChain applications.
"""
from typing import TYPE_CHECKING

View File

@@ -1,4 +1,16 @@
"""Base classes for media and documents."""
"""Base classes for media and documents.
This module contains core abstractions for **data retrieval and processing workflows**:
- `BaseMedia`: Base class providing `id` and `metadata` fields
- `Blob`: Raw data loading (files, binary data) - used by document loaders
- `Document`: Text content for retrieval (RAG, vector stores, semantic search)
!!! note "Not for LLM chat messages"
These classes are for data processing pipelines, not LLM I/O. For multimodal
content in chat messages (images, audio in conversations), see
`langchain.messages` content blocks instead.
"""
from __future__ import annotations
@@ -19,20 +31,18 @@ PathLike = str | PurePath
class BaseMedia(Serializable):
"""Use to represent media content.
"""Base class for content used in retrieval and data processing workflows.
Media objects can be used to represent raw data, such as text or binary data.
Provides common fields for content that needs to be stored, indexed, or searched.
LangChain Media objects allow associating metadata and an optional identifier
with the content.
The presence of an ID and metadata make it easier to store, index, and search
over the content in a structured way.
!!! note
For multimodal content in **chat messages** (images, audio sent to/from LLMs),
use `langchain.messages` content blocks instead.
"""
# The ID field is optional at the moment.
# It will likely become required in a future major release after
# it has been adopted by enough vectorstore implementations.
# it has been adopted by enough VectorStore implementations.
id: str | None = Field(default=None, coerce_numbers_to_str=True)
"""An optional identifier for the document.
@@ -45,71 +55,70 @@ class BaseMedia(Serializable):
class Blob(BaseMedia):
"""Blob represents raw data by either reference or value.
"""Raw data abstraction for document loading and file processing.
Provides an interface to materialize the blob in different representations, and
help to decouple the development of data loaders from the downstream parsing of
the raw data.
Represents raw bytes or text, either in-memory or by file reference. Used
primarily by document loaders to decouple data loading from parsing.
Inspired by: https://developer.mozilla.org/en-US/docs/Web/API/Blob
Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
Example: Initialize a blob from in-memory data
???+ example "Initialize a blob from in-memory data"
```python
from langchain_core.documents import Blob
```python
from langchain_core.documents import Blob
blob = Blob.from_data("Hello, world!")
blob = Blob.from_data("Hello, world!")
# Read the blob as a string
print(blob.as_string())
# Read the blob as a string
print(blob.as_string())
# Read the blob as bytes
print(blob.as_bytes())
# Read the blob as bytes
print(blob.as_bytes())
# Read the blob as a byte stream
with blob.as_bytes_io() as f:
print(f.read())
```
# Read the blob as a byte stream
with blob.as_bytes_io() as f:
print(f.read())
```
Example: Load from memory and specify mime-type and metadata
??? example "Load from memory and specify MIME type and metadata"
```python
from langchain_core.documents import Blob
```python
from langchain_core.documents import Blob
blob = Blob.from_data(
data="Hello, world!",
mime_type="text/plain",
metadata={"source": "https://example.com"},
)
```
blob = Blob.from_data(
data="Hello, world!",
mime_type="text/plain",
metadata={"source": "https://example.com"},
)
```
Example: Load the blob from a file
??? example "Load the blob from a file"
```python
from langchain_core.documents import Blob
```python
from langchain_core.documents import Blob
blob = Blob.from_path("path/to/file.txt")
blob = Blob.from_path("path/to/file.txt")
# Read the blob as a string
print(blob.as_string())
# Read the blob as a string
print(blob.as_string())
# Read the blob as bytes
print(blob.as_bytes())
# Read the blob as bytes
print(blob.as_bytes())
# Read the blob as a byte stream
with blob.as_bytes_io() as f:
print(f.read())
```
# Read the blob as a byte stream
with blob.as_bytes_io() as f:
print(f.read())
```
"""
data: bytes | str | None = None
"""Raw data associated with the blob."""
"""Raw data associated with the `Blob`."""
mimetype: str | None = None
"""MimeType not to be confused with a file extension."""
"""MIME type, not to be confused with a file extension."""
encoding: str = "utf-8"
"""Encoding to use if decoding the bytes into a string.
Use `utf-8` as default encoding, if decoding to string.
Uses `utf-8` as default encoding if decoding to string.
"""
path: PathLike | None = None
"""Location where the original content was found."""
@@ -123,9 +132,9 @@ class Blob(BaseMedia):
def source(self) -> str | None:
"""The source location of the blob as string if known otherwise none.
If a path is associated with the blob, it will default to the path location.
If a path is associated with the `Blob`, it will default to the path location.
Unless explicitly set via a metadata field called `"source"`, in which
Unless explicitly set via a metadata field called `'source'`, in which
case that value will be used instead.
"""
if self.metadata and "source" in self.metadata:
@@ -211,13 +220,13 @@ class Blob(BaseMedia):
Args:
path: Path-like object to file to be read
encoding: Encoding to use if decoding the bytes into a string
mime_type: If provided, will be set as the mime-type of the data
guess_type: If `True`, the mimetype will be guessed from the file extension,
if a mime-type was not provided
metadata: Metadata to associate with the blob
mime_type: If provided, will be set as the MIME type of the data
guess_type: If `True`, the MIME type will be guessed from the file
extension, if a MIME type was not provided
metadata: Metadata to associate with the `Blob`
Returns:
Blob instance
`Blob` instance
"""
if mime_type is None and guess_type:
mimetype = mimetypes.guess_type(path)[0] if guess_type else None
@@ -243,17 +252,17 @@ class Blob(BaseMedia):
path: str | None = None,
metadata: dict | None = None,
) -> Blob:
"""Initialize the blob from in-memory data.
"""Initialize the `Blob` from in-memory data.
Args:
data: The in-memory data associated with the blob
data: The in-memory data associated with the `Blob`
encoding: Encoding to use if decoding the bytes into a string
mime_type: If provided, will be set as the mime-type of the data
mime_type: If provided, will be set as the MIME type of the data
path: If provided, will be set as the source from which the data came
metadata: Metadata to associate with the blob
metadata: Metadata to associate with the `Blob`
Returns:
Blob instance
`Blob` instance
"""
return cls(
data=data,
@@ -274,6 +283,10 @@ class Blob(BaseMedia):
class Document(BaseMedia):
"""Class for storing a piece of text and associated metadata.
!!! note
`Document` is for **retrieval workflows**, not chat I/O. For sending text
to an LLM in a conversation, use message types from `langchain.messages`.
Example:
```python
from langchain_core.documents import Document
@@ -296,7 +309,7 @@ class Document(BaseMedia):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -309,10 +322,10 @@ class Document(BaseMedia):
return ["langchain", "schema", "document"]
def __str__(self) -> str:
"""Override __str__ to restrict it to page_content and metadata.
"""Override `__str__` to restrict it to page_content and metadata.
Returns:
A string representation of the Document.
A string representation of the `Document`.
"""
# The format matches pydantic format for __str__.
#

View File

@@ -21,14 +21,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
This abstraction is primarily used for post-processing of retrieved documents.
Documents matching a given query are first retrieved.
`Document` objects matching a given query are first retrieved.
Then the list of documents can be further processed.
For example, one could re-rank the retrieved documents using an LLM.
!!! note
Users should favor using a RunnableLambda instead of sub-classing from this
Users should favor using a `RunnableLambda` instead of sub-classing from this
interface.
"""
@@ -43,9 +43,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
"""Compress retrieved documents given the query context.
Args:
documents: The retrieved documents.
documents: The retrieved `Document` objects.
query: The query context.
callbacks: Optional callbacks to run during compression.
callbacks: Optional `Callbacks` to run during compression.
Returns:
The compressed documents.
@@ -61,9 +61,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
"""Async compress retrieved documents given the query context.
Args:
documents: The retrieved documents.
documents: The retrieved `Document` objects.
query: The query context.
callbacks: Optional callbacks to run during compression.
callbacks: Optional `Callbacks` to run during compression.
Returns:
The compressed documents.

View File

@@ -16,8 +16,8 @@ if TYPE_CHECKING:
class BaseDocumentTransformer(ABC):
"""Abstract base class for document transformation.
A document transformation takes a sequence of Documents and returns a
sequence of transformed Documents.
A document transformation takes a sequence of `Document` objects and returns a
sequence of transformed `Document` objects.
Example:
```python

View File

@@ -18,7 +18,7 @@ class FakeEmbeddings(Embeddings, BaseModel):
This embedding model creates embeddings by sampling from a normal distribution.
!!! warning
!!! danger "Toy model"
Do not use this outside of testing, as it is not a real embedding model.
Instantiate:
@@ -73,7 +73,7 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
This embedding model creates embeddings by sampling from a normal distribution
with a seed based on the hash of the text.
!!! warning
!!! danger "Toy model"
Do not use this outside of testing, as it is not a real embedding model.
Instantiate:

View File

@@ -29,7 +29,7 @@ class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
max_length: int = 2048
"""Max length for the prompt, beyond which examples are cut."""
example_text_lengths: list[int] = Field(default_factory=list) # :meta private:
example_text_lengths: list[int] = Field(default_factory=list)
"""Length of each example."""
def add_example(self, example: dict[str, str]) -> None:

View File

@@ -41,7 +41,7 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
"""Optional keys to filter input to. If provided, the search is based on
the input variables instead of all variables."""
vectorstore_kwargs: dict[str, Any] | None = None
"""Extra arguments passed to similarity_search function of the vectorstore."""
"""Extra arguments passed to similarity_search function of the `VectorStore`."""
model_config = ConfigDict(
arbitrary_types_allowed=True,
@@ -159,7 +159,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
instead of all variables.
example_keys: If provided, keys to filter examples to.
vectorstore_kwargs: Extra arguments passed to similarity_search function
of the vectorstore.
of the `VectorStore`.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:
@@ -203,7 +203,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
instead of all variables.
example_keys: If provided, keys to filter examples to.
vectorstore_kwargs: Extra arguments passed to similarity_search function
of the vectorstore.
of the `VectorStore`.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:
@@ -286,12 +286,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
k: Number of examples to select.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
input_keys: If provided, the search is based on the input variables
instead of all variables.
example_keys: If provided, keys to filter examples to.
vectorstore_kwargs: Extra arguments passed to similarity_search function
of the vectorstore.
of the `VectorStore`.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:
@@ -333,12 +333,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
k: Number of examples to select.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
input_keys: If provided, the search is based on the input variables
instead of all variables.
example_keys: If provided, keys to filter examples to.
vectorstore_kwargs: Extra arguments passed to similarity_search function
of the vectorstore.
of the `VectorStore`.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:

View File

@@ -16,9 +16,10 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
"""Exception that output parsers should raise to signify a parsing error.
This exists to differentiate parsing errors from other code or execution errors
that also may arise inside the output parser. `OutputParserException` will be
available to catch and handle in ways to fix the parsing error, while other
errors will be raised.
that also may arise inside the output parser.
`OutputParserException` will be available to catch and handle in ways to fix the
parsing error, while other errors will be raised.
"""
def __init__(
@@ -32,18 +33,19 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
Args:
error: The error that's being re-raised or an error message.
observation: String explanation of error which can be passed to a
model to try and remediate the issue.
observation: String explanation of error which can be passed to a model to
try and remediate the issue.
llm_output: String model output which is error-ing.
send_to_llm: Whether to send the observation and llm_output back to an Agent
after an `OutputParserException` has been raised.
This gives the underlying model driving the agent the context that the
previous output was improperly structured, in the hopes that it will
update the output to the correct format.
Raises:
ValueError: If `send_to_llm` is True but either observation or
ValueError: If `send_to_llm` is `True` but either observation or
`llm_output` are not provided.
"""
if isinstance(error, str):
@@ -66,11 +68,11 @@ class ErrorCode(Enum):
"""Error codes."""
INVALID_PROMPT_INPUT = "INVALID_PROMPT_INPUT"
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS"
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS" # Used in JS; not Py (yet)
MESSAGE_COERCION_FAILURE = "MESSAGE_COERCION_FAILURE"
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION"
MODEL_NOT_FOUND = "MODEL_NOT_FOUND"
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT"
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION" # Used in JS; not Py (yet)
MODEL_NOT_FOUND = "MODEL_NOT_FOUND" # Used in JS; not Py (yet)
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT" # Used in JS; not Py (yet)
OUTPUT_PARSING_FAILURE = "OUTPUT_PARSING_FAILURE"
@@ -86,6 +88,6 @@ def create_message(*, message: str, error_code: ErrorCode) -> str:
"""
return (
f"{message}\n"
"For troubleshooting, visit: https://python.langchain.com/docs/"
f"troubleshooting/errors/{error_code.value} "
"For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain"
f"/errors/{error_code.value} "
)

View File

@@ -1,7 +1,7 @@
"""Code to help indexing data into a vectorstore.
This package contains helper logic to help deal with indexing data into
a vectorstore while avoiding duplicated content and over-writing content
a `VectorStore` while avoiding duplicated content and over-writing content
if it's unchanged.
"""

View File

@@ -6,16 +6,9 @@ import hashlib
import json
import uuid
import warnings
from collections.abc import (
AsyncIterable,
AsyncIterator,
Callable,
Iterable,
Iterator,
Sequence,
)
from itertools import islice
from typing import (
TYPE_CHECKING,
Any,
Literal,
TypedDict,
@@ -29,6 +22,16 @@ from langchain_core.exceptions import LangChainException
from langchain_core.indexing.base import DocumentIndex, RecordManager
from langchain_core.vectorstores import VectorStore
if TYPE_CHECKING:
from collections.abc import (
AsyncIterable,
AsyncIterator,
Callable,
Iterable,
Iterator,
Sequence,
)
# Magic UUID to use as a namespace for hashing.
# Used to try and generate a unique UUID for each document
# from hashing the document content and metadata.
@@ -298,48 +301,49 @@ def index(
For the time being, documents are indexed using their hashes, and users
are not able to specify the uid of the document.
!!! warning "Behavior changed in 0.3.25"
!!! warning "Behavior changed in `langchain-core` 0.3.25"
Added `scoped_full` cleanup mode.
!!! warning
* In full mode, the loader should be returning
the entire dataset, and not just a subset of the dataset.
Otherwise, the auto_cleanup will remove documents that it is not
supposed to.
the entire dataset, and not just a subset of the dataset.
Otherwise, the auto_cleanup will remove documents that it is not
supposed to.
* In incremental mode, if documents associated with a particular
source id appear across different batches, the indexing API
will do some redundant work. This will still result in the
correct end state of the index, but will unfortunately not be
100% efficient. For example, if a given document is split into 15
chunks, and we index them using a batch size of 5, we'll have 3 batches
all with the same source id. In general, to avoid doing too much
redundant work select as big a batch size as possible.
source id appear across different batches, the indexing API
will do some redundant work. This will still result in the
correct end state of the index, but will unfortunately not be
100% efficient. For example, if a given document is split into 15
chunks, and we index them using a batch size of 5, we'll have 3 batches
all with the same source id. In general, to avoid doing too much
redundant work select as big a batch size as possible.
* The `scoped_full` mode is suitable if determining an appropriate batch size
is challenging or if your data loader cannot return the entire dataset at
once. This mode keeps track of source IDs in memory, which should be fine
for most use cases. If your dataset is large (10M+ docs), you will likely
need to parallelize the indexing process regardless.
is challenging or if your data loader cannot return the entire dataset at
once. This mode keeps track of source IDs in memory, which should be fine
for most use cases. If your dataset is large (10M+ docs), you will likely
need to parallelize the indexing process regardless.
Args:
docs_source: Data loader or iterable of documents to index.
record_manager: Timestamped set to keep track of which documents were
updated.
vector_store: VectorStore or DocumentIndex to index the documents into.
vector_store: `VectorStore` or DocumentIndex to index the documents into.
batch_size: Batch size to use when indexing.
cleanup: How to handle clean up of documents.
- incremental: Cleans up all documents that haven't been updated AND
that are associated with source ids that were seen during indexing.
Clean up is done continuously during indexing helping to minimize the
probability of users seeing duplicated content.
that are associated with source IDs that were seen during indexing.
Clean up is done continuously during indexing helping to minimize the
probability of users seeing duplicated content.
- full: Delete all documents that have not been returned by the loader
during this run of indexing.
Clean up runs after all documents have been indexed.
This means that users may see duplicated content during indexing.
during this run of indexing.
Clean up runs after all documents have been indexed.
This means that users may see duplicated content during indexing.
- scoped_full: Similar to Full, but only deletes all documents
that haven't been updated AND that are associated with
source ids that were seen during indexing.
that haven't been updated AND that are associated with
source IDs that were seen during indexing.
- None: Do not delete any documents.
source_id_key: Optional key that helps identify the original source
of the document.
@@ -349,7 +353,7 @@ def index(
key_encoder: Hashing algorithm to use for hashing the document content and
metadata. Options include "blake2b", "sha256", and "sha512".
!!! version-added "Added in version 0.3.66"
!!! version-added "Added in `langchain-core` 0.3.66"
key_encoder: Hashing algorithm to use for hashing the document.
If not provided, a default encoder using SHA-1 will be used.
@@ -363,10 +367,10 @@ def index(
When changing the key encoder, you must change the
index as well to avoid duplicated documents in the cache.
upsert_kwargs: Additional keyword arguments to pass to the add_documents
method of the VectorStore or the upsert method of the DocumentIndex.
method of the `VectorStore` or the upsert method of the DocumentIndex.
For example, you can use this to specify a custom vector_field:
upsert_kwargs={"vector_field": "embedding"}
!!! version-added "Added in version 0.3.10"
!!! version-added "Added in `langchain-core` 0.3.10"
Returns:
Indexing result which contains information about how many documents
@@ -375,10 +379,10 @@ def index(
Raises:
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
ValueError: If cleanup mode is incremental and source_id_key is None.
ValueError: If vectorstore does not have
ValueError: If `VectorStore` does not have
"delete" and "add_documents" required methods.
ValueError: If source_id_key is not None, but is not a string or callable.
TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
AssertionError: If `source_id` is None when cleanup mode is incremental.
(should be unreachable code).
"""
@@ -415,7 +419,7 @@ def index(
raise ValueError(msg)
if type(destination).delete == VectorStore.delete:
# Checking if the vectorstore has overridden the default delete method
# Checking if the VectorStore has overridden the default delete method
# implementation which just raises a NotImplementedError
msg = "Vectorstore has not implemented the delete method"
raise ValueError(msg)
@@ -466,11 +470,11 @@ def index(
]
if cleanup in {"incremental", "scoped_full"}:
# source ids are required.
# Source IDs are required.
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
if source_id is None:
msg = (
f"Source ids are required when cleanup mode is "
f"Source IDs are required when cleanup mode is "
f"incremental or scoped_full. "
f"Document that starts with "
f"content: {hashed_doc.page_content[:100]} "
@@ -479,7 +483,7 @@ def index(
raise ValueError(msg)
if cleanup == "scoped_full":
scoped_full_cleanup_source_ids.add(source_id)
# source ids cannot be None after for loop above.
# Source IDs cannot be None after for loop above.
source_ids = cast("Sequence[str]", source_ids)
exists_batch = record_manager.exists(
@@ -538,7 +542,7 @@ def index(
# If source IDs are provided, we can do the deletion incrementally!
if cleanup == "incremental":
# Get the uids of the documents that were not returned by the loader.
# mypy isn't good enough to determine that source ids cannot be None
# mypy isn't good enough to determine that source IDs cannot be None
# here due to a check that's happening above, so we check again.
for source_id in source_ids:
if source_id is None:
@@ -636,48 +640,49 @@ async def aindex(
For the time being, documents are indexed using their hashes, and users
are not able to specify the uid of the document.
!!! warning "Behavior changed in 0.3.25"
!!! warning "Behavior changed in `langchain-core` 0.3.25"
Added `scoped_full` cleanup mode.
!!! warning
* In full mode, the loader should be returning
the entire dataset, and not just a subset of the dataset.
Otherwise, the auto_cleanup will remove documents that it is not
supposed to.
the entire dataset, and not just a subset of the dataset.
Otherwise, the auto_cleanup will remove documents that it is not
supposed to.
* In incremental mode, if documents associated with a particular
source id appear across different batches, the indexing API
will do some redundant work. This will still result in the
correct end state of the index, but will unfortunately not be
100% efficient. For example, if a given document is split into 15
chunks, and we index them using a batch size of 5, we'll have 3 batches
all with the same source id. In general, to avoid doing too much
redundant work select as big a batch size as possible.
source id appear across different batches, the indexing API
will do some redundant work. This will still result in the
correct end state of the index, but will unfortunately not be
100% efficient. For example, if a given document is split into 15
chunks, and we index them using a batch size of 5, we'll have 3 batches
all with the same source id. In general, to avoid doing too much
redundant work select as big a batch size as possible.
* The `scoped_full` mode is suitable if determining an appropriate batch size
is challenging or if your data loader cannot return the entire dataset at
once. This mode keeps track of source IDs in memory, which should be fine
for most use cases. If your dataset is large (10M+ docs), you will likely
need to parallelize the indexing process regardless.
is challenging or if your data loader cannot return the entire dataset at
once. This mode keeps track of source IDs in memory, which should be fine
for most use cases. If your dataset is large (10M+ docs), you will likely
need to parallelize the indexing process regardless.
Args:
docs_source: Data loader or iterable of documents to index.
record_manager: Timestamped set to keep track of which documents were
updated.
vector_store: VectorStore or DocumentIndex to index the documents into.
vector_store: `VectorStore` or DocumentIndex to index the documents into.
batch_size: Batch size to use when indexing.
cleanup: How to handle clean up of documents.
- incremental: Cleans up all documents that haven't been updated AND
that are associated with source ids that were seen during indexing.
Clean up is done continuously during indexing helping to minimize the
probability of users seeing duplicated content.
that are associated with source IDs that were seen during indexing.
Clean up is done continuously during indexing helping to minimize the
probability of users seeing duplicated content.
- full: Delete all documents that have not been returned by the loader
during this run of indexing.
Clean up runs after all documents have been indexed.
This means that users may see duplicated content during indexing.
during this run of indexing.
Clean up runs after all documents have been indexed.
This means that users may see duplicated content during indexing.
- scoped_full: Similar to Full, but only deletes all documents
that haven't been updated AND that are associated with
source ids that were seen during indexing.
that haven't been updated AND that are associated with
source IDs that were seen during indexing.
- None: Do not delete any documents.
source_id_key: Optional key that helps identify the original source
of the document.
@@ -687,7 +692,7 @@ async def aindex(
key_encoder: Hashing algorithm to use for hashing the document content and
metadata. Options include "blake2b", "sha256", and "sha512".
!!! version-added "Added in version 0.3.66"
!!! version-added "Added in `langchain-core` 0.3.66"
key_encoder: Hashing algorithm to use for hashing the document.
If not provided, a default encoder using SHA-1 will be used.
@@ -701,10 +706,10 @@ async def aindex(
When changing the key encoder, you must change the
index as well to avoid duplicated documents in the cache.
upsert_kwargs: Additional keyword arguments to pass to the add_documents
method of the VectorStore or the upsert method of the DocumentIndex.
method of the `VectorStore` or the upsert method of the DocumentIndex.
For example, you can use this to specify a custom vector_field:
upsert_kwargs={"vector_field": "embedding"}
!!! version-added "Added in version 0.3.10"
!!! version-added "Added in `langchain-core` 0.3.10"
Returns:
Indexing result which contains information about how many documents
@@ -713,10 +718,10 @@ async def aindex(
Raises:
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
ValueError: If cleanup mode is incremental and source_id_key is None.
ValueError: If vectorstore does not have
ValueError: If `VectorStore` does not have
"adelete" and "aadd_documents" required methods.
ValueError: If source_id_key is not None, but is not a string or callable.
TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
AssertionError: If `source_id_key` is None when cleanup mode is
incremental or `scoped_full` (should be unreachable).
"""
@@ -757,7 +762,7 @@ async def aindex(
type(destination).adelete == VectorStore.adelete
and type(destination).delete == VectorStore.delete
):
# Checking if the vectorstore has overridden the default adelete or delete
# Checking if the VectorStore has overridden the default adelete or delete
# methods implementation which just raises a NotImplementedError
msg = "Vectorstore has not implemented the adelete or delete method"
raise ValueError(msg)
@@ -815,11 +820,11 @@ async def aindex(
]
if cleanup in {"incremental", "scoped_full"}:
# If the cleanup mode is incremental, source ids are required.
# If the cleanup mode is incremental, source IDs are required.
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
if source_id is None:
msg = (
f"Source ids are required when cleanup mode is "
f"Source IDs are required when cleanup mode is "
f"incremental or scoped_full. "
f"Document that starts with "
f"content: {hashed_doc.page_content[:100]} "
@@ -828,7 +833,7 @@ async def aindex(
raise ValueError(msg)
if cleanup == "scoped_full":
scoped_full_cleanup_source_ids.add(source_id)
# source ids cannot be None after for loop above.
# Source IDs cannot be None after for loop above.
source_ids = cast("Sequence[str]", source_ids)
exists_batch = await record_manager.aexists(
@@ -888,7 +893,7 @@ async def aindex(
if cleanup == "incremental":
# Get the uids of the documents that were not returned by the loader.
# mypy isn't good enough to determine that source ids cannot be None
# mypy isn't good enough to determine that source IDs cannot be None
# here due to a check that's happening above, so we check again.
for source_id in source_ids:
if source_id is None:

View File

@@ -25,7 +25,7 @@ class RecordManager(ABC):
The record manager abstraction is used by the langchain indexing API.
The record manager keeps track of which documents have been
written into a vectorstore and when they were written.
written into a `VectorStore` and when they were written.
The indexing API computes hashes for each document and stores the hash
together with the write time and the source id in the record manager.
@@ -37,7 +37,7 @@ class RecordManager(ABC):
already been indexed, and to only index new documents.
The main benefit of this abstraction is that it works across many vectorstores.
To be supported, a vectorstore needs to only support the ability to add and
To be supported, a `VectorStore` needs to only support the ability to add and
delete documents by ID. Using the record manager, the indexing API will
be able to delete outdated documents and avoid redundant indexing of documents
that have already been indexed.
@@ -45,13 +45,13 @@ class RecordManager(ABC):
The main constraints of this abstraction are:
1. It relies on the time-stamps to determine which documents have been
indexed and which have not. This means that the time-stamps must be
monotonically increasing. The timestamp should be the timestamp
as measured by the server to minimize issues.
indexed and which have not. This means that the time-stamps must be
monotonically increasing. The timestamp should be the timestamp
as measured by the server to minimize issues.
2. The record manager is currently implemented separately from the
vectorstore, which means that the overall system becomes distributed
and may create issues with consistency. For example, writing to
record manager succeeds, but corresponding writing to vectorstore fails.
vectorstore, which means that the overall system becomes distributed
and may create issues with consistency. For example, writing to
record manager succeeds, but corresponding writing to `VectorStore` fails.
"""
def __init__(
@@ -460,7 +460,7 @@ class UpsertResponse(TypedDict):
class DeleteResponse(TypedDict, total=False):
"""A generic response for delete operation.
The fields in this response are optional and whether the vectorstore
The fields in this response are optional and whether the `VectorStore`
returns them or not is up to the implementation.
"""
@@ -518,7 +518,7 @@ class DocumentIndex(BaseRetriever):
if it is provided. If the ID is not provided, the upsert method is free
to generate an ID for the content.
When an ID is specified and the content already exists in the vectorstore,
When an ID is specified and the content already exists in the `VectorStore`,
the upsert method should update the content with the new data. If the content
does not exist, the upsert method should add the item to the `VectorStore`.
@@ -528,20 +528,20 @@ class DocumentIndex(BaseRetriever):
Returns:
A response object that contains the list of IDs that were
successfully added or updated in the vectorstore and the list of IDs that
successfully added or updated in the `VectorStore` and the list of IDs that
failed to be added or updated.
"""
async def aupsert(
self, items: Sequence[Document], /, **kwargs: Any
) -> UpsertResponse:
"""Add or update documents in the vectorstore. Async version of upsert.
"""Add or update documents in the `VectorStore`. Async version of `upsert`.
The upsert functionality should utilize the ID field of the item
if it is provided. If the ID is not provided, the upsert method is free
to generate an ID for the item.
When an ID is specified and the item already exists in the vectorstore,
When an ID is specified and the item already exists in the `VectorStore`,
the upsert method should update the item with the new data. If the item
does not exist, the upsert method should add the item to the `VectorStore`.
@@ -551,7 +551,7 @@ class DocumentIndex(BaseRetriever):
Returns:
A response object that contains the list of IDs that were
successfully added or updated in the vectorstore and the list of IDs that
successfully added or updated in the `VectorStore` and the list of IDs that
failed to be added or updated.
"""
return await run_in_executor(
@@ -568,7 +568,7 @@ class DocumentIndex(BaseRetriever):
Calling delete without any input parameters should raise a ValueError!
Args:
ids: List of ids to delete.
ids: List of IDs to delete.
**kwargs: Additional keyword arguments. This is up to the implementation.
For example, can include an option to delete the entire index,
or else issue a non-blocking delete etc.
@@ -586,7 +586,7 @@ class DocumentIndex(BaseRetriever):
Calling adelete without any input parameters should raise a ValueError!
Args:
ids: List of ids to delete.
ids: List of IDs to delete.
**kwargs: Additional keyword arguments. This is up to the implementation.
For example, can include an option to delete the entire index.

View File

@@ -62,10 +62,10 @@ class InMemoryDocumentIndex(DocumentIndex):
"""Delete by IDs.
Args:
ids: List of ids to delete.
ids: List of IDs to delete.
Raises:
ValueError: If ids is None.
ValueError: If IDs is None.
Returns:
A response object that contains the list of IDs that were successfully

View File

@@ -6,12 +6,13 @@ LangChain has two main classes to work with language models: chat models and
**Chat models**
Language models that use a sequence of messages as inputs and return chat messages
as outputs (as opposed to using plain text). Chat models support the assignment of
distinct roles to conversation messages, helping to distinguish messages from the AI,
users, and instructions such as system messages.
as outputs (as opposed to using plain text).
The key abstraction for chat models is `BaseChatModel`. Implementations
should inherit from this class.
Chat models support the assignment of distinct roles to conversation messages, helping
to distinguish messages from the AI, users, and instructions such as system messages.
The key abstraction for chat models is `BaseChatModel`. Implementations should inherit
from this class.
See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
@@ -52,6 +53,10 @@ if TYPE_CHECKING:
ParrotFakeChatModel,
)
from langchain_core.language_models.llms import LLM, BaseLLM
from langchain_core.language_models.model_profile import (
ModelProfile,
ModelProfileRegistry,
)
__all__ = (
"LLM",
@@ -67,6 +72,8 @@ __all__ = (
"LanguageModelInput",
"LanguageModelLike",
"LanguageModelOutput",
"ModelProfile",
"ModelProfileRegistry",
"ParrotFakeChatModel",
"SimpleChatModel",
"get_tokenizer",
@@ -89,6 +96,8 @@ _dynamic_imports = {
"GenericFakeChatModel": "fake_chat_models",
"ParrotFakeChatModel": "fake_chat_models",
"LLM": "llms",
"ModelProfile": "model_profile",
"ModelProfileRegistry": "model_profile",
"BaseLLM": "llms",
"is_openai_data_block": "_utils",
}

View File

@@ -139,7 +139,8 @@ def _normalize_messages(
directly; this may change in the future
- LangChain v0 standard content blocks for backward compatibility
!!! warning "Behavior changed in 1.0.0"
!!! warning "Behavior changed in `langchain-core` 1.0.0"
In previous versions, this function returned messages in LangChain v0 format.
Now, it returns messages in LangChain v1 format, which upgraded chat models now
expect to receive when passing back in message history. For backward

View File

@@ -131,14 +131,19 @@ class BaseLanguageModel(
Caching is not currently supported for streaming methods of models.
"""
verbose: bool = Field(default_factory=_get_verbosity, exclude=True, repr=False)
"""Whether to print out response text."""
callbacks: Callbacks = Field(default=None, exclude=True)
"""Callbacks to add to the run trace."""
tags: list[str] | None = Field(default=None, exclude=True)
"""Tags to add to the run trace."""
metadata: dict[str, Any] | None = Field(default=None, exclude=True)
"""Metadata to add to the run trace."""
custom_get_token_ids: Callable[[str], list[int]] | None = Field(
default=None, exclude=True
)
@@ -195,15 +200,22 @@ class BaseLanguageModel(
type (e.g., pure text completion models vs chat models).
Args:
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
can be converted to match the format of any language model (string for
pure text generation models and `BaseMessage` objects for chat models).
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
prompts: List of `PromptValue` objects.
A `PromptValue` is an object that can be converted to match the format
of any language model (string for pure text generation models and
`BaseMessage` objects for chat models).
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Returns:
An `LLMResult`, which contains a list of candidate `Generation` objects for
@@ -232,15 +244,22 @@ class BaseLanguageModel(
type (e.g., pure text completion models vs chat models).
Args:
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
can be converted to match the format of any language model (string for
pure text generation models and `BaseMessage` objects for chat models).
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
prompts: List of `PromptValue` objects.
A `PromptValue` is an object that can be converted to match the format
of any language model (string for pure text generation models and
`BaseMessage` objects for chat models).
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Returns:
An `LLMResult`, which contains a list of candidate `Generation` objects for
@@ -262,13 +281,13 @@ class BaseLanguageModel(
return self.lc_attributes
def get_token_ids(self, text: str) -> list[int]:
"""Return the ordered ids of the tokens in a text.
"""Return the ordered IDs of the tokens in a text.
Args:
text: The string input to tokenize.
Returns:
A list of ids corresponding to the tokens in the text, in order they occur
A list of IDs corresponding to the tokens in the text, in order they occur
in the text.
"""
if self.custom_get_token_ids is not None:
@@ -280,6 +299,9 @@ class BaseLanguageModel(
Useful for checking if an input fits in a model's context window.
This should be overridden by model-specific implementations to provide accurate
token counts via model-specific tokenizers.
Args:
text: The string input to tokenize.
@@ -298,9 +320,17 @@ class BaseLanguageModel(
Useful for checking if an input fits in a model's context window.
This should be overridden by model-specific implementations to provide accurate
token counts via model-specific tokenizers.
!!! note
The base implementation of `get_num_tokens_from_messages` ignores tool
schemas.
* The base implementation of `get_num_tokens_from_messages` ignores tool
schemas.
* The base implementation of `get_num_tokens_from_messages` adds additional
prefixes to messages in represent user roles, which will add to the
overall token count. Model-specific implementations may choose to
handle this differently.
Args:
messages: The message inputs to tokenize.

View File

@@ -33,6 +33,7 @@ from langchain_core.language_models.base import (
LangSmithParams,
LanguageModelInput,
)
from langchain_core.language_models.model_profile import ModelProfile
from langchain_core.load import dumpd, dumps
from langchain_core.messages import (
AIMessage,
@@ -88,7 +89,10 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
try:
metadata["body"] = response.json()
except Exception:
metadata["body"] = getattr(response, "text", None)
try:
metadata["body"] = getattr(response, "text", None)
except Exception:
metadata["body"] = None
if hasattr(response, "headers"):
try:
metadata["headers"] = dict(response.headers)
@@ -329,10 +333,25 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
[`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this
field to roll out new content formats in a backward-compatible way.
!!! version-added "Added in version 1.0"
!!! version-added "Added in `langchain-core` 1.0.0"
"""
profile: ModelProfile | None = Field(default=None, exclude=True)
"""Profile detailing model capabilities.
!!! warning "Beta feature"
This is a beta feature. The format of model profiles is subject to change.
If not specified, automatically loaded from the provider package on initialization
if data is available.
Example profile data includes context window sizes, supported modalities, or support
for tool calling, structured output, and other features.
!!! version-added "Added in `langchain-core` 1.1.0"
"""
model_config = ConfigDict(
arbitrary_types_allowed=True,
)
@@ -842,16 +861,21 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
Args:
messages: List of list of messages.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
tags: The tags to apply.
metadata: The metadata to apply.
run_name: The name of the run.
run_id: The ID of the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Returns:
An `LLMResult`, which contains a list of candidate `Generations` for each
@@ -960,16 +984,21 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
Args:
messages: List of list of messages.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
tags: The tags to apply.
metadata: The metadata to apply.
run_name: The name of the run.
run_id: The ID of the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Returns:
An `LLMResult`, which contains a list of candidate `Generations` for each
@@ -1502,10 +1531,10 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
Args:
schema: The output schema. Can be passed in as:
- an OpenAI function/tool schema,
- a JSON Schema,
- a `TypedDict` class,
- or a Pydantic class.
- An OpenAI function/tool schema,
- A JSON Schema,
- A `TypedDict` class,
- Or a Pydantic class.
If `schema` is a Pydantic class then the model output will be a
Pydantic instance of that class, and the model-generated fields will be
@@ -1517,11 +1546,15 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
when specifying a Pydantic or `TypedDict` class.
include_raw:
If `False` then only the parsed structured output is returned. If
an error occurs during model output parsing it will be raised. If `True`
then both the raw model response (a `BaseMessage`) and the parsed model
response will be returned. If an error occurs during output parsing it
will be caught and returned as well.
If `False` then only the parsed structured output is returned.
If an error occurs during model output parsing it will be raised.
If `True` then both the raw model response (a `BaseMessage`) and the
parsed model response will be returned.
If an error occurs during output parsing it will be caught and returned
as well.
The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
`'parsing_error'`.
@@ -1599,7 +1632,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
# }
```
Example: `dict` schema (`include_raw=False`):
Example: Dictionary schema (`include_raw=False`):
```python
from pydantic import BaseModel
@@ -1626,8 +1659,9 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
# }
```
!!! warning "Behavior changed in 0.2.26"
Added support for TypedDict class.
!!! warning "Behavior changed in `langchain-core` 0.2.26"
Added support for `TypedDict` class.
""" # noqa: E501
_ = kwargs.pop("method", None)
@@ -1726,9 +1760,12 @@ def _gen_info_and_msg_metadata(
}
_MAX_CLEANUP_DEPTH = 100
def _cleanup_llm_representation(serialized: Any, depth: int) -> None:
"""Remove non-serializable objects from a serialized object."""
if depth > 100: # Don't cooperate for pathological cases
if depth > _MAX_CLEANUP_DEPTH: # Don't cooperate for pathological cases
return
if not isinstance(serialized, dict):

View File

@@ -1,4 +1,4 @@
"""Fake chat model for testing purposes."""
"""Fake chat models for testing purposes."""
import asyncio
import re

View File

@@ -1,4 +1,7 @@
"""Base interface for large language models to expose."""
"""Base interface for traditional large language models (LLMs) to expose.
These are traditionally older models (newer models generally are chat models).
"""
from __future__ import annotations
@@ -648,9 +651,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompts: The prompts to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
If stop tokens are not supported consider raising `NotImplementedError`.
run_manager: Callback manager for the run.
Returns:
@@ -668,9 +674,12 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompts: The prompts to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
If stop tokens are not supported consider raising `NotImplementedError`.
run_manager: Callback manager for the run.
Returns:
@@ -702,11 +711,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Yields:
Generation chunks.
@@ -728,11 +740,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Yields:
Generation chunks.
@@ -843,10 +858,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompts: List of string prompts.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
tags: List of tags to associate with each prompt. If provided, the length
of the list must match the length of the prompts list.
metadata: List of metadata dictionaries to associate with each prompt. If
@@ -856,8 +875,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
length of the list must match the length of the prompts list.
run_id: List of run IDs to associate with each prompt. If provided, the
length of the list must match the length of the prompts list.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Raises:
ValueError: If prompts is not a list.
@@ -1113,10 +1133,14 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompts: List of string prompts.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
tags: List of tags to associate with each prompt. If provided, the length
of the list must match the length of the prompts list.
metadata: List of metadata dictionaries to associate with each prompt. If
@@ -1126,8 +1150,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
length of the list must match the length of the prompts list.
run_id: List of run IDs to associate with each prompt. If provided, the
length of the list must match the length of the prompts list.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Raises:
ValueError: If the length of `callbacks`, `tags`, `metadata`, or
@@ -1391,11 +1416,6 @@ class LLM(BaseLLM):
`astream` will use `_astream` if provided, otherwise it will implement
a fallback behavior that will use `_stream` if `_stream` is implemented,
and use `_acall` if `_stream` is not implemented.
Please see the following guide for more information on how to
implement a custom LLM:
https://python.langchain.com/docs/how_to/custom_llm/
"""
@abstractmethod
@@ -1412,12 +1432,16 @@ class LLM(BaseLLM):
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
If stop tokens are not supported consider raising `NotImplementedError`.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Returns:
The model output as a string. SHOULD NOT include the prompt.
@@ -1438,12 +1462,16 @@ class LLM(BaseLLM):
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
If stop tokens are not supported consider raising `NotImplementedError`.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
Returns:
The model output as a string. SHOULD NOT include the prompt.

View File

@@ -0,0 +1,84 @@
"""Model profile types and utilities."""
from typing_extensions import TypedDict
class ModelProfile(TypedDict, total=False):
"""Model profile.
!!! warning "Beta feature"
This is a beta feature. The format of model profiles is subject to change.
Provides information about chat model capabilities, such as context window sizes
and supported features.
"""
# --- Input constraints ---
max_input_tokens: int
"""Maximum context window (tokens)"""
image_inputs: bool
"""Whether image inputs are supported."""
# TODO: add more detail about formats?
image_url_inputs: bool
"""Whether [image URL inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
pdf_inputs: bool
"""Whether [PDF inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
# TODO: add more detail about formats? e.g. bytes or base64
audio_inputs: bool
"""Whether [audio inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
# TODO: add more detail about formats? e.g. bytes or base64
video_inputs: bool
"""Whether [video inputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
# TODO: add more detail about formats? e.g. bytes or base64
image_tool_message: bool
"""Whether images can be included in tool messages."""
pdf_tool_message: bool
"""Whether PDFs can be included in tool messages."""
# --- Output constraints ---
max_output_tokens: int
"""Maximum output tokens"""
reasoning_output: bool
"""Whether the model supports [reasoning / chain-of-thought](https://docs.langchain.com/oss/python/langchain/models#reasoning)"""
image_outputs: bool
"""Whether [image outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
audio_outputs: bool
"""Whether [audio outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
video_outputs: bool
"""Whether [video outputs](https://docs.langchain.com/oss/python/langchain/models#multimodal)
are supported."""
# --- Tool calling ---
tool_calling: bool
"""Whether the model supports [tool calling](https://docs.langchain.com/oss/python/langchain/models#tool-calling)"""
tool_choice: bool
"""Whether the model supports [tool choice](https://docs.langchain.com/oss/python/langchain/models#forcing-tool-calls)"""
# --- Structured output ---
structured_output: bool
"""Whether the model supports a native [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
feature"""
ModelProfileRegistry = dict[str, ModelProfile]
"""Registry mapping model identifiers or names to their ModelProfile."""

View File

@@ -17,7 +17,7 @@ def default(obj: Any) -> Any:
obj: The object to serialize to json if it is a Serializable object.
Returns:
A json serializable object or a SerializedNotImplemented object.
A JSON serializable object or a SerializedNotImplemented object.
"""
if isinstance(obj, Serializable):
return obj.to_json()

View File

@@ -61,13 +61,15 @@ class Reviver:
"""Initialize the reviver.
Args:
secrets_map: A map of secrets to load. If a secret is not found in
the map, it will be loaded from the environment if `secrets_from_env`
is True.
secrets_map: A map of secrets to load.
If a secret is not found in the map, it will be loaded from the
environment if `secrets_from_env` is `True`.
valid_namespaces: A list of additional namespaces (modules)
to allow to be deserialized.
secrets_from_env: Whether to load secrets from the environment.
additional_import_mappings: A dictionary of additional namespace mappings
You can use this to override default mappings or add new mappings.
ignore_unserializable_fields: Whether to ignore unserializable fields.
"""
@@ -195,13 +197,15 @@ def loads(
Args:
text: The string to load.
secrets_map: A map of secrets to load. If a secret is not found in
the map, it will be loaded from the environment if `secrets_from_env`
is True.
secrets_map: A map of secrets to load.
If a secret is not found in the map, it will be loaded from the environment
if `secrets_from_env` is `True`.
valid_namespaces: A list of additional namespaces (modules)
to allow to be deserialized.
secrets_from_env: Whether to load secrets from the environment.
additional_import_mappings: A dictionary of additional namespace mappings
You can use this to override default mappings or add new mappings.
ignore_unserializable_fields: Whether to ignore unserializable fields.
@@ -237,13 +241,15 @@ def load(
Args:
obj: The object to load.
secrets_map: A map of secrets to load. If a secret is not found in
the map, it will be loaded from the environment if `secrets_from_env`
is True.
secrets_map: A map of secrets to load.
If a secret is not found in the map, it will be loaded from the environment
if `secrets_from_env` is `True`.
valid_namespaces: A list of additional namespaces (modules)
to allow to be deserialized.
secrets_from_env: Whether to load secrets from the environment.
additional_import_mappings: A dictionary of additional namespace mappings
You can use this to override default mappings or add new mappings.
ignore_unserializable_fields: Whether to ignore unserializable fields.

View File

@@ -97,11 +97,14 @@ class Serializable(BaseModel, ABC):
by default. This is to prevent accidental serialization of objects that should
not be serialized.
- `get_lc_namespace`: Get the namespace of the LangChain object.
During deserialization, this namespace is used to identify
the correct class to instantiate.
Please see the `Reviver` class in `langchain_core.load.load` for more details.
During deserialization an additional mapping is handle classes that have moved
or been renamed across package versions.
- `lc_secrets`: A map of constructor argument names to secret ids.
- `lc_attributes`: List of additional attribute names that should be included
as part of the serialized representation.
@@ -194,7 +197,7 @@ class Serializable(BaseModel, ABC):
ValueError: If the class has deprecated attributes.
Returns:
A json serializable object or a `SerializedNotImplemented` object.
A JSON serializable object or a `SerializedNotImplemented` object.
"""
if not self.is_lc_serializable():
return self.to_json_not_implemented()

View File

@@ -9,6 +9,9 @@ if TYPE_CHECKING:
from langchain_core.messages.ai import (
AIMessage,
AIMessageChunk,
InputTokenDetails,
OutputTokenDetails,
UsageMetadata,
)
from langchain_core.messages.base import (
BaseMessage,
@@ -87,10 +90,12 @@ __all__ = (
"HumanMessage",
"HumanMessageChunk",
"ImageContentBlock",
"InputTokenDetails",
"InvalidToolCall",
"MessageLikeRepresentation",
"NonStandardAnnotation",
"NonStandardContentBlock",
"OutputTokenDetails",
"PlainTextContentBlock",
"ReasoningContentBlock",
"RemoveMessage",
@@ -104,6 +109,7 @@ __all__ = (
"ToolCallChunk",
"ToolMessage",
"ToolMessageChunk",
"UsageMetadata",
"VideoContentBlock",
"_message_from_dict",
"convert_to_messages",
@@ -145,6 +151,7 @@ _dynamic_imports = {
"HumanMessageChunk": "human",
"NonStandardAnnotation": "content",
"NonStandardContentBlock": "content",
"OutputTokenDetails": "ai",
"PlainTextContentBlock": "content",
"ReasoningContentBlock": "content",
"RemoveMessage": "modifier",
@@ -154,12 +161,14 @@ _dynamic_imports = {
"SystemMessage": "system",
"SystemMessageChunk": "system",
"ImageContentBlock": "content",
"InputTokenDetails": "ai",
"InvalidToolCall": "tool",
"TextContentBlock": "content",
"ToolCall": "tool",
"ToolCallChunk": "tool",
"ToolMessage": "tool",
"ToolMessageChunk": "tool",
"UsageMetadata": "ai",
"VideoContentBlock": "content",
"AnyMessage": "utils",
"MessageLikeRepresentation": "utils",

View File

@@ -48,10 +48,10 @@ class InputTokenDetails(TypedDict, total=False):
}
```
!!! version-added "Added in version 0.3.9"
May also hold extra provider-specific keys.
!!! version-added "Added in `langchain-core` 0.3.9"
"""
audio: int
@@ -83,7 +83,9 @@ class OutputTokenDetails(TypedDict, total=False):
}
```
!!! version-added "Added in version 0.3.9"
May also hold extra provider-specific keys.
!!! version-added "Added in `langchain-core` 0.3.9"
"""
@@ -121,9 +123,15 @@ class UsageMetadata(TypedDict):
}
```
!!! warning "Behavior changed in 0.3.9"
!!! warning "Behavior changed in `langchain-core` 0.3.9"
Added `input_token_details` and `output_token_details`.
!!! note "LangSmith SDK"
The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,
LangSmith's `UsageMetadata` has additional fields to capture cost information
used by the LangSmith platform.
"""
input_tokens: int
@@ -131,7 +139,7 @@ class UsageMetadata(TypedDict):
output_tokens: int
"""Count of output (or completion) tokens. Sum of all output token types."""
total_tokens: int
"""Total token count. Sum of input_tokens + output_tokens."""
"""Total token count. Sum of `input_tokens` + `output_tokens`."""
input_token_details: NotRequired[InputTokenDetails]
"""Breakdown of input token counts.
@@ -141,7 +149,6 @@ class UsageMetadata(TypedDict):
"""Breakdown of output token counts.
Does *not* need to sum to full output token count. Does *not* need to have all keys.
"""
@@ -153,7 +160,6 @@ class AIMessage(BaseMessage):
This message represents the output of the model and consists of both
the raw output as returned by the model and standardized fields
(e.g., tool calls, usage metadata) added by the LangChain framework.
"""
tool_calls: list[ToolCall] = []
@@ -651,13 +657,13 @@ def add_ai_message_chunks(
chunk_id = id_
break
else:
# second pass: prefer lc_run-* ids over lc_* ids
# second pass: prefer lc_run-* IDs over lc_* IDs
for id_ in candidates:
if id_ and id_.startswith(LC_ID_PREFIX):
chunk_id = id_
break
else:
# third pass: take any remaining id (auto-generated lc_* ids)
# third pass: take any remaining ID (auto-generated lc_* IDs)
for id_ in candidates:
if id_:
chunk_id = id_

View File

@@ -5,11 +5,9 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Any, cast, overload
from pydantic import ConfigDict, Field
from typing_extensions import Self
from langchain_core._api.deprecation import warn_deprecated
from langchain_core.load.serializable import Serializable
from langchain_core.messages import content as types
from langchain_core.utils import get_bolded_text
from langchain_core.utils._merge import merge_dicts, merge_lists
from langchain_core.utils.interactive_env import is_interactive_env
@@ -17,6 +15,9 @@ from langchain_core.utils.interactive_env import is_interactive_env
if TYPE_CHECKING:
from collections.abc import Sequence
from typing_extensions import Self
from langchain_core.messages import content as types
from langchain_core.prompts.chat import ChatPromptTemplate
@@ -93,6 +94,10 @@ class BaseMessage(Serializable):
"""Base abstract message class.
Messages are the inputs and outputs of a chat model.
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
[`AIMessage`][langchain.messages.AIMessage], and
[`SystemMessage`][langchain.messages.SystemMessage].
"""
content: str | list[str | dict]
@@ -195,7 +200,7 @@ class BaseMessage(Serializable):
def content_blocks(self) -> list[types.ContentBlock]:
r"""Load content blocks from the message content.
!!! version-added "Added in version 1.0.0"
!!! version-added "Added in `langchain-core` 1.0.0"
"""
# Needed here to avoid circular import, as these classes import BaseMessages

View File

@@ -12,10 +12,11 @@ the implementation in `BaseMessage`.
from __future__ import annotations
from collections.abc import Callable
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from collections.abc import Callable
from langchain_core.messages import AIMessage, AIMessageChunk
from langchain_core.messages import content as types

View File

@@ -368,7 +368,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
else:
# Assume it's raw base64 without data URI
try:
# Validate base64 and decode for mime type detection
# Validate base64 and decode for MIME type detection
decoded_bytes = base64.b64decode(url, validate=True)
image_url_b64_block = {
@@ -379,7 +379,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
try:
import filetype # type: ignore[import-not-found] # noqa: PLC0415
# Guess mime type based on file bytes
# Guess MIME type based on file bytes
mime_type = None
kind = filetype.guess(decoded_bytes)
if kind:
@@ -458,6 +458,8 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
if outcome is not None:
server_tool_result_block["extras"]["outcome"] = outcome
converted_blocks.append(server_tool_result_block)
elif item_type == "text":
converted_blocks.append(cast("types.TextContentBlock", item))
else:
# Unknown type, preserve as non-standard
converted_blocks.append({"type": "non_standard", "value": item})

View File

@@ -4,7 +4,6 @@ from __future__ import annotations
import json
import warnings
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any, Literal, cast
from langchain_core.language_models._utils import (
@@ -14,6 +13,8 @@ from langchain_core.language_models._utils import (
from langchain_core.messages import content as types
if TYPE_CHECKING:
from collections.abc import Iterable
from langchain_core.messages import AIMessage, AIMessageChunk

View File

@@ -644,7 +644,7 @@ class AudioContentBlock(TypedDict):
class PlainTextContentBlock(TypedDict):
"""Plaintext data (e.g., from a document).
"""Plaintext data (e.g., from a `.txt` or `.md` document).
!!! note
A `PlainTextContentBlock` existed in `langchain-core<1.0.0`. Although the
@@ -654,7 +654,7 @@ class PlainTextContentBlock(TypedDict):
!!! note
Title and context are optional fields that may be passed to the model. See
Anthropic [example](https://docs.claude.com/en/docs/build-with-claude/citations#citable-vs-non-citable-content).
Anthropic [example](https://platform.claude.com/docs/en/build-with-claude/citations#citable-vs-non-citable-content).
!!! note "Factory function"
`create_plaintext_block` may also be used as a factory to create a
@@ -767,7 +767,7 @@ class FileContentBlock(TypedDict):
class NonStandardContentBlock(TypedDict):
"""Provider-specific data.
"""Provider-specific content data.
This block contains data for which there is not yet a standard type.
@@ -802,7 +802,7 @@ class NonStandardContentBlock(TypedDict):
"""
value: dict[str, Any]
"""Provider-specific data."""
"""Provider-specific content data."""
index: NotRequired[int | str]
"""Index of block in aggregate response. Used during streaming."""
@@ -867,7 +867,7 @@ def _get_data_content_block_types() -> tuple[str, ...]:
Example: ("image", "video", "audio", "text-plain", "file")
Note that old style multimodal blocks type literals with new style blocks.
Speficially, "image", "audio", and "file".
Specifically, "image", "audio", and "file".
See the docstring of `_normalize_messages` in `language_models._utils` for details.
"""
@@ -906,7 +906,7 @@ def is_data_content_block(block: dict) -> bool:
# 'text' is checked to support v0 PlainTextContentBlock types
# We must guard against new style TextContentBlock which also has 'text' `type`
# by ensuring the presense of `source_type`
# by ensuring the presence of `source_type`
if block["type"] == "text" and "source_type" not in block: # noqa: SIM103 # This is more readable
return False
@@ -1399,7 +1399,7 @@ def create_non_standard_block(
"""Create a `NonStandardContentBlock`.
Args:
value: Provider-specific data.
value: Provider-specific content data.
id: Content block identifier. Generated automatically if not provided.
index: Index of block in aggregate response. Used during streaming.

View File

@@ -86,7 +86,7 @@ AnyMessage = Annotated[
| Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
Field(discriminator=Discriminator(_get_type)),
]
""""A type representing any defined `Message` or `MessageChunk` type."""
"""A type representing any defined `Message` or `MessageChunk` type."""
def get_buffer_string(
@@ -328,12 +328,16 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
"""
if isinstance(message, BaseMessage):
message_ = message
elif isinstance(message, str):
message_ = _create_message_from_message_type("human", message)
elif isinstance(message, Sequence) and len(message) == 2:
# mypy doesn't realise this can't be a string given the previous branch
message_type_str, template = message # type: ignore[misc]
message_ = _create_message_from_message_type(message_type_str, template)
elif isinstance(message, Sequence):
if isinstance(message, str):
message_ = _create_message_from_message_type("human", message)
else:
try:
message_type_str, template = message
except ValueError as e:
msg = "Message as a sequence must be (role string, template)"
raise NotImplementedError(msg) from e
message_ = _create_message_from_message_type(message_type_str, template)
elif isinstance(message, dict):
msg_kwargs = message.copy()
try:
@@ -734,8 +738,10 @@ def trim_messages(
Set to `len` to count the number of **messages** in the chat history.
!!! note
Use `count_tokens_approximately` to get fast, approximate token
counts.
This is recommended for using `trim_messages` on the hot path, where
exact token counting is not necessary.
@@ -1025,18 +1031,18 @@ def convert_to_openai_messages(
messages: Message-like object or iterable of objects whose contents are
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
text_format: How to format string or text block contents:
- `'string'`:
If a message has a string content, this is left as a string. If
a message has content blocks that are all of type `'text'`, these
are joined with a newline to make a single string. If a message has
content blocks and at least one isn't of type `'text'`, then
all blocks are left as dicts.
- `'block'`:
If a message has a string content, this is turned into a list
with a single content block of type `'text'`. If a message has
content blocks these are left as is.
include_id: Whether to include message ids in the openai messages, if they
are present in the source messages.
- `'string'`:
If a message has a string content, this is left as a string. If
a message has content blocks that are all of type `'text'`, these
are joined with a newline to make a single string. If a message has
content blocks and at least one isn't of type `'text'`, then
all blocks are left as dicts.
- `'block'`:
If a message has a string content, this is turned into a list
with a single content block of type `'text'`. If a message has
content blocks these are left as is.
include_id: Whether to include message IDs in the openai messages, if they
are present in the source messages.
Raises:
ValueError: if an unrecognized `text_format` is specified, or if a message
@@ -1097,7 +1103,7 @@ def convert_to_openai_messages(
# ]
```
!!! version-added "Added in version 0.3.11"
!!! version-added "Added in `langchain-core` 0.3.11"
""" # noqa: E501
if text_format not in {"string", "block"}:
@@ -1697,7 +1703,7 @@ def count_tokens_approximately(
Warning:
This function does not currently support counting image tokens.
!!! version-added "Added in version 0.3.46"
!!! version-added "Added in `langchain-core` 0.3.46"
"""
token_count = 0.0

View File

@@ -1,4 +1,20 @@
"""**OutputParser** classes parse the output of an LLM call."""
"""`OutputParser` classes parse the output of an LLM call into structured data.
!!! tip "Structured output"
Output parsers emerged as an early solution to the challenge of obtaining structured
output from LLMs.
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
natively. In such cases, using output parsers may be unnecessary, and you should
leverage the model's built-in capabilities for structured output. Refer to the
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
for guidance on how to achieve structured output directly.
Output parsers remain valuable when working with models that do not support
structured output natively, or when you require additional processing or validation
of the model's output beyond its inherent capabilities.
"""
from typing import TYPE_CHECKING

View File

@@ -135,6 +135,9 @@ class BaseOutputParser(
Example:
```python
# Implement a simple boolean output parser
class BooleanOutputParser(BaseOutputParser[bool]):
true_val: str = "YES"
false_val: str = "NO"

View File

@@ -1,11 +1,16 @@
"""Format instructions."""
JSON_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
JSON_FORMAT_INSTRUCTIONS = """STRICT OUTPUT FORMAT:
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
The output should be formatted as a JSON instance that conforms to the JSON schema below.
Here is the output schema:
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
Here is the output schema (shown in a code block for readability only — do not include any backticks or Markdown in your output):
```
{schema}
```""" # noqa: E501

View File

@@ -31,11 +31,14 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
"""Parse the output of an LLM call to a JSON object.
Probably the most reliable output parser for getting structured data that does *not*
use function calling.
When used in streaming mode, it will yield partial JSON objects containing
all the keys that have been returned so far.
In streaming, if `diff` is set to `True`, yields JSONPatch operations
describing the difference between the previous and the current object.
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
difference between the previous and the current object.
"""
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]

View File

@@ -41,7 +41,7 @@ def droplastn(
class ListOutputParser(BaseTransformOutputParser[list[str]]):
"""Parse the output of an LLM call to a list."""
"""Parse the output of a model to a list."""
@property
def _type(self) -> str:
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = ""
for chunk in input:
if isinstance(chunk, BaseMessage):
# extract text
# Extract text
chunk_content = chunk.content
if not isinstance(chunk_content, str):
continue
buffer += chunk_content
else:
# add current chunk to buffer
# Add current chunk to buffer
buffer += chunk
# parse buffer into a list of parts
# Parse buffer into a list of parts
try:
done_idx = 0
# yield only complete parts
# Yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end()
yield [m.group(1)]
buffer = buffer[done_idx:]
except NotImplementedError:
parts = self.parse(buffer)
# yield only complete parts
# Yield only complete parts
if len(parts) > 1:
for part in parts[:-1]:
yield [part]
buffer = parts[-1]
# yield the last part
# Yield the last part
for part in self.parse(buffer):
yield [part]
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = ""
async for chunk in input:
if isinstance(chunk, BaseMessage):
# extract text
# Extract text
chunk_content = chunk.content
if not isinstance(chunk_content, str):
continue
buffer += chunk_content
else:
# add current chunk to buffer
# Add current chunk to buffer
buffer += chunk
# parse buffer into a list of parts
# Parse buffer into a list of parts
try:
done_idx = 0
# yield only complete parts
# Yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end()
yield [m.group(1)]
buffer = buffer[done_idx:]
except NotImplementedError:
parts = self.parse(buffer)
# yield only complete parts
# Yield only complete parts
if len(parts) > 1:
for part in parts[:-1]:
yield [part]
buffer = parts[-1]
# yield the last part
# Yield the last part
for part in self.parse(buffer):
yield [part]
class CommaSeparatedListOutputParser(ListOutputParser):
"""Parse the output of an LLM call to a comma-separated list."""
"""Parse the output of a model to a comma-separated list."""
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
)
return [item for sublist in reader for item in sublist]
except csv.Error:
# keep old logic for backup
# Keep old logic for backup
return [part.strip() for part in text.split(",")]
@property

View File

@@ -15,7 +15,11 @@ from langchain_core.messages.tool import tool_call as create_tool_call
from langchain_core.output_parsers.transform import BaseCumulativeTransformOutputParser
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.utils.json import parse_partial_json
from langchain_core.utils.pydantic import TypeBaseModel
from langchain_core.utils.pydantic import (
TypeBaseModel,
is_pydantic_v1_subclass,
is_pydantic_v2_subclass,
)
logger = logging.getLogger(__name__)
@@ -224,7 +228,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
result: The result of the LLM call.
partial: Whether to parse partial JSON.
If `True`, the output will be a JSON object containing
all the keys that have been returned so far.
all the keys that have been returned so far.
If `False`, the output will be the full JSON object.
Raises:
@@ -307,7 +311,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
result: The result of the LLM call.
partial: Whether to parse partial JSON.
If `True`, the output will be a JSON object containing
all the keys that have been returned so far.
all the keys that have been returned so far.
If `False`, the output will be the full JSON object.
Returns:
@@ -323,7 +327,15 @@ class PydanticToolsParser(JsonOutputToolsParser):
return None if self.first_tool_only else []
json_results = [json_results] if self.first_tool_only else json_results
name_dict = {tool.__name__: tool for tool in self.tools}
name_dict_v2: dict[str, TypeBaseModel] = {
tool.model_config.get("title") or tool.__name__: tool
for tool in self.tools
if is_pydantic_v2_subclass(tool)
}
name_dict_v1: dict[str, TypeBaseModel] = {
tool.__name__: tool for tool in self.tools if is_pydantic_v1_subclass(tool)
}
name_dict: dict[str, TypeBaseModel] = {**name_dict_v2, **name_dict_v1}
pydantic_objects = []
for res in json_results:
if not isinstance(res["args"], dict):

View File

@@ -37,7 +37,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
def _parser_exception(
self, e: Exception, json_object: dict
) -> OutputParserException:
json_string = json.dumps(json_object)
json_string = json.dumps(json_object, ensure_ascii=False)
name = self.pydantic_object.__name__
msg = f"Failed to parse {name} from completion {json_string}. Got: {e}"
return OutputParserException(msg, llm_output=json_string)
@@ -86,7 +86,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
The format instructions for the JSON output.
"""
# Copy schema to avoid altering original Pydantic schema.
schema = dict(self.pydantic_object.model_json_schema().items())
schema = dict(self._get_schema(self.pydantic_object).items())
# Remove extraneous fields.
reduced_schema = schema

View File

@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
class StrOutputParser(BaseTransformOutputParser[str]):
"""OutputParser that parses LLMResult into the top likely string."""
"""OutputParser that parses `LLMResult` into the top likely string."""
@classmethod
def is_lc_serializable(cls) -> bool:
"""StrOutputParser is serializable.
"""`StrOutputParser` is serializable.
Returns:
True
`True`
"""
return True

View File

@@ -43,19 +43,19 @@ class _StreamingParser:
"""Streaming parser for XML.
This implementation is pulled into a class to avoid implementation
drift between transform and atransform of the XMLOutputParser.
drift between transform and atransform of the `XMLOutputParser`.
"""
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
"""Initialize the streaming parser.
Args:
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
See documentation in XMLOutputParser for more information.
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
`'xml'`. See documentation in `XMLOutputParser` for more information.
Raises:
ImportError: If defusedxml is not installed and the defusedxml
parser is requested.
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
requested.
"""
if parser == "defusedxml":
if not _HAS_DEFUSEDXML:
@@ -79,10 +79,10 @@ class _StreamingParser:
"""Parse a chunk of text.
Args:
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
Yields:
A dictionary representing the parsed XML element.
A `dict` representing the parsed XML element.
Raises:
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
@@ -147,46 +147,49 @@ class _StreamingParser:
class XMLOutputParser(BaseTransformOutputParser):
"""Parse an output using xml format."""
"""Parse an output using xml format.
Returns a dictionary of tags.
"""
tags: list[str] | None = None
"""Tags to tell the LLM to expect in the XML output.
Note this may not be perfect depending on the LLM implementation.
For example, with tags=["foo", "bar", "baz"]:
For example, with `tags=["foo", "bar", "baz"]`:
1. A well-formatted XML instance:
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
2. A badly-formatted XML instance (missing closing tag for 'bar'):
"<foo>\n <bar>\n </foo>"
`"<foo>\n <bar>\n </foo>"`
3. A badly-formatted XML instance (unexpected 'tag' element):
"<foo>\n <tag>\n </tag>\n</foo>"
`"<foo>\n <tag>\n </tag>\n</foo>"`
"""
encoding_matcher: re.Pattern = re.compile(
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
)
parser: Literal["defusedxml", "xml"] = "defusedxml"
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
present in some distributions of Python's standard library xml.
`defusedxml` is a wrapper around the standard library parser that
sets up the parser with secure defaults.
* 'xml' is the standard library parser.
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
present in some distributions of Python's standard library xml.
`defusedxml` is a wrapper around the standard library parser that
sets up the parser with secure defaults.
* `'xml'` is the standard library parser.
Use `xml` only if you are sure that your distribution of the standard library
is not vulnerable to XML vulnerabilities.
Use `xml` only if you are sure that your distribution of the standard library is not
vulnerable to XML vulnerabilities.
Please review the following resources for more information:
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
* https://github.com/tiran/defusedxml
The standard library relies on libexpat for parsing XML:
https://github.com/libexpat/libexpat
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
for parsing XML.
"""
def get_format_instructions(self) -> str:
@@ -200,12 +203,12 @@ class XMLOutputParser(BaseTransformOutputParser):
text: The output of an LLM call.
Returns:
A dictionary representing the parsed XML.
A `dict` representing the parsed XML.
Raises:
OutputParserException: If the XML is not well-formed.
ImportError: If defusedxml is not installed and the defusedxml
parser is requested.
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
requested.
"""
# Try to find XML string within triple backticks
# Imports are temporarily placed here to avoid issue with caching on CI

View File

@@ -2,15 +2,17 @@
from __future__ import annotations
from typing import Literal
from typing import TYPE_CHECKING, Literal
from pydantic import model_validator
from typing_extensions import Self
from langchain_core.messages import BaseMessage, BaseMessageChunk
from langchain_core.outputs.generation import Generation
from langchain_core.utils._merge import merge_dicts
if TYPE_CHECKING:
from typing_extensions import Self
class ChatGeneration(Generation):
"""A single chat generation output.

View File

@@ -11,9 +11,8 @@ from langchain_core.utils._merge import merge_dicts
class Generation(Serializable):
"""A single text generation output.
Generation represents the response from an
`"old-fashioned" LLM <https://python.langchain.com/docs/concepts/text_llms/>__` that
generates regular text (not chat messages).
Generation represents the response from an "old-fashioned" LLM (string-in,
string-out) that generates regular text (not chat messages).
This model is used internally by chat model and will eventually
be mapped to a more general `LLMResult` object, and then projected into
@@ -21,8 +20,7 @@ class Generation(Serializable):
LangChain users working with chat models will usually access information via
`AIMessage` (returned from runnable interfaces) or `LLMResult` (available
via callbacks). Please refer the `AIMessage` and `LLMResult` schema documentation
for more information.
via callbacks). Please refer to `AIMessage` and `LLMResult` for more information.
"""
text: str
@@ -35,11 +33,13 @@ class Generation(Serializable):
"""
type: Literal["Generation"] = "Generation"
"""Type is used exclusively for serialization purposes.
Set to "Generation" for this class."""
Set to "Generation" for this class.
"""
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -53,7 +53,7 @@ class Generation(Serializable):
class GenerationChunk(Generation):
"""Generation chunk, which can be concatenated with other Generation chunks."""
"""`GenerationChunk`, which can be concatenated with other Generation chunks."""
def __add__(self, other: GenerationChunk) -> GenerationChunk:
"""Concatenate two `GenerationChunk`s.

View File

@@ -30,15 +30,13 @@ class PromptValue(Serializable, ABC):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
def get_lc_namespace(cls) -> list[str]:
"""Get the namespace of the LangChain object.
This is used to determine the namespace of the object when serializing.
Returns:
`["langchain", "schema", "prompt"]`
"""
@@ -50,7 +48,7 @@ class PromptValue(Serializable, ABC):
@abstractmethod
def to_messages(self) -> list[BaseMessage]:
"""Return prompt as a list of Messages."""
"""Return prompt as a list of messages."""
class StringPromptValue(PromptValue):
@@ -64,8 +62,6 @@ class StringPromptValue(PromptValue):
def get_lc_namespace(cls) -> list[str]:
"""Get the namespace of the LangChain object.
This is used to determine the namespace of the object when serializing.
Returns:
`["langchain", "prompts", "base"]`
"""
@@ -101,8 +97,6 @@ class ChatPromptValue(PromptValue):
def get_lc_namespace(cls) -> list[str]:
"""Get the namespace of the LangChain object.
This is used to determine the namespace of the object when serializing.
Returns:
`["langchain", "prompts", "chat"]`
"""

View File

@@ -6,7 +6,7 @@ import contextlib
import json
import typing
from abc import ABC, abstractmethod
from collections.abc import Callable, Mapping
from collections.abc import Mapping
from functools import cached_property
from pathlib import Path
from typing import (
@@ -33,6 +33,8 @@ from langchain_core.runnables.config import ensure_config
from langchain_core.utils.pydantic import create_model_v2
if TYPE_CHECKING:
from collections.abc import Callable
from langchain_core.documents import Document
@@ -46,21 +48,27 @@ class BasePromptTemplate(
input_variables: list[str]
"""A list of the names of the variables whose values are required as inputs to the
prompt."""
prompt.
"""
optional_variables: list[str] = Field(default=[])
"""optional_variables: A list of the names of the variables for placeholder
or MessagePlaceholder that are optional. These variables are auto inferred
from the prompt and user need not provide them."""
"""A list of the names of the variables for placeholder or `MessagePlaceholder` that
are optional.
These variables are auto inferred from the prompt and user need not provide them.
"""
input_types: typing.Dict[str, Any] = Field(default_factory=dict, exclude=True) # noqa: UP006
"""A dictionary of the types of the variables the prompt template expects.
If not provided, all variables are assumed to be strings."""
If not provided, all variables are assumed to be strings.
"""
output_parser: BaseOutputParser | None = None
"""How to parse the output of calling an LLM on this formatted prompt."""
partial_variables: Mapping[str, Any] = Field(default_factory=dict)
"""A dictionary of the partial variables the prompt template carries.
Partial variables populate the template so that you don't need to
pass them in every time you call the prompt."""
Partial variables populate the template so that you don't need to pass them in every
time you call the prompt.
"""
metadata: typing.Dict[str, Any] | None = None # noqa: UP006
"""Metadata to be used for tracing."""
tags: list[str] | None = None
@@ -105,7 +113,7 @@ class BasePromptTemplate(
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
model_config = ConfigDict(
@@ -127,7 +135,7 @@ class BasePromptTemplate(
"""Get the input schema for the prompt.
Args:
config: configuration for the prompt.
config: Configuration for the prompt.
Returns:
The input schema for the prompt.
@@ -195,8 +203,8 @@ class BasePromptTemplate(
"""Invoke the prompt.
Args:
input: Dict, input to the prompt.
config: RunnableConfig, configuration for the prompt.
input: Input to the prompt.
config: Configuration for the prompt.
Returns:
The output of the prompt.
@@ -221,8 +229,8 @@ class BasePromptTemplate(
"""Async invoke the prompt.
Args:
input: Dict, input to the prompt.
config: RunnableConfig, configuration for the prompt.
input: Input to the prompt.
config: Configuration for the prompt.
Returns:
The output of the prompt.
@@ -242,7 +250,7 @@ class BasePromptTemplate(
@abstractmethod
def format_prompt(self, **kwargs: Any) -> PromptValue:
"""Create Prompt Value.
"""Create `PromptValue`.
Args:
**kwargs: Any arguments to be passed to the prompt template.
@@ -252,7 +260,7 @@ class BasePromptTemplate(
"""
async def aformat_prompt(self, **kwargs: Any) -> PromptValue:
"""Async create Prompt Value.
"""Async create `PromptValue`.
Args:
**kwargs: Any arguments to be passed to the prompt template.
@@ -266,7 +274,7 @@ class BasePromptTemplate(
"""Return a partial of the prompt template.
Args:
**kwargs: partial variables to set.
**kwargs: Partial variables to set.
Returns:
A partial of the prompt template.
@@ -296,9 +304,9 @@ class BasePromptTemplate(
A formatted string.
Example:
```python
prompt.format(variable1="foo")
```
```python
prompt.format(variable1="foo")
```
"""
async def aformat(self, **kwargs: Any) -> FormatOutputType:
@@ -311,9 +319,9 @@ class BasePromptTemplate(
A formatted string.
Example:
```python
await prompt.aformat(variable1="foo")
```
```python
await prompt.aformat(variable1="foo")
```
"""
return self.format(**kwargs)
@@ -348,9 +356,9 @@ class BasePromptTemplate(
NotImplementedError: If the prompt type is not implemented.
Example:
```python
prompt.save(file_path="path/prompt.yaml")
```
```python
prompt.save(file_path="path/prompt.yaml")
```
"""
if self.partial_variables:
msg = "Cannot save prompt with partial variables."
@@ -402,23 +410,23 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
First, this pulls information from the document from two sources:
1. page_content:
This takes the information from the `document.page_content`
and assigns it to a variable named `page_content`.
2. metadata:
This takes information from `document.metadata` and assigns
it to variables of the same name.
1. `page_content`:
This takes the information from the `document.page_content` and assigns it to a
variable named `page_content`.
2. `metadata`:
This takes information from `document.metadata` and assigns it to variables of
the same name.
Those variables are then passed into the `prompt` to produce a formatted string.
Args:
doc: Document, the page_content and metadata will be used to create
doc: `Document`, the `page_content` and `metadata` will be used to create
the final string.
prompt: BasePromptTemplate, will be used to format the page_content
and metadata into the final string.
prompt: `BasePromptTemplate`, will be used to format the `page_content`
and `metadata` into the final string.
Returns:
string of the document formatted.
String of the document formatted.
Example:
```python
@@ -429,7 +437,6 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
prompt = PromptTemplate.from_template("Page {page}: {page_content}")
format_document(doc, prompt)
>>> "Page 1: This is a joke"
```
"""
return prompt.format(**_get_document_info(doc, prompt))
@@ -440,22 +447,22 @@ async def aformat_document(doc: Document, prompt: BasePromptTemplate[str]) -> st
First, this pulls information from the document from two sources:
1. page_content:
This takes the information from the `document.page_content`
and assigns it to a variable named `page_content`.
2. metadata:
This takes information from `document.metadata` and assigns
it to variables of the same name.
1. `page_content`:
This takes the information from the `document.page_content` and assigns it to a
variable named `page_content`.
2. `metadata`:
This takes information from `document.metadata` and assigns it to variables of
the same name.
Those variables are then passed into the `prompt` to produce a formatted string.
Args:
doc: Document, the page_content and metadata will be used to create
doc: `Document`, the `page_content` and `metadata` will be used to create
the final string.
prompt: BasePromptTemplate, will be used to format the page_content
and metadata into the final string.
prompt: `BasePromptTemplate`, will be used to format the `page_content`
and `metadata` into the final string.
Returns:
string of the document formatted.
String of the document formatted.
"""
return await prompt.aformat(**_get_document_info(doc, prompt))

View File

@@ -587,14 +587,15 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
for prompt in self.prompt:
inputs = {var: kwargs[var] for var in prompt.input_variables}
if isinstance(prompt, StringPromptTemplate):
formatted: str | ImageURL | dict[str, Any] = prompt.format(**inputs)
content.append({"type": "text", "text": formatted})
formatted_text: str = prompt.format(**inputs)
if formatted_text != "":
content.append({"type": "text", "text": formatted_text})
elif isinstance(prompt, ImagePromptTemplate):
formatted = prompt.format(**inputs)
content.append({"type": "image_url", "image_url": formatted})
formatted_image: ImageURL = prompt.format(**inputs)
content.append({"type": "image_url", "image_url": formatted_image})
elif isinstance(prompt, DictPromptTemplate):
formatted = prompt.format(**inputs)
content.append(formatted)
formatted_dict: dict[str, Any] = prompt.format(**inputs)
content.append(formatted_dict)
return self._msg_class(
content=content, additional_kwargs=self.additional_kwargs
)
@@ -617,16 +618,15 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
for prompt in self.prompt:
inputs = {var: kwargs[var] for var in prompt.input_variables}
if isinstance(prompt, StringPromptTemplate):
formatted: str | ImageURL | dict[str, Any] = await prompt.aformat(
**inputs
)
content.append({"type": "text", "text": formatted})
formatted_text: str = await prompt.aformat(**inputs)
if formatted_text != "":
content.append({"type": "text", "text": formatted_text})
elif isinstance(prompt, ImagePromptTemplate):
formatted = await prompt.aformat(**inputs)
content.append({"type": "image_url", "image_url": formatted})
formatted_image: ImageURL = await prompt.aformat(**inputs)
content.append({"type": "image_url", "image_url": formatted_image})
elif isinstance(prompt, DictPromptTemplate):
formatted = prompt.format(**inputs)
content.append(formatted)
formatted_dict: dict[str, Any] = prompt.format(**inputs)
content.append(formatted_dict)
return self._msg_class(
content=content, additional_kwargs=self.additional_kwargs
)
@@ -776,42 +776,36 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
Use to create flexible templated prompts for chat models.
Examples:
!!! warning "Behavior changed in 0.2.24"
You can pass any Message-like formats supported by
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
init.
```python
from langchain_core.prompts import ChatPromptTemplate
```python
from langchain_core.prompts import ChatPromptTemplate
template = ChatPromptTemplate(
[
("system", "You are a helpful AI bot. Your name is {name}."),
("human", "Hello, how are you doing?"),
("ai", "I'm doing well, thanks!"),
("human", "{user_input}"),
]
)
template = ChatPromptTemplate(
[
("system", "You are a helpful AI bot. Your name is {name}."),
("human", "Hello, how are you doing?"),
("ai", "I'm doing well, thanks!"),
("human", "{user_input}"),
]
)
prompt_value = template.invoke(
{
"name": "Bob",
"user_input": "What is your name?",
}
)
# Output:
# ChatPromptValue(
# messages=[
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
# HumanMessage(content='Hello, how are you doing?'),
# AIMessage(content="I'm doing well, thanks!"),
# HumanMessage(content='What is your name?')
# ]
# )
```
prompt_value = template.invoke(
{
"name": "Bob",
"user_input": "What is your name?",
}
)
# Output:
# ChatPromptValue(
# messages=[
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
# HumanMessage(content='Hello, how are you doing?'),
# AIMessage(content="I'm doing well, thanks!"),
# HumanMessage(content='What is your name?')
# ]
# )
```
Messages Placeholder:
!!! note "Messages Placeholder"
```python
# In addition to Human/AI/Tool/Function messages,
@@ -852,13 +846,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
# )
```
Single-variable template:
!!! note "Single-variable template"
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
and you invoke the template with a non-dict object, the prompt template will
inject the provided argument into that variable location.
```python
from langchain_core.prompts import ChatPromptTemplate
@@ -898,25 +891,40 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
"""Create a chat prompt template from a variety of message formats.
Args:
messages: sequence of message representations.
messages: Sequence of message representations.
A message can be represented using the following formats:
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
(message type, template); e.g., ("human", "{user_input}"),
(4) 2-tuple of (message class, template), (5) a string which is
shorthand for ("human", template); e.g., "{user_input}".
template_format: format of the template.
input_variables: A list of the names of the variables whose values are
required as inputs to the prompt.
optional_variables: A list of the names of the variables for placeholder
or MessagePlaceholder that are optional.
These variables are auto inferred from the prompt and user need not
provide them.
partial_variables: A dictionary of the partial variables the prompt
template carries. Partial variables populate the template so that you
don't need to pass them in every time you call the prompt.
validate_template: Whether to validate the template.
input_types: A dictionary of the types of the variables the prompt template
expects. If not provided, all variables are assumed to be strings.
1. `BaseMessagePromptTemplate`
2. `BaseMessage`
3. 2-tuple of `(message type, template)`; e.g.,
`("human", "{user_input}")`
4. 2-tuple of `(message class, template)`
5. A string which is shorthand for `("human", template)`; e.g.,
`"{user_input}"`
template_format: Format of the template.
**kwargs: Additional keyword arguments passed to `BasePromptTemplate`,
including (but not limited to):
- `input_variables`: A list of the names of the variables whose values
are required as inputs to the prompt.
- `optional_variables`: A list of the names of the variables for
placeholder or `MessagePlaceholder` that are optional.
These variables are auto inferred from the prompt and user need not
provide them.
- `partial_variables`: A dictionary of the partial variables the prompt
template carries.
Partial variables populate the template so that you don't need to
pass them in every time you call the prompt.
- `validate_template`: Whether to validate the template.
- `input_types`: A dictionary of the types of the variables the prompt
template expects.
If not provided, all variables are assumed to be strings.
Examples:
Instantiation from a list of message templates:
@@ -1121,12 +1129,17 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
)
```
Args:
messages: sequence of message representations.
messages: Sequence of message representations.
A message can be represented using the following formats:
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
(message type, template); e.g., ("human", "{user_input}"),
(4) 2-tuple of (message class, template), (5) a string which is
shorthand for ("human", template); e.g., "{user_input}".
1. `BaseMessagePromptTemplate`
2. `BaseMessage`
3. 2-tuple of `(message type, template)`; e.g.,
`("human", "{user_input}")`
4. 2-tuple of `(message class, template)`
5. A string which is shorthand for `("human", template)`; e.g.,
`"{user_input}"`
template_format: format of the template.
Returns:
@@ -1238,7 +1251,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
"""Extend the chat template with a sequence of messages.
Args:
messages: sequence of message representations to append.
messages: Sequence of message representations to append.
"""
self.messages.extend(
[_convert_to_message_template(message) for message in messages]
@@ -1335,11 +1348,25 @@ def _create_template_from_message_type(
raise ValueError(msg)
var_name = template[1:-1]
message = MessagesPlaceholder(variable_name=var_name, optional=True)
elif len(template) == 2 and isinstance(template[1], bool):
var_name_wrapped, is_optional = template
else:
try:
var_name_wrapped, is_optional = template
except ValueError as e:
msg = (
"Unexpected arguments for placeholder message type."
" Expected either a single string variable name"
" or a list of [variable_name: str, is_optional: bool]."
f" Got: {template}"
)
raise ValueError(msg) from e
if not isinstance(is_optional, bool):
msg = f"Expected is_optional to be a boolean. Got: {is_optional}"
raise ValueError(msg) # noqa: TRY004
if not isinstance(var_name_wrapped, str):
msg = f"Expected variable name to be a string. Got: {var_name_wrapped}"
raise ValueError(msg) # noqa:TRY004
raise ValueError(msg) # noqa: TRY004
if var_name_wrapped[0] != "{" or var_name_wrapped[-1] != "}":
msg = (
f"Invalid placeholder template: {var_name_wrapped}."
@@ -1349,14 +1376,6 @@ def _create_template_from_message_type(
var_name = var_name_wrapped[1:-1]
message = MessagesPlaceholder(variable_name=var_name, optional=is_optional)
else:
msg = (
"Unexpected arguments for placeholder message type."
" Expected either a single string variable name"
" or a list of [variable_name: str, is_optional: bool]."
f" Got: {template}"
)
raise ValueError(msg)
else:
msg = (
f"Unexpected message type: {message_type}. Use one of 'human',"
@@ -1410,10 +1429,11 @@ def _convert_to_message_template(
)
raise ValueError(msg)
message = (message["role"], message["content"])
if len(message) != 2:
try:
message_type_str, template = message
except ValueError as e:
msg = f"Expected 2-tuple of (role, template), got {message}"
raise ValueError(msg)
message_type_str, template = message
raise ValueError(msg) from e
if isinstance(message_type_str, str):
message_ = _create_template_from_message_type(
message_type_str, template, template_format=template_format

View File

@@ -69,7 +69,7 @@ class DictPromptTemplate(RunnableSerializable[dict, dict]):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod

View File

@@ -6,10 +6,10 @@ from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any
from langchain_core.load import Serializable
from langchain_core.messages import BaseMessage
from langchain_core.utils.interactive_env import is_interactive_env
if TYPE_CHECKING:
from langchain_core.messages import BaseMessage
from langchain_core.prompts.chat import ChatPromptTemplate
@@ -18,7 +18,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -32,13 +32,13 @@ class BaseMessagePromptTemplate(Serializable, ABC):
@abstractmethod
def format_messages(self, **kwargs: Any) -> list[BaseMessage]:
"""Format messages from kwargs. Should return a list of BaseMessages.
"""Format messages from kwargs. Should return a list of `BaseMessage` objects.
Args:
**kwargs: Keyword arguments to use for formatting.
Returns:
List of BaseMessages.
List of `BaseMessage` objects.
"""
async def aformat_messages(self, **kwargs: Any) -> list[BaseMessage]:
@@ -48,7 +48,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
**kwargs: Keyword arguments to use for formatting.
Returns:
List of BaseMessages.
List of `BaseMessage` objects.
"""
return self.format_messages(**kwargs)

View File

@@ -4,9 +4,8 @@ from __future__ import annotations
import warnings
from abc import ABC
from collections.abc import Callable, Sequence
from string import Formatter
from typing import Any, Literal
from typing import TYPE_CHECKING, Any, Literal
from pydantic import BaseModel, create_model
@@ -16,10 +15,70 @@ from langchain_core.utils import get_colored_text, mustache
from langchain_core.utils.formatting import formatter
from langchain_core.utils.interactive_env import is_interactive_env
if TYPE_CHECKING:
from collections.abc import Callable, Sequence
try:
from jinja2 import Environment, meta
from jinja2 import meta
from jinja2.exceptions import SecurityError
from jinja2.sandbox import SandboxedEnvironment
class _RestrictedSandboxedEnvironment(SandboxedEnvironment):
"""A more restrictive Jinja2 sandbox that blocks all attribute/method access.
This sandbox only allows simple variable lookups, no attribute or method access.
This prevents template injection attacks via methods like parse_raw().
"""
def is_safe_attribute(self, _obj: Any, _attr: str, _value: Any) -> bool:
"""Block ALL attribute access for security.
Only allow accessing variables directly from the context dict,
no attribute access on those objects.
Args:
_obj: The object being accessed (unused, always blocked).
_attr: The attribute name (unused, always blocked).
_value: The attribute value (unused, always blocked).
Returns:
False - all attribute access is blocked.
"""
# Block all attribute access
return False
def is_safe_callable(self, _obj: Any) -> bool:
"""Block all method calls for security.
Args:
_obj: The object being checked (unused, always blocked).
Returns:
False - all callables are blocked.
"""
return False
def getattr(self, obj: Any, attribute: str) -> Any:
"""Override getattr to block all attribute access.
Args:
obj: The object.
attribute: The attribute name.
Returns:
Never returns.
Raises:
SecurityError: Always, to block attribute access.
"""
msg = (
f"Access to attributes is not allowed in templates. "
f"Attempted to access '{attribute}' on {type(obj).__name__}. "
f"Use only simple variable names like {{{{variable}}}} "
f"without dots or methods."
)
raise SecurityError(msg)
_HAS_JINJA2 = True
except ImportError:
_HAS_JINJA2 = False
@@ -59,14 +118,10 @@ def jinja2_formatter(template: str, /, **kwargs: Any) -> str:
)
raise ImportError(msg)
# This uses a sandboxed environment to prevent arbitrary code execution.
# Jinja2 uses an opt-out rather than opt-in approach for sand-boxing.
# Please treat this sand-boxing as a best-effort approach rather than
# a guarantee of security.
# We recommend to never use jinja2 templates with untrusted inputs.
# https://jinja.palletsprojects.com/en/3.1.x/sandbox/
# approach not a guarantee of security.
return SandboxedEnvironment().from_string(template).render(**kwargs)
# Use a restricted sandbox that blocks ALL attribute/method access
# Only simple variable lookups like {{variable}} are allowed
# Attribute access like {{variable.attr}} or {{variable.method()}} is blocked
return _RestrictedSandboxedEnvironment().from_string(template).render(**kwargs)
def validate_jinja2(template: str, input_variables: list[str]) -> None:
@@ -101,7 +156,7 @@ def _get_jinja2_variables_from_template(template: str) -> set[str]:
"Please install it with `pip install jinja2`."
)
raise ImportError(msg)
env = Environment() # noqa: S701
env = _RestrictedSandboxedEnvironment()
ast = env.parse(template)
return meta.find_undeclared_variables(ast)
@@ -122,13 +177,16 @@ def mustache_formatter(template: str, /, **kwargs: Any) -> str:
def mustache_template_vars(
template: str,
) -> set[str]:
"""Get the variables from a mustache template.
"""Get the top-level variables from a mustache template.
For nested variables like `{{person.name}}`, only the top-level
key (`person`) is returned.
Args:
template: The template string.
Returns:
The variables from the template.
The top-level variables from the template.
"""
variables: set[str] = set()
section_depth = 0
@@ -268,6 +326,30 @@ def get_template_variables(template: str, template_format: str) -> list[str]:
msg = f"Unsupported template format: {template_format}"
raise ValueError(msg)
# For f-strings, block attribute access and indexing syntax
# This prevents template injection attacks via accessing dangerous attributes
if template_format == "f-string":
for var in input_variables:
# Formatter().parse() returns field names with dots/brackets if present
# e.g., "obj.attr" or "obj[0]" - we need to block these
if "." in var or "[" in var or "]" in var:
msg = (
f"Invalid variable name {var!r} in f-string template. "
f"Variable names cannot contain attribute "
f"access (.) or indexing ([])."
)
raise ValueError(msg)
# Block variable names that are all digits (e.g., "0", "100")
# These are interpreted as positional arguments, not keyword arguments
if var.isdigit():
msg = (
f"Invalid variable name {var!r} in f-string template. "
f"Variable names cannot be all digits as they are interpreted "
f"as positional arguments."
)
raise ValueError(msg)
return sorted(input_variables)

View File

@@ -49,7 +49,13 @@ class StructuredPrompt(ChatPromptTemplate):
structured_output_kwargs: additional kwargs for structured output.
template_format: template format for the prompt.
"""
schema_ = schema_ or kwargs.pop("schema")
schema_ = schema_ or kwargs.pop("schema", None)
if not schema_:
err_msg = (
"Must pass in a non-empty structured output schema. Received: "
f"{schema_}"
)
raise ValueError(err_msg)
structured_output_kwargs = structured_output_kwargs or {}
for k in set(kwargs).difference(get_pydantic_field_names(self.__class__)):
structured_output_kwargs[k] = kwargs.pop(k)
@@ -104,19 +110,23 @@ class StructuredPrompt(ChatPromptTemplate):
)
```
Args:
messages: sequence of message representations.
messages: Sequence of message representations.
A message can be represented using the following formats:
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
(message type, template); e.g., ("human", "{user_input}"),
(4) 2-tuple of (message class, template), (5) a string which is
shorthand for ("human", template); e.g., "{user_input}"
schema: a dictionary representation of function call, or a Pydantic model.
1. `BaseMessagePromptTemplate`
2. `BaseMessage`
3. 2-tuple of `(message type, template)`; e.g.,
`("human", "{user_input}")`
4. 2-tuple of `(message class, template)`
5. A string which is shorthand for `("human", template)`; e.g.,
`"{user_input}"`
schema: A dictionary representation of function call, or a Pydantic model.
**kwargs: Any additional kwargs to pass through to
`ChatModel.with_structured_output(schema, **kwargs)`.
Returns:
a structured prompt template
A structured prompt template
"""
return cls(messages, schema, **kwargs)

View File

@@ -105,7 +105,9 @@ class InMemoryRateLimiter(BaseRateLimiter):
from langchain_anthropic import ChatAnthropic
model = ChatAnthropic(model_name="claude-sonnet-4-5", rate_limiter=rate_limiter)
model = ChatAnthropic(
model_name="claude-sonnet-4-5-20250929", rate_limiter=rate_limiter
)
for _ in range(5):
tic = time.time()

View File

@@ -50,65 +50,65 @@ class LangSmithRetrieverParams(TypedDict, total=False):
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
"""Abstract base class for a Document retrieval system.
"""Abstract base class for a document retrieval system.
A retrieval system is defined as something that can take string queries and return
the most 'relevant' Documents from some source.
the most 'relevant' documents from some source.
Usage:
A retriever follows the standard Runnable interface, and should be used
via the standard Runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`.
A retriever follows the standard `Runnable` interface, and should be used via the
standard `Runnable` methods of `invoke`, `ainvoke`, `batch`, `abatch`.
Implementation:
When implementing a custom retriever, the class should implement
the `_get_relevant_documents` method to define the logic for retrieving documents.
When implementing a custom retriever, the class should implement the
`_get_relevant_documents` method to define the logic for retrieving documents.
Optionally, an async native implementations can be provided by overriding the
`_aget_relevant_documents` method.
Example: A retriever that returns the first 5 documents from a list of documents
!!! example "Retriever that returns the first 5 documents from a list of documents"
```python
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
```python
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
class SimpleRetriever(BaseRetriever):
docs: list[Document]
k: int = 5
class SimpleRetriever(BaseRetriever):
docs: list[Document]
k: int = 5
def _get_relevant_documents(self, query: str) -> list[Document]:
\"\"\"Return the first k documents from the list of documents\"\"\"
return self.docs[:self.k]
def _get_relevant_documents(self, query: str) -> list[Document]:
\"\"\"Return the first k documents from the list of documents\"\"\"
return self.docs[:self.k]
async def _aget_relevant_documents(self, query: str) -> list[Document]:
\"\"\"(Optional) async native implementation.\"\"\"
return self.docs[:self.k]
```
async def _aget_relevant_documents(self, query: str) -> list[Document]:
\"\"\"(Optional) async native implementation.\"\"\"
return self.docs[:self.k]
```
Example: A simple retriever based on a scikit-learn vectorizer
!!! example "Simple retriever based on a scikit-learn vectorizer"
```python
from sklearn.metrics.pairwise import cosine_similarity
```python
from sklearn.metrics.pairwise import cosine_similarity
class TFIDFRetriever(BaseRetriever, BaseModel):
vectorizer: Any
docs: list[Document]
tfidf_array: Any
k: int = 4
class TFIDFRetriever(BaseRetriever, BaseModel):
vectorizer: Any
docs: list[Document]
tfidf_array: Any
k: int = 4
class Config:
arbitrary_types_allowed = True
class Config:
arbitrary_types_allowed = True
def _get_relevant_documents(self, query: str) -> list[Document]:
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
query_vec = self.vectorizer.transform([query])
# Op -- (n_docs,1) -- Cosine Sim with each doc
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
```
def _get_relevant_documents(self, query: str) -> list[Document]:
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
query_vec = self.vectorizer.transform([query])
# Op -- (n_docs,1) -- Cosine Sim with each doc
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
```
"""
model_config = ConfigDict(
@@ -119,15 +119,19 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
_expects_other_args: bool = False
tags: list[str] | None = None
"""Optional list of tags associated with the retriever.
These tags will be associated with each call to this retriever,
and passed as arguments to the handlers defined in `callbacks`.
You can use these to eg identify a specific instance of a retriever with its
use case.
"""
metadata: dict[str, Any] | None = None
"""Optional metadata associated with the retriever.
This metadata will be associated with each call to this retriever,
and passed as arguments to the handlers defined in `callbacks`.
You can use these to eg identify a specific instance of a retriever with its
use case.
"""

View File

@@ -118,6 +118,8 @@ if TYPE_CHECKING:
Other = TypeVar("Other")
_RUNNABLE_GENERIC_NUM_ARGS = 2 # Input and Output
class Runnable(ABC, Generic[Input, Output]):
"""A unit of work that can be invoked, batched, streamed, transformed and composed.
@@ -147,11 +149,11 @@ class Runnable(ABC, Generic[Input, Output]):
the `input_schema` property, the `output_schema` property and `config_schema`
method.
LCEL and Composition
====================
Composition
===========
Runnable objects can be composed together to create chains in a declarative way.
The LangChain Expression Language (LCEL) is a declarative way to compose
`Runnable` objectsinto chains.
Any chain constructed this way will automatically have sync, async, batch, and
streaming support.
@@ -235,21 +237,21 @@ class Runnable(ABC, Generic[Input, Output]):
You can set the global debug flag to True to enable debug output for all chains:
```python
from langchain_core.globals import set_debug
```python
from langchain_core.globals import set_debug
set_debug(True)
```
set_debug(True)
```
Alternatively, you can pass existing or custom callbacks to any given chain:
```python
from langchain_core.tracers import ConsoleCallbackHandler
```python
from langchain_core.tracers import ConsoleCallbackHandler
chain.invoke(..., config={"callbacks": [ConsoleCallbackHandler()]})
```
chain.invoke(..., config={"callbacks": [ConsoleCallbackHandler()]})
```
For a UI (and much more) checkout [LangSmith](https://docs.smith.langchain.com/).
For a UI (and much more) checkout [LangSmith](https://docs.langchain.com/langsmith/home).
"""
@@ -309,7 +311,10 @@ class Runnable(ABC, Generic[Input, Output]):
for base in self.__class__.mro():
if hasattr(base, "__pydantic_generic_metadata__"):
metadata = base.__pydantic_generic_metadata__
if "args" in metadata and len(metadata["args"]) == 2:
if (
"args" in metadata
and len(metadata["args"]) == _RUNNABLE_GENERIC_NUM_ARGS
):
return metadata["args"][0]
# If we didn't find a Pydantic model in the parent classes,
@@ -317,7 +322,7 @@ class Runnable(ABC, Generic[Input, Output]):
# Runnables that are not pydantic models.
for cls in self.__class__.__orig_bases__: # type: ignore[attr-defined]
type_args = get_args(cls)
if type_args and len(type_args) == 2:
if type_args and len(type_args) == _RUNNABLE_GENERIC_NUM_ARGS:
return type_args[0]
msg = (
@@ -340,12 +345,15 @@ class Runnable(ABC, Generic[Input, Output]):
for base in self.__class__.mro():
if hasattr(base, "__pydantic_generic_metadata__"):
metadata = base.__pydantic_generic_metadata__
if "args" in metadata and len(metadata["args"]) == 2:
if (
"args" in metadata
and len(metadata["args"]) == _RUNNABLE_GENERIC_NUM_ARGS
):
return metadata["args"][1]
for cls in self.__class__.__orig_bases__: # type: ignore[attr-defined]
type_args = get_args(cls)
if type_args and len(type_args) == 2:
if type_args and len(type_args) == _RUNNABLE_GENERIC_NUM_ARGS:
return type_args[1]
msg = (
@@ -424,7 +432,7 @@ class Runnable(ABC, Generic[Input, Output]):
print(runnable.get_input_jsonschema())
```
!!! version-added "Added in version 0.3.0"
!!! version-added "Added in `langchain-core` 0.3.0"
"""
return self.get_input_schema(config).model_json_schema()
@@ -502,7 +510,7 @@ class Runnable(ABC, Generic[Input, Output]):
print(runnable.get_output_jsonschema())
```
!!! version-added "Added in version 0.3.0"
!!! version-added "Added in `langchain-core` 0.3.0"
"""
return self.get_output_schema(config).model_json_schema()
@@ -566,7 +574,7 @@ class Runnable(ABC, Generic[Input, Output]):
Returns:
A JSON schema that represents the config of the `Runnable`.
!!! version-added "Added in version 0.3.0"
!!! version-added "Added in `langchain-core` 0.3.0"
"""
return self.config_schema(include=include).model_json_schema()
@@ -699,51 +707,53 @@ class Runnable(ABC, Generic[Input, Output]):
def pick(self, keys: str | list[str]) -> RunnableSerializable[Any, Any]:
"""Pick keys from the output `dict` of this `Runnable`.
Pick a single key:
!!! example "Pick a single key"
```python
import json
```python
import json
from langchain_core.runnables import RunnableLambda, RunnableMap
from langchain_core.runnables import RunnableLambda, RunnableMap
as_str = RunnableLambda(str)
as_json = RunnableLambda(json.loads)
chain = RunnableMap(str=as_str, json=as_json)
as_str = RunnableLambda(str)
as_json = RunnableLambda(json.loads)
chain = RunnableMap(str=as_str, json=as_json)
chain.invoke("[1, 2, 3]")
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3]}
chain.invoke("[1, 2, 3]")
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3]}
json_only_chain = chain.pick("json")
json_only_chain.invoke("[1, 2, 3]")
# -> [1, 2, 3]
```
json_only_chain = chain.pick("json")
json_only_chain.invoke("[1, 2, 3]")
# -> [1, 2, 3]
```
Pick a list of keys:
!!! example "Pick a list of keys"
```python
from typing import Any
```python
from typing import Any
import json
import json
from langchain_core.runnables import RunnableLambda, RunnableMap
from langchain_core.runnables import RunnableLambda, RunnableMap
as_str = RunnableLambda(str)
as_json = RunnableLambda(json.loads)
as_str = RunnableLambda(str)
as_json = RunnableLambda(json.loads)
def as_bytes(x: Any) -> bytes:
return bytes(x, "utf-8")
def as_bytes(x: Any) -> bytes:
return bytes(x, "utf-8")
chain = RunnableMap(str=as_str, json=as_json, bytes=RunnableLambda(as_bytes))
chain = RunnableMap(
str=as_str, json=as_json, bytes=RunnableLambda(as_bytes)
)
chain.invoke("[1, 2, 3]")
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
chain.invoke("[1, 2, 3]")
# -> {"str": "[1, 2, 3]", "json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
json_and_bytes_chain = chain.pick(["json", "bytes"])
json_and_bytes_chain.invoke("[1, 2, 3]")
# -> {"json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
```
json_and_bytes_chain = chain.pick(["json", "bytes"])
json_and_bytes_chain.invoke("[1, 2, 3]")
# -> {"json": [1, 2, 3], "bytes": b"[1, 2, 3]"}
```
Args:
keys: A key or list of keys to pick from the output dict.
@@ -766,7 +776,7 @@ class Runnable(ABC, Generic[Input, Output]):
"""Assigns new fields to the `dict` output of this `Runnable`.
```python
from langchain_community.llms.fake import FakeStreamingListLLM
from langchain_core.language_models.fake import FakeStreamingListLLM
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import SystemMessagePromptTemplate
from langchain_core.runnables import Runnable
@@ -818,10 +828,12 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
input: The input to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
Returns:
The output of the `Runnable`.
@@ -838,10 +850,12 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
input: The input to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
Returns:
The output of the `Runnable`.
@@ -868,8 +882,9 @@ class Runnable(ABC, Generic[Input, Output]):
config: A config to use when invoking the `Runnable`. The config supports
standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work
to do in parallel, and other keys. Please refer to the
`RunnableConfig` for more details.
to do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
return_exceptions: Whether to return exceptions instead of raising them.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
@@ -932,10 +947,12 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
inputs: A list of inputs to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
return_exceptions: Whether to return exceptions instead of raising them.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
@@ -998,10 +1015,12 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
inputs: A list of inputs to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
return_exceptions: Whether to return exceptions instead of raising them.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
@@ -1061,10 +1080,12 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
inputs: A list of inputs to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
return_exceptions: Whether to return exceptions instead of raising them.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
@@ -1353,48 +1374,50 @@ class Runnable(ABC, Generic[Input, Output]):
).with_config({"run_name": "my_template", "tags": ["my_template"]})
```
For instance:
!!! example
```python
from langchain_core.runnables import RunnableLambda
```python
from langchain_core.runnables import RunnableLambda
async def reverse(s: str) -> str:
return s[::-1]
async def reverse(s: str) -> str:
return s[::-1]
chain = RunnableLambda(func=reverse)
chain = RunnableLambda(func=reverse)
events = [event async for event in chain.astream_events("hello", version="v2")]
events = [
event async for event in chain.astream_events("hello", version="v2")
]
# Will produce the following events
# (run_id, and parent_ids has been omitted for brevity):
[
{
"data": {"input": "hello"},
"event": "on_chain_start",
"metadata": {},
"name": "reverse",
"tags": [],
},
{
"data": {"chunk": "olleh"},
"event": "on_chain_stream",
"metadata": {},
"name": "reverse",
"tags": [],
},
{
"data": {"output": "olleh"},
"event": "on_chain_end",
"metadata": {},
"name": "reverse",
"tags": [],
},
]
```
# Will produce the following events
# (run_id, and parent_ids has been omitted for brevity):
[
{
"data": {"input": "hello"},
"event": "on_chain_start",
"metadata": {},
"name": "reverse",
"tags": [],
},
{
"data": {"chunk": "olleh"},
"event": "on_chain_stream",
"metadata": {},
"name": "reverse",
"tags": [],
},
{
"data": {"output": "olleh"},
"event": "on_chain_end",
"metadata": {},
"name": "reverse",
"tags": [],
},
]
```
```python title="Example: Dispatch Custom Event"
```python title="Dispatch custom event"
from langchain_core.callbacks.manager import (
adispatch_custom_event,
)
@@ -1428,10 +1451,13 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
input: The input to the `Runnable`.
config: The config to use for the `Runnable`.
version: The version of the schema to use either `'v2'` or `'v1'`.
version: The version of the schema to use, either `'v2'` or `'v1'`.
Users should use `'v2'`.
`'v1'` is for backwards compatibility and will be deprecated
in `0.4.0`.
No default will be assigned until the API is stabilized.
custom events will only be surfaced in `'v2'`.
include_names: Only include events from `Runnable` objects with matching names.
@@ -1441,6 +1467,7 @@ class Runnable(ABC, Generic[Input, Output]):
exclude_types: Exclude events from `Runnable` objects with matching types.
exclude_tags: Exclude events from `Runnable` objects with matching tags.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
These will be passed to `astream_log` as this implementation
of `astream_events` is built on top of `astream_log`.
@@ -1742,46 +1769,52 @@ class Runnable(ABC, Generic[Input, Output]):
import time
import asyncio
def format_t(timestamp: float) -> str:
return datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat()
async def test_runnable(time_to_sleep: int):
print(f"Runnable[{time_to_sleep}s]: starts at {format_t(time.time())}")
await asyncio.sleep(time_to_sleep)
print(f"Runnable[{time_to_sleep}s]: ends at {format_t(time.time())}")
async def fn_start(run_obj: Runnable):
print(f"on start callback starts at {format_t(time.time())}")
await asyncio.sleep(3)
print(f"on start callback ends at {format_t(time.time())}")
async def fn_end(run_obj: Runnable):
print(f"on end callback starts at {format_t(time.time())}")
await asyncio.sleep(2)
print(f"on end callback ends at {format_t(time.time())}")
runnable = RunnableLambda(test_runnable).with_alisteners(
on_start=fn_start,
on_end=fn_end
on_start=fn_start, on_end=fn_end
)
async def concurrent_runs():
await asyncio.gather(runnable.ainvoke(2), runnable.ainvoke(3))
asyncio.run(concurrent_runs())
Result:
on start callback starts at 2025-03-01T07:05:22.875378+00:00
on start callback starts at 2025-03-01T07:05:22.875495+00:00
on start callback ends at 2025-03-01T07:05:25.878862+00:00
on start callback ends at 2025-03-01T07:05:25.878947+00:00
Runnable[2s]: starts at 2025-03-01T07:05:25.879392+00:00
Runnable[3s]: starts at 2025-03-01T07:05:25.879804+00:00
Runnable[2s]: ends at 2025-03-01T07:05:27.881998+00:00
on end callback starts at 2025-03-01T07:05:27.882360+00:00
Runnable[3s]: ends at 2025-03-01T07:05:28.881737+00:00
on end callback starts at 2025-03-01T07:05:28.882428+00:00
on end callback ends at 2025-03-01T07:05:29.883893+00:00
on end callback ends at 2025-03-01T07:05:30.884831+00:00
asyncio.run(concurrent_runs())
# Result:
# on start callback starts at 2025-03-01T07:05:22.875378+00:00
# on start callback starts at 2025-03-01T07:05:22.875495+00:00
# on start callback ends at 2025-03-01T07:05:25.878862+00:00
# on start callback ends at 2025-03-01T07:05:25.878947+00:00
# Runnable[2s]: starts at 2025-03-01T07:05:25.879392+00:00
# Runnable[3s]: starts at 2025-03-01T07:05:25.879804+00:00
# Runnable[2s]: ends at 2025-03-01T07:05:27.881998+00:00
# on end callback starts at 2025-03-01T07:05:27.882360+00:00
# Runnable[3s]: ends at 2025-03-01T07:05:28.881737+00:00
# on end callback starts at 2025-03-01T07:05:28.882428+00:00
# on end callback ends at 2025-03-01T07:05:29.883893+00:00
# on end callback ends at 2025-03-01T07:05:30.884831+00:00
```
"""
return RunnableBinding(
@@ -1843,7 +1876,7 @@ class Runnable(ABC, Generic[Input, Output]):
`exp_base`, and `jitter` (all `float` values).
Returns:
A new Runnable that retries the original Runnable on exceptions.
A new `Runnable` that retries the original `Runnable` on exceptions.
Example:
```python
@@ -1927,7 +1960,9 @@ class Runnable(ABC, Generic[Input, Output]):
exceptions_to_handle: A tuple of exception types to handle.
exception_key: If `string` is specified then handled exceptions will be
passed to fallbacks as part of the input under the specified key.
If `None`, exceptions will not be passed to fallbacks.
If used, the base `Runnable` and its fallbacks must accept a
dictionary as input.
@@ -1963,7 +1998,9 @@ class Runnable(ABC, Generic[Input, Output]):
exceptions_to_handle: A tuple of exception types to handle.
exception_key: If `string` is specified then handled exceptions will be
passed to fallbacks as part of the input under the specified key.
If `None`, exceptions will not be passed to fallbacks.
If used, the base `Runnable` and its fallbacks must accept a
dictionary as input.
@@ -2429,10 +2466,14 @@ class Runnable(ABC, Generic[Input, Output]):
`as_tool` will instantiate a `BaseTool` with a name, description, and
`args_schema` from a `Runnable`. Where possible, schemas are inferred
from `runnable.get_input_schema`. Alternatively (e.g., if the
`Runnable` takes a dict as input and the specific dict keys are not typed),
the schema can be specified directly with `args_schema`. You can also
pass `arg_types` to just specify the required arguments and their types.
from `runnable.get_input_schema`.
Alternatively (e.g., if the `Runnable` takes a dict as input and the specific
`dict` keys are not typed), the schema can be specified directly with
`args_schema`.
You can also pass `arg_types` to just specify the required arguments and their
types.
Args:
args_schema: The schema for the tool.
@@ -2443,82 +2484,82 @@ class Runnable(ABC, Generic[Input, Output]):
Returns:
A `BaseTool` instance.
Typed dict input:
!!! example "`TypedDict` input"
```python
from typing_extensions import TypedDict
from langchain_core.runnables import RunnableLambda
```python
from typing_extensions import TypedDict
from langchain_core.runnables import RunnableLambda
class Args(TypedDict):
a: int
b: list[int]
class Args(TypedDict):
a: int
b: list[int]
def f(x: Args) -> str:
return str(x["a"] * max(x["b"]))
def f(x: Args) -> str:
return str(x["a"] * max(x["b"]))
runnable = RunnableLambda(f)
as_tool = runnable.as_tool()
as_tool.invoke({"a": 3, "b": [1, 2]})
```
runnable = RunnableLambda(f)
as_tool = runnable.as_tool()
as_tool.invoke({"a": 3, "b": [1, 2]})
```
`dict` input, specifying schema via `args_schema`:
!!! example "`dict` input, specifying schema via `args_schema`"
```python
from typing import Any
from pydantic import BaseModel, Field
from langchain_core.runnables import RunnableLambda
```python
from typing import Any
from pydantic import BaseModel, Field
from langchain_core.runnables import RunnableLambda
def f(x: dict[str, Any]) -> str:
return str(x["a"] * max(x["b"]))
def f(x: dict[str, Any]) -> str:
return str(x["a"] * max(x["b"]))
class FSchema(BaseModel):
\"\"\"Apply a function to an integer and list of integers.\"\"\"
class FSchema(BaseModel):
\"\"\"Apply a function to an integer and list of integers.\"\"\"
a: int = Field(..., description="Integer")
b: list[int] = Field(..., description="List of ints")
a: int = Field(..., description="Integer")
b: list[int] = Field(..., description="List of ints")
runnable = RunnableLambda(f)
as_tool = runnable.as_tool(FSchema)
as_tool.invoke({"a": 3, "b": [1, 2]})
```
runnable = RunnableLambda(f)
as_tool = runnable.as_tool(FSchema)
as_tool.invoke({"a": 3, "b": [1, 2]})
```
`dict` input, specifying schema via `arg_types`:
!!! example "`dict` input, specifying schema via `arg_types`"
```python
from typing import Any
from langchain_core.runnables import RunnableLambda
```python
from typing import Any
from langchain_core.runnables import RunnableLambda
def f(x: dict[str, Any]) -> str:
return str(x["a"] * max(x["b"]))
def f(x: dict[str, Any]) -> str:
return str(x["a"] * max(x["b"]))
runnable = RunnableLambda(f)
as_tool = runnable.as_tool(arg_types={"a": int, "b": list[int]})
as_tool.invoke({"a": 3, "b": [1, 2]})
```
runnable = RunnableLambda(f)
as_tool = runnable.as_tool(arg_types={"a": int, "b": list[int]})
as_tool.invoke({"a": 3, "b": [1, 2]})
```
String input:
!!! example "`str` input"
```python
from langchain_core.runnables import RunnableLambda
```python
from langchain_core.runnables import RunnableLambda
def f(x: str) -> str:
return x + "a"
def f(x: str) -> str:
return x + "a"
def g(x: str) -> str:
return x + "z"
def g(x: str) -> str:
return x + "z"
runnable = RunnableLambda(f) | g
as_tool = runnable.as_tool()
as_tool.invoke("b")
```
runnable = RunnableLambda(f) | g
as_tool = runnable.as_tool()
as_tool.invoke("b")
```
"""
# Avoid circular import
from langchain_core.tools import convert_runnable_to_tool # noqa: PLC0415
@@ -2570,29 +2611,33 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
Returns:
A new `Runnable` with the fields configured.
```python
from langchain_core.runnables import ConfigurableField
from langchain_openai import ChatOpenAI
!!! example
model = ChatOpenAI(max_tokens=20).configurable_fields(
max_tokens=ConfigurableField(
id="output_token_number",
name="Max tokens in the output",
description="The maximum number of tokens in the output",
```python
from langchain_core.runnables import ConfigurableField
from langchain_openai import ChatOpenAI
model = ChatOpenAI(max_tokens=20).configurable_fields(
max_tokens=ConfigurableField(
id="output_token_number",
name="Max tokens in the output",
description="The maximum number of tokens in the output",
)
)
)
# max_tokens = 20
print("max_tokens_20: ", model.invoke("tell me something about chess").content)
# max_tokens = 20
print(
"max_tokens_20: ", model.invoke("tell me something about chess").content
)
# max_tokens = 200
print(
"max_tokens_200: ",
model.with_config(configurable={"output_token_number": 200})
.invoke("tell me something about chess")
.content,
)
```
# max_tokens = 200
print(
"max_tokens_200: ",
model.with_config(configurable={"output_token_number": 200})
.invoke("tell me something about chess")
.content,
)
```
"""
# Import locally to prevent circular import
from langchain_core.runnables.configurable import ( # noqa: PLC0415
@@ -2631,29 +2676,31 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
Returns:
A new `Runnable` with the alternatives configured.
```python
from langchain_anthropic import ChatAnthropic
from langchain_core.runnables.utils import ConfigurableField
from langchain_openai import ChatOpenAI
!!! example
model = ChatAnthropic(
model_name="claude-3-7-sonnet-20250219"
).configurable_alternatives(
ConfigurableField(id="llm"),
default_key="anthropic",
openai=ChatOpenAI(),
)
```python
from langchain_anthropic import ChatAnthropic
from langchain_core.runnables.utils import ConfigurableField
from langchain_openai import ChatOpenAI
# uses the default model ChatAnthropic
print(model.invoke("which organization created you?").content)
model = ChatAnthropic(
model_name="claude-sonnet-4-5-20250929"
).configurable_alternatives(
ConfigurableField(id="llm"),
default_key="anthropic",
openai=ChatOpenAI(),
)
# uses ChatOpenAI
print(
model.with_config(configurable={"llm": "openai"})
.invoke("which organization created you?")
.content
)
```
# uses the default model ChatAnthropic
print(model.invoke("which organization created you?").content)
# uses ChatOpenAI
print(
model.with_config(configurable={"llm": "openai"})
.invoke("which organization created you?")
.content
)
```
"""
# Import locally to prevent circular import
from langchain_core.runnables.configurable import ( # noqa: PLC0415
@@ -2750,6 +2797,9 @@ def _seq_output_schema(
return last.get_output_schema(config)
_RUNNABLE_SEQUENCE_MIN_STEPS = 2
class RunnableSequence(RunnableSerializable[Input, Output]):
"""Sequence of `Runnable` objects, where the output of one is the input of the next.
@@ -2859,7 +2909,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
name: The name of the `Runnable`.
first: The first `Runnable` in the sequence.
middle: The middle `Runnable` objects in the sequence.
last: The last Runnable in the sequence.
last: The last `Runnable` in the sequence.
Raises:
ValueError: If the sequence has less than 2 steps.
@@ -2872,8 +2922,11 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
steps_flat.extend(step.steps)
else:
steps_flat.append(coerce_to_runnable(step))
if len(steps_flat) < 2:
msg = f"RunnableSequence must have at least 2 steps, got {len(steps_flat)}"
if len(steps_flat) < _RUNNABLE_SEQUENCE_MIN_STEPS:
msg = (
f"RunnableSequence must have at least {_RUNNABLE_SEQUENCE_MIN_STEPS} "
f"steps, got {len(steps_flat)}"
)
raise ValueError(msg)
super().__init__(
first=steps_flat[0],
@@ -2904,7 +2957,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
model_config = ConfigDict(
@@ -3500,7 +3553,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
Returns a mapping of their outputs.
`RunnableParallel` is one of the two main composition primitives for the LCEL,
`RunnableParallel` is one of the two main composition primitives,
alongside `RunnableSequence`. It invokes `Runnable`s concurrently, providing the
same input to each.
@@ -3610,7 +3663,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -3668,6 +3721,12 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
== "object"
for s in self.steps__.values()
):
for step in self.steps__.values():
fields = step.get_input_schema(config).model_fields
root_field = fields.get("root")
if root_field is not None and root_field.annotation != Any:
return super().get_input_schema(config)
# This is correct, but pydantic typings/mypy don't think so.
return create_model_v2(
self.get_name("Input"),
@@ -4477,7 +4536,7 @@ class RunnableLambda(Runnable[Input, Output]):
# on itemgetter objects, so we have to parse the repr
items = str(func).replace("operator.itemgetter(", "")[:-1].split(", ")
if all(
item[0] == "'" and item[-1] == "'" and len(item) > 2 for item in items
item[0] == "'" and item[-1] == "'" and item != "''" for item in items
):
fields = {item[1:-1]: (Any, ...) for item in items}
# It's a dict, lol
@@ -5139,7 +5198,7 @@ class RunnableEachBase(RunnableSerializable[list[Input], list[Output]]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -5322,7 +5381,7 @@ class RunnableEach(RunnableEachBase[Input, Output]):
class RunnableBindingBase(RunnableSerializable[Input, Output]): # type: ignore[no-redef]
"""`Runnable` that delegates calls to another `Runnable` with a set of kwargs.
"""`Runnable` that delegates calls to another `Runnable` with a set of `**kwargs`.
Use only if creating a new `RunnableBinding` subclass with different `__init__`
args.
@@ -5462,7 +5521,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]): # type: ignore[
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -5752,7 +5811,7 @@ class RunnableBinding(RunnableBindingBase[Input, Output]): # type: ignore[no-re
```python
# Create a Runnable binding that invokes the chat model with the
# additional kwarg `stop=['-']` when running it.
from langchain_community.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
model = ChatOpenAI()
model.invoke('Say "Parrot-MAGIC"', stop=["-"]) # Should return `Parrot`

View File

@@ -36,11 +36,13 @@ from langchain_core.runnables.utils import (
get_unique_config_specs,
)
_MIN_BRANCHES = 2
class RunnableBranch(RunnableSerializable[Input, Output]):
"""Runnable that selects which branch to run based on a condition.
"""`Runnable` that selects which branch to run based on a condition.
The Runnable is initialized with a list of `(condition, Runnable)` pairs and
The `Runnable` is initialized with a list of `(condition, Runnable)` pairs and
a default branch.
When operating on an input, the first condition that evaluates to True is
@@ -86,12 +88,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
Defaults a `Runnable` to run if no condition is met.
Raises:
ValueError: If the number of branches is less than 2.
ValueError: If the number of branches is less than `2`.
TypeError: If the default branch is not `Runnable`, `Callable` or `Mapping`.
TypeError: If a branch is not a tuple or list.
ValueError: If a branch is not of length 2.
TypeError: If a branch is not a `tuple` or `list`.
ValueError: If a branch is not of length `2`.
"""
if len(branches) < 2:
if len(branches) < _MIN_BRANCHES:
msg = "RunnableBranch requires at least two branches"
raise ValueError(msg)
@@ -118,7 +120,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
)
raise TypeError(msg)
if len(branch) != 2:
if len(branch) != _MIN_BRANCHES:
msg = (
f"RunnableBranch branches must be "
f"tuples or lists of length 2, not {len(branch)}"
@@ -140,7 +142,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -187,12 +189,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
def invoke(
self, input: Input, config: RunnableConfig | None = None, **kwargs: Any
) -> Output:
"""First evaluates the condition, then delegate to true or false branch.
"""First evaluates the condition, then delegate to `True` or `False` branch.
Args:
input: The input to the Runnable.
config: The configuration for the Runnable.
**kwargs: Additional keyword arguments to pass to the Runnable.
input: The input to the `Runnable`.
config: The configuration for the `Runnable`.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
Returns:
The output of the branch that was run.
@@ -297,12 +299,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
config: RunnableConfig | None = None,
**kwargs: Any | None,
) -> Iterator[Output]:
"""First evaluates the condition, then delegate to true or false branch.
"""First evaluates the condition, then delegate to `True` or `False` branch.
Args:
input: The input to the Runnable.
config: The configuration for the Runnable.
**kwargs: Additional keyword arguments to pass to the Runnable.
input: The input to the `Runnable`.
config: The configuration for the `Runnable`.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
Yields:
The output of the branch that was run.
@@ -381,12 +383,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
config: RunnableConfig | None = None,
**kwargs: Any | None,
) -> AsyncIterator[Output]:
"""First evaluates the condition, then delegate to true or false branch.
"""First evaluates the condition, then delegate to `True` or `False` branch.
Args:
input: The input to the Runnable.
config: The configuration for the Runnable.
**kwargs: Additional keyword arguments to pass to the Runnable.
input: The input to the `Runnable`.
config: The configuration for the `Runnable`.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
Yields:
The output of the branch that was run.

View File

@@ -47,54 +47,59 @@ class EmptyDict(TypedDict, total=False):
class RunnableConfig(TypedDict, total=False):
"""Configuration for a Runnable."""
"""Configuration for a `Runnable`.
See the [reference docs](https://reference.langchain.com/python/langchain_core/runnables/#langchain_core.runnables.RunnableConfig)
for more details.
"""
tags: list[str]
"""
Tags for this call and any sub-calls (eg. a Chain calling an LLM).
"""Tags for this call and any sub-calls (e.g. a Chain calling an LLM).
You can use these to filter calls.
"""
metadata: dict[str, Any]
"""
Metadata for this call and any sub-calls (eg. a Chain calling an LLM).
"""Metadata for this call and any sub-calls (e.g. a Chain calling an LLM).
Keys should be strings, values should be JSON-serializable.
"""
callbacks: Callbacks
"""
Callbacks for this call and any sub-calls (eg. a Chain calling an LLM).
"""Callbacks for this call and any sub-calls (e.g. a Chain calling an LLM).
Tags are passed to all callbacks, metadata is passed to handle*Start callbacks.
"""
run_name: str
"""
Name for the tracer run for this call. Defaults to the name of the class.
"""
"""Name for the tracer run for this call.
Defaults to the name of the class."""
max_concurrency: int | None
"""
Maximum number of parallel calls to make. If not provided, defaults to
`ThreadPoolExecutor`'s default.
"""Maximum number of parallel calls to make.
If not provided, defaults to `ThreadPoolExecutor`'s default.
"""
recursion_limit: int
"""
Maximum number of times a call can recurse. If not provided, defaults to `25`.
"""Maximum number of times a call can recurse.
If not provided, defaults to `25`.
"""
configurable: dict[str, Any]
"""
Runtime values for attributes previously made configurable on this `Runnable`,
"""Runtime values for attributes previously made configurable on this `Runnable`,
or sub-Runnables, through `configurable_fields` or `configurable_alternatives`.
Check `output_schema` for a description of the attributes that have been made
configurable.
"""
run_id: uuid.UUID | None
"""
Unique identifier for the tracer run for this call. If not provided, a new UUID
will be generated.
"""Unique identifier for the tracer run for this call.
If not provided, a new UUID will be generated.
"""

View File

@@ -1,4 +1,4 @@
"""Runnables that can be dynamically configured."""
"""`Runnable` objects that can be dynamically configured."""
from __future__ import annotations
@@ -47,14 +47,14 @@ if TYPE_CHECKING:
class DynamicRunnable(RunnableSerializable[Input, Output]):
"""Serializable Runnable that can be dynamically configured.
"""Serializable `Runnable` that can be dynamically configured.
A DynamicRunnable should be initiated using the `configurable_fields` or
`configurable_alternatives` method of a Runnable.
A `DynamicRunnable` should be initiated using the `configurable_fields` or
`configurable_alternatives` method of a `Runnable`.
"""
default: RunnableSerializable[Input, Output]
"""The default Runnable to use."""
"""The default `Runnable` to use."""
config: RunnableConfig | None = None
"""The configuration to use."""
@@ -66,7 +66,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -120,13 +120,13 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
def prepare(
self, config: RunnableConfig | None = None
) -> tuple[Runnable[Input, Output], RunnableConfig]:
"""Prepare the Runnable for invocation.
"""Prepare the `Runnable` for invocation.
Args:
config: The configuration to use.
Returns:
The prepared Runnable and configuration.
The prepared `Runnable` and configuration.
"""
runnable: Runnable[Input, Output] = self
while isinstance(runnable, DynamicRunnable):
@@ -316,12 +316,12 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
"""Runnable that can be dynamically configured.
"""`Runnable` that can be dynamically configured.
A RunnableConfigurableFields should be initiated using the
`configurable_fields` method of a Runnable.
A `RunnableConfigurableFields` should be initiated using the
`configurable_fields` method of a `Runnable`.
Here is an example of using a RunnableConfigurableFields with LLMs:
Here is an example of using a `RunnableConfigurableFields` with LLMs:
```python
from langchain_core.prompts import PromptTemplate
@@ -348,7 +348,7 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
chain.invoke({"x": 0}, config={"configurable": {"temperature": 0.9}})
```
Here is an example of using a RunnableConfigurableFields with HubRunnables:
Here is an example of using a `RunnableConfigurableFields` with `HubRunnables`:
```python
from langchain_core.prompts import PromptTemplate
@@ -380,7 +380,7 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
@property
def config_specs(self) -> list[ConfigurableFieldSpec]:
"""Get the configuration specs for the RunnableConfigurableFields.
"""Get the configuration specs for the `RunnableConfigurableFields`.
Returns:
The configuration specs.
@@ -473,13 +473,13 @@ _enums_for_spec_lock = threading.Lock()
class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
"""Runnable that can be dynamically configured.
"""`Runnable` that can be dynamically configured.
A RunnableConfigurableAlternatives should be initiated using the
`configurable_alternatives` method of a Runnable or can be
A `RunnableConfigurableAlternatives` should be initiated using the
`configurable_alternatives` method of a `Runnable` or can be
initiated directly as well.
Here is an example of using a RunnableConfigurableAlternatives that uses
Here is an example of using a `RunnableConfigurableAlternatives` that uses
alternative prompts to illustrate its functionality:
```python
@@ -506,7 +506,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
chain.with_config(configurable={"prompt": "poem"}).invoke({"topic": "bears"})
```
Equivalently, you can initialize RunnableConfigurableAlternatives directly
Equivalently, you can initialize `RunnableConfigurableAlternatives` directly
and use in LCEL in the same way:
```python
@@ -531,7 +531,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
"""
which: ConfigurableField
"""The ConfigurableField to use to choose between alternatives."""
"""The `ConfigurableField` to use to choose between alternatives."""
alternatives: dict[
str,
@@ -544,8 +544,9 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
prefix_keys: bool
"""Whether to prefix configurable fields of each alternative with a namespace
of the form <which.id>==<alternative_key>, eg. a key named "temperature" used by
the alternative named "gpt3" becomes "model==gpt3/temperature"."""
of the form <which.id>==<alternative_key>, e.g. a key named "temperature" used by
the alternative named "gpt3" becomes "model==gpt3/temperature".
"""
@property
@override
@@ -638,24 +639,24 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
def _strremoveprefix(s: str, prefix: str) -> str:
"""str.removeprefix() is only available in Python 3.9+."""
"""`str.removeprefix()` is only available in Python 3.9+."""
return s.replace(prefix, "", 1) if s.startswith(prefix) else s
def prefix_config_spec(
spec: ConfigurableFieldSpec, prefix: str
) -> ConfigurableFieldSpec:
"""Prefix the id of a ConfigurableFieldSpec.
"""Prefix the id of a `ConfigurableFieldSpec`.
This is useful when a RunnableConfigurableAlternatives is used as a
ConfigurableField of another RunnableConfigurableAlternatives.
This is useful when a `RunnableConfigurableAlternatives` is used as a
`ConfigurableField` of another `RunnableConfigurableAlternatives`.
Args:
spec: The ConfigurableFieldSpec to prefix.
spec: The `ConfigurableFieldSpec` to prefix.
prefix: The prefix to add.
Returns:
The prefixed ConfigurableFieldSpec.
The prefixed `ConfigurableFieldSpec`.
"""
return (
ConfigurableFieldSpec(
@@ -677,15 +678,15 @@ def make_options_spec(
) -> ConfigurableFieldSpec:
"""Make options spec.
Make a ConfigurableFieldSpec for a ConfigurableFieldSingleOption or
ConfigurableFieldMultiOption.
Make a `ConfigurableFieldSpec` for a `ConfigurableFieldSingleOption` or
`ConfigurableFieldMultiOption`.
Args:
spec: The ConfigurableFieldSingleOption or ConfigurableFieldMultiOption.
spec: The `ConfigurableFieldSingleOption` or `ConfigurableFieldMultiOption`.
description: The description to use if the spec does not have one.
Returns:
The ConfigurableFieldSpec.
The `ConfigurableFieldSpec`.
"""
with _enums_for_spec_lock:
if enum := _enums_for_spec.get(spec):

View File

@@ -35,20 +35,20 @@ if TYPE_CHECKING:
class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
"""Runnable that can fallback to other Runnables if it fails.
"""`Runnable` that can fallback to other `Runnable`s if it fails.
External APIs (e.g., APIs for a language model) may at times experience
degraded performance or even downtime.
In these cases, it can be useful to have a fallback Runnable that can be
used in place of the original Runnable (e.g., fallback to another LLM provider).
In these cases, it can be useful to have a fallback `Runnable` that can be
used in place of the original `Runnable` (e.g., fallback to another LLM provider).
Fallbacks can be defined at the level of a single Runnable, or at the level
of a chain of Runnables. Fallbacks are tried in order until one succeeds or
Fallbacks can be defined at the level of a single `Runnable`, or at the level
of a chain of `Runnable`s. Fallbacks are tried in order until one succeeds or
all fail.
While you can instantiate a `RunnableWithFallbacks` directly, it is usually
more convenient to use the `with_fallbacks` method on a Runnable.
more convenient to use the `with_fallbacks` method on a `Runnable`.
Example:
```python
@@ -87,7 +87,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
"""
runnable: Runnable[Input, Output]
"""The Runnable to run first."""
"""The `Runnable` to run first."""
fallbacks: Sequence[Runnable[Input, Output]]
"""A sequence of fallbacks to try."""
exceptions_to_handle: tuple[type[BaseException], ...] = (Exception,)
@@ -97,9 +97,12 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
"""
exception_key: str | None = None
"""If `string` is specified then handled exceptions will be passed to fallbacks as
part of the input under the specified key. If `None`, exceptions
will not be passed to fallbacks. If used, the base Runnable and its fallbacks
must accept a dictionary as input."""
part of the input under the specified key.
If `None`, exceptions will not be passed to fallbacks.
If used, the base `Runnable` and its fallbacks must accept a dictionary as input.
"""
model_config = ConfigDict(
arbitrary_types_allowed=True,
@@ -137,7 +140,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return True as this class is serializable."""
"""Return `True` as this class is serializable."""
return True
@classmethod
@@ -152,10 +155,10 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
@property
def runnables(self) -> Iterator[Runnable[Input, Output]]:
"""Iterator over the Runnable and its fallbacks.
"""Iterator over the `Runnable` and its fallbacks.
Yields:
The Runnable then its fallbacks.
The `Runnable` then its fallbacks.
"""
yield self.runnable
yield from self.fallbacks
@@ -589,14 +592,14 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
await run_manager.on_chain_end(output)
def __getattr__(self, name: str) -> Any:
"""Get an attribute from the wrapped Runnable and its fallbacks.
"""Get an attribute from the wrapped `Runnable` and its fallbacks.
Returns:
If the attribute is anything other than a method that outputs a Runnable,
returns getattr(self.runnable, name). If the attribute is a method that
does return a new Runnable (e.g. model.bind_tools([...]) outputs a new
RunnableBinding) then self.runnable and each of the runnables in
self.fallbacks is replaced with getattr(x, name).
If the attribute is anything other than a method that outputs a `Runnable`,
returns `getattr(self.runnable, name)`. If the attribute is a method that
does return a new `Runnable` (e.g. `model.bind_tools([...])` outputs a new
`RunnableBinding`) then `self.runnable` and each of the runnables in
`self.fallbacks` is replaced with `getattr(x, name)`.
Example:
```python
@@ -604,7 +607,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
from langchain_anthropic import ChatAnthropic
gpt_4o = ChatOpenAI(model="gpt-4o")
claude_3_sonnet = ChatAnthropic(model="claude-3-7-sonnet-20250219")
claude_3_sonnet = ChatAnthropic(model="claude-sonnet-4-5-20250929")
model = gpt_4o.with_fallbacks([claude_3_sonnet])
model.model_name
@@ -618,7 +621,6 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
runnable=RunnableBinding(bound=ChatOpenAI(...), kwargs={"tools": [...]}),
fallbacks=[RunnableBinding(bound=ChatAnthropic(...), kwargs={"tools": [...]})],
)
```
""" # noqa: E501
attr = getattr(self.runnable, name)

View File

@@ -4,7 +4,6 @@ from __future__ import annotations
import inspect
from collections import defaultdict
from collections.abc import Callable
from dataclasses import dataclass, field
from enum import Enum
from typing import (
@@ -22,7 +21,7 @@ from langchain_core.runnables.base import Runnable, RunnableSerializable
from langchain_core.utils.pydantic import _IgnoreUnserializable, is_basemodel_subclass
if TYPE_CHECKING:
from collections.abc import Sequence
from collections.abc import Callable, Sequence
from pydantic import BaseModel
@@ -132,7 +131,7 @@ class Branch(NamedTuple):
condition: Callable[..., str]
"""A callable that returns a string representation of the condition."""
ends: dict[str, str] | None
"""Optional dictionary of end node ids for the branches. """
"""Optional dictionary of end node IDs for the branches. """
class CurveStyle(Enum):
@@ -642,6 +641,7 @@ class Graph:
retry_delay: float = 1.0,
frontmatter_config: dict[str, Any] | None = None,
base_url: str | None = None,
proxies: dict[str, str] | None = None,
) -> bytes:
"""Draw the graph as a PNG image using Mermaid.
@@ -674,11 +674,10 @@ class Graph:
}
```
base_url: The base URL of the Mermaid server for rendering via API.
proxies: HTTP/HTTPS proxies for requests (e.g. `{"http": "http://127.0.0.1:7890"}`).
Returns:
The PNG image as bytes.
"""
# Import locally to prevent circular import
from langchain_core.runnables.graph_mermaid import ( # noqa: PLC0415
@@ -699,6 +698,7 @@ class Graph:
padding=padding,
max_retries=max_retries,
retry_delay=retry_delay,
proxies=proxies,
base_url=base_url,
)
@@ -706,8 +706,10 @@ class Graph:
def _first_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
"""Find the single node that is not a target of any edge.
Exclude nodes/sources with ids in the exclude list.
Exclude nodes/sources with IDs in the exclude list.
If there is no such node, or there are multiple, return `None`.
When drawing the graph, this node would be the origin.
"""
targets = {edge.target for edge in graph.edges if edge.source not in exclude}
@@ -722,8 +724,10 @@ def _first_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
def _last_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
"""Find the single node that is not a source of any edge.
Exclude nodes/targets with ids in the exclude list.
Exclude nodes/targets with IDs in the exclude list.
If there is no such node, or there are multiple, return `None`.
When drawing the graph, this node would be the destination.
"""
sources = {edge.source for edge in graph.edges if edge.target not in exclude}

View File

@@ -7,7 +7,6 @@ from __future__ import annotations
import math
import os
from collections.abc import Mapping, Sequence
from typing import TYPE_CHECKING, Any
try:
@@ -20,6 +19,8 @@ except ImportError:
_HAS_GRANDALF = False
if TYPE_CHECKING:
from collections.abc import Mapping, Sequence
from langchain_core.runnables.graph import Edge as LangEdge

Some files were not shown because too many files have changed in this diff Show More