Compare commits

..

1 Commits

Author SHA1 Message Date
Sydney Runkle
7230be12bb cc pass at fixing tool message bug 2025-10-20 13:14:45 -04:00
400 changed files with 9356 additions and 27295 deletions

View File

@@ -2,7 +2,7 @@ blank_issues_enabled: false
version: 2.1
contact_links:
- name: 📚 Documentation
url: https://github.com/langchain-ai/docs/issues/new?template=01-langchain.yml
url: https://github.com/langchain-ai/docs/issues/new?template=langchain.yml
about: Report an issue related to the LangChain documentation
- name: 💬 LangChain Forum
url: https://forum.langchain.com/

93
.github/actions/poetry_setup/action.yml vendored Normal file
View File

@@ -0,0 +1,93 @@
# An action for setting up poetry install with caching.
# Using a custom action since the default action does not
# take poetry install groups into account.
# Action code from:
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
name: poetry-install-with-caching
description: Poetry install with support for caching of dependency groups.
inputs:
python-version:
description: Python version, supporting MAJOR.MINOR only
required: true
poetry-version:
description: Poetry version
required: true
cache-key:
description: Cache key to use for manual handling of caching
required: true
working-directory:
description: Directory whose poetry.lock file should be cached
required: true
runs:
using: composite
steps:
- uses: actions/setup-python@v5
name: Setup python ${{ inputs.python-version }}
id: setup-python
with:
python-version: ${{ inputs.python-version }}
- uses: actions/cache@v4
id: cache-bin-poetry
name: Cache Poetry binary - Python ${{ inputs.python-version }}
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
with:
path: |
/opt/pipx/venvs/poetry
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
- name: Refresh shell hashtable and fixup softlinks
if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
shell: bash
env:
POETRY_VERSION: ${{ inputs.poetry-version }}
PYTHON_VERSION: ${{ inputs.python-version }}
run: |
set -eux
# Refresh the shell hashtable, to ensure correct `which` output.
hash -r
# `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
# Delete and recreate the softlinks pipx expects to have.
rm /opt/pipx/venvs/poetry/bin/python
cd /opt/pipx/venvs/poetry/bin
ln -s "$(which "python$PYTHON_VERSION")" python
chmod +x python
cd /opt/pipx_bin/
ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
chmod +x poetry
# Ensure everything got set up correctly.
/opt/pipx/venvs/poetry/bin/python --version
/opt/pipx_bin/poetry --version
- name: Install poetry
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
shell: bash
env:
POETRY_VERSION: ${{ inputs.poetry-version }}
PYTHON_VERSION: ${{ inputs.python-version }}
# Install poetry using the python version installed by setup-python step.
run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose
- name: Restore pip and poetry cached dependencies
uses: actions/cache@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
with:
path: |
~/.cache/pip
~/.cache/pypoetry/virtualenvs
~/.cache/pypoetry/cache
~/.cache/pypoetry/artifacts
${{ env.WORKDIR }}/.venv
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}

View File

@@ -7,12 +7,13 @@ core:
- any-glob-to-any-file:
- "libs/core/**/*"
langchain-classic:
langchain:
- changed-files:
- any-glob-to-any-file:
- "libs/langchain/**/*"
- "libs/langchain_v1/**/*"
langchain:
v1:
- changed-files:
- any-glob-to-any-file:
- "libs/langchain_v1/**/*"
@@ -27,11 +28,6 @@ standard-tests:
- any-glob-to-any-file:
- "libs/standard-tests/**/*"
model-profiles:
- changed-files:
- any-glob-to-any-file:
- "libs/model-profiles/**/*"
text-splitters:
- changed-files:
- any-glob-to-any-file:
@@ -43,81 +39,6 @@ integration:
- any-glob-to-any-file:
- "libs/partners/**/*"
anthropic:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/anthropic/**/*"
chroma:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/chroma/**/*"
deepseek:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/deepseek/**/*"
exa:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/exa/**/*"
fireworks:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/fireworks/**/*"
groq:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/groq/**/*"
huggingface:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/huggingface/**/*"
mistralai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/mistralai/**/*"
nomic:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/nomic/**/*"
ollama:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/ollama/**/*"
openai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/openai/**/*"
perplexity:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/perplexity/**/*"
prompty:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/prompty/**/*"
qdrant:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/qdrant/**/*"
xai:
- changed-files:
- any-glob-to-any-file:
- "libs/partners/xai/**/*"
# Infrastructure and DevOps
infra:
- changed-files:

41
.github/pr-title-labeler.yml vendored Normal file
View File

@@ -0,0 +1,41 @@
# PR title labeler config
#
# Labels PRs based on conventional commit patterns in titles
#
# Format: type(scope): description or type!: description (breaking)
add-missing-labels: true
clear-prexisting: false
include-commits: false
include-title: true
label-for-breaking-changes: breaking
label-mapping:
documentation: ["docs"]
feature: ["feat"]
fix: ["fix"]
infra: ["build", "ci", "chore"]
integration:
[
"anthropic",
"chroma",
"deepseek",
"exa",
"fireworks",
"groq",
"huggingface",
"mistralai",
"nomic",
"ollama",
"openai",
"perplexity",
"prompty",
"qdrant",
"xai",
]
linting: ["style"]
performance: ["perf"]
refactor: ["refactor"]
release: ["release"]
revert: ["revert"]
tests: ["test"]

View File

@@ -30,7 +30,6 @@ LANGCHAIN_DIRS = [
"libs/text-splitters",
"libs/langchain",
"libs/langchain_v1",
"libs/model-profiles",
]
# When set to True, we are ignoring core dependents
@@ -135,7 +134,7 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
elif dir_ == "libs/core":
py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]
# custom logic for specific directories
elif dir_ in {"libs/partners/chroma"}:
elif dir_ in {"libs/partners/chroma", "libs/partners/nomic"}:
py_versions = ["3.10", "3.13"]
else:
py_versions = ["3.10", "3.14"]

View File

@@ -98,7 +98,7 @@ def _check_python_version_from_requirement(
return True
else:
marker_str = str(requirement.marker)
if "python_version" in marker_str or "python_full_version" in marker_str:
if "python_version" or "python_full_version" in marker_str:
python_version_str = "".join(
char
for char in marker_str

View File

@@ -77,7 +77,7 @@ jobs:
working-directory: ${{ inputs.working-directory }}
- name: Upload build
uses: actions/upload-artifact@v5
uses: actions/upload-artifact@v4
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -149,8 +149,8 @@ jobs:
fi
fi
# if PREV_TAG is empty or came out to 0.0.0, let it be empty
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
# if PREV_TAG is empty, let it be empty
if [ -z "$PREV_TAG" ]; then
echo "No previous tag found - first release"
else
# confirm prev-tag actually exists in git repo with git tag
@@ -179,8 +179,8 @@ jobs:
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
run: |
PREAMBLE="Changes since $PREV_TAG"
# if PREV_TAG is empty or 0.0.0, then we are releasing the first version
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
# if PREV_TAG is empty, then we are releasing the first version
if [ -z "$PREV_TAG" ]; then
PREAMBLE="Initial release"
PREV_TAG=$(git rev-list --max-parents=0 HEAD)
fi
@@ -208,7 +208,7 @@ jobs:
steps:
- uses: actions/checkout@v5
- uses: actions/download-artifact@v6
- uses: actions/download-artifact@v5
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -258,7 +258,7 @@ jobs:
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v6
- uses: actions/download-artifact@v5
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -377,7 +377,6 @@ jobs:
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
run: make integration_tests
working-directory: ${{ inputs.working-directory }}
@@ -410,7 +409,6 @@ jobs:
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
steps:
- uses: actions/checkout@v5
@@ -430,7 +428,7 @@ jobs:
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v6
- uses: actions/download-artifact@v5
if: startsWith(inputs.working-directory, 'libs/core')
with:
name: dist
@@ -444,7 +442,7 @@ jobs:
git ls-remote --tags origin "langchain-${{ matrix.partner }}*" \
| awk '{print $2}' \
| sed 's|refs/tags/||' \
| grep -E '[0-9]+\.[0-9]+\.[0-9]+$' \
| grep -E '[0-9]+\.[0-9]+\.[0-9]+([a-zA-Z]+[0-9]+)?$' \
| sort -Vr \
| head -n 1
)"
@@ -499,7 +497,7 @@ jobs:
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v6
- uses: actions/download-artifact@v5
with:
name: dist
path: ${{ inputs.working-directory }}/dist/
@@ -539,7 +537,7 @@ jobs:
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/download-artifact@v6
- uses: actions/download-artifact@v5
with:
name: dist
path: ${{ inputs.working-directory }}/dist/

View File

@@ -155,7 +155,6 @@ jobs:
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
run: |
cd langchain/${{ matrix.working-directory }}
make integration_tests

View File

@@ -30,7 +30,7 @@
# core, cli, langchain, langchain_v1, langchain-classic, standard-tests,
# text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq,
# huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant,
# xai, infra, deps
# xai, infra
#
# Rules:
# 1. The 'Type' must start with a lowercase letter.
@@ -79,8 +79,8 @@ jobs:
core
cli
langchain
langchain_v1
langchain-classic
model-profiles
standard-tests
text-splitters
docs

2
.gitignore vendored
View File

@@ -1,8 +1,6 @@
.vs/
.claude/
.idea/
#Emacs backup
*~
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

View File

@@ -1,8 +0,0 @@
{
"mcpServers": {
"docs-langchain": {
"type": "http",
"url": "https://docs.langchain.com/mcp"
}
}
}

View File

@@ -34,22 +34,17 @@
</a>
</p>
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development all while future-proofing decisions as the underlying technology evolves.
LangChain is a framework for building LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development all while future-proofing decisions as the underlying technology evolves.
```bash
pip install langchain
```
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
---
**Documentation**:
**Documentation**: To learn more about LangChain, check out [the docs](https://docs.langchain.com/oss/python/langchain/overview).
- [docs.langchain.com](https://docs.langchain.com/oss/python/langchain/overview) Comprehensive documentation, including conceptual overviews and guides
- [reference.langchain.com/python](https://reference.langchain.com/python) API reference docs for LangChain packages
**Discussions**: Visit the [LangChain Forum](https://forum.langchain.com) to connect with the community and share all of your technical questions, ideas, and feedback.
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
> [!NOTE]
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
@@ -60,27 +55,24 @@ LangChain helps developers build applications powered by LLMs through a standard
Use LangChain for:
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more.
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly LangChain's abstractions keep you moving without losing momentum.
- **Rapid prototyping**. Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle.
- **Production-ready features**. Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices.
- **Vibrant community and ecosystem**. Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community.
- **Flexible abstraction layers**. Work at the level of abstraction that suits your needs - from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity.
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChains vast library of integrations with model providers, tools, vector stores, retrievers, and more.
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your applications needs. As the industry frontier evolves, adapt quickly LangChains abstractions keep you moving without losing momentum.
## LangChain ecosystem
## LangChains ecosystem
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
To improve your LLM application development, pair LangChain with:
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
- [Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview) List of LangChain integrations, including chat & embedding models, tools & toolkits, and more
- [LangSmith](https://www.langchain.com/langsmith) Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
- [LangSmith Deployment](https://docs.langchain.com/langsmith/deployments) Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams and iterate quickly with visual prototyping in [LangSmith Studio](https://docs.langchain.com/langsmith/studio).
- [Deep Agents](https://github.com/langchain-ai/deepagents) *(new!)* Build agents that can plan, use subagents, and leverage file systems for complex tasks
- [LangSmith](https://www.langchain.com/langsmith) - Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) - Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows — and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
- [LangGraph Platform](https://docs.langchain.com/langgraph-platform) - Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams — and iterate quickly with visual prototyping in [LangGraph Studio](https://langchain-ai.github.io/langgraph/concepts/langgraph_studio).
## Additional resources
- [API Reference](https://reference.langchain.com/python) Detailed reference on navigating base packages and integrations for LangChain.
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview) Learn how to contribute to LangChain projects and find good first issues.
- [Code of Conduct](https://github.com/langchain-ai/langchain/blob/master/.github/CODE_OF_CONDUCT.md) Our community guidelines and standards for participation.
- [Learn](https://docs.langchain.com/oss/python/learn): Use cases, conceptual overviews, and more.
- [API Reference](https://reference.langchain.com/python): Detailed reference on
navigating base packages and integrations for LangChain.
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview): Learn how to contribute to LangChain and find good first issues.
- [LangChain Forum](https://forum.langchain.com): Connect with the community and share all of your technical questions, ideas, and feedback.
- [Chat LangChain](https://chat.langchain.com): Ask questions & chat with our documentation.

View File

@@ -55,10 +55,10 @@ All out of scope targets defined by huntr as well as:
* **langchain-experimental**: This repository is for experimental code and is not
eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
time and published with no bounty attached.
* **tools**: Tools in either `langchain` or `langchain-community` are not eligible for bug
* **tools**: Tools in either langchain or langchain-community are not eligible for bug
bounties. This includes the following directories
* `libs/langchain/langchain/tools`
* `libs/community/langchain_community/tools`
* libs/langchain/langchain/tools
* libs/community/langchain_community/tools
* Please review the [Best Practices](#best-practices)
for more details, but generally tools interact with the real world. Developers are
expected to understand the security implications of their code and are responsible

View File

@@ -295,7 +295,7 @@
"source": [
"## TODO: Any functionality specific to this vector store\n",
"\n",
"E.g. creating a persistent database to save to your disk, etc."
"E.g. creating a persisten database to save to your disk, etc."
]
},
{

View File

@@ -5,10 +5,12 @@
!!! warning
New agents should be built using the
[`langchain` library](https://pypi.org/project/langchain/), which provides a
[langgraph library](https://github.com/langchain-ai/langgraph), which provides a
simpler and more flexible way to define agents.
See docs on [building agents](https://docs.langchain.com/oss/python/langchain/agents).
Please see the
[migration guide](https://python.langchain.com/docs/how_to/migrate_agent/) for
information on how to migrate existing agents to modern langgraph agents.
Agents use language models to choose a sequence of actions to take.
@@ -52,33 +54,31 @@ class AgentAction(Serializable):
"""The input to pass in to the Tool."""
log: str
"""Additional information to log about the action.
This log can be used in a few ways. First, it can be used to audit what exactly the
LLM predicted to lead to this `(tool, tool_input)`.
Second, it can be used in future iterations to show the LLMs prior thoughts. This is
useful when `(tool, tool_input)` does not contain full information about the LLM
prediction (for example, any `thought` before the tool/tool_input).
"""
This log can be used in a few ways. First, it can be used to audit
what exactly the LLM predicted to lead to this (tool, tool_input).
Second, it can be used in future iterations to show the LLMs prior
thoughts. This is useful when (tool, tool_input) does not contain
full information about the LLM prediction (for example, any `thought`
before the tool/tool_input)."""
type: Literal["AgentAction"] = "AgentAction"
# Override init to support instantiation by position for backward compat.
def __init__(self, tool: str, tool_input: str | dict, log: str, **kwargs: Any):
"""Create an `AgentAction`.
"""Create an AgentAction.
Args:
tool: The name of the tool to execute.
tool_input: The input to pass in to the `Tool`.
tool_input: The input to pass in to the Tool.
log: Additional information to log about the action.
"""
super().__init__(tool=tool, tool_input=tool_input, log=log, **kwargs)
@classmethod
def is_lc_serializable(cls) -> bool:
"""`AgentAction` is serializable.
"""AgentAction is serializable.
Returns:
`True`
True
"""
return True
@@ -100,23 +100,19 @@ class AgentAction(Serializable):
class AgentActionMessageLog(AgentAction):
"""Representation of an action to be executed by an agent.
This is similar to `AgentAction`, but includes a message log consisting of
chat messages.
This is useful when working with `ChatModels`, and is used to reconstruct
conversation history from the agent's perspective.
This is similar to AgentAction, but includes a message log consisting of
chat messages. This is useful when working with ChatModels, and is used
to reconstruct conversation history from the agent's perspective.
"""
message_log: Sequence[BaseMessage]
"""Similar to log, this can be used to pass along extra information about what exact
messages were predicted by the LLM before parsing out the `(tool, tool_input)`.
This is again useful if `(tool, tool_input)` cannot be used to fully recreate the
LLM prediction, and you need that LLM prediction (for future agent iteration).
"""Similar to log, this can be used to pass along extra
information about what exact messages were predicted by the LLM
before parsing out the (tool, tool_input). This is again useful
if (tool, tool_input) cannot be used to fully recreate the LLM
prediction, and you need that LLM prediction (for future agent iteration).
Compared to `log`, this is useful when the underlying LLM is a
chat model (and therefore returns messages rather than a string).
"""
chat model (and therefore returns messages rather than a string)."""
# Ignoring type because we're overriding the type from AgentAction.
# And this is the correct thing to do in this case.
# The type literal is used for serialization purposes.
@@ -124,12 +120,12 @@ class AgentActionMessageLog(AgentAction):
class AgentStep(Serializable):
"""Result of running an `AgentAction`."""
"""Result of running an AgentAction."""
action: AgentAction
"""The `AgentAction` that was executed."""
"""The AgentAction that was executed."""
observation: Any
"""The result of the `AgentAction`."""
"""The result of the AgentAction."""
@property
def messages(self) -> Sequence[BaseMessage]:
@@ -138,22 +134,19 @@ class AgentStep(Serializable):
class AgentFinish(Serializable):
"""Final return value of an `ActionAgent`.
"""Final return value of an ActionAgent.
Agents return an `AgentFinish` when they have reached a stopping condition.
Agents return an AgentFinish when they have reached a stopping condition.
"""
return_values: dict
"""Dictionary of return values."""
log: str
"""Additional information to log about the return value.
This is used to pass along the full LLM prediction, not just the parsed out
return value.
For example, if the full LLM prediction was `Final Answer: 2` you may want to just
return `2` as a return value, but pass along the full string as a `log` (for
debugging or observability purposes).
return value. For example, if the full LLM prediction was
`Final Answer: 2` you may want to just return `2` as a return value, but pass
along the full string as a `log` (for debugging or observability purposes).
"""
type: Literal["AgentFinish"] = "AgentFinish"
@@ -163,7 +156,7 @@ class AgentFinish(Serializable):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -211,7 +204,7 @@ def _convert_agent_observation_to_messages(
observation: Observation to convert to a message.
Returns:
`AIMessage` that corresponds to the original tool invocation.
AIMessage that corresponds to the original tool invocation.
"""
if isinstance(agent_action, AgentActionMessageLog):
return [_create_function_message(agent_action, observation)]
@@ -234,7 +227,7 @@ def _create_function_message(
observation: the result of the tool invocation.
Returns:
`FunctionMessage` that corresponds to the original tool invocation.
FunctionMessage that corresponds to the original tool invocation.
"""
if not isinstance(observation, str):
try:

View File

@@ -2,8 +2,8 @@
Distinct from provider-based [prompt caching](https://docs.langchain.com/oss/python/langchain/models#prompt-caching).
!!! warning "Beta feature"
This is a beta feature. Please be wary of deploying experimental code to production
!!! warning
This is a beta feature! Please be wary of deploying experimental code to production
unless you've taken appropriate precautions.
A cache is useful for two reasons:
@@ -49,18 +49,17 @@ class BaseCache(ABC):
"""Look up based on `prompt` and `llm_string`.
A cache implementation is expected to generate a key from the 2-tuple
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
of prompt and llm_string (e.g., by concatenating them with a delimiter).
Args:
prompt: A string representation of the prompt.
In the case of a chat model, the prompt is a non-trivial
serialization of the prompt into the language model.
llm_string: A string representation of the LLM configuration.
This is used to capture the invocation parameters of the LLM
(e.g., model name, temperature, stop tokens, max tokens, etc.).
These invocation parameters are serialized into a string representation.
These invocation parameters are serialized into a string
representation.
Returns:
On a cache miss, return `None`. On a cache hit, return the cached value.
@@ -79,10 +78,8 @@ class BaseCache(ABC):
In the case of a chat model, the prompt is a non-trivial
serialization of the prompt into the language model.
llm_string: A string representation of the LLM configuration.
This is used to capture the invocation parameters of the LLM
(e.g., model name, temperature, stop tokens, max tokens, etc.).
These invocation parameters are serialized into a string
representation.
return_val: The value to be cached. The value is a list of `Generation`
@@ -97,17 +94,15 @@ class BaseCache(ABC):
"""Async look up based on `prompt` and `llm_string`.
A cache implementation is expected to generate a key from the 2-tuple
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
of prompt and llm_string (e.g., by concatenating them with a delimiter).
Args:
prompt: A string representation of the prompt.
In the case of a chat model, the prompt is a non-trivial
serialization of the prompt into the language model.
llm_string: A string representation of the LLM configuration.
This is used to capture the invocation parameters of the LLM
(e.g., model name, temperature, stop tokens, max tokens, etc.).
These invocation parameters are serialized into a string
representation.
@@ -130,10 +125,8 @@ class BaseCache(ABC):
In the case of a chat model, the prompt is a non-trivial
serialization of the prompt into the language model.
llm_string: A string representation of the LLM configuration.
This is used to capture the invocation parameters of the LLM
(e.g., model name, temperature, stop tokens, max tokens, etc.).
These invocation parameters are serialized into a string
representation.
return_val: The value to be cached. The value is a list of `Generation`

View File

@@ -420,6 +420,8 @@ class RunManagerMixin:
(includes inherited tags).
metadata: The metadata associated with the custom event
(includes inherited metadata).
!!! version-added "Added in version 0.2.15"
"""
@@ -880,6 +882,8 @@ class AsyncCallbackHandler(BaseCallbackHandler):
(includes inherited tags).
metadata: The metadata associated with the custom event
(includes inherited metadata).
!!! version-added "Added in version 0.2.15"
"""

View File

@@ -229,24 +229,7 @@ def shielded(func: Func) -> Func:
@functools.wraps(func)
async def wrapped(*args: Any, **kwargs: Any) -> Any:
# Capture the current context to preserve context variables
ctx = copy_context()
# Create the coroutine
coro = func(*args, **kwargs)
# For Python 3.11+, create task with explicit context
# For older versions, fallback to original behavior
try:
# Create a task with the captured context to preserve context variables
task = asyncio.create_task(coro, context=ctx) # type: ignore[call-arg, unused-ignore]
# `call-arg` used to not fail 3.9 or 3.10 tests
return await asyncio.shield(task)
except TypeError:
# Python < 3.11 fallback - create task normally then shield
# This won't preserve context perfectly but is better than nothing
task = asyncio.create_task(coro)
return await asyncio.shield(task)
return await asyncio.shield(func(*args, **kwargs))
return cast("Func", wrapped)
@@ -1583,6 +1566,9 @@ class CallbackManager(BaseCallbackManager):
Raises:
ValueError: If additional keyword arguments are passed.
!!! version-added "Added in version 0.2.14"
"""
if not self.handlers:
return
@@ -2056,6 +2042,8 @@ class AsyncCallbackManager(BaseCallbackManager):
Raises:
ValueError: If additional keyword arguments are passed.
!!! version-added "Added in version 0.2.14"
"""
if not self.handlers:
return
@@ -2567,6 +2555,9 @@ async def adispatch_custom_event(
This is due to a limitation in asyncio for python <= 3.10 that prevents
LangChain from automatically propagating the config object on the user's
behalf.
!!! version-added "Added in version 0.2.15"
"""
# Import locally to prevent circular imports.
from langchain_core.runnables.config import ( # noqa: PLC0415
@@ -2639,6 +2630,9 @@ def dispatch_custom_event(
foo_ = RunnableLambda(foo)
foo_.invoke({"a": "1"}, {"callbacks": [CustomCallbackManager()]})
```
!!! version-added "Added in version 0.2.15"
"""
# Import locally to prevent circular imports.
from langchain_core.runnables.config import ( # noqa: PLC0415

View File

@@ -24,7 +24,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
from langchain_core.callbacks import UsageMetadataCallbackHandler
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
callback = UsageMetadataCallbackHandler()
result_1 = llm_1.invoke("Hello", config={"callbacks": [callback]})
@@ -43,7 +43,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
```
!!! version-added "Added in `langchain-core` 0.3.49"
!!! version-added "Added in version 0.3.49"
"""
@@ -109,7 +109,7 @@ def get_usage_metadata_callback(
from langchain_core.callbacks import get_usage_metadata_callback
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
with get_usage_metadata_callback() as cb:
llm_1.invoke("Hello")
@@ -134,7 +134,7 @@ def get_usage_metadata_callback(
}
```
!!! version-added "Added in `langchain-core` 0.3.49"
!!! version-added "Added in version 0.3.49"
"""
usage_metadata_callback_var: ContextVar[UsageMetadataCallbackHandler | None] = (

View File

@@ -121,7 +121,7 @@ class BaseChatMessageHistory(ABC):
This method may be deprecated in a future release.
Args:
message: The `HumanMessage` to add to the store.
message: The human message to add to the store.
"""
if isinstance(message, HumanMessage):
self.add_message(message)
@@ -129,7 +129,7 @@ class BaseChatMessageHistory(ABC):
self.add_message(HumanMessage(content=message))
def add_ai_message(self, message: AIMessage | str) -> None:
"""Convenience method for adding an `AIMessage` string to the store.
"""Convenience method for adding an AI message string to the store.
!!! note
This is a convenience method. Code should favor the bulk `add_messages`
@@ -138,7 +138,7 @@ class BaseChatMessageHistory(ABC):
This method may be deprecated in a future release.
Args:
message: The `AIMessage` to add.
message: The AI message to add.
"""
if isinstance(message, AIMessage):
self.add_message(message)
@@ -173,7 +173,7 @@ class BaseChatMessageHistory(ABC):
in an efficient manner to avoid unnecessary round-trips to the underlying store.
Args:
messages: A sequence of `BaseMessage` objects to store.
messages: A sequence of BaseMessage objects to store.
"""
for message in messages:
self.add_message(message)
@@ -182,7 +182,7 @@ class BaseChatMessageHistory(ABC):
"""Async add a list of messages.
Args:
messages: A sequence of `BaseMessage` objects to store.
messages: A sequence of BaseMessage objects to store.
"""
await run_in_executor(None, self.add_messages, messages)

View File

@@ -27,7 +27,7 @@ class BaseLoader(ABC): # noqa: B024
"""Interface for Document Loader.
Implementations should implement the lazy-loading method using generators
to avoid loading all documents into memory at once.
to avoid loading all Documents into memory at once.
`load` is provided just for user convenience and should not be overridden.
"""
@@ -53,11 +53,9 @@ class BaseLoader(ABC): # noqa: B024
def load_and_split(
self, text_splitter: TextSplitter | None = None
) -> list[Document]:
"""Load `Document` and split into chunks. Chunks are returned as `Document`.
"""Load Documents and split into chunks. Chunks are returned as `Document`.
!!! danger
Do not override this method. It should be considered to be deprecated!
Do not override this method. It should be considered to be deprecated!
Args:
text_splitter: `TextSplitter` instance to use for splitting documents.
@@ -137,7 +135,7 @@ class BaseBlobParser(ABC):
"""
def parse(self, blob: Blob) -> list[Document]:
"""Eagerly parse the blob into a `Document` or list of `Document` objects.
"""Eagerly parse the blob into a `Document` or `Document` objects.
This is a convenience method for interactive development environment.

View File

@@ -28,7 +28,7 @@ class BlobLoader(ABC):
def yield_blobs(
self,
) -> Iterable[Blob]:
"""A lazy loader for raw data represented by LangChain's `Blob` object.
"""A lazy loader for raw data represented by LangChain's Blob object.
Returns:
A generator over blobs

View File

@@ -14,13 +14,13 @@ from langchain_core.documents import Document
class LangSmithLoader(BaseLoader):
"""Load LangSmith Dataset examples as `Document` objects.
"""Load LangSmith Dataset examples as Documents.
Loads the example inputs as the `Document` page content and places the entire
example into the `Document` metadata. This allows you to easily create few-shot
example retrievers from the loaded documents.
Loads the example inputs as the Document page content and places the entire example
into the Document metadata. This allows you to easily create few-shot example
retrievers from the loaded documents.
??? note "Lazy loading example"
??? note "Lazy load"
```python
from langchain_core.document_loaders import LangSmithLoader
@@ -34,6 +34,9 @@ class LangSmithLoader(BaseLoader):
```python
# -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
```
!!! version-added "Added in version 0.2.34"
"""
def __init__(
@@ -66,11 +69,12 @@ class LangSmithLoader(BaseLoader):
format_content: Function for converting the content extracted from the example
inputs into a string. Defaults to JSON-encoding the contents.
example_ids: The IDs of the examples to filter by.
as_of: The dataset version tag or timestamp to retrieve the examples as of.
Response examples will only be those that were present at the time of
the tagged (or timestamped) version.
as_of: The dataset version tag OR
timestamp to retrieve the examples as of.
Response examples will only be those that were present at the time
of the tagged (or timestamped) version.
splits: A list of dataset splits, which are
divisions of your dataset such as `train`, `test`, or `validation`.
divisions of your dataset such as 'train', 'test', or 'validation'.
Returns examples only from the specified splits.
inline_s3_urls: Whether to inline S3 URLs.
offset: The offset to start from.

View File

@@ -1,28 +1,7 @@
"""Documents module for data retrieval and processing workflows.
"""Documents module.
This module provides core abstractions for handling data in retrieval-augmented
generation (RAG) pipelines, vector stores, and document processing workflows.
!!! warning "Documents vs. message content"
This module is distinct from `langchain_core.messages.content`, which provides
multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within
messages).
**Key distinction:**
- **Documents** (this module): For **data retrieval and processing workflows**
- Vector stores, retrievers, RAG pipelines
- Text chunking, embedding, and semantic search
- Example: Chunks of a PDF stored in a vector database
- **Content Blocks** (`messages.content`): For **LLM conversational I/O**
- Multimodal message content sent to/from models
- Tool calls, reasoning, citations within chat
- Example: An image sent to a vision model in a chat message (via
[`ImageContentBlock`][langchain.messages.ImageContentBlock])
While both can represent similar data types (text, files), they serve different
architectural purposes in LangChain applications.
**Document** module is a collection of classes that handle documents
and their transformations.
"""
from typing import TYPE_CHECKING

View File

@@ -1,16 +1,4 @@
"""Base classes for media and documents.
This module contains core abstractions for **data retrieval and processing workflows**:
- `BaseMedia`: Base class providing `id` and `metadata` fields
- `Blob`: Raw data loading (files, binary data) - used by document loaders
- `Document`: Text content for retrieval (RAG, vector stores, semantic search)
!!! note "Not for LLM chat messages"
These classes are for data processing pipelines, not LLM I/O. For multimodal
content in chat messages (images, audio in conversations), see
`langchain.messages` content blocks instead.
"""
"""Base classes for media and documents."""
from __future__ import annotations
@@ -31,23 +19,27 @@ PathLike = str | PurePath
class BaseMedia(Serializable):
"""Base class for content used in retrieval and data processing workflows.
"""Use to represent media content.
Provides common fields for content that needs to be stored, indexed, or searched.
Media objects can be used to represent raw data, such as text or binary data.
!!! note
For multimodal content in **chat messages** (images, audio sent to/from LLMs),
use `langchain.messages` content blocks instead.
LangChain Media objects allow associating metadata and an optional identifier
with the content.
The presence of an ID and metadata make it easier to store, index, and search
over the content in a structured way.
"""
# The ID field is optional at the moment.
# It will likely become required in a future major release after
# it has been adopted by enough VectorStore implementations.
# it has been adopted by enough vectorstore implementations.
id: str | None = Field(default=None, coerce_numbers_to_str=True)
"""An optional identifier for the document.
Ideally this should be unique across the document collection and formatted
as a UUID, but this will not be enforced.
!!! version-added "Added in version 0.2.11"
"""
metadata: dict = Field(default_factory=dict)
@@ -55,70 +47,71 @@ class BaseMedia(Serializable):
class Blob(BaseMedia):
"""Raw data abstraction for document loading and file processing.
"""Blob represents raw data by either reference or value.
Represents raw bytes or text, either in-memory or by file reference. Used
primarily by document loaders to decouple data loading from parsing.
Provides an interface to materialize the blob in different representations, and
help to decouple the development of data loaders from the downstream parsing of
the raw data.
Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
Inspired by: https://developer.mozilla.org/en-US/docs/Web/API/Blob
???+ example "Initialize a blob from in-memory data"
Example: Initialize a blob from in-memory data
```python
from langchain_core.documents import Blob
```python
from langchain_core.documents import Blob
blob = Blob.from_data("Hello, world!")
blob = Blob.from_data("Hello, world!")
# Read the blob as a string
print(blob.as_string())
# Read the blob as a string
print(blob.as_string())
# Read the blob as bytes
print(blob.as_bytes())
# Read the blob as bytes
print(blob.as_bytes())
# Read the blob as a byte stream
with blob.as_bytes_io() as f:
print(f.read())
```
# Read the blob as a byte stream
with blob.as_bytes_io() as f:
print(f.read())
```
??? example "Load from memory and specify MIME type and metadata"
Example: Load from memory and specify mime-type and metadata
```python
from langchain_core.documents import Blob
```python
from langchain_core.documents import Blob
blob = Blob.from_data(
data="Hello, world!",
mime_type="text/plain",
metadata={"source": "https://example.com"},
)
```
blob = Blob.from_data(
data="Hello, world!",
mime_type="text/plain",
metadata={"source": "https://example.com"},
)
```
??? example "Load the blob from a file"
Example: Load the blob from a file
```python
from langchain_core.documents import Blob
```python
from langchain_core.documents import Blob
blob = Blob.from_path("path/to/file.txt")
blob = Blob.from_path("path/to/file.txt")
# Read the blob as a string
print(blob.as_string())
# Read the blob as a string
print(blob.as_string())
# Read the blob as bytes
print(blob.as_bytes())
# Read the blob as bytes
print(blob.as_bytes())
# Read the blob as a byte stream
with blob.as_bytes_io() as f:
print(f.read())
```
# Read the blob as a byte stream
with blob.as_bytes_io() as f:
print(f.read())
```
"""
data: bytes | str | None = None
"""Raw data associated with the `Blob`."""
"""Raw data associated with the blob."""
mimetype: str | None = None
"""MIME type, not to be confused with a file extension."""
"""MimeType not to be confused with a file extension."""
encoding: str = "utf-8"
"""Encoding to use if decoding the bytes into a string.
Uses `utf-8` as default encoding if decoding to string.
Use `utf-8` as default encoding, if decoding to string.
"""
path: PathLike | None = None
"""Location where the original content was found."""
@@ -132,9 +125,9 @@ class Blob(BaseMedia):
def source(self) -> str | None:
"""The source location of the blob as string if known otherwise none.
If a path is associated with the `Blob`, it will default to the path location.
If a path is associated with the blob, it will default to the path location.
Unless explicitly set via a metadata field called `'source'`, in which
Unless explicitly set via a metadata field called `"source"`, in which
case that value will be used instead.
"""
if self.metadata and "source" in self.metadata:
@@ -220,13 +213,13 @@ class Blob(BaseMedia):
Args:
path: Path-like object to file to be read
encoding: Encoding to use if decoding the bytes into a string
mime_type: If provided, will be set as the MIME type of the data
guess_type: If `True`, the MIME type will be guessed from the file
extension, if a MIME type was not provided
metadata: Metadata to associate with the `Blob`
mime_type: If provided, will be set as the mime-type of the data
guess_type: If `True`, the mimetype will be guessed from the file extension,
if a mime-type was not provided
metadata: Metadata to associate with the blob
Returns:
`Blob` instance
Blob instance
"""
if mime_type is None and guess_type:
mimetype = mimetypes.guess_type(path)[0] if guess_type else None
@@ -252,17 +245,17 @@ class Blob(BaseMedia):
path: str | None = None,
metadata: dict | None = None,
) -> Blob:
"""Initialize the `Blob` from in-memory data.
"""Initialize the blob from in-memory data.
Args:
data: The in-memory data associated with the `Blob`
data: The in-memory data associated with the blob
encoding: Encoding to use if decoding the bytes into a string
mime_type: If provided, will be set as the MIME type of the data
mime_type: If provided, will be set as the mime-type of the data
path: If provided, will be set as the source from which the data came
metadata: Metadata to associate with the `Blob`
metadata: Metadata to associate with the blob
Returns:
`Blob` instance
Blob instance
"""
return cls(
data=data,
@@ -283,10 +276,6 @@ class Blob(BaseMedia):
class Document(BaseMedia):
"""Class for storing a piece of text and associated metadata.
!!! note
`Document` is for **retrieval workflows**, not chat I/O. For sending text
to an LLM in a conversation, use message types from `langchain.messages`.
Example:
```python
from langchain_core.documents import Document
@@ -309,7 +298,7 @@ class Document(BaseMedia):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -322,10 +311,10 @@ class Document(BaseMedia):
return ["langchain", "schema", "document"]
def __str__(self) -> str:
"""Override `__str__` to restrict it to page_content and metadata.
"""Override __str__ to restrict it to page_content and metadata.
Returns:
A string representation of the `Document`.
A string representation of the Document.
"""
# The format matches pydantic format for __str__.
#

View File

@@ -21,14 +21,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
This abstraction is primarily used for post-processing of retrieved documents.
`Document` objects matching a given query are first retrieved.
Documents matching a given query are first retrieved.
Then the list of documents can be further processed.
For example, one could re-rank the retrieved documents using an LLM.
!!! note
Users should favor using a `RunnableLambda` instead of sub-classing from this
Users should favor using a RunnableLambda instead of sub-classing from this
interface.
"""
@@ -43,9 +43,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
"""Compress retrieved documents given the query context.
Args:
documents: The retrieved `Document` objects.
documents: The retrieved documents.
query: The query context.
callbacks: Optional `Callbacks` to run during compression.
callbacks: Optional callbacks to run during compression.
Returns:
The compressed documents.
@@ -61,9 +61,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
"""Async compress retrieved documents given the query context.
Args:
documents: The retrieved `Document` objects.
documents: The retrieved documents.
query: The query context.
callbacks: Optional `Callbacks` to run during compression.
callbacks: Optional callbacks to run during compression.
Returns:
The compressed documents.

View File

@@ -16,8 +16,8 @@ if TYPE_CHECKING:
class BaseDocumentTransformer(ABC):
"""Abstract base class for document transformation.
A document transformation takes a sequence of `Document` objects and returns a
sequence of transformed `Document` objects.
A document transformation takes a sequence of Documents and returns a
sequence of transformed Documents.
Example:
```python
@@ -57,10 +57,10 @@ class BaseDocumentTransformer(ABC):
"""Transform a list of documents.
Args:
documents: A sequence of `Document` objects to be transformed.
documents: A sequence of Documents to be transformed.
Returns:
A sequence of transformed `Document` objects.
A sequence of transformed Documents.
"""
async def atransform_documents(
@@ -69,10 +69,10 @@ class BaseDocumentTransformer(ABC):
"""Asynchronously transform a list of documents.
Args:
documents: A sequence of `Document` objects to be transformed.
documents: A sequence of Documents to be transformed.
Returns:
A sequence of transformed `Document` objects.
A sequence of transformed Documents.
"""
return await run_in_executor(
None, self.transform_documents, documents, **kwargs

View File

@@ -18,7 +18,7 @@ class FakeEmbeddings(Embeddings, BaseModel):
This embedding model creates embeddings by sampling from a normal distribution.
!!! danger "Toy model"
!!! warning
Do not use this outside of testing, as it is not a real embedding model.
Instantiate:
@@ -73,7 +73,7 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
This embedding model creates embeddings by sampling from a normal distribution
with a seed based on the hash of the text.
!!! danger "Toy model"
!!! warning
Do not use this outside of testing, as it is not a real embedding model.
Instantiate:

View File

@@ -29,7 +29,7 @@ class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
max_length: int = 2048
"""Max length for the prompt, beyond which examples are cut."""
example_text_lengths: list[int] = Field(default_factory=list)
example_text_lengths: list[int] = Field(default_factory=list) # :meta private:
"""Length of each example."""
def add_example(self, example: dict[str, str]) -> None:

View File

@@ -41,7 +41,7 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
"""Optional keys to filter input to. If provided, the search is based on
the input variables instead of all variables."""
vectorstore_kwargs: dict[str, Any] | None = None
"""Extra arguments passed to similarity_search function of the `VectorStore`."""
"""Extra arguments passed to similarity_search function of the vectorstore."""
model_config = ConfigDict(
arbitrary_types_allowed=True,
@@ -159,7 +159,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
instead of all variables.
example_keys: If provided, keys to filter examples to.
vectorstore_kwargs: Extra arguments passed to similarity_search function
of the `VectorStore`.
of the vectorstore.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:
@@ -203,7 +203,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
instead of all variables.
example_keys: If provided, keys to filter examples to.
vectorstore_kwargs: Extra arguments passed to similarity_search function
of the `VectorStore`.
of the vectorstore.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:
@@ -286,12 +286,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
k: Number of examples to select.
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
input_keys: If provided, the search is based on the input variables
instead of all variables.
example_keys: If provided, keys to filter examples to.
vectorstore_kwargs: Extra arguments passed to similarity_search function
of the `VectorStore`.
of the vectorstore.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:
@@ -333,12 +333,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
k: Number of examples to select.
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
input_keys: If provided, the search is based on the input variables
instead of all variables.
example_keys: If provided, keys to filter examples to.
vectorstore_kwargs: Extra arguments passed to similarity_search function
of the `VectorStore`.
of the vectorstore.
vectorstore_cls_kwargs: optional kwargs containing url for vector store
Returns:

View File

@@ -16,10 +16,9 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
"""Exception that output parsers should raise to signify a parsing error.
This exists to differentiate parsing errors from other code or execution errors
that also may arise inside the output parser.
`OutputParserException` will be available to catch and handle in ways to fix the
parsing error, while other errors will be raised.
that also may arise inside the output parser. `OutputParserException` will be
available to catch and handle in ways to fix the parsing error, while other
errors will be raised.
"""
def __init__(
@@ -33,19 +32,18 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
Args:
error: The error that's being re-raised or an error message.
observation: String explanation of error which can be passed to a model to
try and remediate the issue.
observation: String explanation of error which can be passed to a
model to try and remediate the issue.
llm_output: String model output which is error-ing.
send_to_llm: Whether to send the observation and llm_output back to an Agent
after an `OutputParserException` has been raised.
This gives the underlying model driving the agent the context that the
previous output was improperly structured, in the hopes that it will
update the output to the correct format.
Raises:
ValueError: If `send_to_llm` is `True` but either observation or
ValueError: If `send_to_llm` is True but either observation or
`llm_output` are not provided.
"""
if isinstance(error, str):
@@ -68,11 +66,11 @@ class ErrorCode(Enum):
"""Error codes."""
INVALID_PROMPT_INPUT = "INVALID_PROMPT_INPUT"
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS" # Used in JS; not Py (yet)
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS"
MESSAGE_COERCION_FAILURE = "MESSAGE_COERCION_FAILURE"
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION" # Used in JS; not Py (yet)
MODEL_NOT_FOUND = "MODEL_NOT_FOUND" # Used in JS; not Py (yet)
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT" # Used in JS; not Py (yet)
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION"
MODEL_NOT_FOUND = "MODEL_NOT_FOUND"
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT"
OUTPUT_PARSING_FAILURE = "OUTPUT_PARSING_FAILURE"
@@ -88,6 +86,6 @@ def create_message(*, message: str, error_code: ErrorCode) -> str:
"""
return (
f"{message}\n"
"For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain"
f"/errors/{error_code.value} "
"For troubleshooting, visit: https://python.langchain.com/docs/"
f"troubleshooting/errors/{error_code.value} "
)

View File

@@ -1,7 +1,7 @@
"""Code to help indexing data into a vectorstore.
This package contains helper logic to help deal with indexing data into
a `VectorStore` while avoiding duplicated content and over-writing content
a vectorstore while avoiding duplicated content and over-writing content
if it's unchanged.
"""

View File

@@ -298,48 +298,48 @@ def index(
For the time being, documents are indexed using their hashes, and users
are not able to specify the uid of the document.
!!! warning "Behavior changed in `langchain-core` 0.3.25"
!!! warning "Behavior changed in 0.3.25"
Added `scoped_full` cleanup mode.
!!! warning
* In full mode, the loader should be returning
the entire dataset, and not just a subset of the dataset.
Otherwise, the auto_cleanup will remove documents that it is not
supposed to.
the entire dataset, and not just a subset of the dataset.
Otherwise, the auto_cleanup will remove documents that it is not
supposed to.
* In incremental mode, if documents associated with a particular
source id appear across different batches, the indexing API
will do some redundant work. This will still result in the
correct end state of the index, but will unfortunately not be
100% efficient. For example, if a given document is split into 15
chunks, and we index them using a batch size of 5, we'll have 3 batches
all with the same source id. In general, to avoid doing too much
redundant work select as big a batch size as possible.
source id appear across different batches, the indexing API
will do some redundant work. This will still result in the
correct end state of the index, but will unfortunately not be
100% efficient. For example, if a given document is split into 15
chunks, and we index them using a batch size of 5, we'll have 3 batches
all with the same source id. In general, to avoid doing too much
redundant work select as big a batch size as possible.
* The `scoped_full` mode is suitable if determining an appropriate batch size
is challenging or if your data loader cannot return the entire dataset at
once. This mode keeps track of source IDs in memory, which should be fine
for most use cases. If your dataset is large (10M+ docs), you will likely
need to parallelize the indexing process regardless.
is challenging or if your data loader cannot return the entire dataset at
once. This mode keeps track of source IDs in memory, which should be fine
for most use cases. If your dataset is large (10M+ docs), you will likely
need to parallelize the indexing process regardless.
Args:
docs_source: Data loader or iterable of documents to index.
record_manager: Timestamped set to keep track of which documents were
updated.
vector_store: `VectorStore` or DocumentIndex to index the documents into.
vector_store: VectorStore or DocumentIndex to index the documents into.
batch_size: Batch size to use when indexing.
cleanup: How to handle clean up of documents.
- incremental: Cleans up all documents that haven't been updated AND
that are associated with source IDs that were seen during indexing.
Clean up is done continuously during indexing helping to minimize the
probability of users seeing duplicated content.
that are associated with source ids that were seen during indexing.
Clean up is done continuously during indexing helping to minimize the
probability of users seeing duplicated content.
- full: Delete all documents that have not been returned by the loader
during this run of indexing.
Clean up runs after all documents have been indexed.
This means that users may see duplicated content during indexing.
during this run of indexing.
Clean up runs after all documents have been indexed.
This means that users may see duplicated content during indexing.
- scoped_full: Similar to Full, but only deletes all documents
that haven't been updated AND that are associated with
source IDs that were seen during indexing.
that haven't been updated AND that are associated with
source ids that were seen during indexing.
- None: Do not delete any documents.
source_id_key: Optional key that helps identify the original source
of the document.
@@ -349,7 +349,7 @@ def index(
key_encoder: Hashing algorithm to use for hashing the document content and
metadata. Options include "blake2b", "sha256", and "sha512".
!!! version-added "Added in `langchain-core` 0.3.66"
!!! version-added "Added in version 0.3.66"
key_encoder: Hashing algorithm to use for hashing the document.
If not provided, a default encoder using SHA-1 will be used.
@@ -363,10 +363,10 @@ def index(
When changing the key encoder, you must change the
index as well to avoid duplicated documents in the cache.
upsert_kwargs: Additional keyword arguments to pass to the add_documents
method of the `VectorStore` or the upsert method of the DocumentIndex.
method of the VectorStore or the upsert method of the DocumentIndex.
For example, you can use this to specify a custom vector_field:
upsert_kwargs={"vector_field": "embedding"}
!!! version-added "Added in `langchain-core` 0.3.10"
!!! version-added "Added in version 0.3.10"
Returns:
Indexing result which contains information about how many documents
@@ -375,10 +375,10 @@ def index(
Raises:
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
ValueError: If cleanup mode is incremental and source_id_key is None.
ValueError: If `VectorStore` does not have
ValueError: If vectorstore does not have
"delete" and "add_documents" required methods.
ValueError: If source_id_key is not None, but is not a string or callable.
TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
AssertionError: If `source_id` is None when cleanup mode is incremental.
(should be unreachable code).
"""
@@ -415,7 +415,7 @@ def index(
raise ValueError(msg)
if type(destination).delete == VectorStore.delete:
# Checking if the VectorStore has overridden the default delete method
# Checking if the vectorstore has overridden the default delete method
# implementation which just raises a NotImplementedError
msg = "Vectorstore has not implemented the delete method"
raise ValueError(msg)
@@ -466,11 +466,11 @@ def index(
]
if cleanup in {"incremental", "scoped_full"}:
# Source IDs are required.
# source ids are required.
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
if source_id is None:
msg = (
f"Source IDs are required when cleanup mode is "
f"Source ids are required when cleanup mode is "
f"incremental or scoped_full. "
f"Document that starts with "
f"content: {hashed_doc.page_content[:100]} "
@@ -479,7 +479,7 @@ def index(
raise ValueError(msg)
if cleanup == "scoped_full":
scoped_full_cleanup_source_ids.add(source_id)
# Source IDs cannot be None after for loop above.
# source ids cannot be None after for loop above.
source_ids = cast("Sequence[str]", source_ids)
exists_batch = record_manager.exists(
@@ -538,7 +538,7 @@ def index(
# If source IDs are provided, we can do the deletion incrementally!
if cleanup == "incremental":
# Get the uids of the documents that were not returned by the loader.
# mypy isn't good enough to determine that source IDs cannot be None
# mypy isn't good enough to determine that source ids cannot be None
# here due to a check that's happening above, so we check again.
for source_id in source_ids:
if source_id is None:
@@ -636,48 +636,48 @@ async def aindex(
For the time being, documents are indexed using their hashes, and users
are not able to specify the uid of the document.
!!! warning "Behavior changed in `langchain-core` 0.3.25"
!!! warning "Behavior changed in 0.3.25"
Added `scoped_full` cleanup mode.
!!! warning
* In full mode, the loader should be returning
the entire dataset, and not just a subset of the dataset.
Otherwise, the auto_cleanup will remove documents that it is not
supposed to.
the entire dataset, and not just a subset of the dataset.
Otherwise, the auto_cleanup will remove documents that it is not
supposed to.
* In incremental mode, if documents associated with a particular
source id appear across different batches, the indexing API
will do some redundant work. This will still result in the
correct end state of the index, but will unfortunately not be
100% efficient. For example, if a given document is split into 15
chunks, and we index them using a batch size of 5, we'll have 3 batches
all with the same source id. In general, to avoid doing too much
redundant work select as big a batch size as possible.
source id appear across different batches, the indexing API
will do some redundant work. This will still result in the
correct end state of the index, but will unfortunately not be
100% efficient. For example, if a given document is split into 15
chunks, and we index them using a batch size of 5, we'll have 3 batches
all with the same source id. In general, to avoid doing too much
redundant work select as big a batch size as possible.
* The `scoped_full` mode is suitable if determining an appropriate batch size
is challenging or if your data loader cannot return the entire dataset at
once. This mode keeps track of source IDs in memory, which should be fine
for most use cases. If your dataset is large (10M+ docs), you will likely
need to parallelize the indexing process regardless.
is challenging or if your data loader cannot return the entire dataset at
once. This mode keeps track of source IDs in memory, which should be fine
for most use cases. If your dataset is large (10M+ docs), you will likely
need to parallelize the indexing process regardless.
Args:
docs_source: Data loader or iterable of documents to index.
record_manager: Timestamped set to keep track of which documents were
updated.
vector_store: `VectorStore` or DocumentIndex to index the documents into.
vector_store: VectorStore or DocumentIndex to index the documents into.
batch_size: Batch size to use when indexing.
cleanup: How to handle clean up of documents.
- incremental: Cleans up all documents that haven't been updated AND
that are associated with source IDs that were seen during indexing.
Clean up is done continuously during indexing helping to minimize the
probability of users seeing duplicated content.
that are associated with source ids that were seen during indexing.
Clean up is done continuously during indexing helping to minimize the
probability of users seeing duplicated content.
- full: Delete all documents that have not been returned by the loader
during this run of indexing.
Clean up runs after all documents have been indexed.
This means that users may see duplicated content during indexing.
during this run of indexing.
Clean up runs after all documents have been indexed.
This means that users may see duplicated content during indexing.
- scoped_full: Similar to Full, but only deletes all documents
that haven't been updated AND that are associated with
source IDs that were seen during indexing.
that haven't been updated AND that are associated with
source ids that were seen during indexing.
- None: Do not delete any documents.
source_id_key: Optional key that helps identify the original source
of the document.
@@ -687,7 +687,7 @@ async def aindex(
key_encoder: Hashing algorithm to use for hashing the document content and
metadata. Options include "blake2b", "sha256", and "sha512".
!!! version-added "Added in `langchain-core` 0.3.66"
!!! version-added "Added in version 0.3.66"
key_encoder: Hashing algorithm to use for hashing the document.
If not provided, a default encoder using SHA-1 will be used.
@@ -701,10 +701,10 @@ async def aindex(
When changing the key encoder, you must change the
index as well to avoid duplicated documents in the cache.
upsert_kwargs: Additional keyword arguments to pass to the add_documents
method of the `VectorStore` or the upsert method of the DocumentIndex.
method of the VectorStore or the upsert method of the DocumentIndex.
For example, you can use this to specify a custom vector_field:
upsert_kwargs={"vector_field": "embedding"}
!!! version-added "Added in `langchain-core` 0.3.10"
!!! version-added "Added in version 0.3.10"
Returns:
Indexing result which contains information about how many documents
@@ -713,10 +713,10 @@ async def aindex(
Raises:
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
ValueError: If cleanup mode is incremental and source_id_key is None.
ValueError: If `VectorStore` does not have
ValueError: If vectorstore does not have
"adelete" and "aadd_documents" required methods.
ValueError: If source_id_key is not None, but is not a string or callable.
TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
AssertionError: If `source_id_key` is None when cleanup mode is
incremental or `scoped_full` (should be unreachable).
"""
@@ -757,7 +757,7 @@ async def aindex(
type(destination).adelete == VectorStore.adelete
and type(destination).delete == VectorStore.delete
):
# Checking if the VectorStore has overridden the default adelete or delete
# Checking if the vectorstore has overridden the default adelete or delete
# methods implementation which just raises a NotImplementedError
msg = "Vectorstore has not implemented the adelete or delete method"
raise ValueError(msg)
@@ -815,11 +815,11 @@ async def aindex(
]
if cleanup in {"incremental", "scoped_full"}:
# If the cleanup mode is incremental, source IDs are required.
# If the cleanup mode is incremental, source ids are required.
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
if source_id is None:
msg = (
f"Source IDs are required when cleanup mode is "
f"Source ids are required when cleanup mode is "
f"incremental or scoped_full. "
f"Document that starts with "
f"content: {hashed_doc.page_content[:100]} "
@@ -828,7 +828,7 @@ async def aindex(
raise ValueError(msg)
if cleanup == "scoped_full":
scoped_full_cleanup_source_ids.add(source_id)
# Source IDs cannot be None after for loop above.
# source ids cannot be None after for loop above.
source_ids = cast("Sequence[str]", source_ids)
exists_batch = await record_manager.aexists(
@@ -888,7 +888,7 @@ async def aindex(
if cleanup == "incremental":
# Get the uids of the documents that were not returned by the loader.
# mypy isn't good enough to determine that source IDs cannot be None
# mypy isn't good enough to determine that source ids cannot be None
# here due to a check that's happening above, so we check again.
for source_id in source_ids:
if source_id is None:

View File

@@ -25,7 +25,7 @@ class RecordManager(ABC):
The record manager abstraction is used by the langchain indexing API.
The record manager keeps track of which documents have been
written into a `VectorStore` and when they were written.
written into a vectorstore and when they were written.
The indexing API computes hashes for each document and stores the hash
together with the write time and the source id in the record manager.
@@ -37,7 +37,7 @@ class RecordManager(ABC):
already been indexed, and to only index new documents.
The main benefit of this abstraction is that it works across many vectorstores.
To be supported, a `VectorStore` needs to only support the ability to add and
To be supported, a vectorstore needs to only support the ability to add and
delete documents by ID. Using the record manager, the indexing API will
be able to delete outdated documents and avoid redundant indexing of documents
that have already been indexed.
@@ -45,13 +45,13 @@ class RecordManager(ABC):
The main constraints of this abstraction are:
1. It relies on the time-stamps to determine which documents have been
indexed and which have not. This means that the time-stamps must be
monotonically increasing. The timestamp should be the timestamp
as measured by the server to minimize issues.
indexed and which have not. This means that the time-stamps must be
monotonically increasing. The timestamp should be the timestamp
as measured by the server to minimize issues.
2. The record manager is currently implemented separately from the
vectorstore, which means that the overall system becomes distributed
and may create issues with consistency. For example, writing to
record manager succeeds, but corresponding writing to `VectorStore` fails.
vectorstore, which means that the overall system becomes distributed
and may create issues with consistency. For example, writing to
record manager succeeds, but corresponding writing to vectorstore fails.
"""
def __init__(
@@ -460,7 +460,7 @@ class UpsertResponse(TypedDict):
class DeleteResponse(TypedDict, total=False):
"""A generic response for delete operation.
The fields in this response are optional and whether the `VectorStore`
The fields in this response are optional and whether the vectorstore
returns them or not is up to the implementation.
"""
@@ -508,6 +508,8 @@ class DocumentIndex(BaseRetriever):
1. Storing document in the index.
2. Fetching document by ID.
3. Searching for document using a query.
!!! version-added "Added in version 0.2.29"
"""
@abc.abstractmethod
@@ -518,7 +520,7 @@ class DocumentIndex(BaseRetriever):
if it is provided. If the ID is not provided, the upsert method is free
to generate an ID for the content.
When an ID is specified and the content already exists in the `VectorStore`,
When an ID is specified and the content already exists in the vectorstore,
the upsert method should update the content with the new data. If the content
does not exist, the upsert method should add the item to the `VectorStore`.
@@ -528,20 +530,20 @@ class DocumentIndex(BaseRetriever):
Returns:
A response object that contains the list of IDs that were
successfully added or updated in the `VectorStore` and the list of IDs that
successfully added or updated in the vectorstore and the list of IDs that
failed to be added or updated.
"""
async def aupsert(
self, items: Sequence[Document], /, **kwargs: Any
) -> UpsertResponse:
"""Add or update documents in the `VectorStore`. Async version of `upsert`.
"""Add or update documents in the vectorstore. Async version of upsert.
The upsert functionality should utilize the ID field of the item
if it is provided. If the ID is not provided, the upsert method is free
to generate an ID for the item.
When an ID is specified and the item already exists in the `VectorStore`,
When an ID is specified and the item already exists in the vectorstore,
the upsert method should update the item with the new data. If the item
does not exist, the upsert method should add the item to the `VectorStore`.
@@ -551,7 +553,7 @@ class DocumentIndex(BaseRetriever):
Returns:
A response object that contains the list of IDs that were
successfully added or updated in the `VectorStore` and the list of IDs that
successfully added or updated in the vectorstore and the list of IDs that
failed to be added or updated.
"""
return await run_in_executor(
@@ -568,7 +570,7 @@ class DocumentIndex(BaseRetriever):
Calling delete without any input parameters should raise a ValueError!
Args:
ids: List of IDs to delete.
ids: List of ids to delete.
**kwargs: Additional keyword arguments. This is up to the implementation.
For example, can include an option to delete the entire index,
or else issue a non-blocking delete etc.
@@ -586,7 +588,7 @@ class DocumentIndex(BaseRetriever):
Calling adelete without any input parameters should raise a ValueError!
Args:
ids: List of IDs to delete.
ids: List of ids to delete.
**kwargs: Additional keyword arguments. This is up to the implementation.
For example, can include an option to delete the entire index.

View File

@@ -23,6 +23,8 @@ class InMemoryDocumentIndex(DocumentIndex):
It provides a simple search API that returns documents by the number of
counts the given query appears in the document.
!!! version-added "Added in version 0.2.29"
"""
store: dict[str, Document] = Field(default_factory=dict)
@@ -62,10 +64,10 @@ class InMemoryDocumentIndex(DocumentIndex):
"""Delete by IDs.
Args:
ids: List of IDs to delete.
ids: List of ids to delete.
Raises:
ValueError: If IDs is None.
ValueError: If ids is None.
Returns:
A response object that contains the list of IDs that were successfully

View File

@@ -6,13 +6,12 @@ LangChain has two main classes to work with language models: chat models and
**Chat models**
Language models that use a sequence of messages as inputs and return chat messages
as outputs (as opposed to using plain text).
as outputs (as opposed to using plain text). Chat models support the assignment of
distinct roles to conversation messages, helping to distinguish messages from the AI,
users, and instructions such as system messages.
Chat models support the assignment of distinct roles to conversation messages, helping
to distinguish messages from the AI, users, and instructions such as system messages.
The key abstraction for chat models is `BaseChatModel`. Implementations should inherit
from this class.
The key abstraction for chat models is `BaseChatModel`. Implementations
should inherit from this class.
See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).

View File

@@ -139,7 +139,7 @@ def _normalize_messages(
directly; this may change in the future
- LangChain v0 standard content blocks for backward compatibility
!!! warning "Behavior changed in `langchain-core` 1.0.0"
!!! warning "Behavior changed in 1.0.0"
In previous versions, this function returned messages in LangChain v0 format.
Now, it returns messages in LangChain v1 format, which upgraded chat models now
expect to receive when passing back in message history. For backward

View File

@@ -131,19 +131,14 @@ class BaseLanguageModel(
Caching is not currently supported for streaming methods of models.
"""
verbose: bool = Field(default_factory=_get_verbosity, exclude=True, repr=False)
"""Whether to print out response text."""
callbacks: Callbacks = Field(default=None, exclude=True)
"""Callbacks to add to the run trace."""
tags: list[str] | None = Field(default=None, exclude=True)
"""Tags to add to the run trace."""
metadata: dict[str, Any] | None = Field(default=None, exclude=True)
"""Metadata to add to the run trace."""
custom_get_token_ids: Callable[[str], list[int]] | None = Field(
default=None, exclude=True
)
@@ -200,22 +195,15 @@ class BaseLanguageModel(
type (e.g., pure text completion models vs chat models).
Args:
prompts: List of `PromptValue` objects.
A `PromptValue` is an object that can be converted to match the format
of any language model (string for pure text generation models and
`BaseMessage` objects for chat models).
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
can be converted to match the format of any language model (string for
pure text generation models and `BaseMessage` objects for chat models).
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
An `LLMResult`, which contains a list of candidate `Generation` objects for
@@ -244,22 +232,15 @@ class BaseLanguageModel(
type (e.g., pure text completion models vs chat models).
Args:
prompts: List of `PromptValue` objects.
A `PromptValue` is an object that can be converted to match the format
of any language model (string for pure text generation models and
`BaseMessage` objects for chat models).
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
can be converted to match the format of any language model (string for
pure text generation models and `BaseMessage` objects for chat models).
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
An `LLMResult`, which contains a list of candidate `Generation` objects for
@@ -281,13 +262,13 @@ class BaseLanguageModel(
return self.lc_attributes
def get_token_ids(self, text: str) -> list[int]:
"""Return the ordered IDs of the tokens in a text.
"""Return the ordered ids of the tokens in a text.
Args:
text: The string input to tokenize.
Returns:
A list of IDs corresponding to the tokens in the text, in order they occur
A list of ids corresponding to the tokens in the text, in order they occur
in the text.
"""
if self.custom_get_token_ids is not None:

View File

@@ -15,7 +15,6 @@ from typing import TYPE_CHECKING, Any, Literal, cast
from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import override
from langchain_core._api.beta_decorator import beta
from langchain_core.caches import BaseCache
from langchain_core.callbacks import (
AsyncCallbackManager,
@@ -76,8 +75,6 @@ from langchain_core.utils.utils import LC_ID_PREFIX, from_env
if TYPE_CHECKING:
import uuid
from langchain_model_profiles import ModelProfile # type: ignore[import-untyped]
from langchain_core.output_parsers.base import OutputParserLike
from langchain_core.runnables import Runnable, RunnableConfig
from langchain_core.tools import BaseTool
@@ -332,7 +329,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
[`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this
field to roll out new content formats in a backward-compatible way.
!!! version-added "Added in `langchain-core` 1.0"
!!! version-added "Added in version 1.0"
"""
@@ -845,21 +842,16 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
Args:
messages: List of list of messages.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
tags: The tags to apply.
metadata: The metadata to apply.
run_name: The name of the run.
run_id: The ID of the run.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
An `LLMResult`, which contains a list of candidate `Generations` for each
@@ -968,21 +960,16 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
Args:
messages: List of list of messages.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
tags: The tags to apply.
metadata: The metadata to apply.
run_name: The name of the run.
run_id: The ID of the run.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
An `LLMResult`, which contains a list of candidate `Generations` for each
@@ -1515,10 +1502,10 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
Args:
schema: The output schema. Can be passed in as:
- An OpenAI function/tool schema,
- A JSON Schema,
- A `TypedDict` class,
- Or a Pydantic class.
- an OpenAI function/tool schema,
- a JSON Schema,
- a `TypedDict` class,
- or a Pydantic class.
If `schema` is a Pydantic class then the model output will be a
Pydantic instance of that class, and the model-generated fields will be
@@ -1530,15 +1517,11 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
when specifying a Pydantic or `TypedDict` class.
include_raw:
If `False` then only the parsed structured output is returned.
If an error occurs during model output parsing it will be raised.
If `True` then both the raw model response (a `BaseMessage`) and the
parsed model response will be returned.
If an error occurs during output parsing it will be caught and returned
as well.
If `False` then only the parsed structured output is returned. If
an error occurs during model output parsing it will be raised. If `True`
then both the raw model response (a `BaseMessage`) and the parsed model
response will be returned. If an error occurs during output parsing it
will be caught and returned as well.
The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
`'parsing_error'`.
@@ -1643,8 +1626,8 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
# }
```
!!! warning "Behavior changed in `langchain-core` 0.2.26"
Added support for `TypedDict` class.
!!! warning "Behavior changed in 0.2.26"
Added support for TypedDict class.
""" # noqa: E501
_ = kwargs.pop("method", None)
@@ -1685,40 +1668,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
return RunnableMap(raw=llm) | parser_with_fallback
return llm | output_parser
@property
@beta()
def profile(self) -> ModelProfile:
"""Return profiling information for the model.
This property relies on the `langchain-model-profiles` package to retrieve chat
model capabilities, such as context window sizes and supported features.
Raises:
ImportError: If `langchain-model-profiles` is not installed.
Returns:
A `ModelProfile` object containing profiling information for the model.
"""
try:
from langchain_model_profiles import get_model_profile # noqa: PLC0415
except ImportError as err:
informative_error_message = (
"To access model profiling information, please install the "
"`langchain-model-profiles` package: "
"`pip install langchain-model-profiles`."
)
raise ImportError(informative_error_message) from err
provider_id = self._llm_type
model_name = (
# Model name is not standardized across integrations. New integrations
# should prefer `model`.
getattr(self, "model", None)
or getattr(self, "model_name", None)
or getattr(self, "model_id", "")
)
return get_model_profile(provider_id, model_name) or {}
class SimpleChatModel(BaseChatModel):
"""Simplified implementation for a chat model to inherit from.
@@ -1777,12 +1726,9 @@ def _gen_info_and_msg_metadata(
}
_MAX_CLEANUP_DEPTH = 100
def _cleanup_llm_representation(serialized: Any, depth: int) -> None:
"""Remove non-serializable objects from a serialized object."""
if depth > _MAX_CLEANUP_DEPTH: # Don't cooperate for pathological cases
if depth > 100: # Don't cooperate for pathological cases
return
if not isinstance(serialized, dict):

View File

@@ -1,4 +1,4 @@
"""Fake chat models for testing purposes."""
"""Fake chat model for testing purposes."""
import asyncio
import re

View File

@@ -1,7 +1,4 @@
"""Base interface for traditional large language models (LLMs) to expose.
These are traditionally older models (newer models generally are chat models).
"""
"""Base interface for large language models to expose."""
from __future__ import annotations
@@ -651,12 +648,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompts: The prompts to generate from.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
If stop tokens are not supported consider raising `NotImplementedError`.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
run_manager: Callback manager for the run.
Returns:
@@ -674,12 +668,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompts: The prompts to generate from.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
If stop tokens are not supported consider raising `NotImplementedError`.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
run_manager: Callback manager for the run.
Returns:
@@ -711,14 +702,11 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Yields:
Generation chunks.
@@ -740,14 +728,11 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Yields:
Generation chunks.
@@ -858,14 +843,10 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompts: List of string prompts.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
tags: List of tags to associate with each prompt. If provided, the length
of the list must match the length of the prompts list.
metadata: List of metadata dictionaries to associate with each prompt. If
@@ -875,9 +856,8 @@ class BaseLLM(BaseLanguageModel[str], ABC):
length of the list must match the length of the prompts list.
run_id: List of run IDs to associate with each prompt. If provided, the
length of the list must match the length of the prompts list.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Raises:
ValueError: If prompts is not a list.
@@ -1133,14 +1113,10 @@ class BaseLLM(BaseLanguageModel[str], ABC):
Args:
prompts: List of string prompts.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
callbacks: `Callbacks` to pass through.
Used for executing additional functionality, such as logging or
streaming, throughout generation.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of these substrings.
callbacks: `Callbacks` to pass through. Used for executing additional
functionality, such as logging or streaming, throughout generation.
tags: List of tags to associate with each prompt. If provided, the length
of the list must match the length of the prompts list.
metadata: List of metadata dictionaries to associate with each prompt. If
@@ -1150,9 +1126,8 @@ class BaseLLM(BaseLanguageModel[str], ABC):
length of the list must match the length of the prompts list.
run_id: List of run IDs to associate with each prompt. If provided, the
length of the list must match the length of the prompts list.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Raises:
ValueError: If the length of `callbacks`, `tags`, `metadata`, or
@@ -1416,6 +1391,11 @@ class LLM(BaseLLM):
`astream` will use `_astream` if provided, otherwise it will implement
a fallback behavior that will use `_stream` if `_stream` is implemented,
and use `_acall` if `_stream` is not implemented.
Please see the following guide for more information on how to
implement a custom LLM:
https://python.langchain.com/docs/how_to/custom_llm/
"""
@abstractmethod
@@ -1432,16 +1412,12 @@ class LLM(BaseLLM):
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
If stop tokens are not supported consider raising `NotImplementedError`.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
The model output as a string. SHOULD NOT include the prompt.
@@ -1462,16 +1438,12 @@ class LLM(BaseLLM):
Args:
prompt: The prompt to generate from.
stop: Stop words to use when generating.
Model output is cut off at the first occurrence of any of these
substrings.
If stop tokens are not supported consider raising `NotImplementedError`.
stop: Stop words to use when generating. Model output is cut off at the
first occurrence of any of the stop substrings.
If stop tokens are not supported consider raising NotImplementedError.
run_manager: Callback manager for the run.
**kwargs: Arbitrary additional keyword arguments.
These are usually passed to the model provider API call.
**kwargs: Arbitrary additional keyword arguments. These are usually passed
to the model provider API call.
Returns:
The model output as a string. SHOULD NOT include the prompt.

View File

@@ -17,7 +17,7 @@ def default(obj: Any) -> Any:
obj: The object to serialize to json if it is a Serializable object.
Returns:
A JSON serializable object or a SerializedNotImplemented object.
A json serializable object or a SerializedNotImplemented object.
"""
if isinstance(obj, Serializable):
return obj.to_json()
@@ -38,7 +38,7 @@ def _dump_pydantic_models(obj: Any) -> Any:
def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
"""Return a JSON string representation of an object.
"""Return a json string representation of an object.
Args:
obj: The object to dump.
@@ -47,7 +47,7 @@ def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
**kwargs: Additional arguments to pass to `json.dumps`
Returns:
A JSON string representation of the object.
A json string representation of the object.
Raises:
ValueError: If `default` is passed as a kwarg.
@@ -71,12 +71,14 @@ def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
def dumpd(obj: Any) -> Any:
"""Return a dict representation of an object.
!!! note
Unfortunately this function is not as efficient as it could be because it first
dumps the object to a json string and then loads it back into a dictionary.
Args:
obj: The object to dump.
Returns:
Dictionary that can be serialized to json using `json.dumps`.
dictionary that can be serialized to json using json.dumps
"""
# Unfortunately this function is not as efficient as it could be because it first
# dumps the object to a json string and then loads it back into a dictionary.
return json.loads(dumps(obj))

View File

@@ -265,8 +265,6 @@ def load(
return reviver(loaded_obj)
if isinstance(obj, list):
return [_load(o) for o in obj]
if isinstance(obj, str) and obj in reviver.secrets_map:
return reviver.secrets_map[obj]
return obj
return _load(obj)

View File

@@ -97,14 +97,11 @@ class Serializable(BaseModel, ABC):
by default. This is to prevent accidental serialization of objects that should
not be serialized.
- `get_lc_namespace`: Get the namespace of the LangChain object.
During deserialization, this namespace is used to identify
the correct class to instantiate.
Please see the `Reviver` class in `langchain_core.load.load` for more details.
During deserialization an additional mapping is handle classes that have moved
or been renamed across package versions.
- `lc_secrets`: A map of constructor argument names to secret ids.
- `lc_attributes`: List of additional attribute names that should be included
as part of the serialized representation.
@@ -197,7 +194,7 @@ class Serializable(BaseModel, ABC):
ValueError: If the class has deprecated attributes.
Returns:
A JSON serializable object or a `SerializedNotImplemented` object.
A json serializable object or a `SerializedNotImplemented` object.
"""
if not self.is_lc_serializable():
return self.to_json_not_implemented()

View File

@@ -9,9 +9,6 @@ if TYPE_CHECKING:
from langchain_core.messages.ai import (
AIMessage,
AIMessageChunk,
InputTokenDetails,
OutputTokenDetails,
UsageMetadata,
)
from langchain_core.messages.base import (
BaseMessage,
@@ -90,12 +87,10 @@ __all__ = (
"HumanMessage",
"HumanMessageChunk",
"ImageContentBlock",
"InputTokenDetails",
"InvalidToolCall",
"MessageLikeRepresentation",
"NonStandardAnnotation",
"NonStandardContentBlock",
"OutputTokenDetails",
"PlainTextContentBlock",
"ReasoningContentBlock",
"RemoveMessage",
@@ -109,7 +104,6 @@ __all__ = (
"ToolCallChunk",
"ToolMessage",
"ToolMessageChunk",
"UsageMetadata",
"VideoContentBlock",
"_message_from_dict",
"convert_to_messages",
@@ -151,7 +145,6 @@ _dynamic_imports = {
"HumanMessageChunk": "human",
"NonStandardAnnotation": "content",
"NonStandardContentBlock": "content",
"OutputTokenDetails": "ai",
"PlainTextContentBlock": "content",
"ReasoningContentBlock": "content",
"RemoveMessage": "modifier",
@@ -161,14 +154,12 @@ _dynamic_imports = {
"SystemMessage": "system",
"SystemMessageChunk": "system",
"ImageContentBlock": "content",
"InputTokenDetails": "ai",
"InvalidToolCall": "tool",
"TextContentBlock": "content",
"ToolCall": "tool",
"ToolCallChunk": "tool",
"ToolMessage": "tool",
"ToolMessageChunk": "tool",
"UsageMetadata": "ai",
"VideoContentBlock": "content",
"AnyMessage": "utils",
"MessageLikeRepresentation": "utils",

View File

@@ -48,9 +48,9 @@ class InputTokenDetails(TypedDict, total=False):
}
```
May also hold extra provider-specific keys.
!!! version-added "Added in version 0.3.9"
!!! version-added "Added in `langchain-core` 0.3.9"
May also hold extra provider-specific keys.
"""
@@ -83,9 +83,7 @@ class OutputTokenDetails(TypedDict, total=False):
}
```
May also hold extra provider-specific keys.
!!! version-added "Added in `langchain-core` 0.3.9"
!!! version-added "Added in version 0.3.9"
"""
@@ -123,13 +121,9 @@ class UsageMetadata(TypedDict):
}
```
!!! warning "Behavior changed in `langchain-core` 0.3.9"
!!! warning "Behavior changed in 0.3.9"
Added `input_token_details` and `output_token_details`.
!!! note "LangSmith SDK"
The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,
LangSmith's `UsageMetadata` has additional fields to capture cost information
used by the LangSmith platform.
"""
input_tokens: int
@@ -137,7 +131,7 @@ class UsageMetadata(TypedDict):
output_tokens: int
"""Count of output (or completion) tokens. Sum of all output token types."""
total_tokens: int
"""Total token count. Sum of `input_tokens` + `output_tokens`."""
"""Total token count. Sum of input_tokens + output_tokens."""
input_token_details: NotRequired[InputTokenDetails]
"""Breakdown of input token counts.
@@ -147,6 +141,7 @@ class UsageMetadata(TypedDict):
"""Breakdown of output token counts.
Does *not* need to sum to full output token count. Does *not* need to have all keys.
"""
@@ -158,6 +153,7 @@ class AIMessage(BaseMessage):
This message represents the output of the model and consists of both
the raw output as returned by the model and standardized fields
(e.g., tool calls, usage metadata) added by the LangChain framework.
"""
tool_calls: list[ToolCall] = []
@@ -655,13 +651,13 @@ def add_ai_message_chunks(
chunk_id = id_
break
else:
# second pass: prefer lc_run-* IDs over lc_* IDs
# second pass: prefer lc_run-* ids over lc_* ids
for id_ in candidates:
if id_ and id_.startswith(LC_ID_PREFIX):
chunk_id = id_
break
else:
# third pass: take any remaining ID (auto-generated lc_* IDs)
# third pass: take any remaining id (auto-generated lc_* ids)
for id_ in candidates:
if id_:
chunk_id = id_

View File

@@ -93,10 +93,6 @@ class BaseMessage(Serializable):
"""Base abstract message class.
Messages are the inputs and outputs of a chat model.
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
[`AIMessage`][langchain.messages.AIMessage], and
[`SystemMessage`][langchain.messages.SystemMessage].
"""
content: str | list[str | dict]
@@ -199,7 +195,7 @@ class BaseMessage(Serializable):
def content_blocks(self) -> list[types.ContentBlock]:
r"""Load content blocks from the message content.
!!! version-added "Added in `langchain-core` 1.0.0"
!!! version-added "Added in version 1.0.0"
"""
# Needed here to avoid circular import, as these classes import BaseMessages

View File

@@ -368,7 +368,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
else:
# Assume it's raw base64 without data URI
try:
# Validate base64 and decode for MIME type detection
# Validate base64 and decode for mime type detection
decoded_bytes = base64.b64decode(url, validate=True)
image_url_b64_block = {
@@ -379,7 +379,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
try:
import filetype # type: ignore[import-not-found] # noqa: PLC0415
# Guess MIME type based on file bytes
# Guess mime type based on file bytes
mime_type = None
kind = filetype.guess(decoded_bytes)
if kind:
@@ -458,8 +458,6 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
if outcome is not None:
server_tool_result_block["extras"]["outcome"] = outcome
converted_blocks.append(server_tool_result_block)
elif item_type == "text":
converted_blocks.append(cast("types.TextContentBlock", item))
else:
# Unknown type, preserve as non-standard
converted_blocks.append({"type": "non_standard", "value": item})

View File

@@ -644,7 +644,7 @@ class AudioContentBlock(TypedDict):
class PlainTextContentBlock(TypedDict):
"""Plaintext data (e.g., from a `.txt` or `.md` document).
"""Plaintext data (e.g., from a document).
!!! note
A `PlainTextContentBlock` existed in `langchain-core<1.0.0`. Although the
@@ -767,7 +767,7 @@ class FileContentBlock(TypedDict):
class NonStandardContentBlock(TypedDict):
"""Provider-specific content data.
"""Provider-specific data.
This block contains data for which there is not yet a standard type.
@@ -802,7 +802,7 @@ class NonStandardContentBlock(TypedDict):
"""
value: dict[str, Any]
"""Provider-specific content data."""
"""Provider-specific data."""
index: NotRequired[int | str]
"""Index of block in aggregate response. Used during streaming."""
@@ -867,7 +867,7 @@ def _get_data_content_block_types() -> tuple[str, ...]:
Example: ("image", "video", "audio", "text-plain", "file")
Note that old style multimodal blocks type literals with new style blocks.
Specifically, "image", "audio", and "file".
Speficially, "image", "audio", and "file".
See the docstring of `_normalize_messages` in `language_models._utils` for details.
"""
@@ -906,7 +906,7 @@ def is_data_content_block(block: dict) -> bool:
# 'text' is checked to support v0 PlainTextContentBlock types
# We must guard against new style TextContentBlock which also has 'text' `type`
# by ensuring the presence of `source_type`
# by ensuring the presense of `source_type`
if block["type"] == "text" and "source_type" not in block: # noqa: SIM103 # This is more readable
return False
@@ -1399,7 +1399,7 @@ def create_non_standard_block(
"""Create a `NonStandardContentBlock`.
Args:
value: Provider-specific content data.
value: Provider-specific data.
id: Content block identifier. Generated automatically if not provided.
index: Index of block in aggregate response. Used during streaming.

View File

@@ -86,7 +86,7 @@ AnyMessage = Annotated[
| Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
Field(discriminator=Discriminator(_get_type)),
]
"""A type representing any defined `Message` or `MessageChunk` type."""
""""A type representing any defined `Message` or `MessageChunk` type."""
def get_buffer_string(
@@ -328,16 +328,12 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
"""
if isinstance(message, BaseMessage):
message_ = message
elif isinstance(message, Sequence):
if isinstance(message, str):
message_ = _create_message_from_message_type("human", message)
else:
try:
message_type_str, template = message
except ValueError as e:
msg = "Message as a sequence must be (role string, template)"
raise NotImplementedError(msg) from e
message_ = _create_message_from_message_type(message_type_str, template)
elif isinstance(message, str):
message_ = _create_message_from_message_type("human", message)
elif isinstance(message, Sequence) and len(message) == 2:
# mypy doesn't realise this can't be a string given the previous branch
message_type_str, template = message # type: ignore[misc]
message_ = _create_message_from_message_type(message_type_str, template)
elif isinstance(message, dict):
msg_kwargs = message.copy()
try:
@@ -443,8 +439,8 @@ def filter_messages(
exclude_ids: Message IDs to exclude.
exclude_tool_calls: Tool call IDs to exclude.
Can be one of the following:
- `True`: All `AIMessage` objects with tool calls and all `ToolMessage`
objects will be excluded.
- `True`: all `AIMessage`s with tool calls and all
`ToolMessage` objects will be excluded.
- a sequence of tool call IDs to exclude:
- `ToolMessage` objects with the corresponding tool call ID will be
excluded.
@@ -1029,18 +1025,18 @@ def convert_to_openai_messages(
messages: Message-like object or iterable of objects whose contents are
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
text_format: How to format string or text block contents:
- `'string'`:
If a message has a string content, this is left as a string. If
a message has content blocks that are all of type `'text'`, these
are joined with a newline to make a single string. If a message has
content blocks and at least one isn't of type `'text'`, then
all blocks are left as dicts.
- `'block'`:
If a message has a string content, this is turned into a list
with a single content block of type `'text'`. If a message has
content blocks these are left as is.
include_id: Whether to include message IDs in the openai messages, if they
are present in the source messages.
- `'string'`:
If a message has a string content, this is left as a string. If
a message has content blocks that are all of type `'text'`, these
are joined with a newline to make a single string. If a message has
content blocks and at least one isn't of type `'text'`, then
all blocks are left as dicts.
- `'block'`:
If a message has a string content, this is turned into a list
with a single content block of type `'text'`. If a message has
content blocks these are left as is.
include_id: Whether to include message ids in the openai messages, if they
are present in the source messages.
Raises:
ValueError: if an unrecognized `text_format` is specified, or if a message
@@ -1101,7 +1097,7 @@ def convert_to_openai_messages(
# ]
```
!!! version-added "Added in `langchain-core` 0.3.11"
!!! version-added "Added in version 0.3.11"
""" # noqa: E501
if text_format not in {"string", "block"}:
@@ -1701,7 +1697,7 @@ def count_tokens_approximately(
Warning:
This function does not currently support counting image tokens.
!!! version-added "Added in `langchain-core` 0.3.46"
!!! version-added "Added in version 0.3.46"
"""
token_count = 0.0

View File

@@ -1,20 +1,4 @@
"""`OutputParser` classes parse the output of an LLM call into structured data.
!!! tip "Structured output"
Output parsers emerged as an early solution to the challenge of obtaining structured
output from LLMs.
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
natively. In such cases, using output parsers may be unnecessary, and you should
leverage the model's built-in capabilities for structured output. Refer to the
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
for guidance on how to achieve structured output directly.
Output parsers remain valuable when working with models that do not support
structured output natively, or when you require additional processing or validation
of the model's output beyond its inherent capabilities.
"""
"""**OutputParser** classes parse the output of an LLM call."""
from typing import TYPE_CHECKING

View File

@@ -135,9 +135,6 @@ class BaseOutputParser(
Example:
```python
# Implement a simple boolean output parser
class BooleanOutputParser(BaseOutputParser[bool]):
true_val: str = "YES"
false_val: str = "NO"

View File

@@ -1,16 +1,11 @@
"""Format instructions."""
JSON_FORMAT_INSTRUCTIONS = """STRICT OUTPUT FORMAT:
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.
JSON_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
Here is the output schema (shown in a code block for readability only — do not include any backticks or Markdown in your output):
Here is the output schema:
```
{schema}
```""" # noqa: E501

View File

@@ -31,14 +31,11 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
"""Parse the output of an LLM call to a JSON object.
Probably the most reliable output parser for getting structured data that does *not*
use function calling.
When used in streaming mode, it will yield partial JSON objects containing
all the keys that have been returned so far.
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
difference between the previous and the current object.
In streaming, if `diff` is set to `True`, yields JSONPatch operations
describing the difference between the previous and the current object.
"""
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]

View File

@@ -41,7 +41,7 @@ def droplastn(
class ListOutputParser(BaseTransformOutputParser[list[str]]):
"""Parse the output of a model to a list."""
"""Parse the output of an LLM call to a list."""
@property
def _type(self) -> str:
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = ""
for chunk in input:
if isinstance(chunk, BaseMessage):
# Extract text
# extract text
chunk_content = chunk.content
if not isinstance(chunk_content, str):
continue
buffer += chunk_content
else:
# Add current chunk to buffer
# add current chunk to buffer
buffer += chunk
# Parse buffer into a list of parts
# parse buffer into a list of parts
try:
done_idx = 0
# Yield only complete parts
# yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end()
yield [m.group(1)]
buffer = buffer[done_idx:]
except NotImplementedError:
parts = self.parse(buffer)
# Yield only complete parts
# yield only complete parts
if len(parts) > 1:
for part in parts[:-1]:
yield [part]
buffer = parts[-1]
# Yield the last part
# yield the last part
for part in self.parse(buffer):
yield [part]
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
buffer = ""
async for chunk in input:
if isinstance(chunk, BaseMessage):
# Extract text
# extract text
chunk_content = chunk.content
if not isinstance(chunk_content, str):
continue
buffer += chunk_content
else:
# Add current chunk to buffer
# add current chunk to buffer
buffer += chunk
# Parse buffer into a list of parts
# parse buffer into a list of parts
try:
done_idx = 0
# Yield only complete parts
# yield only complete parts
for m in droplastn(self.parse_iter(buffer), 1):
done_idx = m.end()
yield [m.group(1)]
buffer = buffer[done_idx:]
except NotImplementedError:
parts = self.parse(buffer)
# Yield only complete parts
# yield only complete parts
if len(parts) > 1:
for part in parts[:-1]:
yield [part]
buffer = parts[-1]
# Yield the last part
# yield the last part
for part in self.parse(buffer):
yield [part]
class CommaSeparatedListOutputParser(ListOutputParser):
"""Parse the output of a model to a comma-separated list."""
"""Parse the output of an LLM call to a comma-separated list."""
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
)
return [item for sublist in reader for item in sublist]
except csv.Error:
# Keep old logic for backup
# keep old logic for backup
return [part.strip() for part in text.split(",")]
@property

View File

@@ -15,11 +15,7 @@ from langchain_core.messages.tool import tool_call as create_tool_call
from langchain_core.output_parsers.transform import BaseCumulativeTransformOutputParser
from langchain_core.outputs import ChatGeneration, Generation
from langchain_core.utils.json import parse_partial_json
from langchain_core.utils.pydantic import (
TypeBaseModel,
is_pydantic_v1_subclass,
is_pydantic_v2_subclass,
)
from langchain_core.utils.pydantic import TypeBaseModel
logger = logging.getLogger(__name__)
@@ -228,7 +224,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
result: The result of the LLM call.
partial: Whether to parse partial JSON.
If `True`, the output will be a JSON object containing
all the keys that have been returned so far.
all the keys that have been returned so far.
If `False`, the output will be the full JSON object.
Raises:
@@ -311,7 +307,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
result: The result of the LLM call.
partial: Whether to parse partial JSON.
If `True`, the output will be a JSON object containing
all the keys that have been returned so far.
all the keys that have been returned so far.
If `False`, the output will be the full JSON object.
Returns:
@@ -327,15 +323,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
return None if self.first_tool_only else []
json_results = [json_results] if self.first_tool_only else json_results
name_dict_v2: dict[str, TypeBaseModel] = {
tool.model_config.get("title") or tool.__name__: tool
for tool in self.tools
if is_pydantic_v2_subclass(tool)
}
name_dict_v1: dict[str, TypeBaseModel] = {
tool.__name__: tool for tool in self.tools if is_pydantic_v1_subclass(tool)
}
name_dict: dict[str, TypeBaseModel] = {**name_dict_v2, **name_dict_v1}
name_dict = {tool.__name__: tool for tool in self.tools}
pydantic_objects = []
for res in json_results:
if not isinstance(res["args"], dict):

View File

@@ -86,7 +86,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
The format instructions for the JSON output.
"""
# Copy schema to avoid altering original Pydantic schema.
schema = dict(self._get_schema(self.pydantic_object).items())
schema = dict(self.pydantic_object.model_json_schema().items())
# Remove extraneous fields.
reduced_schema = schema

View File

@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
class StrOutputParser(BaseTransformOutputParser[str]):
"""OutputParser that parses `LLMResult` into the top likely string."""
"""OutputParser that parses LLMResult into the top likely string."""
@classmethod
def is_lc_serializable(cls) -> bool:
"""`StrOutputParser` is serializable.
"""StrOutputParser is serializable.
Returns:
`True`
True
"""
return True

View File

@@ -43,19 +43,19 @@ class _StreamingParser:
"""Streaming parser for XML.
This implementation is pulled into a class to avoid implementation
drift between transform and atransform of the `XMLOutputParser`.
drift between transform and atransform of the XMLOutputParser.
"""
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
"""Initialize the streaming parser.
Args:
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
`'xml'`. See documentation in `XMLOutputParser` for more information.
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
See documentation in XMLOutputParser for more information.
Raises:
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
requested.
ImportError: If defusedxml is not installed and the defusedxml
parser is requested.
"""
if parser == "defusedxml":
if not _HAS_DEFUSEDXML:
@@ -79,10 +79,10 @@ class _StreamingParser:
"""Parse a chunk of text.
Args:
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
Yields:
A `dict` representing the parsed XML element.
A dictionary representing the parsed XML element.
Raises:
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
@@ -147,49 +147,46 @@ class _StreamingParser:
class XMLOutputParser(BaseTransformOutputParser):
"""Parse an output using xml format.
Returns a dictionary of tags.
"""
"""Parse an output using xml format."""
tags: list[str] | None = None
"""Tags to tell the LLM to expect in the XML output.
Note this may not be perfect depending on the LLM implementation.
For example, with `tags=["foo", "bar", "baz"]`:
For example, with tags=["foo", "bar", "baz"]:
1. A well-formatted XML instance:
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
2. A badly-formatted XML instance (missing closing tag for 'bar'):
`"<foo>\n <bar>\n </foo>"`
"<foo>\n <bar>\n </foo>"
3. A badly-formatted XML instance (unexpected 'tag' element):
`"<foo>\n <tag>\n </tag>\n</foo>"`
"<foo>\n <tag>\n </tag>\n</foo>"
"""
encoding_matcher: re.Pattern = re.compile(
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
)
parser: Literal["defusedxml", "xml"] = "defusedxml"
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
present in some distributions of Python's standard library xml.
`defusedxml` is a wrapper around the standard library parser that
sets up the parser with secure defaults.
* `'xml'` is the standard library parser.
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
present in some distributions of Python's standard library xml.
`defusedxml` is a wrapper around the standard library parser that
sets up the parser with secure defaults.
* 'xml' is the standard library parser.
Use `xml` only if you are sure that your distribution of the standard library is not
vulnerable to XML vulnerabilities.
Use `xml` only if you are sure that your distribution of the standard library
is not vulnerable to XML vulnerabilities.
Please review the following resources for more information:
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
* https://github.com/tiran/defusedxml
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
for parsing XML.
The standard library relies on libexpat for parsing XML:
https://github.com/libexpat/libexpat
"""
def get_format_instructions(self) -> str:
@@ -203,12 +200,12 @@ class XMLOutputParser(BaseTransformOutputParser):
text: The output of an LLM call.
Returns:
A `dict` representing the parsed XML.
A dictionary representing the parsed XML.
Raises:
OutputParserException: If the XML is not well-formed.
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
requested.
ImportError: If defusedxml is not installed and the defusedxml
parser is requested.
"""
# Try to find XML string within triple backticks
# Imports are temporarily placed here to avoid issue with caching on CI

View File

@@ -11,8 +11,9 @@ from langchain_core.utils._merge import merge_dicts
class Generation(Serializable):
"""A single text generation output.
Generation represents the response from an "old-fashioned" LLM (string-in,
string-out) that generates regular text (not chat messages).
Generation represents the response from an
`"old-fashioned" LLM <https://python.langchain.com/docs/concepts/text_llms/>__` that
generates regular text (not chat messages).
This model is used internally by chat model and will eventually
be mapped to a more general `LLMResult` object, and then projected into
@@ -20,7 +21,8 @@ class Generation(Serializable):
LangChain users working with chat models will usually access information via
`AIMessage` (returned from runnable interfaces) or `LLMResult` (available
via callbacks). Please refer to `AIMessage` and `LLMResult` for more information.
via callbacks). Please refer the `AIMessage` and `LLMResult` schema documentation
for more information.
"""
text: str
@@ -33,13 +35,11 @@ class Generation(Serializable):
"""
type: Literal["Generation"] = "Generation"
"""Type is used exclusively for serialization purposes.
Set to "Generation" for this class.
"""
Set to "Generation" for this class."""
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -53,7 +53,7 @@ class Generation(Serializable):
class GenerationChunk(Generation):
"""`GenerationChunk`, which can be concatenated with other Generation chunks."""
"""Generation chunk, which can be concatenated with other Generation chunks."""
def __add__(self, other: GenerationChunk) -> GenerationChunk:
"""Concatenate two `GenerationChunk`s.

View File

@@ -30,13 +30,15 @@ class PromptValue(Serializable, ABC):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
def get_lc_namespace(cls) -> list[str]:
"""Get the namespace of the LangChain object.
This is used to determine the namespace of the object when serializing.
Returns:
`["langchain", "schema", "prompt"]`
"""
@@ -48,7 +50,7 @@ class PromptValue(Serializable, ABC):
@abstractmethod
def to_messages(self) -> list[BaseMessage]:
"""Return prompt as a list of messages."""
"""Return prompt as a list of Messages."""
class StringPromptValue(PromptValue):
@@ -62,6 +64,8 @@ class StringPromptValue(PromptValue):
def get_lc_namespace(cls) -> list[str]:
"""Get the namespace of the LangChain object.
This is used to determine the namespace of the object when serializing.
Returns:
`["langchain", "prompts", "base"]`
"""
@@ -97,6 +101,8 @@ class ChatPromptValue(PromptValue):
def get_lc_namespace(cls) -> list[str]:
"""Get the namespace of the LangChain object.
This is used to determine the namespace of the object when serializing.
Returns:
`["langchain", "prompts", "chat"]`
"""

View File

@@ -46,27 +46,21 @@ class BasePromptTemplate(
input_variables: list[str]
"""A list of the names of the variables whose values are required as inputs to the
prompt.
"""
prompt."""
optional_variables: list[str] = Field(default=[])
"""A list of the names of the variables for placeholder or `MessagePlaceholder` that
are optional.
These variables are auto inferred from the prompt and user need not provide them.
"""
"""optional_variables: A list of the names of the variables for placeholder
or MessagePlaceholder that are optional. These variables are auto inferred
from the prompt and user need not provide them."""
input_types: typing.Dict[str, Any] = Field(default_factory=dict, exclude=True) # noqa: UP006
"""A dictionary of the types of the variables the prompt template expects.
If not provided, all variables are assumed to be strings.
"""
If not provided, all variables are assumed to be strings."""
output_parser: BaseOutputParser | None = None
"""How to parse the output of calling an LLM on this formatted prompt."""
partial_variables: Mapping[str, Any] = Field(default_factory=dict)
"""A dictionary of the partial variables the prompt template carries.
Partial variables populate the template so that you don't need to pass them in every
time you call the prompt.
"""
Partial variables populate the template so that you don't need to
pass them in every time you call the prompt."""
metadata: typing.Dict[str, Any] | None = None # noqa: UP006
"""Metadata to be used for tracing."""
tags: list[str] | None = None
@@ -111,7 +105,7 @@ class BasePromptTemplate(
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
model_config = ConfigDict(
@@ -133,7 +127,7 @@ class BasePromptTemplate(
"""Get the input schema for the prompt.
Args:
config: Configuration for the prompt.
config: configuration for the prompt.
Returns:
The input schema for the prompt.
@@ -201,8 +195,8 @@ class BasePromptTemplate(
"""Invoke the prompt.
Args:
input: Input to the prompt.
config: Configuration for the prompt.
input: Dict, input to the prompt.
config: RunnableConfig, configuration for the prompt.
Returns:
The output of the prompt.
@@ -227,8 +221,8 @@ class BasePromptTemplate(
"""Async invoke the prompt.
Args:
input: Input to the prompt.
config: Configuration for the prompt.
input: Dict, input to the prompt.
config: RunnableConfig, configuration for the prompt.
Returns:
The output of the prompt.
@@ -248,7 +242,7 @@ class BasePromptTemplate(
@abstractmethod
def format_prompt(self, **kwargs: Any) -> PromptValue:
"""Create `PromptValue`.
"""Create Prompt Value.
Args:
**kwargs: Any arguments to be passed to the prompt template.
@@ -258,7 +252,7 @@ class BasePromptTemplate(
"""
async def aformat_prompt(self, **kwargs: Any) -> PromptValue:
"""Async create `PromptValue`.
"""Async create Prompt Value.
Args:
**kwargs: Any arguments to be passed to the prompt template.
@@ -272,7 +266,7 @@ class BasePromptTemplate(
"""Return a partial of the prompt template.
Args:
**kwargs: Partial variables to set.
**kwargs: partial variables to set.
Returns:
A partial of the prompt template.
@@ -302,9 +296,9 @@ class BasePromptTemplate(
A formatted string.
Example:
```python
prompt.format(variable1="foo")
```
```python
prompt.format(variable1="foo")
```
"""
async def aformat(self, **kwargs: Any) -> FormatOutputType:
@@ -317,9 +311,9 @@ class BasePromptTemplate(
A formatted string.
Example:
```python
await prompt.aformat(variable1="foo")
```
```python
await prompt.aformat(variable1="foo")
```
"""
return self.format(**kwargs)
@@ -354,9 +348,9 @@ class BasePromptTemplate(
NotImplementedError: If the prompt type is not implemented.
Example:
```python
prompt.save(file_path="path/prompt.yaml")
```
```python
prompt.save(file_path="path/prompt.yaml")
```
"""
if self.partial_variables:
msg = "Cannot save prompt with partial variables."
@@ -408,23 +402,23 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
First, this pulls information from the document from two sources:
1. `page_content`:
This takes the information from the `document.page_content` and assigns it to a
variable named `page_content`.
2. `metadata`:
This takes information from `document.metadata` and assigns it to variables of
the same name.
1. page_content:
This takes the information from the `document.page_content`
and assigns it to a variable named `page_content`.
2. metadata:
This takes information from `document.metadata` and assigns
it to variables of the same name.
Those variables are then passed into the `prompt` to produce a formatted string.
Args:
doc: `Document`, the `page_content` and `metadata` will be used to create
doc: Document, the page_content and metadata will be used to create
the final string.
prompt: `BasePromptTemplate`, will be used to format the `page_content`
and `metadata` into the final string.
prompt: BasePromptTemplate, will be used to format the page_content
and metadata into the final string.
Returns:
String of the document formatted.
string of the document formatted.
Example:
```python
@@ -435,6 +429,7 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
prompt = PromptTemplate.from_template("Page {page}: {page_content}")
format_document(doc, prompt)
>>> "Page 1: This is a joke"
```
"""
return prompt.format(**_get_document_info(doc, prompt))
@@ -445,22 +440,22 @@ async def aformat_document(doc: Document, prompt: BasePromptTemplate[str]) -> st
First, this pulls information from the document from two sources:
1. `page_content`:
This takes the information from the `document.page_content` and assigns it to a
variable named `page_content`.
2. `metadata`:
This takes information from `document.metadata` and assigns it to variables of
the same name.
1. page_content:
This takes the information from the `document.page_content`
and assigns it to a variable named `page_content`.
2. metadata:
This takes information from `document.metadata` and assigns
it to variables of the same name.
Those variables are then passed into the `prompt` to produce a formatted string.
Args:
doc: `Document`, the `page_content` and `metadata` will be used to create
doc: Document, the page_content and metadata will be used to create
the final string.
prompt: `BasePromptTemplate`, will be used to format the `page_content`
and `metadata` into the final string.
prompt: BasePromptTemplate, will be used to format the page_content
and metadata into the final string.
Returns:
String of the document formatted.
string of the document formatted.
"""
return await prompt.aformat(**_get_document_info(doc, prompt))

View File

@@ -587,15 +587,14 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
for prompt in self.prompt:
inputs = {var: kwargs[var] for var in prompt.input_variables}
if isinstance(prompt, StringPromptTemplate):
formatted_text: str = prompt.format(**inputs)
if formatted_text != "":
content.append({"type": "text", "text": formatted_text})
formatted: str | ImageURL | dict[str, Any] = prompt.format(**inputs)
content.append({"type": "text", "text": formatted})
elif isinstance(prompt, ImagePromptTemplate):
formatted_image: ImageURL = prompt.format(**inputs)
content.append({"type": "image_url", "image_url": formatted_image})
formatted = prompt.format(**inputs)
content.append({"type": "image_url", "image_url": formatted})
elif isinstance(prompt, DictPromptTemplate):
formatted_dict: dict[str, Any] = prompt.format(**inputs)
content.append(formatted_dict)
formatted = prompt.format(**inputs)
content.append(formatted)
return self._msg_class(
content=content, additional_kwargs=self.additional_kwargs
)
@@ -618,15 +617,16 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
for prompt in self.prompt:
inputs = {var: kwargs[var] for var in prompt.input_variables}
if isinstance(prompt, StringPromptTemplate):
formatted_text: str = await prompt.aformat(**inputs)
if formatted_text != "":
content.append({"type": "text", "text": formatted_text})
formatted: str | ImageURL | dict[str, Any] = await prompt.aformat(
**inputs
)
content.append({"type": "text", "text": formatted})
elif isinstance(prompt, ImagePromptTemplate):
formatted_image: ImageURL = await prompt.aformat(**inputs)
content.append({"type": "image_url", "image_url": formatted_image})
formatted = await prompt.aformat(**inputs)
content.append({"type": "image_url", "image_url": formatted})
elif isinstance(prompt, DictPromptTemplate):
formatted_dict: dict[str, Any] = prompt.format(**inputs)
content.append(formatted_dict)
formatted = prompt.format(**inputs)
content.append(formatted)
return self._msg_class(
content=content, additional_kwargs=self.additional_kwargs
)
@@ -776,36 +776,42 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
Use to create flexible templated prompts for chat models.
```python
from langchain_core.prompts import ChatPromptTemplate
Examples:
!!! warning "Behavior changed in 0.2.24"
You can pass any Message-like formats supported by
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
init.
template = ChatPromptTemplate(
[
("system", "You are a helpful AI bot. Your name is {name}."),
("human", "Hello, how are you doing?"),
("ai", "I'm doing well, thanks!"),
("human", "{user_input}"),
]
)
```python
from langchain_core.prompts import ChatPromptTemplate
prompt_value = template.invoke(
{
"name": "Bob",
"user_input": "What is your name?",
}
)
# Output:
# ChatPromptValue(
# messages=[
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
# HumanMessage(content='Hello, how are you doing?'),
# AIMessage(content="I'm doing well, thanks!"),
# HumanMessage(content='What is your name?')
# ]
# )
```
template = ChatPromptTemplate(
[
("system", "You are a helpful AI bot. Your name is {name}."),
("human", "Hello, how are you doing?"),
("ai", "I'm doing well, thanks!"),
("human", "{user_input}"),
]
)
!!! note "Messages Placeholder"
prompt_value = template.invoke(
{
"name": "Bob",
"user_input": "What is your name?",
}
)
# Output:
# ChatPromptValue(
# messages=[
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
# HumanMessage(content='Hello, how are you doing?'),
# AIMessage(content="I'm doing well, thanks!"),
# HumanMessage(content='What is your name?')
# ]
# )
```
Messages Placeholder:
```python
# In addition to Human/AI/Tool/Function messages,
@@ -846,12 +852,13 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
# )
```
!!! note "Single-variable template"
Single-variable template:
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
and you invoke the template with a non-dict object, the prompt template will
inject the provided argument into that variable location.
```python
from langchain_core.prompts import ChatPromptTemplate
@@ -891,35 +898,25 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
"""Create a chat prompt template from a variety of message formats.
Args:
messages: Sequence of message representations.
messages: sequence of message representations.
A message can be represented using the following formats:
1. `BaseMessagePromptTemplate`
2. `BaseMessage`
3. 2-tuple of `(message type, template)`; e.g.,
`("human", "{user_input}")`
4. 2-tuple of `(message class, template)`
5. A string which is shorthand for `("human", template)`; e.g.,
`"{user_input}"`
template_format: Format of the template.
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
(message type, template); e.g., ("human", "{user_input}"),
(4) 2-tuple of (message class, template), (5) a string which is
shorthand for ("human", template); e.g., "{user_input}".
template_format: format of the template.
input_variables: A list of the names of the variables whose values are
required as inputs to the prompt.
optional_variables: A list of the names of the variables for placeholder
or MessagePlaceholder that are optional.
These variables are auto inferred from the prompt and user need not
provide them.
partial_variables: A dictionary of the partial variables the prompt
template carries.
Partial variables populate the template so that you don't need to pass
them in every time you call the prompt.
template carries. Partial variables populate the template so that you
don't need to pass them in every time you call the prompt.
validate_template: Whether to validate the template.
input_types: A dictionary of the types of the variables the prompt template
expects.
If not provided, all variables are assumed to be strings.
expects. If not provided, all variables are assumed to be strings.
Examples:
Instantiation from a list of message templates:
@@ -1124,17 +1121,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
)
```
Args:
messages: Sequence of message representations.
messages: sequence of message representations.
A message can be represented using the following formats:
1. `BaseMessagePromptTemplate`
2. `BaseMessage`
3. 2-tuple of `(message type, template)`; e.g.,
`("human", "{user_input}")`
4. 2-tuple of `(message class, template)`
5. A string which is shorthand for `("human", template)`; e.g.,
`"{user_input}"`
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
(message type, template); e.g., ("human", "{user_input}"),
(4) 2-tuple of (message class, template), (5) a string which is
shorthand for ("human", template); e.g., "{user_input}".
template_format: format of the template.
Returns:
@@ -1246,7 +1238,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
"""Extend the chat template with a sequence of messages.
Args:
messages: Sequence of message representations to append.
messages: sequence of message representations to append.
"""
self.messages.extend(
[_convert_to_message_template(message) for message in messages]
@@ -1343,25 +1335,11 @@ def _create_template_from_message_type(
raise ValueError(msg)
var_name = template[1:-1]
message = MessagesPlaceholder(variable_name=var_name, optional=True)
else:
try:
var_name_wrapped, is_optional = template
except ValueError as e:
msg = (
"Unexpected arguments for placeholder message type."
" Expected either a single string variable name"
" or a list of [variable_name: str, is_optional: bool]."
f" Got: {template}"
)
raise ValueError(msg) from e
if not isinstance(is_optional, bool):
msg = f"Expected is_optional to be a boolean. Got: {is_optional}"
raise ValueError(msg) # noqa: TRY004
elif len(template) == 2 and isinstance(template[1], bool):
var_name_wrapped, is_optional = template
if not isinstance(var_name_wrapped, str):
msg = f"Expected variable name to be a string. Got: {var_name_wrapped}"
raise ValueError(msg) # noqa: TRY004
raise ValueError(msg) # noqa:TRY004
if var_name_wrapped[0] != "{" or var_name_wrapped[-1] != "}":
msg = (
f"Invalid placeholder template: {var_name_wrapped}."
@@ -1371,6 +1349,14 @@ def _create_template_from_message_type(
var_name = var_name_wrapped[1:-1]
message = MessagesPlaceholder(variable_name=var_name, optional=is_optional)
else:
msg = (
"Unexpected arguments for placeholder message type."
" Expected either a single string variable name"
" or a list of [variable_name: str, is_optional: bool]."
f" Got: {template}"
)
raise ValueError(msg)
else:
msg = (
f"Unexpected message type: {message_type}. Use one of 'human',"
@@ -1424,11 +1410,10 @@ def _convert_to_message_template(
)
raise ValueError(msg)
message = (message["role"], message["content"])
try:
message_type_str, template = message
except ValueError as e:
if len(message) != 2:
msg = f"Expected 2-tuple of (role, template), got {message}"
raise ValueError(msg) from e
raise ValueError(msg)
message_type_str, template = message
if isinstance(message_type_str, str):
message_ = _create_template_from_message_type(
message_type_str, template, template_format=template_format

View File

@@ -69,7 +69,7 @@ class DictPromptTemplate(RunnableSerializable[dict, dict]):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod

View File

@@ -18,7 +18,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -32,13 +32,13 @@ class BaseMessagePromptTemplate(Serializable, ABC):
@abstractmethod
def format_messages(self, **kwargs: Any) -> list[BaseMessage]:
"""Format messages from kwargs. Should return a list of `BaseMessage` objects.
"""Format messages from kwargs. Should return a list of BaseMessages.
Args:
**kwargs: Keyword arguments to use for formatting.
Returns:
List of `BaseMessage` objects.
List of BaseMessages.
"""
async def aformat_messages(self, **kwargs: Any) -> list[BaseMessage]:
@@ -48,7 +48,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
**kwargs: Keyword arguments to use for formatting.
Returns:
List of `BaseMessage` objects.
List of BaseMessages.
"""
return self.format_messages(**kwargs)

View File

@@ -122,16 +122,13 @@ def mustache_formatter(template: str, /, **kwargs: Any) -> str:
def mustache_template_vars(
template: str,
) -> set[str]:
"""Get the top-level variables from a mustache template.
For nested variables like `{{person.name}}`, only the top-level
key (`person`) is returned.
"""Get the variables from a mustache template.
Args:
template: The template string.
Returns:
The top-level variables from the template.
The variables from the template.
"""
variables: set[str] = set()
section_depth = 0

View File

@@ -104,23 +104,19 @@ class StructuredPrompt(ChatPromptTemplate):
)
```
Args:
messages: Sequence of message representations.
messages: sequence of message representations.
A message can be represented using the following formats:
1. `BaseMessagePromptTemplate`
2. `BaseMessage`
3. 2-tuple of `(message type, template)`; e.g.,
`("human", "{user_input}")`
4. 2-tuple of `(message class, template)`
5. A string which is shorthand for `("human", template)`; e.g.,
`"{user_input}"`
schema: A dictionary representation of function call, or a Pydantic model.
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
(message type, template); e.g., ("human", "{user_input}"),
(4) 2-tuple of (message class, template), (5) a string which is
shorthand for ("human", template); e.g., "{user_input}"
schema: a dictionary representation of function call, or a Pydantic model.
**kwargs: Any additional kwargs to pass through to
`ChatModel.with_structured_output(schema, **kwargs)`.
Returns:
A structured prompt template
a structured prompt template
"""
return cls(messages, schema, **kwargs)

View File

@@ -105,9 +105,7 @@ class InMemoryRateLimiter(BaseRateLimiter):
from langchain_anthropic import ChatAnthropic
model = ChatAnthropic(
model_name="claude-sonnet-4-5-20250929", rate_limiter=rate_limiter
)
model = ChatAnthropic(model_name="claude-sonnet-4-5", rate_limiter=rate_limiter)
for _ in range(5):
tic = time.time()

View File

@@ -50,65 +50,65 @@ class LangSmithRetrieverParams(TypedDict, total=False):
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
"""Abstract base class for a document retrieval system.
"""Abstract base class for a Document retrieval system.
A retrieval system is defined as something that can take string queries and return
the most 'relevant' documents from some source.
the most 'relevant' Documents from some source.
Usage:
A retriever follows the standard `Runnable` interface, and should be used via the
standard `Runnable` methods of `invoke`, `ainvoke`, `batch`, `abatch`.
A retriever follows the standard Runnable interface, and should be used
via the standard Runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`.
Implementation:
When implementing a custom retriever, the class should implement the
`_get_relevant_documents` method to define the logic for retrieving documents.
When implementing a custom retriever, the class should implement
the `_get_relevant_documents` method to define the logic for retrieving documents.
Optionally, an async native implementations can be provided by overriding the
`_aget_relevant_documents` method.
!!! example "Retriever that returns the first 5 documents from a list of documents"
Example: A retriever that returns the first 5 documents from a list of documents
```python
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
```python
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
class SimpleRetriever(BaseRetriever):
docs: list[Document]
k: int = 5
class SimpleRetriever(BaseRetriever):
docs: list[Document]
k: int = 5
def _get_relevant_documents(self, query: str) -> list[Document]:
\"\"\"Return the first k documents from the list of documents\"\"\"
return self.docs[:self.k]
def _get_relevant_documents(self, query: str) -> list[Document]:
\"\"\"Return the first k documents from the list of documents\"\"\"
return self.docs[:self.k]
async def _aget_relevant_documents(self, query: str) -> list[Document]:
\"\"\"(Optional) async native implementation.\"\"\"
return self.docs[:self.k]
```
async def _aget_relevant_documents(self, query: str) -> list[Document]:
\"\"\"(Optional) async native implementation.\"\"\"
return self.docs[:self.k]
```
!!! example "Simple retriever based on a scikit-learn vectorizer"
Example: A simple retriever based on a scikit-learn vectorizer
```python
from sklearn.metrics.pairwise import cosine_similarity
```python
from sklearn.metrics.pairwise import cosine_similarity
class TFIDFRetriever(BaseRetriever, BaseModel):
vectorizer: Any
docs: list[Document]
tfidf_array: Any
k: int = 4
class TFIDFRetriever(BaseRetriever, BaseModel):
vectorizer: Any
docs: list[Document]
tfidf_array: Any
k: int = 4
class Config:
arbitrary_types_allowed = True
class Config:
arbitrary_types_allowed = True
def _get_relevant_documents(self, query: str) -> list[Document]:
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
query_vec = self.vectorizer.transform([query])
# Op -- (n_docs,1) -- Cosine Sim with each doc
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
```
def _get_relevant_documents(self, query: str) -> list[Document]:
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
query_vec = self.vectorizer.transform([query])
# Op -- (n_docs,1) -- Cosine Sim with each doc
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
```
"""
model_config = ConfigDict(
@@ -119,19 +119,15 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
_expects_other_args: bool = False
tags: list[str] | None = None
"""Optional list of tags associated with the retriever.
These tags will be associated with each call to this retriever,
and passed as arguments to the handlers defined in `callbacks`.
You can use these to eg identify a specific instance of a retriever with its
use case.
"""
metadata: dict[str, Any] | None = None
"""Optional metadata associated with the retriever.
This metadata will be associated with each call to this retriever,
and passed as arguments to the handlers defined in `callbacks`.
You can use these to eg identify a specific instance of a retriever with its
use case.
"""

View File

@@ -118,8 +118,6 @@ if TYPE_CHECKING:
Other = TypeVar("Other")
_RUNNABLE_GENERIC_NUM_ARGS = 2 # Input and Output
class Runnable(ABC, Generic[Input, Output]):
"""A unit of work that can be invoked, batched, streamed, transformed and composed.
@@ -149,11 +147,11 @@ class Runnable(ABC, Generic[Input, Output]):
the `input_schema` property, the `output_schema` property and `config_schema`
method.
Composition
===========
Runnable objects can be composed together to create chains in a declarative way.
LCEL and Composition
====================
The LangChain Expression Language (LCEL) is a declarative way to compose
`Runnable` objectsinto chains.
Any chain constructed this way will automatically have sync, async, batch, and
streaming support.
@@ -237,21 +235,21 @@ class Runnable(ABC, Generic[Input, Output]):
You can set the global debug flag to True to enable debug output for all chains:
```python
from langchain_core.globals import set_debug
```python
from langchain_core.globals import set_debug
set_debug(True)
```
set_debug(True)
```
Alternatively, you can pass existing or custom callbacks to any given chain:
```python
from langchain_core.tracers import ConsoleCallbackHandler
```python
from langchain_core.tracers import ConsoleCallbackHandler
chain.invoke(..., config={"callbacks": [ConsoleCallbackHandler()]})
```
chain.invoke(..., config={"callbacks": [ConsoleCallbackHandler()]})
```
For a UI (and much more) checkout [LangSmith](https://docs.langchain.com/langsmith/home).
For a UI (and much more) checkout [LangSmith](https://docs.smith.langchain.com/).
"""
@@ -311,10 +309,7 @@ class Runnable(ABC, Generic[Input, Output]):
for base in self.__class__.mro():
if hasattr(base, "__pydantic_generic_metadata__"):
metadata = base.__pydantic_generic_metadata__
if (
"args" in metadata
and len(metadata["args"]) == _RUNNABLE_GENERIC_NUM_ARGS
):
if "args" in metadata and len(metadata["args"]) == 2:
return metadata["args"][0]
# If we didn't find a Pydantic model in the parent classes,
@@ -322,7 +317,7 @@ class Runnable(ABC, Generic[Input, Output]):
# Runnables that are not pydantic models.
for cls in self.__class__.__orig_bases__: # type: ignore[attr-defined]
type_args = get_args(cls)
if type_args and len(type_args) == _RUNNABLE_GENERIC_NUM_ARGS:
if type_args and len(type_args) == 2:
return type_args[0]
msg = (
@@ -345,15 +340,12 @@ class Runnable(ABC, Generic[Input, Output]):
for base in self.__class__.mro():
if hasattr(base, "__pydantic_generic_metadata__"):
metadata = base.__pydantic_generic_metadata__
if (
"args" in metadata
and len(metadata["args"]) == _RUNNABLE_GENERIC_NUM_ARGS
):
if "args" in metadata and len(metadata["args"]) == 2:
return metadata["args"][1]
for cls in self.__class__.__orig_bases__: # type: ignore[attr-defined]
type_args = get_args(cls)
if type_args and len(type_args) == _RUNNABLE_GENERIC_NUM_ARGS:
if type_args and len(type_args) == 2:
return type_args[1]
msg = (
@@ -432,7 +424,7 @@ class Runnable(ABC, Generic[Input, Output]):
print(runnable.get_input_jsonschema())
```
!!! version-added "Added in `langchain-core` 0.3.0"
!!! version-added "Added in version 0.3.0"
"""
return self.get_input_schema(config).model_json_schema()
@@ -510,7 +502,7 @@ class Runnable(ABC, Generic[Input, Output]):
print(runnable.get_output_jsonschema())
```
!!! version-added "Added in `langchain-core` 0.3.0"
!!! version-added "Added in version 0.3.0"
"""
return self.get_output_schema(config).model_json_schema()
@@ -574,7 +566,7 @@ class Runnable(ABC, Generic[Input, Output]):
Returns:
A JSON schema that represents the config of the `Runnable`.
!!! version-added "Added in `langchain-core` 0.3.0"
!!! version-added "Added in version 0.3.0"
"""
return self.config_schema(include=include).model_json_schema()
@@ -774,7 +766,7 @@ class Runnable(ABC, Generic[Input, Output]):
"""Assigns new fields to the `dict` output of this `Runnable`.
```python
from langchain_core.language_models.fake import FakeStreamingListLLM
from langchain_community.llms.fake import FakeStreamingListLLM
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import SystemMessagePromptTemplate
from langchain_core.runnables import Runnable
@@ -826,12 +818,10 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
input: The input to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
Returns:
The output of the `Runnable`.
@@ -848,12 +838,10 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
input: The input to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
Returns:
The output of the `Runnable`.
@@ -880,9 +868,8 @@ class Runnable(ABC, Generic[Input, Output]):
config: A config to use when invoking the `Runnable`. The config supports
standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work
to do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
to do in parallel, and other keys. Please refer to the
`RunnableConfig` for more details.
return_exceptions: Whether to return exceptions instead of raising them.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
@@ -945,12 +932,10 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
inputs: A list of inputs to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
return_exceptions: Whether to return exceptions instead of raising them.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
@@ -1013,12 +998,10 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
inputs: A list of inputs to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
return_exceptions: Whether to return exceptions instead of raising them.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
@@ -1078,12 +1061,10 @@ class Runnable(ABC, Generic[Input, Output]):
Args:
inputs: A list of inputs to the `Runnable`.
config: A config to use when invoking the `Runnable`.
The config supports standard keys like `'tags'`, `'metadata'` for
tracing purposes, `'max_concurrency'` for controlling how much work to
do in parallel, and other keys.
Please refer to `RunnableConfig` for more details.
do in parallel, and other keys. Please refer to the `RunnableConfig`
for more details.
return_exceptions: Whether to return exceptions instead of raising them.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
@@ -1761,52 +1742,46 @@ class Runnable(ABC, Generic[Input, Output]):
import time
import asyncio
def format_t(timestamp: float) -> str:
return datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat()
async def test_runnable(time_to_sleep: int):
print(f"Runnable[{time_to_sleep}s]: starts at {format_t(time.time())}")
await asyncio.sleep(time_to_sleep)
print(f"Runnable[{time_to_sleep}s]: ends at {format_t(time.time())}")
async def fn_start(run_obj: Runnable):
print(f"on start callback starts at {format_t(time.time())}")
await asyncio.sleep(3)
print(f"on start callback ends at {format_t(time.time())}")
async def fn_end(run_obj: Runnable):
print(f"on end callback starts at {format_t(time.time())}")
await asyncio.sleep(2)
print(f"on end callback ends at {format_t(time.time())}")
runnable = RunnableLambda(test_runnable).with_alisteners(
on_start=fn_start, on_end=fn_end
on_start=fn_start,
on_end=fn_end
)
async def concurrent_runs():
await asyncio.gather(runnable.ainvoke(2), runnable.ainvoke(3))
asyncio.run(concurrent_runs())
# Result:
# on start callback starts at 2025-03-01T07:05:22.875378+00:00
# on start callback starts at 2025-03-01T07:05:22.875495+00:00
# on start callback ends at 2025-03-01T07:05:25.878862+00:00
# on start callback ends at 2025-03-01T07:05:25.878947+00:00
# Runnable[2s]: starts at 2025-03-01T07:05:25.879392+00:00
# Runnable[3s]: starts at 2025-03-01T07:05:25.879804+00:00
# Runnable[2s]: ends at 2025-03-01T07:05:27.881998+00:00
# on end callback starts at 2025-03-01T07:05:27.882360+00:00
# Runnable[3s]: ends at 2025-03-01T07:05:28.881737+00:00
# on end callback starts at 2025-03-01T07:05:28.882428+00:00
# on end callback ends at 2025-03-01T07:05:29.883893+00:00
# on end callback ends at 2025-03-01T07:05:30.884831+00:00
Result:
on start callback starts at 2025-03-01T07:05:22.875378+00:00
on start callback starts at 2025-03-01T07:05:22.875495+00:00
on start callback ends at 2025-03-01T07:05:25.878862+00:00
on start callback ends at 2025-03-01T07:05:25.878947+00:00
Runnable[2s]: starts at 2025-03-01T07:05:25.879392+00:00
Runnable[3s]: starts at 2025-03-01T07:05:25.879804+00:00
Runnable[2s]: ends at 2025-03-01T07:05:27.881998+00:00
on end callback starts at 2025-03-01T07:05:27.882360+00:00
Runnable[3s]: ends at 2025-03-01T07:05:28.881737+00:00
on end callback starts at 2025-03-01T07:05:28.882428+00:00
on end callback ends at 2025-03-01T07:05:29.883893+00:00
on end callback ends at 2025-03-01T07:05:30.884831+00:00
```
"""
return RunnableBinding(
@@ -1868,7 +1843,7 @@ class Runnable(ABC, Generic[Input, Output]):
`exp_base`, and `jitter` (all `float` values).
Returns:
A new `Runnable` that retries the original `Runnable` on exceptions.
A new Runnable that retries the original Runnable on exceptions.
Example:
```python
@@ -1952,9 +1927,7 @@ class Runnable(ABC, Generic[Input, Output]):
exceptions_to_handle: A tuple of exception types to handle.
exception_key: If `string` is specified then handled exceptions will be
passed to fallbacks as part of the input under the specified key.
If `None`, exceptions will not be passed to fallbacks.
If used, the base `Runnable` and its fallbacks must accept a
dictionary as input.
@@ -1990,9 +1963,7 @@ class Runnable(ABC, Generic[Input, Output]):
exceptions_to_handle: A tuple of exception types to handle.
exception_key: If `string` is specified then handled exceptions will be
passed to fallbacks as part of the input under the specified key.
If `None`, exceptions will not be passed to fallbacks.
If used, the base `Runnable` and its fallbacks must accept a
dictionary as input.
@@ -2458,14 +2429,10 @@ class Runnable(ABC, Generic[Input, Output]):
`as_tool` will instantiate a `BaseTool` with a name, description, and
`args_schema` from a `Runnable`. Where possible, schemas are inferred
from `runnable.get_input_schema`.
Alternatively (e.g., if the `Runnable` takes a dict as input and the specific
`dict` keys are not typed), the schema can be specified directly with
`args_schema`.
You can also pass `arg_types` to just specify the required arguments and their
types.
from `runnable.get_input_schema`. Alternatively (e.g., if the
`Runnable` takes a dict as input and the specific dict keys are not typed),
the schema can be specified directly with `args_schema`. You can also
pass `arg_types` to just specify the required arguments and their types.
Args:
args_schema: The schema for the tool.
@@ -2534,7 +2501,7 @@ class Runnable(ABC, Generic[Input, Output]):
as_tool.invoke({"a": 3, "b": [1, 2]})
```
`str` input:
String input:
```python
from langchain_core.runnables import RunnableLambda
@@ -2552,6 +2519,9 @@ class Runnable(ABC, Generic[Input, Output]):
as_tool = runnable.as_tool()
as_tool.invoke("b")
```
!!! version-added "Added in version 0.2.14"
"""
# Avoid circular import
from langchain_core.tools import convert_runnable_to_tool # noqa: PLC0415
@@ -2670,7 +2640,7 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
from langchain_openai import ChatOpenAI
model = ChatAnthropic(
model_name="claude-sonnet-4-5-20250929"
model_name="claude-3-7-sonnet-20250219"
).configurable_alternatives(
ConfigurableField(id="llm"),
default_key="anthropic",
@@ -2783,9 +2753,6 @@ def _seq_output_schema(
return last.get_output_schema(config)
_RUNNABLE_SEQUENCE_MIN_STEPS = 2
class RunnableSequence(RunnableSerializable[Input, Output]):
"""Sequence of `Runnable` objects, where the output of one is the input of the next.
@@ -2895,7 +2862,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
name: The name of the `Runnable`.
first: The first `Runnable` in the sequence.
middle: The middle `Runnable` objects in the sequence.
last: The last `Runnable` in the sequence.
last: The last Runnable in the sequence.
Raises:
ValueError: If the sequence has less than 2 steps.
@@ -2908,11 +2875,8 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
steps_flat.extend(step.steps)
else:
steps_flat.append(coerce_to_runnable(step))
if len(steps_flat) < _RUNNABLE_SEQUENCE_MIN_STEPS:
msg = (
f"RunnableSequence must have at least {_RUNNABLE_SEQUENCE_MIN_STEPS} "
f"steps, got {len(steps_flat)}"
)
if len(steps_flat) < 2:
msg = f"RunnableSequence must have at least 2 steps, got {len(steps_flat)}"
raise ValueError(msg)
super().__init__(
first=steps_flat[0],
@@ -2943,7 +2907,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
model_config = ConfigDict(
@@ -3539,7 +3503,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
Returns a mapping of their outputs.
`RunnableParallel` is one of the two main composition primitives,
`RunnableParallel` is one of the two main composition primitives for the LCEL,
alongside `RunnableSequence`. It invokes `Runnable`s concurrently, providing the
same input to each.
@@ -3649,7 +3613,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -3707,12 +3671,6 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
== "object"
for s in self.steps__.values()
):
for step in self.steps__.values():
fields = step.get_input_schema(config).model_fields
root_field = fields.get("root")
if root_field is not None and root_field.annotation != Any:
return super().get_input_schema(config)
# This is correct, but pydantic typings/mypy don't think so.
return create_model_v2(
self.get_name("Input"),
@@ -4522,7 +4480,7 @@ class RunnableLambda(Runnable[Input, Output]):
# on itemgetter objects, so we have to parse the repr
items = str(func).replace("operator.itemgetter(", "")[:-1].split(", ")
if all(
item[0] == "'" and item[-1] == "'" and item != "''" for item in items
item[0] == "'" and item[-1] == "'" and len(item) > 2 for item in items
):
fields = {item[1:-1]: (Any, ...) for item in items}
# It's a dict, lol
@@ -5184,7 +5142,7 @@ class RunnableEachBase(RunnableSerializable[list[Input], list[Output]]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -5367,7 +5325,7 @@ class RunnableEach(RunnableEachBase[Input, Output]):
class RunnableBindingBase(RunnableSerializable[Input, Output]): # type: ignore[no-redef]
"""`Runnable` that delegates calls to another `Runnable` with a set of `**kwargs`.
"""`Runnable` that delegates calls to another `Runnable` with a set of kwargs.
Use only if creating a new `RunnableBinding` subclass with different `__init__`
args.
@@ -5507,7 +5465,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]): # type: ignore[
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -5797,7 +5755,7 @@ class RunnableBinding(RunnableBindingBase[Input, Output]): # type: ignore[no-re
```python
# Create a Runnable binding that invokes the chat model with the
# additional kwarg `stop=['-']` when running it.
from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatOpenAI
model = ChatOpenAI()
model.invoke('Say "Parrot-MAGIC"', stop=["-"]) # Should return `Parrot`

View File

@@ -36,19 +36,17 @@ from langchain_core.runnables.utils import (
get_unique_config_specs,
)
_MIN_BRANCHES = 2
class RunnableBranch(RunnableSerializable[Input, Output]):
"""`Runnable` that selects which branch to run based on a condition.
"""Runnable that selects which branch to run based on a condition.
The `Runnable` is initialized with a list of `(condition, Runnable)` pairs and
The Runnable is initialized with a list of (condition, Runnable) pairs and
a default branch.
When operating on an input, the first condition that evaluates to True is
selected, and the corresponding `Runnable` is run on the input.
selected, and the corresponding Runnable is run on the input.
If no condition evaluates to `True`, the default branch is run on the input.
If no condition evaluates to True, the default branch is run on the input.
Examples:
```python
@@ -67,9 +65,9 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
"""
branches: Sequence[tuple[Runnable[Input, bool], Runnable[Input, Output]]]
"""A list of `(condition, Runnable)` pairs."""
"""A list of (condition, Runnable) pairs."""
default: Runnable[Input, Output]
"""A `Runnable` to run if no condition is met."""
"""A Runnable to run if no condition is met."""
def __init__(
self,
@@ -81,19 +79,19 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
]
| RunnableLike,
) -> None:
"""A `Runnable` that runs one of two branches based on a condition.
"""A Runnable that runs one of two branches based on a condition.
Args:
*branches: A list of `(condition, Runnable)` pairs.
Defaults a `Runnable` to run if no condition is met.
*branches: A list of (condition, Runnable) pairs.
Defaults a Runnable to run if no condition is met.
Raises:
ValueError: If the number of branches is less than `2`.
TypeError: If the default branch is not `Runnable`, `Callable` or `Mapping`.
TypeError: If a branch is not a `tuple` or `list`.
ValueError: If a branch is not of length `2`.
ValueError: If the number of branches is less than 2.
TypeError: If the default branch is not Runnable, Callable or Mapping.
TypeError: If a branch is not a tuple or list.
ValueError: If a branch is not of length 2.
"""
if len(branches) < _MIN_BRANCHES:
if len(branches) < 2:
msg = "RunnableBranch requires at least two branches"
raise ValueError(msg)
@@ -120,7 +118,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
)
raise TypeError(msg)
if len(branch) != _MIN_BRANCHES:
if len(branch) != 2:
msg = (
f"RunnableBranch branches must be "
f"tuples or lists of length 2, not {len(branch)}"
@@ -142,7 +140,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -189,12 +187,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
def invoke(
self, input: Input, config: RunnableConfig | None = None, **kwargs: Any
) -> Output:
"""First evaluates the condition, then delegate to `True` or `False` branch.
"""First evaluates the condition, then delegate to true or false branch.
Args:
input: The input to the `Runnable`.
config: The configuration for the `Runnable`.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
input: The input to the Runnable.
config: The configuration for the Runnable.
**kwargs: Additional keyword arguments to pass to the Runnable.
Returns:
The output of the branch that was run.
@@ -299,12 +297,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
config: RunnableConfig | None = None,
**kwargs: Any | None,
) -> Iterator[Output]:
"""First evaluates the condition, then delegate to `True` or `False` branch.
"""First evaluates the condition, then delegate to true or false branch.
Args:
input: The input to the `Runnable`.
config: The configuration for the Runna`ble.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
input: The input to the Runnable.
config: The configuration for the Runnable.
**kwargs: Additional keyword arguments to pass to the Runnable.
Yields:
The output of the branch that was run.
@@ -383,12 +381,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
config: RunnableConfig | None = None,
**kwargs: Any | None,
) -> AsyncIterator[Output]:
"""First evaluates the condition, then delegate to `True` or `False` branch.
"""First evaluates the condition, then delegate to true or false branch.
Args:
input: The input to the `Runnable`.
config: The configuration for the `Runnable`.
**kwargs: Additional keyword arguments to pass to the `Runnable`.
input: The input to the Runnable.
config: The configuration for the Runnable.
**kwargs: Additional keyword arguments to pass to the Runnable.
Yields:
The output of the branch that was run.

View File

@@ -1,4 +1,4 @@
"""`Runnable` objects that can be dynamically configured."""
"""Runnables that can be dynamically configured."""
from __future__ import annotations
@@ -47,14 +47,14 @@ if TYPE_CHECKING:
class DynamicRunnable(RunnableSerializable[Input, Output]):
"""Serializable `Runnable` that can be dynamically configured.
"""Serializable Runnable that can be dynamically configured.
A `DynamicRunnable` should be initiated using the `configurable_fields` or
`configurable_alternatives` method of a `Runnable`.
A DynamicRunnable should be initiated using the `configurable_fields` or
`configurable_alternatives` method of a Runnable.
"""
default: RunnableSerializable[Input, Output]
"""The default `Runnable` to use."""
"""The default Runnable to use."""
config: RunnableConfig | None = None
"""The configuration to use."""
@@ -66,7 +66,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -120,13 +120,13 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
def prepare(
self, config: RunnableConfig | None = None
) -> tuple[Runnable[Input, Output], RunnableConfig]:
"""Prepare the `Runnable` for invocation.
"""Prepare the Runnable for invocation.
Args:
config: The configuration to use.
Returns:
The prepared `Runnable` and configuration.
The prepared Runnable and configuration.
"""
runnable: Runnable[Input, Output] = self
while isinstance(runnable, DynamicRunnable):
@@ -316,12 +316,12 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
"""`Runnable` that can be dynamically configured.
"""Runnable that can be dynamically configured.
A `RunnableConfigurableFields` should be initiated using the
`configurable_fields` method of a `Runnable`.
A RunnableConfigurableFields should be initiated using the
`configurable_fields` method of a Runnable.
Here is an example of using a `RunnableConfigurableFields` with LLMs:
Here is an example of using a RunnableConfigurableFields with LLMs:
```python
from langchain_core.prompts import PromptTemplate
@@ -348,7 +348,7 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
chain.invoke({"x": 0}, config={"configurable": {"temperature": 0.9}})
```
Here is an example of using a `RunnableConfigurableFields` with `HubRunnables`:
Here is an example of using a RunnableConfigurableFields with HubRunnables:
```python
from langchain_core.prompts import PromptTemplate
@@ -380,7 +380,7 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
@property
def config_specs(self) -> list[ConfigurableFieldSpec]:
"""Get the configuration specs for the `RunnableConfigurableFields`.
"""Get the configuration specs for the RunnableConfigurableFields.
Returns:
The configuration specs.
@@ -473,13 +473,13 @@ _enums_for_spec_lock = threading.Lock()
class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
"""`Runnable` that can be dynamically configured.
"""Runnable that can be dynamically configured.
A `RunnableConfigurableAlternatives` should be initiated using the
`configurable_alternatives` method of a `Runnable` or can be
A RunnableConfigurableAlternatives should be initiated using the
`configurable_alternatives` method of a Runnable or can be
initiated directly as well.
Here is an example of using a `RunnableConfigurableAlternatives` that uses
Here is an example of using a RunnableConfigurableAlternatives that uses
alternative prompts to illustrate its functionality:
```python
@@ -506,7 +506,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
chain.with_config(configurable={"prompt": "poem"}).invoke({"topic": "bears"})
```
Equivalently, you can initialize `RunnableConfigurableAlternatives` directly
Equivalently, you can initialize RunnableConfigurableAlternatives directly
and use in LCEL in the same way:
```python
@@ -531,7 +531,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
"""
which: ConfigurableField
"""The `ConfigurableField` to use to choose between alternatives."""
"""The ConfigurableField to use to choose between alternatives."""
alternatives: dict[
str,
@@ -544,9 +544,8 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
prefix_keys: bool
"""Whether to prefix configurable fields of each alternative with a namespace
of the form <which.id>==<alternative_key>, e.g. a key named "temperature" used by
the alternative named "gpt3" becomes "model==gpt3/temperature".
"""
of the form <which.id>==<alternative_key>, eg. a key named "temperature" used by
the alternative named "gpt3" becomes "model==gpt3/temperature"."""
@property
@override
@@ -639,24 +638,24 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
def _strremoveprefix(s: str, prefix: str) -> str:
"""`str.removeprefix()` is only available in Python 3.9+."""
"""str.removeprefix() is only available in Python 3.9+."""
return s.replace(prefix, "", 1) if s.startswith(prefix) else s
def prefix_config_spec(
spec: ConfigurableFieldSpec, prefix: str
) -> ConfigurableFieldSpec:
"""Prefix the id of a `ConfigurableFieldSpec`.
"""Prefix the id of a ConfigurableFieldSpec.
This is useful when a `RunnableConfigurableAlternatives` is used as a
`ConfigurableField` of another `RunnableConfigurableAlternatives`.
This is useful when a RunnableConfigurableAlternatives is used as a
ConfigurableField of another RunnableConfigurableAlternatives.
Args:
spec: The `ConfigurableFieldSpec` to prefix.
spec: The ConfigurableFieldSpec to prefix.
prefix: The prefix to add.
Returns:
The prefixed `ConfigurableFieldSpec`.
The prefixed ConfigurableFieldSpec.
"""
return (
ConfigurableFieldSpec(
@@ -678,15 +677,15 @@ def make_options_spec(
) -> ConfigurableFieldSpec:
"""Make options spec.
Make a `ConfigurableFieldSpec` for a `ConfigurableFieldSingleOption` or
`ConfigurableFieldMultiOption`.
Make a ConfigurableFieldSpec for a ConfigurableFieldSingleOption or
ConfigurableFieldMultiOption.
Args:
spec: The `ConfigurableFieldSingleOption` or `ConfigurableFieldMultiOption`.
spec: The ConfigurableFieldSingleOption or ConfigurableFieldMultiOption.
description: The description to use if the spec does not have one.
Returns:
The `ConfigurableFieldSpec`.
The ConfigurableFieldSpec.
"""
with _enums_for_spec_lock:
if enum := _enums_for_spec.get(spec):

View File

@@ -35,20 +35,20 @@ if TYPE_CHECKING:
class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
"""`Runnable` that can fallback to other `Runnable`s if it fails.
"""Runnable that can fallback to other Runnables if it fails.
External APIs (e.g., APIs for a language model) may at times experience
degraded performance or even downtime.
In these cases, it can be useful to have a fallback `Runnable` that can be
used in place of the original `Runnable` (e.g., fallback to another LLM provider).
In these cases, it can be useful to have a fallback Runnable that can be
used in place of the original Runnable (e.g., fallback to another LLM provider).
Fallbacks can be defined at the level of a single `Runnable`, or at the level
of a chain of `Runnable`s. Fallbacks are tried in order until one succeeds or
Fallbacks can be defined at the level of a single Runnable, or at the level
of a chain of Runnables. Fallbacks are tried in order until one succeeds or
all fail.
While you can instantiate a `RunnableWithFallbacks` directly, it is usually
more convenient to use the `with_fallbacks` method on a `Runnable`.
more convenient to use the `with_fallbacks` method on a Runnable.
Example:
```python
@@ -87,7 +87,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
"""
runnable: Runnable[Input, Output]
"""The `Runnable` to run first."""
"""The Runnable to run first."""
fallbacks: Sequence[Runnable[Input, Output]]
"""A sequence of fallbacks to try."""
exceptions_to_handle: tuple[type[BaseException], ...] = (Exception,)
@@ -97,12 +97,9 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
"""
exception_key: str | None = None
"""If `string` is specified then handled exceptions will be passed to fallbacks as
part of the input under the specified key.
If `None`, exceptions will not be passed to fallbacks.
If used, the base `Runnable` and its fallbacks must accept a dictionary as input.
"""
part of the input under the specified key. If `None`, exceptions
will not be passed to fallbacks. If used, the base Runnable and its fallbacks
must accept a dictionary as input."""
model_config = ConfigDict(
arbitrary_types_allowed=True,
@@ -140,7 +137,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -155,10 +152,10 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
@property
def runnables(self) -> Iterator[Runnable[Input, Output]]:
"""Iterator over the `Runnable` and its fallbacks.
"""Iterator over the Runnable and its fallbacks.
Yields:
The `Runnable` then its fallbacks.
The Runnable then its fallbacks.
"""
yield self.runnable
yield from self.fallbacks
@@ -592,14 +589,14 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
await run_manager.on_chain_end(output)
def __getattr__(self, name: str) -> Any:
"""Get an attribute from the wrapped `Runnable` and its fallbacks.
"""Get an attribute from the wrapped Runnable and its fallbacks.
Returns:
If the attribute is anything other than a method that outputs a `Runnable`,
returns `getattr(self.runnable, name)`. If the attribute is a method that
does return a new `Runnable` (e.g. `model.bind_tools([...])` outputs a new
`RunnableBinding`) then `self.runnable` and each of the runnables in
`self.fallbacks` is replaced with `getattr(x, name)`.
If the attribute is anything other than a method that outputs a Runnable,
returns getattr(self.runnable, name). If the attribute is a method that
does return a new Runnable (e.g. model.bind_tools([...]) outputs a new
RunnableBinding) then self.runnable and each of the runnables in
self.fallbacks is replaced with getattr(x, name).
Example:
```python
@@ -607,7 +604,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
from langchain_anthropic import ChatAnthropic
gpt_4o = ChatOpenAI(model="gpt-4o")
claude_3_sonnet = ChatAnthropic(model="claude-sonnet-4-5-20250929")
claude_3_sonnet = ChatAnthropic(model="claude-3-7-sonnet-20250219")
model = gpt_4o.with_fallbacks([claude_3_sonnet])
model.model_name
@@ -621,6 +618,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
runnable=RunnableBinding(bound=ChatOpenAI(...), kwargs={"tools": [...]}),
fallbacks=[RunnableBinding(bound=ChatAnthropic(...), kwargs={"tools": [...]})],
)
```
""" # noqa: E501
attr = getattr(self.runnable, name)

View File

@@ -132,7 +132,7 @@ class Branch(NamedTuple):
condition: Callable[..., str]
"""A callable that returns a string representation of the condition."""
ends: dict[str, str] | None
"""Optional dictionary of end node IDs for the branches. """
"""Optional dictionary of end node ids for the branches. """
class CurveStyle(Enum):
@@ -706,10 +706,8 @@ class Graph:
def _first_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
"""Find the single node that is not a target of any edge.
Exclude nodes/sources with IDs in the exclude list.
Exclude nodes/sources with ids in the exclude list.
If there is no such node, or there are multiple, return `None`.
When drawing the graph, this node would be the origin.
"""
targets = {edge.target for edge in graph.edges if edge.source not in exclude}
@@ -724,10 +722,8 @@ def _first_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
def _last_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
"""Find the single node that is not a source of any edge.
Exclude nodes/targets with IDs in the exclude list.
Exclude nodes/targets with ids in the exclude list.
If there is no such node, or there are multiple, return `None`.
When drawing the graph, this node would be the destination.
"""
sources = {edge.source for edge in graph.edges if edge.target not in exclude}

View File

@@ -454,10 +454,7 @@ def _render_mermaid_using_api(
return img_bytes
# If we get a server error (5xx), retry
if (
requests.codes.internal_server_error <= response.status_code
and attempt < max_retries
):
if 500 <= response.status_code < 600 and attempt < max_retries:
# Exponential backoff with jitter
sleep_time = retry_delay * (2**attempt) * (0.5 + 0.5 * random.random()) # noqa: S311 not used for crypto
time.sleep(sleep_time)

View File

@@ -1,6 +1,5 @@
"""Helper class to draw a state graph into a PNG file."""
from itertools import groupby
from typing import Any
from langchain_core.runnables.graph import Graph, LabelsDict
@@ -142,7 +141,6 @@ class PngDrawer:
# Add nodes, conditional edges, and edges to the graph
self.add_nodes(viz, graph)
self.add_edges(viz, graph)
self.add_subgraph(viz, [node.split(":") for node in graph.nodes])
# Update entrypoint and END styles
self.update_styles(viz, graph)
@@ -163,32 +161,6 @@ class PngDrawer:
for node in graph.nodes:
self.add_node(viz, node)
def add_subgraph(
self,
viz: Any,
nodes: list[list[str]],
parent_prefix: list[str] | None = None,
) -> None:
"""Add subgraphs to the graph.
Args:
viz: The graphviz object.
nodes: The nodes to add.
parent_prefix: The prefix of the parent subgraph.
"""
for prefix, grouped in groupby(
[node[:] for node in sorted(nodes)],
key=lambda x: x.pop(0),
):
current_prefix = (parent_prefix or []) + [prefix]
grouped_nodes = list(grouped)
if len(grouped_nodes) > 1:
subgraph = viz.add_subgraph(
[":".join(current_prefix + node) for node in grouped_nodes],
name="cluster_" + ":".join(current_prefix),
)
self.add_subgraph(subgraph, grouped_nodes, current_prefix)
def add_edges(self, viz: Any, graph: Graph) -> None:
"""Add edges to the graph.

View File

@@ -36,23 +36,23 @@ GetSessionHistoryCallable = Callable[..., BaseChatMessageHistory]
class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
"""`Runnable` that manages chat message history for another `Runnable`.
"""Runnable that manages chat message history for another Runnable.
A chat message history is a sequence of messages that represent a conversation.
`RunnableWithMessageHistory` wraps another `Runnable` and manages the chat message
RunnableWithMessageHistory wraps another Runnable and manages the chat message
history for it; it is responsible for reading and updating the chat message
history.
The formats supported for the inputs and outputs of the wrapped `Runnable`
The formats supported for the inputs and outputs of the wrapped Runnable
are described below.
`RunnableWithMessageHistory` must always be called with a config that contains
RunnableWithMessageHistory must always be called with a config that contains
the appropriate parameters for the chat message history factory.
By default, the `Runnable` is expected to take a single configuration parameter
By default, the Runnable is expected to take a single configuration parameter
called `session_id` which is a string. This parameter is used to create a new
or look up an existing chat message history that matches the given `session_id`.
or look up an existing chat message history that matches the given session_id.
In this case, the invocation would look like this:
@@ -117,12 +117,12 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
```
Example where the wrapped `Runnable` takes a dictionary input:
Example where the wrapped Runnable takes a dictionary input:
```python
from typing import Optional
from langchain_anthropic import ChatAnthropic
from langchain_community.chat_models import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
@@ -166,7 +166,7 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
print(store) # noqa: T201
```
Example where the session factory takes two keys (`user_id` and `conversation_id`):
Example where the session factory takes two keys, user_id and conversation id):
```python
store = {}
@@ -223,28 +223,21 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
"""
get_session_history: GetSessionHistoryCallable
"""Function that returns a new `BaseChatMessageHistory`.
"""Function that returns a new BaseChatMessageHistory.
This function should either take a single positional argument `session_id` of type
string and return a corresponding chat message history instance
"""
string and return a corresponding chat message history instance"""
input_messages_key: str | None = None
"""Must be specified if the base `Runnable` accepts a `dict` as input.
The key in the input `dict` that contains the messages.
"""
"""Must be specified if the base runnable accepts a dict as input.
The key in the input dict that contains the messages."""
output_messages_key: str | None = None
"""Must be specified if the base `Runnable` returns a `dict` as output.
The key in the output `dict` that contains the messages.
"""
"""Must be specified if the base Runnable returns a dict as output.
The key in the output dict that contains the messages."""
history_messages_key: str | None = None
"""Must be specified if the base `Runnable` accepts a `dict` as input and expects a
separate key for historical messages.
"""
"""Must be specified if the base runnable accepts a dict as input and expects a
separate key for historical messages."""
history_factory_config: Sequence[ConfigurableFieldSpec]
"""Configure fields that should be passed to the chat history factory.
See `ConfigurableFieldSpec` for more details.
"""
See `ConfigurableFieldSpec` for more details."""
def __init__(
self,
@@ -261,16 +254,15 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
history_factory_config: Sequence[ConfigurableFieldSpec] | None = None,
**kwargs: Any,
) -> None:
"""Initialize `RunnableWithMessageHistory`.
"""Initialize RunnableWithMessageHistory.
Args:
runnable: The base `Runnable` to be wrapped.
runnable: The base Runnable to be wrapped.
Must take as input one of:
1. A list of `BaseMessage`
2. A `dict` with one key for all messages
3. A `dict` with one key for the current input string/message(s) and
2. A dict with one key for all messages
3. A dict with one key for the current input string/message(s) and
a separate key for historical messages. If the input key points
to a string, it will be treated as a `HumanMessage` in history.
@@ -278,15 +270,13 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
1. A string which can be treated as an `AIMessage`
2. A `BaseMessage` or sequence of `BaseMessage`
3. A `dict` with a key for a `BaseMessage` or sequence of
3. A dict with a key for a `BaseMessage` or sequence of
`BaseMessage`
get_session_history: Function that returns a new `BaseChatMessageHistory`.
get_session_history: Function that returns a new BaseChatMessageHistory.
This function should either take a single positional argument
`session_id` of type string and return a corresponding
chat message history instance.
```python
def get_session_history(
session_id: str, *, user_id: str | None = None
@@ -305,17 +295,16 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
) -> BaseChatMessageHistory: ...
```
input_messages_key: Must be specified if the base runnable accepts a `dict`
input_messages_key: Must be specified if the base runnable accepts a dict
as input.
output_messages_key: Must be specified if the base runnable returns a `dict`
output_messages_key: Must be specified if the base runnable returns a dict
as output.
history_messages_key: Must be specified if the base runnable accepts a
`dict` as input and expects a separate key for historical messages.
history_messages_key: Must be specified if the base runnable accepts a dict
as input and expects a separate key for historical messages.
history_factory_config: Configure fields that should be passed to the
chat history factory. See `ConfigurableFieldSpec` for more details.
Specifying these allows you to pass multiple config keys into the
`get_session_history` factory.
Specifying these allows you to pass multiple config keys
into the get_session_history factory.
**kwargs: Arbitrary additional kwargs to pass to parent class
`RunnableBindingBase` init.
@@ -375,7 +364,7 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
@property
@override
def config_specs(self) -> list[ConfigurableFieldSpec]:
"""Get the configuration specs for the `RunnableWithMessageHistory`."""
"""Get the configuration specs for the RunnableWithMessageHistory."""
return get_unique_config_specs(
super().config_specs + list(self.history_factory_config)
)
@@ -617,6 +606,6 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
def _get_parameter_names(callable_: GetSessionHistoryCallable) -> list[str]:
"""Get the parameter names of the `Callable`."""
"""Get the parameter names of the callable."""
sig = inspect.signature(callable_)
return list(sig.parameters.keys())

View File

@@ -51,10 +51,10 @@ def identity(x: Other) -> Other:
"""Identity function.
Args:
x: Input.
x: input.
Returns:
Output.
output.
"""
return x
@@ -63,10 +63,10 @@ async def aidentity(x: Other) -> Other:
"""Async identity function.
Args:
x: Input.
x: input.
Returns:
Output.
output.
"""
return x
@@ -74,11 +74,11 @@ async def aidentity(x: Other) -> Other:
class RunnablePassthrough(RunnableSerializable[Other, Other]):
"""Runnable to passthrough inputs unchanged or with additional keys.
This `Runnable` behaves almost like the identity function, except that it
This Runnable behaves almost like the identity function, except that it
can be configured to add additional keys to the output, if the input is a
dict.
The examples below demonstrate this `Runnable` works using a few simple
The examples below demonstrate this Runnable works using a few simple
chains. The chains rely on simple lambdas to make the examples easy to execute
and experiment with.
@@ -164,7 +164,7 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
input_type: type[Other] | None = None,
**kwargs: Any,
) -> None:
"""Create a `RunnablePassthrough`.
"""Create e RunnablePassthrough.
Args:
func: Function to be called with the input.
@@ -180,7 +180,7 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -213,11 +213,11 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
"""Merge the Dict input with the output produced by the mapping argument.
Args:
**kwargs: `Runnable`, `Callable` or a `Mapping` from keys to `Runnable`
objects or `Callable`s.
**kwargs: Runnable, Callable or a Mapping from keys to Runnables
or Callables.
Returns:
A `Runnable` that merges the `dict` input with the output produced by the
A Runnable that merges the Dict input with the output produced by the
mapping argument.
"""
return RunnableAssign(RunnableParallel[dict[str, Any]](kwargs))
@@ -350,7 +350,7 @@ _graph_passthrough: RunnablePassthrough = RunnablePassthrough()
class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
"""Runnable that assigns key-value pairs to `dict[str, Any]` inputs.
"""Runnable that assigns key-value pairs to dict[str, Any] inputs.
The `RunnableAssign` class takes input dictionaries and, through a
`RunnableParallel` instance, applies transformations, then combines
@@ -392,7 +392,7 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
mapper: RunnableParallel
def __init__(self, mapper: RunnableParallel[dict[str, Any]], **kwargs: Any) -> None:
"""Create a `RunnableAssign`.
"""Create a RunnableAssign.
Args:
mapper: A `RunnableParallel` instance that will be used to transform the
@@ -403,7 +403,7 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod
@@ -668,19 +668,13 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
yield chunk
class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
"""`Runnable` that picks keys from `dict[str, Any]` inputs.
class RunnablePick(RunnableSerializable[dict[str, Any], dict[str, Any]]):
"""Runnable that picks keys from dict[str, Any] inputs.
`RunnablePick` class represents a `Runnable` that selectively picks keys from a
RunnablePick class represents a Runnable that selectively picks keys from a
dictionary input. It allows you to specify one or more keys to extract
from the input dictionary.
!!! note "Return Type Behavior"
The return type depends on the `keys` parameter:
- When `keys` is a `str`: Returns the single value associated with that key
- When `keys` is a `list`: Returns a dictionary containing only the selected
keys
from the input dictionary. It returns a new dictionary containing only
the selected keys.
Example:
```python
@@ -693,22 +687,18 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
"country": "USA",
}
# Single key - returns the value directly
runnable_single = RunnablePick(keys="name")
result_single = runnable_single.invoke(input_data)
print(result_single) # Output: "John"
runnable = RunnablePick(keys=["name", "age"])
# Multiple keys - returns a dictionary
runnable_multiple = RunnablePick(keys=["name", "age"])
result_multiple = runnable_multiple.invoke(input_data)
print(result_multiple) # Output: {'name': 'John', 'age': 30}
output_data = runnable.invoke(input_data)
print(output_data) # Output: {'name': 'John', 'age': 30}
```
"""
keys: str | list[str]
def __init__(self, keys: str | list[str], **kwargs: Any) -> None:
"""Create a `RunnablePick`.
"""Create a RunnablePick.
Args:
keys: A single key or a list of keys to pick from the input dictionary.
@@ -718,7 +708,7 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod

View File

@@ -40,11 +40,11 @@ class RouterInput(TypedDict):
key: str
"""The key to route on."""
input: Any
"""The input to pass to the selected `Runnable`."""
"""The input to pass to the selected Runnable."""
class RouterRunnable(RunnableSerializable[RouterInput, Output]):
"""`Runnable` that routes to a set of `Runnable` based on `Input['key']`.
"""Runnable that routes to a set of Runnables based on Input['key'].
Returns the output of the selected Runnable.
@@ -74,10 +74,10 @@ class RouterRunnable(RunnableSerializable[RouterInput, Output]):
self,
runnables: Mapping[str, Runnable[Any, Output] | Callable[[Any], Output]],
) -> None:
"""Create a `RouterRunnable`.
"""Create a RouterRunnable.
Args:
runnables: A mapping of keys to `Runnable` objects.
runnables: A mapping of keys to Runnables.
"""
super().__init__(
runnables={key: coerce_to_runnable(r) for key, r in runnables.items()}
@@ -90,7 +90,7 @@ class RouterRunnable(RunnableSerializable[RouterInput, Output]):
@classmethod
@override
def is_lc_serializable(cls) -> bool:
"""Return `True` as this class is serializable."""
"""Return True as this class is serializable."""
return True
@classmethod

View File

@@ -28,7 +28,7 @@ class EventData(TypedDict, total=False):
This field is only available if the `Runnable` raised an exception.
!!! version-added "Added in `langchain-core` 1.0.0"
!!! version-added "Added in version 1.0.0"
"""
output: Any
"""The output of the `Runnable` that generated the event.
@@ -168,7 +168,10 @@ class StandardStreamEvent(BaseStreamEvent):
class CustomStreamEvent(BaseStreamEvent):
"""Custom stream event created by the user."""
"""Custom stream event created by the user.
!!! version-added "Added in version 0.2.15"
"""
# Overwrite the event field to be more specific.
event: Literal["on_custom_event"] # type: ignore[misc]

View File

@@ -86,7 +86,7 @@ class BaseStore(ABC, Generic[K, V]):
Returns:
A sequence of optional values associated with the keys.
If a key is not found, the corresponding value will be `None`.
If a key is not found, the corresponding value will be None.
"""
async def amget(self, keys: Sequence[K]) -> list[V | None]:
@@ -97,7 +97,7 @@ class BaseStore(ABC, Generic[K, V]):
Returns:
A sequence of optional values associated with the keys.
If a key is not found, the corresponding value will be `None`.
If a key is not found, the corresponding value will be None.
"""
return await run_in_executor(None, self.mget, keys)
@@ -243,7 +243,8 @@ class InMemoryStore(InMemoryBaseStore[Any]):
"""In-memory store for any type of data.
Attributes:
store: The underlying dictionary that stores the key-value pairs.
store (dict[str, Any]): The underlying dictionary that stores
the key-value pairs.
Examples:
```python
@@ -266,7 +267,8 @@ class InMemoryByteStore(InMemoryBaseStore[bytes]):
"""In-memory store for bytes.
Attributes:
store: The underlying dictionary that stores the key-value pairs.
store (dict[str, bytes]): The underlying dictionary that stores
the key-value pairs.
Examples:
```python

View File

@@ -125,11 +125,9 @@ def print_sys_info(*, additional_pkgs: Sequence[str] = ()) -> None:
for dep in sub_dependencies:
try:
dep_version = metadata.version(dep)
except Exception:
dep_version = None
if dep_version is not None:
print(f"> {dep}: {dep_version}")
except Exception:
print(f"> {dep}: Installed. No version info available.")
if __name__ == "__main__":

View File

@@ -391,7 +391,6 @@ class BaseTool(RunnableSerializable[str | dict | ToolCall, Any]):
"""Base class for all LangChain tools.
This abstract class defines the interface that all LangChain tools must implement.
Tools are components that can be called by agents to perform specific actions.
"""
@@ -402,7 +401,7 @@ class BaseTool(RunnableSerializable[str | dict | ToolCall, Any]):
**kwargs: Additional keyword arguments passed to the parent class.
Raises:
SchemaAnnotationError: If `args_schema` has incorrect type annotation.
SchemaAnnotationError: If args_schema has incorrect type annotation.
"""
super().__init_subclass__(**kwargs)
@@ -443,15 +442,15 @@ class ChildTool(BaseTool):
Args schema should be either:
- A subclass of `pydantic.BaseModel`.
- A subclass of `pydantic.v1.BaseModel` if accessing v1 namespace in pydantic 2
- A JSON schema dict
- A subclass of pydantic.BaseModel.
- A subclass of pydantic.v1.BaseModel if accessing v1 namespace in pydantic 2
- a JSON schema dict
"""
return_direct: bool = False
"""Whether to return the tool's output directly.
Setting this to `True` means that after the tool is called, the `AgentExecutor` will
stop looping.
Setting this to True means
that after the tool is called, the AgentExecutor will stop looping.
"""
verbose: bool = False
"""Whether to log the tool's progress."""
@@ -461,37 +460,31 @@ class ChildTool(BaseTool):
tags: list[str] | None = None
"""Optional list of tags associated with the tool.
These tags will be associated with each call to this tool,
and passed as arguments to the handlers defined in `callbacks`.
You can use these to, e.g., identify a specific instance of a tool with its use
case.
You can use these to eg identify a specific instance of a tool with its use case.
"""
metadata: dict[str, Any] | None = None
"""Optional metadata associated with the tool.
This metadata will be associated with each call to this tool,
and passed as arguments to the handlers defined in `callbacks`.
You can use these to, e.g., identify a specific instance of a tool with its use
case.
You can use these to eg identify a specific instance of a tool with its use case.
"""
handle_tool_error: bool | str | Callable[[ToolException], str] | None = False
"""Handle the content of the `ToolException` thrown."""
"""Handle the content of the ToolException thrown."""
handle_validation_error: (
bool | str | Callable[[ValidationError | ValidationErrorV1], str] | None
) = False
"""Handle the content of the `ValidationError` thrown."""
"""Handle the content of the ValidationError thrown."""
response_format: Literal["content", "content_and_artifact"] = "content"
"""The tool response format.
If `'content'` then the output of the tool is interpreted as the contents of a
`ToolMessage`. If `'content_and_artifact'` then the output is expected to be a
two-tuple corresponding to the `(content, artifact)` of a `ToolMessage`.
If `"content"` then the output of the tool is interpreted as the contents of a
`ToolMessage`. If `"content_and_artifact"` then the output is expected to be a
two-tuple corresponding to the (content, artifact) of a `ToolMessage`.
"""
def __init__(self, **kwargs: Any) -> None:
@@ -499,7 +492,7 @@ class ChildTool(BaseTool):
Raises:
TypeError: If `args_schema` is not a subclass of pydantic `BaseModel` or
`dict`.
dict.
"""
if (
"args_schema" in kwargs
@@ -533,7 +526,7 @@ class ChildTool(BaseTool):
"""Get the tool's input arguments schema.
Returns:
`dict` containing the tool's argument properties.
Dictionary containing the tool's argument properties.
"""
if isinstance(self.args_schema, dict):
json_schema = self.args_schema
@@ -623,9 +616,9 @@ class ChildTool(BaseTool):
Raises:
ValueError: If `string` input is provided with JSON schema `args_schema`.
ValueError: If `InjectedToolCallId` is required but `tool_call_id` is not
ValueError: If InjectedToolCallId is required but `tool_call_id` is not
provided.
TypeError: If `args_schema` is not a Pydantic `BaseModel` or dict.
TypeError: If args_schema is not a Pydantic `BaseModel` or dict.
"""
input_args = self.args_schema
if isinstance(tool_input, str):
@@ -714,35 +707,6 @@ class ChildTool(BaseTool):
kwargs["run_manager"] = kwargs["run_manager"].get_sync()
return await run_in_executor(None, self._run, *args, **kwargs)
def _filter_injected_args(self, tool_input: dict) -> dict:
"""Filter out injected tool arguments from the input dictionary.
Injected arguments are those annotated with `InjectedToolArg` or its
subclasses, or arguments in `FILTERED_ARGS` like `run_manager` and callbacks.
Args:
tool_input: The tool input dictionary to filter.
Returns:
A filtered dictionary with injected arguments removed.
"""
# Start with filtered args from the constant
filtered_keys = set[str](FILTERED_ARGS)
# If we have an args_schema, use it to identify injected args
if self.args_schema is not None:
try:
annotations = get_all_basemodel_annotations(self.args_schema)
for field_name, field_type in annotations.items():
if _is_injected_arg_type(field_type):
filtered_keys.add(field_name)
except Exception: # noqa: S110
# If we can't get annotations, just use FILTERED_ARGS
pass
# Filter out the injected keys from tool_input
return {k: v for k, v in tool_input.items() if k not in filtered_keys}
def _to_args_and_kwargs(
self, tool_input: str | dict, tool_call_id: str | None
) -> tuple[tuple, dict]:
@@ -753,7 +717,7 @@ class ChildTool(BaseTool):
tool_call_id: The ID of the tool call, if available.
Returns:
A tuple of `(positional_args, keyword_args)` for the tool.
A tuple of (positional_args, keyword_args) for the tool.
Raises:
TypeError: If the tool input type is invalid.
@@ -830,29 +794,17 @@ class ChildTool(BaseTool):
self.metadata,
)
# Filter out injected arguments from callback inputs
filtered_tool_input = (
self._filter_injected_args(tool_input)
if isinstance(tool_input, dict)
else None
)
# Use filtered inputs for the input_str parameter as well
tool_input_str = (
tool_input
if isinstance(tool_input, str)
else str(
filtered_tool_input if filtered_tool_input is not None else tool_input
)
)
run_manager = callback_manager.on_tool_start(
{"name": self.name, "description": self.description},
tool_input_str,
tool_input if isinstance(tool_input, str) else str(tool_input),
color=start_color,
name=run_name,
run_id=run_id,
inputs=filtered_tool_input,
# Inputs by definition should always be dicts.
# For now, it's unclear whether this assumption is ever violated,
# but if it is we will send a `None` value to the callback instead
# TODO: will need to address issue via a patch.
inputs=tool_input if isinstance(tool_input, dict) else None,
**kwargs,
)
@@ -872,19 +824,16 @@ class ChildTool(BaseTool):
tool_kwargs |= {config_param: config}
response = context.run(self._run, *tool_args, **tool_kwargs)
if self.response_format == "content_and_artifact":
msg = (
"Since response_format='content_and_artifact' "
"a two-tuple of the message content and raw tool output is "
f"expected. Instead, generated response is of type: "
f"{type(response)}."
)
if not isinstance(response, tuple):
if not isinstance(response, tuple) or len(response) != 2:
msg = (
"Since response_format='content_and_artifact' "
"a two-tuple of the message content and raw tool output is "
f"expected. Instead generated response of type: "
f"{type(response)}."
)
error_to_raise = ValueError(msg)
else:
try:
content, artifact = response
except ValueError:
error_to_raise = ValueError(msg)
content, artifact = response
else:
content = response
except (ValidationError, ValidationErrorV1) as e:
@@ -956,30 +905,17 @@ class ChildTool(BaseTool):
metadata,
self.metadata,
)
# Filter out injected arguments from callback inputs
filtered_tool_input = (
self._filter_injected_args(tool_input)
if isinstance(tool_input, dict)
else None
)
# Use filtered inputs for the input_str parameter as well
tool_input_str = (
tool_input
if isinstance(tool_input, str)
else str(
filtered_tool_input if filtered_tool_input is not None else tool_input
)
)
run_manager = await callback_manager.on_tool_start(
{"name": self.name, "description": self.description},
tool_input_str,
tool_input if isinstance(tool_input, str) else str(tool_input),
color=start_color,
name=run_name,
run_id=run_id,
inputs=filtered_tool_input,
# Inputs by definition should always be dicts.
# For now, it's unclear whether this assumption is ever violated,
# but if it is we will send a `None` value to the callback instead
# TODO: will need to address issue via a patch.
inputs=tool_input if isinstance(tool_input, dict) else None,
**kwargs,
)
content = None
@@ -1001,19 +937,16 @@ class ChildTool(BaseTool):
coro = self._arun(*tool_args, **tool_kwargs)
response = await coro_with_context(coro, context)
if self.response_format == "content_and_artifact":
msg = (
"Since response_format='content_and_artifact' "
"a two-tuple of the message content and raw tool output is "
f"expected. Instead, generated response is of type: "
f"{type(response)}."
)
if not isinstance(response, tuple):
if not isinstance(response, tuple) or len(response) != 2:
msg = (
"Since response_format='content_and_artifact' "
"a two-tuple of the message content and raw tool output is "
f"expected. Instead generated response of type: "
f"{type(response)}."
)
error_to_raise = ValueError(msg)
else:
try:
content, artifact = response
except ValueError:
error_to_raise = ValueError(msg)
content, artifact = response
else:
content = response
except ValidationError as e:
@@ -1061,7 +994,7 @@ def _handle_validation_error(
Args:
e: The validation error that occurred.
flag: How to handle the error (`bool`, `str`, or `Callable`).
flag: How to handle the error (bool, string, or callable).
Returns:
The error message to return.
@@ -1093,7 +1026,7 @@ def _handle_tool_error(
Args:
e: The tool exception that occurred.
flag: How to handle the error (`bool`, `str`, or `Callable`).
flag: How to handle the error (bool, string, or callable).
Returns:
The error message to return.
@@ -1124,12 +1057,12 @@ def _prep_run_args(
"""Prepare arguments for tool execution.
Args:
value: The input value (`str`, `dict`, or `ToolCall`).
value: The input value (string, dict, or ToolCall).
config: The runnable configuration.
**kwargs: Additional keyword arguments.
Returns:
A tuple of `(tool_input, run_kwargs)`.
A tuple of (tool_input, run_kwargs).
"""
config = ensure_config(config)
if _is_tool_call(value):
@@ -1160,7 +1093,7 @@ def _format_output(
name: str,
status: str,
) -> ToolOutputMixin | Any:
"""Format tool output as a `ToolMessage` if appropriate.
"""Format tool output as a ToolMessage if appropriate.
Args:
content: The main content of the tool output.
@@ -1170,7 +1103,7 @@ def _format_output(
status: The execution status.
Returns:
The formatted output, either as a `ToolMessage` or the original content.
The formatted output, either as a ToolMessage or the original content.
"""
if isinstance(content, ToolOutputMixin) or tool_call_id is None:
return content
@@ -1241,7 +1174,7 @@ def _get_type_hints(func: Callable) -> dict[str, type] | None:
func: The function to get type hints from.
Returns:
`dict` of type hints, or `None` if extraction fails.
Dictionary of type hints, or None if extraction fails.
"""
if isinstance(func, functools.partial):
func = func.func
@@ -1252,13 +1185,13 @@ def _get_type_hints(func: Callable) -> dict[str, type] | None:
def _get_runnable_config_param(func: Callable) -> str | None:
"""Find the parameter name for `RunnableConfig` in a function.
"""Find the parameter name for RunnableConfig in a function.
Args:
func: The function to check.
Returns:
The parameter name for `RunnableConfig`, or `None` if not found.
The parameter name for RunnableConfig, or None if not found.
"""
type_hints = _get_type_hints(func)
if not type_hints:
@@ -1282,11 +1215,9 @@ class _DirectlyInjectedToolArg:
Injected via direct type annotation, rather than annotated metadata.
For example, `ToolRuntime` is a directly injected argument.
For example, ToolRuntime is a directly injected argument.
Note the direct annotation rather than the verbose alternative:
`Annotated[ToolRuntime, InjectedRuntime]`
Annotated[ToolRuntime, InjectedRuntime]
```python
from langchain_core.tools import tool, ToolRuntime
@@ -1329,11 +1260,11 @@ class InjectedToolCallId(InjectedToolArg):
def _is_directly_injected_arg_type(type_: Any) -> bool:
"""Check if a type annotation indicates a directly injected argument.
This is currently only used for `ToolRuntime`.
Checks if either the annotation itself is a subclass of `_DirectlyInjectedToolArg`
or the origin of the annotation is a subclass of `_DirectlyInjectedToolArg`.
This is currently only used for ToolRuntime.
Checks if either the annotation itself is a subclass of _DirectlyInjectedToolArg
or the origin of the annotation is a subclass of _DirectlyInjectedToolArg.
Ex: `ToolRuntime` or `ToolRuntime[ContextT, StateT]` would both return `True`.
Ex: ToolRuntime or ToolRuntime[ContextT, StateT] would both return True.
"""
return (
isinstance(type_, type) and issubclass(type_, _DirectlyInjectedToolArg)
@@ -1375,14 +1306,14 @@ def _is_injected_arg_type(
def get_all_basemodel_annotations(
cls: TypeBaseModel | Any, *, default_to_bound: bool = True
) -> dict[str, type | TypeVar]:
"""Get all annotations from a Pydantic `BaseModel` and its parents.
"""Get all annotations from a Pydantic BaseModel and its parents.
Args:
cls: The Pydantic `BaseModel` class.
default_to_bound: Whether to default to the bound of a `TypeVar` if it exists.
cls: The Pydantic BaseModel class.
default_to_bound: Whether to default to the bound of a TypeVar if it exists.
Returns:
`dict` of field names to their type annotations.
A dictionary of field names to their type annotations.
"""
# cls has no subscript: cls = FooBar
if isinstance(cls, type):
@@ -1448,15 +1379,15 @@ def _replace_type_vars(
*,
default_to_bound: bool = True,
) -> type | TypeVar:
"""Replace `TypeVar`s in a type annotation with concrete types.
"""Replace TypeVars in a type annotation with concrete types.
Args:
type_: The type annotation to process.
generic_map: Mapping of `TypeVar`s to concrete types.
default_to_bound: Whether to use `TypeVar` bounds as defaults.
generic_map: Mapping of TypeVars to concrete types.
default_to_bound: Whether to use TypeVar bounds as defaults.
Returns:
The type with `TypeVar`s replaced.
The type with TypeVars replaced.
"""
generic_map = generic_map or {}
if isinstance(type_, TypeVar):

View File

@@ -81,72 +81,57 @@ def tool(
parse_docstring: bool = False,
error_on_invalid_docstring: bool = True,
) -> BaseTool | Callable[[Callable | Runnable], BaseTool]:
"""Convert Python functions and `Runnables` to LangChain tools.
Can be used as a decorator with or without arguments to create tools from functions.
Functions can have any signature - the tool will automatically infer input schemas
unless disabled.
!!! note "Requirements"
- Functions must have type hints for proper schema inference
- When `infer_schema=False`, functions must be `(str) -> str` and have
docstrings
- When using with `Runnable`, a string name must be provided
"""Make tools out of Python functions, can be used with or without arguments.
Args:
name_or_callable: Optional name of the tool or the `Callable` to be
converted to a tool. Overrides the function's name.
Must be provided as a positional argument.
runnable: Optional `Runnable` to convert to a tool.
Must be provided as a positional argument.
name_or_callable: Optional name of the tool or the callable to be
converted to a tool. Must be provided as a positional argument.
runnable: Optional runnable to convert to a tool. Must be provided as a
positional argument.
description: Optional description for the tool.
Precedence for the tool description value is as follows:
- This `description` argument
- `description` argument
(used even if docstring and/or `args_schema` are provided)
- Tool function docstring
(used even if `args_schema` is provided)
- `args_schema` description
(used only if `description` and docstring are not provided)
(used only if `description` / docstring are not provided)
*args: Extra positional arguments. Must be empty.
return_direct: Whether to return directly from the tool rather than continuing
the agent loop.
return_direct: Whether to return directly from the tool rather
than continuing the agent loop.
args_schema: Optional argument schema for user to specify.
infer_schema: Whether to infer the schema of the arguments from the function's
signature. This also makes the resultant tool accept a dictionary input to
its `run()` function.
response_format: The tool response format.
If `'content'`, then the output of the tool is interpreted as the contents
of a `ToolMessage`.
If `'content_and_artifact'`, then the output is expected to be a two-tuple
infer_schema: Whether to infer the schema of the arguments from
the function's signature. This also makes the resultant tool
accept a dictionary input to its `run()` function.
response_format: The tool response format. If `"content"` then the output of
the tool is interpreted as the contents of a `ToolMessage`. If
`"content_and_artifact"` then the output is expected to be a two-tuple
corresponding to the `(content, artifact)` of a `ToolMessage`.
parse_docstring: If `infer_schema` and `parse_docstring`, will attempt to
parse_docstring: if `infer_schema` and `parse_docstring`, will attempt to
parse parameter descriptions from Google Style function docstrings.
error_on_invalid_docstring: If `parse_docstring` is provided, configure
error_on_invalid_docstring: if `parse_docstring` is provided, configure
whether to raise `ValueError` on invalid Google Style docstrings.
Raises:
ValueError: If too many positional arguments are provided (e.g. violating the
`*args` constraint).
ValueError: If a `Runnable` is provided without a string name. When using `tool`
with a `Runnable`, a `str` name must be provided as the `name_or_callable`.
ValueError: If too many positional arguments are provided.
ValueError: If a runnable is provided without a string name.
ValueError: If the first argument is not a string or callable with
a `__name__` attribute.
ValueError: If the function does not have a docstring and description
is not provided and `infer_schema` is `False`.
ValueError: If `parse_docstring` is `True` and the function has an invalid
Google-style docstring and `error_on_invalid_docstring` is True.
ValueError: If a `Runnable` is provided that does not have an object schema.
ValueError: If a Runnable is provided that does not have an object schema.
Returns:
The tool.
Requires:
- Function must be of type `(str) -> str`
- Function must have a docstring
Examples:
```python
@tool
@@ -166,6 +151,8 @@ def tool(
return "partial json of results", {"full": "object of results"}
```
!!! version-added "Added in version 0.2.14"
Parse Google-style docstrings:
```python

View File

@@ -83,12 +83,11 @@ def create_retriever_tool(
model, so should be descriptive.
document_prompt: The prompt to use for the document.
document_separator: The separator to use between documents.
response_format: The tool response format.
If `"content"` then the output of the tool is interpreted as the contents of
a `ToolMessage`. If `"content_and_artifact"` then the output is expected to
be a two-tuple corresponding to the `(content, artifact)` of a `ToolMessage`
(artifact being a list of documents in this case).
response_format: The tool response format. If `"content"` then the output of
the tool is interpreted as the contents of a `ToolMessage`. If
`"content_and_artifact"` then the output is expected to be a two-tuple
corresponding to the `(content, artifact)` of a `ToolMessage` (artifact
being a list of documents in this case).
Returns:
Tool class to pass to an agent.

View File

@@ -151,13 +151,11 @@ class StructuredTool(BaseTool):
return_direct: Whether to return the result directly or as a callback.
args_schema: The schema of the tool's input arguments.
infer_schema: Whether to infer the schema from the function's signature.
response_format: The tool response format.
If `"content"` then the output of the tool is interpreted as the
contents of a `ToolMessage`. If `"content_and_artifact"` then the output
is expected to be a two-tuple corresponding to the `(content, artifact)`
of a `ToolMessage`.
parse_docstring: If `infer_schema` and `parse_docstring`, will attempt
response_format: The tool response format. If `"content"` then the output of
the tool is interpreted as the contents of a `ToolMessage`. If
`"content_and_artifact"` then the output is expected to be a two-tuple
corresponding to the `(content, artifact)` of a `ToolMessage`.
parse_docstring: if `infer_schema` and `parse_docstring`, will attempt
to parse parameter descriptions from Google Style function docstrings.
error_on_invalid_docstring: if `parse_docstring` is provided, configure
whether to raise `ValueError` on invalid Google Style docstrings.

View File

@@ -96,10 +96,10 @@ class RunLogPatch:
"""Patch to the run log."""
ops: list[dict[str, Any]]
"""List of JSONPatch operations, which describe how to create the run state
"""List of jsonpatch operations, which describe how to create the run state
from an empty dict. This is the minimal representation of the log, designed to
be serialized as JSON and sent over the wire to reconstruct the log on the other
side. Reconstruction of the state can be done with any JSONPatch-compliant library,
side. Reconstruction of the state can be done with any jsonpatch-compliant library,
see https://jsonpatch.com for more information."""
def __init__(self, *ops: dict[str, Any]) -> None:

View File

@@ -351,7 +351,7 @@ def convert_to_openai_function(
Raises:
ValueError: If function is not in a supported format.
!!! warning "Behavior changed in `langchain-core` 0.3.16"
!!! warning "Behavior changed in 0.3.16"
`description` and `parameters` keys are now optional. Only `name` is
required and guaranteed to be part of the output.
"""
@@ -412,7 +412,7 @@ def convert_to_openai_function(
if strict is not None:
if "strict" in oai_function and oai_function["strict"] != strict:
msg = (
f"Tool/function already has a 'strict' key with value "
f"Tool/function already has a 'strict' key wth value "
f"{oai_function['strict']} which is different from the explicit "
f"`strict` arg received {strict=}."
)
@@ -425,14 +425,6 @@ def convert_to_openai_function(
oai_function["parameters"] = _recursive_set_additional_properties_false(
oai_function["parameters"]
)
# All fields must be `required`
parameters = oai_function.get("parameters")
if isinstance(parameters, dict):
fields = parameters.get("properties")
if isinstance(fields, dict) and fields:
parameters = dict(parameters)
parameters["required"] = list(fields.keys())
oai_function["parameters"] = parameters
return oai_function
@@ -475,16 +467,16 @@ def convert_to_openai_tool(
A dict version of the passed in tool which is compatible with the
OpenAI tool-calling API.
!!! warning "Behavior changed in `langchain-core` 0.3.16"
!!! warning "Behavior changed in 0.3.16"
`description` and `parameters` keys are now optional. Only `name` is
required and guaranteed to be part of the output.
!!! warning "Behavior changed in `langchain-core` 0.3.44"
!!! warning "Behavior changed in 0.3.44"
Return OpenAI Responses API-style tools unchanged. This includes
any dict with `"type"` in `"file_search"`, `"function"`,
`"computer_use_preview"`, `"web_search_preview"`.
!!! warning "Behavior changed in `langchain-core` 0.3.63"
!!! warning "Behavior changed in 0.3.63"
Added support for OpenAI's image generation built-in tool.
"""
# Import locally to prevent circular import
@@ -653,9 +645,6 @@ def tool_example_to_messages(
return messages
_MIN_DOCSTRING_BLOCKS = 2
def _parse_google_docstring(
docstring: str | None,
args: list[str],
@@ -674,7 +663,7 @@ def _parse_google_docstring(
arg for arg in args if arg not in {"run_manager", "callbacks", "return"}
}
if filtered_annotations and (
len(docstring_blocks) < _MIN_DOCSTRING_BLOCKS
len(docstring_blocks) < 2
or not any(block.startswith("Args:") for block in docstring_blocks[1:])
):
msg = "Found invalid Google-Style docstring."

View File

@@ -26,9 +26,6 @@ def get_color_mapping(
colors = list(_TEXT_COLOR_MAPPING.keys())
if excluded_colors is not None:
colors = [c for c in colors if c not in excluded_colors]
if not colors:
msg = "No colors available after applying exclusions."
raise ValueError(msg)
return {item: colors[i % len(colors)] for i, item in enumerate(items)}

View File

@@ -65,8 +65,8 @@ def get_pydantic_major_version() -> int:
PYDANTIC_MAJOR_VERSION = PYDANTIC_VERSION.major
PYDANTIC_MINOR_VERSION = PYDANTIC_VERSION.minor
IS_PYDANTIC_V1 = False
IS_PYDANTIC_V2 = True
IS_PYDANTIC_V1 = PYDANTIC_VERSION.major == 1
IS_PYDANTIC_V2 = PYDANTIC_VERSION.major == 2
PydanticBaseModel = BaseModel
TypeBaseModel = type[BaseModel]

View File

@@ -30,7 +30,10 @@ def stringify_dict(data: dict) -> str:
Returns:
The stringified dictionary.
"""
return "".join(f"{key}: {stringify_value(value)}\n" for key, value in data.items())
text = ""
for key, value in data.items():
text += key + ": " + stringify_value(value) + "\n"
return text
def comma_list(items: list[Any]) -> str:

View File

@@ -218,7 +218,7 @@ def _build_model_kwargs(
values: dict[str, Any],
all_required_field_names: set[str],
) -> dict[str, Any]:
"""Build `model_kwargs` param from Pydantic constructor values.
"""Build "model_kwargs" param from Pydantic constructor values.
Args:
values: All init args passed in by user.
@@ -228,8 +228,8 @@ def _build_model_kwargs(
Extra kwargs.
Raises:
ValueError: If a field is specified in both `values` and `extra_kwargs`.
ValueError: If a field is specified in `model_kwargs`.
ValueError: If a field is specified in both values and extra_kwargs.
ValueError: If a field is specified in model_kwargs.
"""
extra_kwargs = values.get("model_kwargs", {})
for field_name in list(values):
@@ -267,10 +267,6 @@ def build_extra_kwargs(
) -> dict[str, Any]:
"""Build extra kwargs from values and extra_kwargs.
!!! danger "DON'T USE"
Kept for backwards-compatibility but should never have been public. Use the
internal `_build_model_kwargs` function instead.
Args:
extra_kwargs: Extra kwargs passed in by user.
values: Values passed in by user.
@@ -280,10 +276,9 @@ def build_extra_kwargs(
Extra kwargs.
Raises:
ValueError: If a field is specified in both `values` and `extra_kwargs`.
ValueError: If a field is specified in `model_kwargs`.
ValueError: If a field is specified in both values and extra_kwargs.
ValueError: If a field is specified in model_kwargs.
"""
# DON'T USE! Kept for backwards-compatibility but should never have been public.
for field_name in list(values):
if field_name in extra_kwargs:
msg = f"Found {field_name} supplied twice."
@@ -297,7 +292,6 @@ def build_extra_kwargs(
)
extra_kwargs[field_name] = values.pop(field_name)
# DON'T USE! Kept for backwards-compatibility but should never have been public.
invalid_model_kwargs = all_required_field_names.intersection(extra_kwargs.keys())
if invalid_model_kwargs:
msg = (
@@ -306,7 +300,6 @@ def build_extra_kwargs(
)
raise ValueError(msg)
# DON'T USE! Kept for backwards-compatibility but should never have been public.
return extra_kwargs

View File

@@ -58,16 +58,16 @@ class VectorStore(ABC):
texts: Iterable of strings to add to the `VectorStore`.
metadatas: Optional list of metadatas associated with the texts.
ids: Optional list of IDs associated with the texts.
**kwargs: `VectorStore` specific parameters.
**kwargs: vectorstore specific parameters.
One of the kwargs should be `ids` which is a list of ids
associated with the texts.
Returns:
List of IDs from adding the texts into the `VectorStore`.
List of ids from adding the texts into the `VectorStore`.
Raises:
ValueError: If the number of metadatas does not match the number of texts.
ValueError: If the number of IDs does not match the number of texts.
ValueError: If the number of ids does not match the number of texts.
"""
if type(self).add_documents != VectorStore.add_documents:
# This condition is triggered if the subclass has provided
@@ -109,12 +109,11 @@ class VectorStore(ABC):
"""Delete by vector ID or other criteria.
Args:
ids: List of IDs to delete. If `None`, delete all.
ids: List of ids to delete. If `None`, delete all.
**kwargs: Other keyword arguments that subclasses might use.
Returns:
`True` if deletion is successful, `False` otherwise, `None` if not
implemented.
True if deletion is successful, False otherwise, None if not implemented.
"""
msg = "delete method must be implemented by subclass."
raise NotImplementedError(msg)
@@ -136,10 +135,12 @@ class VectorStore(ABC):
some IDs.
Args:
ids: List of IDs to retrieve.
ids: List of ids to retrieve.
Returns:
List of `Document` objects.
List of Documents.
!!! version-added "Added in version 0.2.11"
"""
msg = f"{self.__class__.__name__} does not yet support get_by_ids."
raise NotImplementedError(msg)
@@ -162,10 +163,12 @@ class VectorStore(ABC):
some IDs.
Args:
ids: List of IDs to retrieve.
ids: List of ids to retrieve.
Returns:
List of `Document` objects.
List of Documents.
!!! version-added "Added in version 0.2.11"
"""
return await run_in_executor(None, self.get_by_ids, ids)
@@ -173,12 +176,11 @@ class VectorStore(ABC):
"""Async delete by vector ID or other criteria.
Args:
ids: List of IDs to delete. If `None`, delete all.
ids: List of ids to delete. If `None`, delete all.
**kwargs: Other keyword arguments that subclasses might use.
Returns:
`True` if deletion is successful, `False` otherwise, `None` if not
implemented.
True if deletion is successful, False otherwise, None if not implemented.
"""
return await run_in_executor(None, self.delete, ids, **kwargs)
@@ -196,14 +198,14 @@ class VectorStore(ABC):
texts: Iterable of strings to add to the `VectorStore`.
metadatas: Optional list of metadatas associated with the texts.
ids: Optional list
**kwargs: `VectorStore` specific parameters.
**kwargs: vectorstore specific parameters.
Returns:
List of IDs from adding the texts into the `VectorStore`.
List of ids from adding the texts into the `VectorStore`.
Raises:
ValueError: If the number of metadatas does not match the number of texts.
ValueError: If the number of IDs does not match the number of texts.
ValueError: If the number of ids does not match the number of texts.
"""
if ids is not None:
# For backward compatibility
@@ -232,14 +234,13 @@ class VectorStore(ABC):
return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:
"""Add or update documents in the `VectorStore`.
"""Add or update documents in the vectorstore.
Args:
documents: Documents to add to the `VectorStore`.
**kwargs: Additional keyword arguments.
If kwargs contains IDs and documents contain ids, the IDs in the kwargs
will receive precedence.
if kwargs contains ids and documents contain ids,
the ids in the kwargs will receive precedence.
Returns:
List of IDs of the added texts.
@@ -294,17 +295,17 @@ class VectorStore(ABC):
"""Return docs most similar to query using a specified search type.
Args:
query: Input text.
search_type: Type of search to perform. Can be `'similarity'`, `'mmr'`, or
`'similarity_score_threshold'`.
query: Input text
search_type: Type of search to perform. Can be "similarity",
"mmr", or "similarity_score_threshold".
**kwargs: Arguments to pass to the search method.
Returns:
List of `Document` objects most similar to the query.
Raises:
ValueError: If `search_type` is not one of `'similarity'`,
`'mmr'`, or `'similarity_score_threshold'`.
ValueError: If search_type is not one of "similarity",
"mmr", or "similarity_score_threshold".
"""
if search_type == "similarity":
return self.similarity_search(query, **kwargs)
@@ -329,16 +330,16 @@ class VectorStore(ABC):
Args:
query: Input text.
search_type: Type of search to perform. Can be `'similarity'`, `'mmr'`, or
`'similarity_score_threshold'`.
search_type: Type of search to perform. Can be "similarity",
"mmr", or "similarity_score_threshold".
**kwargs: Arguments to pass to the search method.
Returns:
List of `Document` objects most similar to the query.
Raises:
ValueError: If `search_type` is not one of `'similarity'`,
`'mmr'`, or `'similarity_score_threshold'`.
ValueError: If search_type is not one of "similarity",
"mmr", or "similarity_score_threshold".
"""
if search_type == "similarity":
return await self.asimilarity_search(query, **kwargs)
@@ -363,7 +364,7 @@ class VectorStore(ABC):
Args:
query: Input text.
k: Number of `Document` objects to return.
k: Number of Documents to return.
**kwargs: Arguments to pass to the search method.
Returns:
@@ -422,7 +423,7 @@ class VectorStore(ABC):
**kwargs: Arguments to pass to the search method.
Returns:
List of tuples of `(doc, similarity_score)`.
List of Tuples of `(doc, similarity_score)`.
"""
raise NotImplementedError
@@ -436,7 +437,7 @@ class VectorStore(ABC):
**kwargs: Arguments to pass to the search method.
Returns:
List of tuples of `(doc, similarity_score)`.
List of Tuples of `(doc, similarity_score)`.
"""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
@@ -460,13 +461,13 @@ class VectorStore(ABC):
Args:
query: Input text.
k: Number of `Document` objects to return.
k: Number of Documents to return.
**kwargs: kwargs to be passed to similarity search. Should include
`score_threshold`, An optional floating point value between `0` to `1`
to filter the resulting set of retrieved docs
Returns:
List of tuples of `(doc, similarity_score)`
List of Tuples of `(doc, similarity_score)`
"""
relevance_score_fn = self._select_relevance_score_fn()
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
@@ -487,13 +488,13 @@ class VectorStore(ABC):
Args:
query: Input text.
k: Number of `Document` objects to return.
k: Number of Documents to return.
**kwargs: kwargs to be passed to similarity search. Should include
`score_threshold`, An optional floating point value between `0` to `1`
to filter the resulting set of retrieved docs
Returns:
List of tuples of `(doc, similarity_score)`
List of Tuples of `(doc, similarity_score)`
"""
relevance_score_fn = self._select_relevance_score_fn()
docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
@@ -511,13 +512,13 @@ class VectorStore(ABC):
Args:
query: Input text.
k: Number of `Document` objects to return.
k: Number of Documents to return.
**kwargs: kwargs to be passed to similarity search. Should include
`score_threshold`, An optional floating point value between `0` to `1`
to filter the resulting set of retrieved docs
Returns:
List of tuples of `(doc, similarity_score)`.
List of Tuples of `(doc, similarity_score)`.
"""
score_threshold = kwargs.pop("score_threshold", None)
@@ -560,13 +561,13 @@ class VectorStore(ABC):
Args:
query: Input text.
k: Number of `Document` objects to return.
k: Number of Documents to return.
**kwargs: kwargs to be passed to similarity search. Should include
`score_threshold`, An optional floating point value between `0` to `1`
to filter the resulting set of retrieved docs
Returns:
List of tuples of `(doc, similarity_score)`
List of Tuples of `(doc, similarity_score)`
"""
score_threshold = kwargs.pop("score_threshold", None)
@@ -604,7 +605,7 @@ class VectorStore(ABC):
Args:
query: Input text.
k: Number of `Document` objects to return.
k: Number of Documents to return.
**kwargs: Arguments to pass to the search method.
Returns:
@@ -622,7 +623,7 @@ class VectorStore(ABC):
Args:
embedding: Embedding to look up documents similar to.
k: Number of `Document` objects to return.
k: Number of Documents to return.
**kwargs: Arguments to pass to the search method.
Returns:
@@ -637,7 +638,7 @@ class VectorStore(ABC):
Args:
embedding: Embedding to look up documents similar to.
k: Number of `Document` objects to return.
k: Number of Documents to return.
**kwargs: Arguments to pass to the search method.
Returns:
@@ -665,11 +666,11 @@ class VectorStore(ABC):
Args:
query: Text to look up documents similar to.
k: Number of `Document` objects to return.
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
lambda_mult: Number between `0` and `1` that determines the degree
of diversity among the results with `0` corresponding
to maximum diversity and `1` to minimum diversity.
k: Number of Documents to return.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity.
**kwargs: Arguments to pass to the search method.
Returns:
@@ -692,11 +693,11 @@ class VectorStore(ABC):
Args:
query: Text to look up documents similar to.
k: Number of `Document` objects to return.
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
lambda_mult: Number between `0` and `1` that determines the degree
of diversity among the results with `0` corresponding
to maximum diversity and `1` to minimum diversity.
k: Number of Documents to return.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity.
**kwargs: Arguments to pass to the search method.
Returns:
@@ -730,11 +731,11 @@ class VectorStore(ABC):
Args:
embedding: Embedding to look up documents similar to.
k: Number of `Document` objects to return.
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
lambda_mult: Number between `0` and `1` that determines the degree
of diversity among the results with `0` corresponding
to maximum diversity and `1` to minimum diversity.
k: Number of Documents to return.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity.
**kwargs: Arguments to pass to the search method.
Returns:
@@ -757,11 +758,11 @@ class VectorStore(ABC):
Args:
embedding: Embedding to look up documents similar to.
k: Number of `Document` objects to return.
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
lambda_mult: Number between `0` and `1` that determines the degree
of diversity among the results with `0` corresponding
to maximum diversity and `1` to minimum diversity.
k: Number of Documents to return.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity.
**kwargs: Arguments to pass to the search method.
Returns:
@@ -848,7 +849,7 @@ class VectorStore(ABC):
ids: list[str] | None = None,
**kwargs: Any,
) -> VST:
"""Return `VectorStore` initialized from texts and embeddings.
"""Return VectorStore initialized from texts and embeddings.
Args:
texts: Texts to add to the `VectorStore`.
@@ -858,7 +859,7 @@ class VectorStore(ABC):
**kwargs: Additional keyword arguments.
Returns:
`VectorStore` initialized from texts and embeddings.
VectorStore initialized from texts and embeddings.
"""
@classmethod
@@ -871,7 +872,7 @@ class VectorStore(ABC):
ids: list[str] | None = None,
**kwargs: Any,
) -> Self:
"""Async return `VectorStore` initialized from texts and embeddings.
"""Async return VectorStore initialized from texts and embeddings.
Args:
texts: Texts to add to the `VectorStore`.
@@ -881,7 +882,7 @@ class VectorStore(ABC):
**kwargs: Additional keyword arguments.
Returns:
`VectorStore` initialized from texts and embeddings.
VectorStore initialized from texts and embeddings.
"""
if ids is not None:
kwargs["ids"] = ids
@@ -902,21 +903,19 @@ class VectorStore(ABC):
Args:
**kwargs: Keyword arguments to pass to the search function.
Can include:
* `search_type`: Defines the type of search that the Retriever should
perform. Can be `'similarity'` (default), `'mmr'`, or
`'similarity_score_threshold'`.
* `search_kwargs`: Keyword arguments to pass to the search function. Can
search_type: Defines the type of search that the Retriever should
perform. Can be "similarity" (default), "mmr", or
"similarity_score_threshold".
search_kwargs: Keyword arguments to pass to the search function. Can
include things like:
* `k`: Amount of documents to return (Default: `4`)
* `score_threshold`: Minimum relevance threshold
for `similarity_score_threshold`
* `fetch_k`: Amount of documents to pass to MMR algorithm
(Default: `20`)
* `lambda_mult`: Diversity of results returned by MMR;
`1` for minimum diversity and 0 for maximum. (Default: `0.5`)
* `filter`: Filter by document metadata
k: Amount of documents to return (Default: 4)
score_threshold: Minimum relevance threshold
for similarity_score_threshold
fetch_k: Amount of documents to pass to MMR algorithm
(Default: 20)
lambda_mult: Diversity of results returned by MMR;
1 for minimum diversity and 0 for maximum. (Default: 0.5)
filter: Filter by document metadata
Returns:
Retriever class for `VectorStore`.
@@ -959,7 +958,7 @@ class VectorStoreRetriever(BaseRetriever):
vectorstore: VectorStore
"""VectorStore to use for retrieval."""
search_type: str = "similarity"
"""Type of search to perform."""
"""Type of search to perform. Defaults to "similarity"."""
search_kwargs: dict = Field(default_factory=dict)
"""Keyword arguments to pass to the search function."""
allowed_search_types: ClassVar[Collection[str]] = (
@@ -984,8 +983,8 @@ class VectorStoreRetriever(BaseRetriever):
Validated values.
Raises:
ValueError: If `search_type` is not one of the allowed search types.
ValueError: If `score_threshold` is not specified with a float value(`0~1`)
ValueError: If search_type is not one of the allowed search types.
ValueError: If score_threshold is not specified with a float value(0~1)
"""
search_type = values.get("search_type", "similarity")
if search_type not in cls.allowed_search_types:

View File

@@ -257,10 +257,10 @@ class InMemoryVectorStore(VectorStore):
"""Get documents by their ids.
Args:
ids: The IDs of the documents to get.
ids: The ids of the documents to get.
Returns:
A list of `Document` objects.
A list of Document objects.
"""
documents = []
@@ -281,10 +281,10 @@ class InMemoryVectorStore(VectorStore):
"""Async get documents by their ids.
Args:
ids: The IDs of the documents to get.
ids: The ids of the documents to get.
Returns:
A list of `Document` objects.
A list of Document objects.
"""
return self.get_by_ids(ids)

View File

@@ -1,3 +1,3 @@
"""langchain-core version information and utilities."""
VERSION = "1.0.4"
VERSION = "1.0.0"

View File

@@ -3,13 +3,8 @@ requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "langchain-core"
description = "Building applications with LLMs through composability"
license = {text = "MIT"}
readme = "README.md"
authors = []
version = "1.0.4"
license = {text = "MIT"}
requires-python = ">=3.10.0,<4.0.0"
dependencies = [
"langsmith>=0.3.45,<1.0.0",
@@ -20,6 +15,10 @@ dependencies = [
"packaging>=23.2.0,<26.0.0",
"pydantic>=2.7.4,<3.0.0",
]
name = "langchain-core"
version = "1.0.0"
description = "Building applications with LLMs through composability"
readme = "README.md"
[project.urls]
Homepage = "https://docs.langchain.com/"
@@ -36,7 +35,6 @@ typing = [
"mypy>=1.18.1,<1.19.0",
"types-pyyaml>=6.0.12.2,<7.0.0.0",
"types-requests>=2.28.11.5,<3.0.0.0",
"langchain-model-profiles",
"langchain-text-splitters",
]
dev = [
@@ -58,7 +56,6 @@ test = [
"blockbuster>=1.5.18,<1.6.0",
"numpy>=1.26.4; python_version<'3.13'",
"numpy>=2.1.0; python_version>='3.13'",
"langchain-model-profiles",
"langchain-tests",
"pytest-benchmark",
"pytest-codspeed",
@@ -66,7 +63,6 @@ test = [
test_integration = []
[tool.uv.sources]
langchain-model-profiles = { path = "../model-profiles" }
langchain-tests = { path = "../standard-tests" }
langchain-text-splitters = { path = "../text-splitters" }
@@ -105,6 +101,7 @@ ignore = [
"ANN401", # No Any types
"BLE", # Blind exceptions
"ERA", # No commented-out code
"PLR2004", # Comparison to magic number
]
unfixable = [
"B028", # People should intentionally tune the stacklevel
@@ -125,7 +122,7 @@ ignore-var-parameters = true # ignore missing documentation for *args and **kwa
"langchain_core/utils/mustache.py" = [ "PLW0603",]
"langchain_core/sys_info.py" = [ "T201",]
"tests/unit_tests/test_tools.py" = [ "ARG",]
"tests/**" = [ "D1", "PLR2004", "S", "SLF",]
"tests/**" = [ "D1", "S", "SLF",]
"scripts/**" = [ "INP", "S",]
[tool.coverage.run]
@@ -133,10 +130,7 @@ omit = [ "tests/*",]
[tool.pytest.ini_options]
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
markers = [
"requires: mark tests as requiring a specific library",
"compile: mark placeholder test used to compile integration tests without running them",
]
markers = [ "requires: mark tests as requiring a specific library", "compile: mark placeholder test used to compile integration tests without running them", ]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
filterwarnings = [ "ignore::langchain_core._api.beta_decorator.LangChainBetaWarning",]
asyncio_default_fixture_loop_scope = "function"

View File

@@ -148,65 +148,4 @@ async def test_inline_handlers_share_parent_context_multiple() -> None:
2,
3,
3,
]
async def test_shielded_callback_context_preservation() -> None:
"""Verify that shielded callbacks preserve context variables.
This test specifically addresses the issue where async callbacks decorated
with @shielded do not properly preserve context variables, breaking
instrumentation and other context-dependent functionality.
The issue manifests in callbacks that use the @shielded decorator:
* on_llm_end
* on_llm_error
* on_chain_end
* on_chain_error
* And other shielded callback methods
"""
context_var: contextvars.ContextVar[str] = contextvars.ContextVar("test_context")
class ContextTestHandler(AsyncCallbackHandler):
"""Handler that reads context variables in shielded callbacks."""
def __init__(self) -> None:
self.run_inline = False
self.context_values: list[str] = []
@override
async def on_llm_end(self, response: Any, **kwargs: Any) -> None:
"""This method is decorated with @shielded in the run manager."""
# This should preserve the context variable value
self.context_values.append(context_var.get("not_found"))
@override
async def on_chain_end(self, outputs: Any, **kwargs: Any) -> None:
"""This method is decorated with @shielded in the run manager."""
# This should preserve the context variable value
self.context_values.append(context_var.get("not_found"))
# Set up the test context
context_var.set("test_value")
handler = ContextTestHandler()
manager = AsyncCallbackManager(handlers=[handler])
# Create run managers that have the shielded methods
llm_managers = await manager.on_llm_start({}, ["test prompt"])
llm_run_manager = llm_managers[0]
chain_run_manager = await manager.on_chain_start({}, {"test": "input"})
# Test LLM end callback (which is shielded)
await llm_run_manager.on_llm_end({"response": "test"}) # type: ignore[arg-type]
# Test Chain end callback (which is shielded)
await chain_run_manager.on_chain_end({"output": "test"})
# The context should be preserved in shielded callbacks
# This was the main issue - shielded decorators were not preserving context
assert handler.context_values == ["test_value", "test_value"], (
f"Expected context values ['test_value', 'test_value'], "
f"but got {handler.context_values}. "
f"This indicates the shielded decorator is not preserving context variables."
)
], f"Expected order of states was broken due to context loss. Got {states}"

View File

@@ -33,7 +33,7 @@ def test_hashing() -> None:
# hash should be deterministic
assert hashed_document.id == "fd1dc827-051b-537d-a1fe-1fa043e8b276"
# Verify that hashing with sha1 is deterministic
# Verify that hashing with sha1 is determinstic
another_hashed_document = _get_document_with_hash(document, key_encoder="sha1")
assert another_hashed_document.id == hashed_document.id

View File

@@ -604,7 +604,7 @@ def test_incremental_fails_with_bad_source_ids(
with pytest.raises(
ValueError,
match="Source IDs are required when cleanup mode is incremental or scoped_full",
match="Source ids are required when cleanup mode is incremental or scoped_full",
):
# Should raise an error because no source id function was specified
index(
@@ -654,7 +654,7 @@ async def test_aincremental_fails_with_bad_source_ids(
with pytest.raises(
ValueError,
match="Source IDs are required when cleanup mode is incremental or scoped_full",
match="Source ids are required when cleanup mode is incremental or scoped_full",
):
# Should raise an error because no source id function was specified
await aindex(
@@ -956,7 +956,7 @@ def test_scoped_full_fails_with_bad_source_ids(
with pytest.raises(
ValueError,
match="Source IDs are required when cleanup mode is incremental or scoped_full",
match="Source ids are required when cleanup mode is incremental or scoped_full",
):
# Should raise an error because no source id function was specified
index(
@@ -1006,7 +1006,7 @@ async def test_ascoped_full_fails_with_bad_source_ids(
with pytest.raises(
ValueError,
match="Source IDs are required when cleanup mode is incremental or scoped_full",
match="Source ids are required when cleanup mode is incremental or scoped_full",
):
# Should raise an error because no source id function was specified
await aindex(
@@ -2801,7 +2801,7 @@ def test_index_with_upsert_kwargs(
]
assert [doc.metadata for doc in args[0]] == [{"source": "1"}, {"source": "2"}]
# Check that IDs are present
# Check that ids are present
assert "ids" in kwargs
assert isinstance(kwargs["ids"], list)
assert len(kwargs["ids"]) == 2
@@ -2932,7 +2932,7 @@ async def test_aindex_with_upsert_kwargs(
]
assert [doc.metadata for doc in args[0]] == [{"source": "1"}, {"source": "2"}]
# Check that IDs are present
# Check that ids are present
assert "ids" in kwargs
assert isinstance(kwargs["ids"], list)
assert len(kwargs["ids"]) == 2

View File

@@ -1217,20 +1217,3 @@ def test_get_ls_params() -> None:
ls_params = llm._get_ls_params(stop=["stop"])
assert ls_params["ls_stop"] == ["stop"]
def test_model_profiles() -> None:
model = GenericFakeChatModel(messages=iter([]))
profile = model.profile
assert profile == {}
class MyModel(GenericFakeChatModel):
model: str = "gpt-5"
@property
def _llm_type(self) -> str:
return "openai-chat"
model = MyModel(messages=iter([]))
profile = model.profile
assert profile

Some files were not shown because too many files have changed in this diff Show More