mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-05 08:40:36 +00:00
Compare commits
1 Commits
langchain=
...
sr/fix-too
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7230be12bb |
2
.github/ISSUE_TEMPLATE/config.yml
vendored
2
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -2,7 +2,7 @@ blank_issues_enabled: false
|
||||
version: 2.1
|
||||
contact_links:
|
||||
- name: 📚 Documentation
|
||||
url: https://github.com/langchain-ai/docs/issues/new?template=01-langchain.yml
|
||||
url: https://github.com/langchain-ai/docs/issues/new?template=langchain.yml
|
||||
about: Report an issue related to the LangChain documentation
|
||||
- name: 💬 LangChain Forum
|
||||
url: https://forum.langchain.com/
|
||||
|
||||
93
.github/actions/poetry_setup/action.yml
vendored
Normal file
93
.github/actions/poetry_setup/action.yml
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
# An action for setting up poetry install with caching.
|
||||
# Using a custom action since the default action does not
|
||||
# take poetry install groups into account.
|
||||
# Action code from:
|
||||
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
|
||||
name: poetry-install-with-caching
|
||||
description: Poetry install with support for caching of dependency groups.
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: Python version, supporting MAJOR.MINOR only
|
||||
required: true
|
||||
|
||||
poetry-version:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory whose poetry.lock file should be cached
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: actions/setup-python@v5
|
||||
name: Setup python ${{ inputs.python-version }}
|
||||
id: setup-python
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v4
|
||||
id: cache-bin-poetry
|
||||
name: Cache Poetry binary - Python ${{ inputs.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
|
||||
with:
|
||||
path: |
|
||||
/opt/pipx/venvs/poetry
|
||||
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
|
||||
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
|
||||
|
||||
- name: Refresh shell hashtable and fixup softlinks
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
run: |
|
||||
set -eux
|
||||
|
||||
# Refresh the shell hashtable, to ensure correct `which` output.
|
||||
hash -r
|
||||
|
||||
# `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
|
||||
# Delete and recreate the softlinks pipx expects to have.
|
||||
rm /opt/pipx/venvs/poetry/bin/python
|
||||
cd /opt/pipx/venvs/poetry/bin
|
||||
ln -s "$(which "python$PYTHON_VERSION")" python
|
||||
chmod +x python
|
||||
cd /opt/pipx_bin/
|
||||
ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
|
||||
chmod +x poetry
|
||||
|
||||
# Ensure everything got set up correctly.
|
||||
/opt/pipx/venvs/poetry/bin/python --version
|
||||
/opt/pipx_bin/poetry --version
|
||||
|
||||
- name: Install poetry
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
# Install poetry using the python version installed by setup-python step.
|
||||
run: pipx install "poetry==$POETRY_VERSION" --python '${{ steps.setup-python.outputs.python-path }}' --verbose
|
||||
|
||||
- name: Restore pip and poetry cached dependencies
|
||||
uses: actions/cache@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
${{ env.WORKDIR }}/.venv
|
||||
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}
|
||||
85
.github/pr-file-labeler.yml
vendored
85
.github/pr-file-labeler.yml
vendored
@@ -7,12 +7,13 @@ core:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/core/**/*"
|
||||
|
||||
langchain-classic:
|
||||
langchain:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/langchain/**/*"
|
||||
- "libs/langchain_v1/**/*"
|
||||
|
||||
langchain:
|
||||
v1:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/langchain_v1/**/*"
|
||||
@@ -27,11 +28,6 @@ standard-tests:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/standard-tests/**/*"
|
||||
|
||||
model-profiles:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/model-profiles/**/*"
|
||||
|
||||
text-splitters:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
@@ -43,81 +39,6 @@ integration:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/**/*"
|
||||
|
||||
anthropic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/anthropic/**/*"
|
||||
|
||||
chroma:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/chroma/**/*"
|
||||
|
||||
deepseek:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/deepseek/**/*"
|
||||
|
||||
exa:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/exa/**/*"
|
||||
|
||||
fireworks:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/fireworks/**/*"
|
||||
|
||||
groq:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/groq/**/*"
|
||||
|
||||
huggingface:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/huggingface/**/*"
|
||||
|
||||
mistralai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/mistralai/**/*"
|
||||
|
||||
nomic:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/nomic/**/*"
|
||||
|
||||
ollama:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/ollama/**/*"
|
||||
|
||||
openai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/openai/**/*"
|
||||
|
||||
perplexity:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/perplexity/**/*"
|
||||
|
||||
prompty:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/prompty/**/*"
|
||||
|
||||
qdrant:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/qdrant/**/*"
|
||||
|
||||
xai:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "libs/partners/xai/**/*"
|
||||
|
||||
# Infrastructure and DevOps
|
||||
infra:
|
||||
- changed-files:
|
||||
|
||||
41
.github/pr-title-labeler.yml
vendored
Normal file
41
.github/pr-title-labeler.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
# PR title labeler config
|
||||
#
|
||||
# Labels PRs based on conventional commit patterns in titles
|
||||
#
|
||||
# Format: type(scope): description or type!: description (breaking)
|
||||
|
||||
add-missing-labels: true
|
||||
clear-prexisting: false
|
||||
include-commits: false
|
||||
include-title: true
|
||||
label-for-breaking-changes: breaking
|
||||
|
||||
label-mapping:
|
||||
documentation: ["docs"]
|
||||
feature: ["feat"]
|
||||
fix: ["fix"]
|
||||
infra: ["build", "ci", "chore"]
|
||||
integration:
|
||||
[
|
||||
"anthropic",
|
||||
"chroma",
|
||||
"deepseek",
|
||||
"exa",
|
||||
"fireworks",
|
||||
"groq",
|
||||
"huggingface",
|
||||
"mistralai",
|
||||
"nomic",
|
||||
"ollama",
|
||||
"openai",
|
||||
"perplexity",
|
||||
"prompty",
|
||||
"qdrant",
|
||||
"xai",
|
||||
]
|
||||
linting: ["style"]
|
||||
performance: ["perf"]
|
||||
refactor: ["refactor"]
|
||||
release: ["release"]
|
||||
revert: ["revert"]
|
||||
tests: ["test"]
|
||||
3
.github/scripts/check_diff.py
vendored
3
.github/scripts/check_diff.py
vendored
@@ -30,7 +30,6 @@ LANGCHAIN_DIRS = [
|
||||
"libs/text-splitters",
|
||||
"libs/langchain",
|
||||
"libs/langchain_v1",
|
||||
"libs/model-profiles",
|
||||
]
|
||||
|
||||
# When set to True, we are ignoring core dependents
|
||||
@@ -135,7 +134,7 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
|
||||
elif dir_ == "libs/core":
|
||||
py_versions = ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
# custom logic for specific directories
|
||||
elif dir_ in {"libs/partners/chroma"}:
|
||||
elif dir_ in {"libs/partners/chroma", "libs/partners/nomic"}:
|
||||
py_versions = ["3.10", "3.13"]
|
||||
else:
|
||||
py_versions = ["3.10", "3.14"]
|
||||
|
||||
2
.github/scripts/get_min_versions.py
vendored
2
.github/scripts/get_min_versions.py
vendored
@@ -98,7 +98,7 @@ def _check_python_version_from_requirement(
|
||||
return True
|
||||
else:
|
||||
marker_str = str(requirement.marker)
|
||||
if "python_version" in marker_str or "python_full_version" in marker_str:
|
||||
if "python_version" or "python_full_version" in marker_str:
|
||||
python_version_str = "".join(
|
||||
char
|
||||
for char in marker_str
|
||||
|
||||
24
.github/workflows/_release.yml
vendored
24
.github/workflows/_release.yml
vendored
@@ -77,7 +77,7 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Upload build
|
||||
uses: actions/upload-artifact@v5
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -149,8 +149,8 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
# if PREV_TAG is empty or came out to 0.0.0, let it be empty
|
||||
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
|
||||
# if PREV_TAG is empty, let it be empty
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
echo "No previous tag found - first release"
|
||||
else
|
||||
# confirm prev-tag actually exists in git repo with git tag
|
||||
@@ -179,8 +179,8 @@ jobs:
|
||||
PREV_TAG: ${{ steps.check-tags.outputs.prev-tag }}
|
||||
run: |
|
||||
PREAMBLE="Changes since $PREV_TAG"
|
||||
# if PREV_TAG is empty or 0.0.0, then we are releasing the first version
|
||||
if [ -z "$PREV_TAG" ] || [ "$PREV_TAG" = "$PKG_NAME==0.0.0" ]; then
|
||||
# if PREV_TAG is empty, then we are releasing the first version
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
PREAMBLE="Initial release"
|
||||
PREV_TAG=$(git rev-list --max-parents=0 HEAD)
|
||||
fi
|
||||
@@ -208,7 +208,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -258,7 +258,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -377,7 +377,6 @@ jobs:
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
|
||||
PPLX_API_KEY: ${{ secrets.PPLX_API_KEY }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
@@ -410,7 +409,6 @@ jobs:
|
||||
AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
|
||||
AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
@@ -430,7 +428,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v5
|
||||
if: startsWith(inputs.working-directory, 'libs/core')
|
||||
with:
|
||||
name: dist
|
||||
@@ -444,7 +442,7 @@ jobs:
|
||||
git ls-remote --tags origin "langchain-${{ matrix.partner }}*" \
|
||||
| awk '{print $2}' \
|
||||
| sed 's|refs/tags/||' \
|
||||
| grep -E '[0-9]+\.[0-9]+\.[0-9]+$' \
|
||||
| grep -E '[0-9]+\.[0-9]+\.[0-9]+([a-zA-Z]+[0-9]+)?$' \
|
||||
| sort -Vr \
|
||||
| head -n 1
|
||||
)"
|
||||
@@ -499,7 +497,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
@@ -539,7 +537,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- uses: actions/download-artifact@v6
|
||||
- uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: dist
|
||||
path: ${{ inputs.working-directory }}/dist/
|
||||
|
||||
1
.github/workflows/integration_tests.yml
vendored
1
.github/workflows/integration_tests.yml
vendored
@@ -155,7 +155,6 @@ jobs:
|
||||
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
|
||||
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
|
||||
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
|
||||
LANGCHAIN_TESTS_USER_AGENT: ${{ secrets.LANGCHAIN_TESTS_USER_AGENT }}
|
||||
run: |
|
||||
cd langchain/${{ matrix.working-directory }}
|
||||
make integration_tests
|
||||
|
||||
4
.github/workflows/pr_lint.yml
vendored
4
.github/workflows/pr_lint.yml
vendored
@@ -30,7 +30,7 @@
|
||||
# core, cli, langchain, langchain_v1, langchain-classic, standard-tests,
|
||||
# text-splitters, docs, anthropic, chroma, deepseek, exa, fireworks, groq,
|
||||
# huggingface, mistralai, nomic, ollama, openai, perplexity, prompty, qdrant,
|
||||
# xai, infra, deps
|
||||
# xai, infra
|
||||
#
|
||||
# Rules:
|
||||
# 1. The 'Type' must start with a lowercase letter.
|
||||
@@ -79,8 +79,8 @@ jobs:
|
||||
core
|
||||
cli
|
||||
langchain
|
||||
langchain_v1
|
||||
langchain-classic
|
||||
model-profiles
|
||||
standard-tests
|
||||
text-splitters
|
||||
docs
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,8 +1,6 @@
|
||||
.vs/
|
||||
.claude/
|
||||
.idea/
|
||||
#Emacs backup
|
||||
*~
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"docs-langchain": {
|
||||
"type": "http",
|
||||
"url": "https://docs.langchain.com/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
38
README.md
38
README.md
@@ -34,22 +34,17 @@
|
||||
</a>
|
||||
</p>
|
||||
|
||||
LangChain is a framework for building agents and LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development – all while future-proofing decisions as the underlying technology evolves.
|
||||
LangChain is a framework for building LLM-powered applications. It helps you chain together interoperable components and third-party integrations to simplify AI application development — all while future-proofing decisions as the underlying technology evolves.
|
||||
|
||||
```bash
|
||||
pip install langchain
|
||||
```
|
||||
|
||||
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
|
||||
|
||||
---
|
||||
|
||||
**Documentation**:
|
||||
**Documentation**: To learn more about LangChain, check out [the docs](https://docs.langchain.com/oss/python/langchain/overview).
|
||||
|
||||
- [docs.langchain.com](https://docs.langchain.com/oss/python/langchain/overview) – Comprehensive documentation, including conceptual overviews and guides
|
||||
- [reference.langchain.com/python](https://reference.langchain.com/python) – API reference docs for LangChain packages
|
||||
|
||||
**Discussions**: Visit the [LangChain Forum](https://forum.langchain.com) to connect with the community and share all of your technical questions, ideas, and feedback.
|
||||
If you're looking for more advanced customization or agent orchestration, check out [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview), our framework for building controllable agent workflows.
|
||||
|
||||
> [!NOTE]
|
||||
> Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
||||
@@ -60,27 +55,24 @@ LangChain helps developers build applications powered by LLMs through a standard
|
||||
|
||||
Use LangChain for:
|
||||
|
||||
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain's vast library of integrations with model providers, tools, vector stores, retrievers, and more.
|
||||
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application's needs. As the industry frontier evolves, adapt quickly – LangChain's abstractions keep you moving without losing momentum.
|
||||
- **Rapid prototyping**. Quickly build and iterate on LLM applications with LangChain's modular, component-based architecture. Test different approaches and workflows without rebuilding from scratch, accelerating your development cycle.
|
||||
- **Production-ready features**. Deploy reliable applications with built-in support for monitoring, evaluation, and debugging through integrations like LangSmith. Scale with confidence using battle-tested patterns and best practices.
|
||||
- **Vibrant community and ecosystem**. Leverage a rich ecosystem of integrations, templates, and community-contributed components. Benefit from continuous improvements and stay up-to-date with the latest AI developments through an active open-source community.
|
||||
- **Flexible abstraction layers**. Work at the level of abstraction that suits your needs - from high-level chains for quick starts to low-level components for fine-grained control. LangChain grows with your application's complexity.
|
||||
- **Real-time data augmentation**. Easily connect LLMs to diverse data sources and external/internal systems, drawing from LangChain’s vast library of integrations with model providers, tools, vector stores, retrievers, and more.
|
||||
- **Model interoperability**. Swap models in and out as your engineering team experiments to find the best choice for your application’s needs. As the industry frontier evolves, adapt quickly — LangChain’s abstractions keep you moving without losing momentum.
|
||||
|
||||
## LangChain ecosystem
|
||||
## LangChain’s ecosystem
|
||||
|
||||
While the LangChain framework can be used standalone, it also integrates seamlessly with any LangChain product, giving developers a full suite of tools when building LLM applications.
|
||||
|
||||
To improve your LLM application development, pair LangChain with:
|
||||
|
||||
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) – Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows – and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
|
||||
- [Integrations](https://docs.langchain.com/oss/python/integrations/providers/overview) – List of LangChain integrations, including chat & embedding models, tools & toolkits, and more
|
||||
- [LangSmith](https://www.langchain.com/langsmith) – Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
|
||||
- [LangSmith Deployment](https://docs.langchain.com/langsmith/deployments) – Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams – and iterate quickly with visual prototyping in [LangSmith Studio](https://docs.langchain.com/langsmith/studio).
|
||||
- [Deep Agents](https://github.com/langchain-ai/deepagents) *(new!)* – Build agents that can plan, use subagents, and leverage file systems for complex tasks
|
||||
- [LangSmith](https://www.langchain.com/langsmith) - Helpful for agent evals and observability. Debug poor-performing LLM app runs, evaluate agent trajectories, gain visibility in production, and improve performance over time.
|
||||
- [LangGraph](https://docs.langchain.com/oss/python/langgraph/overview) - Build agents that can reliably handle complex tasks with LangGraph, our low-level agent orchestration framework. LangGraph offers customizable architecture, long-term memory, and human-in-the-loop workflows — and is trusted in production by companies like LinkedIn, Uber, Klarna, and GitLab.
|
||||
- [LangGraph Platform](https://docs.langchain.com/langgraph-platform) - Deploy and scale agents effortlessly with a purpose-built deployment platform for long-running, stateful workflows. Discover, reuse, configure, and share agents across teams — and iterate quickly with visual prototyping in [LangGraph Studio](https://langchain-ai.github.io/langgraph/concepts/langgraph_studio).
|
||||
|
||||
## Additional resources
|
||||
|
||||
- [API Reference](https://reference.langchain.com/python) – Detailed reference on navigating base packages and integrations for LangChain.
|
||||
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview) – Learn how to contribute to LangChain projects and find good first issues.
|
||||
- [Code of Conduct](https://github.com/langchain-ai/langchain/blob/master/.github/CODE_OF_CONDUCT.md) – Our community guidelines and standards for participation.
|
||||
- [Learn](https://docs.langchain.com/oss/python/learn): Use cases, conceptual overviews, and more.
|
||||
- [API Reference](https://reference.langchain.com/python): Detailed reference on
|
||||
navigating base packages and integrations for LangChain.
|
||||
- [Contributing Guide](https://docs.langchain.com/oss/python/contributing/overview): Learn how to contribute to LangChain and find good first issues.
|
||||
- [LangChain Forum](https://forum.langchain.com): Connect with the community and share all of your technical questions, ideas, and feedback.
|
||||
- [Chat LangChain](https://chat.langchain.com): Ask questions & chat with our documentation.
|
||||
|
||||
@@ -55,10 +55,10 @@ All out of scope targets defined by huntr as well as:
|
||||
* **langchain-experimental**: This repository is for experimental code and is not
|
||||
eligible for bug bounties (see [package warning](https://pypi.org/project/langchain-experimental/)), bug reports to it will be marked as interesting or waste of
|
||||
time and published with no bounty attached.
|
||||
* **tools**: Tools in either `langchain` or `langchain-community` are not eligible for bug
|
||||
* **tools**: Tools in either langchain or langchain-community are not eligible for bug
|
||||
bounties. This includes the following directories
|
||||
* `libs/langchain/langchain/tools`
|
||||
* `libs/community/langchain_community/tools`
|
||||
* libs/langchain/langchain/tools
|
||||
* libs/community/langchain_community/tools
|
||||
* Please review the [Best Practices](#best-practices)
|
||||
for more details, but generally tools interact with the real world. Developers are
|
||||
expected to understand the security implications of their code and are responsible
|
||||
|
||||
@@ -295,7 +295,7 @@
|
||||
"source": [
|
||||
"## TODO: Any functionality specific to this vector store\n",
|
||||
"\n",
|
||||
"E.g. creating a persistent database to save to your disk, etc."
|
||||
"E.g. creating a persisten database to save to your disk, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,10 +5,12 @@
|
||||
|
||||
!!! warning
|
||||
New agents should be built using the
|
||||
[`langchain` library](https://pypi.org/project/langchain/), which provides a
|
||||
[langgraph library](https://github.com/langchain-ai/langgraph), which provides a
|
||||
simpler and more flexible way to define agents.
|
||||
|
||||
See docs on [building agents](https://docs.langchain.com/oss/python/langchain/agents).
|
||||
Please see the
|
||||
[migration guide](https://python.langchain.com/docs/how_to/migrate_agent/) for
|
||||
information on how to migrate existing agents to modern langgraph agents.
|
||||
|
||||
Agents use language models to choose a sequence of actions to take.
|
||||
|
||||
@@ -52,33 +54,31 @@ class AgentAction(Serializable):
|
||||
"""The input to pass in to the Tool."""
|
||||
log: str
|
||||
"""Additional information to log about the action.
|
||||
|
||||
This log can be used in a few ways. First, it can be used to audit what exactly the
|
||||
LLM predicted to lead to this `(tool, tool_input)`.
|
||||
|
||||
Second, it can be used in future iterations to show the LLMs prior thoughts. This is
|
||||
useful when `(tool, tool_input)` does not contain full information about the LLM
|
||||
prediction (for example, any `thought` before the tool/tool_input).
|
||||
"""
|
||||
This log can be used in a few ways. First, it can be used to audit
|
||||
what exactly the LLM predicted to lead to this (tool, tool_input).
|
||||
Second, it can be used in future iterations to show the LLMs prior
|
||||
thoughts. This is useful when (tool, tool_input) does not contain
|
||||
full information about the LLM prediction (for example, any `thought`
|
||||
before the tool/tool_input)."""
|
||||
type: Literal["AgentAction"] = "AgentAction"
|
||||
|
||||
# Override init to support instantiation by position for backward compat.
|
||||
def __init__(self, tool: str, tool_input: str | dict, log: str, **kwargs: Any):
|
||||
"""Create an `AgentAction`.
|
||||
"""Create an AgentAction.
|
||||
|
||||
Args:
|
||||
tool: The name of the tool to execute.
|
||||
tool_input: The input to pass in to the `Tool`.
|
||||
tool_input: The input to pass in to the Tool.
|
||||
log: Additional information to log about the action.
|
||||
"""
|
||||
super().__init__(tool=tool, tool_input=tool_input, log=log, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""`AgentAction` is serializable.
|
||||
"""AgentAction is serializable.
|
||||
|
||||
Returns:
|
||||
`True`
|
||||
True
|
||||
"""
|
||||
return True
|
||||
|
||||
@@ -100,23 +100,19 @@ class AgentAction(Serializable):
|
||||
class AgentActionMessageLog(AgentAction):
|
||||
"""Representation of an action to be executed by an agent.
|
||||
|
||||
This is similar to `AgentAction`, but includes a message log consisting of
|
||||
chat messages.
|
||||
|
||||
This is useful when working with `ChatModels`, and is used to reconstruct
|
||||
conversation history from the agent's perspective.
|
||||
This is similar to AgentAction, but includes a message log consisting of
|
||||
chat messages. This is useful when working with ChatModels, and is used
|
||||
to reconstruct conversation history from the agent's perspective.
|
||||
"""
|
||||
|
||||
message_log: Sequence[BaseMessage]
|
||||
"""Similar to log, this can be used to pass along extra information about what exact
|
||||
messages were predicted by the LLM before parsing out the `(tool, tool_input)`.
|
||||
|
||||
This is again useful if `(tool, tool_input)` cannot be used to fully recreate the
|
||||
LLM prediction, and you need that LLM prediction (for future agent iteration).
|
||||
|
||||
"""Similar to log, this can be used to pass along extra
|
||||
information about what exact messages were predicted by the LLM
|
||||
before parsing out the (tool, tool_input). This is again useful
|
||||
if (tool, tool_input) cannot be used to fully recreate the LLM
|
||||
prediction, and you need that LLM prediction (for future agent iteration).
|
||||
Compared to `log`, this is useful when the underlying LLM is a
|
||||
chat model (and therefore returns messages rather than a string).
|
||||
"""
|
||||
chat model (and therefore returns messages rather than a string)."""
|
||||
# Ignoring type because we're overriding the type from AgentAction.
|
||||
# And this is the correct thing to do in this case.
|
||||
# The type literal is used for serialization purposes.
|
||||
@@ -124,12 +120,12 @@ class AgentActionMessageLog(AgentAction):
|
||||
|
||||
|
||||
class AgentStep(Serializable):
|
||||
"""Result of running an `AgentAction`."""
|
||||
"""Result of running an AgentAction."""
|
||||
|
||||
action: AgentAction
|
||||
"""The `AgentAction` that was executed."""
|
||||
"""The AgentAction that was executed."""
|
||||
observation: Any
|
||||
"""The result of the `AgentAction`."""
|
||||
"""The result of the AgentAction."""
|
||||
|
||||
@property
|
||||
def messages(self) -> Sequence[BaseMessage]:
|
||||
@@ -138,22 +134,19 @@ class AgentStep(Serializable):
|
||||
|
||||
|
||||
class AgentFinish(Serializable):
|
||||
"""Final return value of an `ActionAgent`.
|
||||
"""Final return value of an ActionAgent.
|
||||
|
||||
Agents return an `AgentFinish` when they have reached a stopping condition.
|
||||
Agents return an AgentFinish when they have reached a stopping condition.
|
||||
"""
|
||||
|
||||
return_values: dict
|
||||
"""Dictionary of return values."""
|
||||
log: str
|
||||
"""Additional information to log about the return value.
|
||||
|
||||
This is used to pass along the full LLM prediction, not just the parsed out
|
||||
return value.
|
||||
|
||||
For example, if the full LLM prediction was `Final Answer: 2` you may want to just
|
||||
return `2` as a return value, but pass along the full string as a `log` (for
|
||||
debugging or observability purposes).
|
||||
return value. For example, if the full LLM prediction was
|
||||
`Final Answer: 2` you may want to just return `2` as a return value, but pass
|
||||
along the full string as a `log` (for debugging or observability purposes).
|
||||
"""
|
||||
type: Literal["AgentFinish"] = "AgentFinish"
|
||||
|
||||
@@ -163,7 +156,7 @@ class AgentFinish(Serializable):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -211,7 +204,7 @@ def _convert_agent_observation_to_messages(
|
||||
observation: Observation to convert to a message.
|
||||
|
||||
Returns:
|
||||
`AIMessage` that corresponds to the original tool invocation.
|
||||
AIMessage that corresponds to the original tool invocation.
|
||||
"""
|
||||
if isinstance(agent_action, AgentActionMessageLog):
|
||||
return [_create_function_message(agent_action, observation)]
|
||||
@@ -234,7 +227,7 @@ def _create_function_message(
|
||||
observation: the result of the tool invocation.
|
||||
|
||||
Returns:
|
||||
`FunctionMessage` that corresponds to the original tool invocation.
|
||||
FunctionMessage that corresponds to the original tool invocation.
|
||||
"""
|
||||
if not isinstance(observation, str):
|
||||
try:
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
Distinct from provider-based [prompt caching](https://docs.langchain.com/oss/python/langchain/models#prompt-caching).
|
||||
|
||||
!!! warning "Beta feature"
|
||||
This is a beta feature. Please be wary of deploying experimental code to production
|
||||
!!! warning
|
||||
This is a beta feature! Please be wary of deploying experimental code to production
|
||||
unless you've taken appropriate precautions.
|
||||
|
||||
A cache is useful for two reasons:
|
||||
@@ -49,18 +49,17 @@ class BaseCache(ABC):
|
||||
"""Look up based on `prompt` and `llm_string`.
|
||||
|
||||
A cache implementation is expected to generate a key from the 2-tuple
|
||||
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
|
||||
of prompt and llm_string (e.g., by concatenating them with a delimiter).
|
||||
|
||||
Args:
|
||||
prompt: A string representation of the prompt.
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string representation.
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
|
||||
Returns:
|
||||
On a cache miss, return `None`. On a cache hit, return the cached value.
|
||||
@@ -79,10 +78,8 @@ class BaseCache(ABC):
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
return_val: The value to be cached. The value is a list of `Generation`
|
||||
@@ -97,17 +94,15 @@ class BaseCache(ABC):
|
||||
"""Async look up based on `prompt` and `llm_string`.
|
||||
|
||||
A cache implementation is expected to generate a key from the 2-tuple
|
||||
of `prompt` and `llm_string` (e.g., by concatenating them with a delimiter).
|
||||
of prompt and llm_string (e.g., by concatenating them with a delimiter).
|
||||
|
||||
Args:
|
||||
prompt: A string representation of the prompt.
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
|
||||
@@ -130,10 +125,8 @@ class BaseCache(ABC):
|
||||
In the case of a chat model, the prompt is a non-trivial
|
||||
serialization of the prompt into the language model.
|
||||
llm_string: A string representation of the LLM configuration.
|
||||
|
||||
This is used to capture the invocation parameters of the LLM
|
||||
(e.g., model name, temperature, stop tokens, max tokens, etc.).
|
||||
|
||||
These invocation parameters are serialized into a string
|
||||
representation.
|
||||
return_val: The value to be cached. The value is a list of `Generation`
|
||||
|
||||
@@ -420,6 +420,8 @@ class RunManagerMixin:
|
||||
(includes inherited tags).
|
||||
metadata: The metadata associated with the custom event
|
||||
(includes inherited metadata).
|
||||
|
||||
!!! version-added "Added in version 0.2.15"
|
||||
"""
|
||||
|
||||
|
||||
@@ -880,6 +882,8 @@ class AsyncCallbackHandler(BaseCallbackHandler):
|
||||
(includes inherited tags).
|
||||
metadata: The metadata associated with the custom event
|
||||
(includes inherited metadata).
|
||||
|
||||
!!! version-added "Added in version 0.2.15"
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -229,24 +229,7 @@ def shielded(func: Func) -> Func:
|
||||
|
||||
@functools.wraps(func)
|
||||
async def wrapped(*args: Any, **kwargs: Any) -> Any:
|
||||
# Capture the current context to preserve context variables
|
||||
ctx = copy_context()
|
||||
|
||||
# Create the coroutine
|
||||
coro = func(*args, **kwargs)
|
||||
|
||||
# For Python 3.11+, create task with explicit context
|
||||
# For older versions, fallback to original behavior
|
||||
try:
|
||||
# Create a task with the captured context to preserve context variables
|
||||
task = asyncio.create_task(coro, context=ctx) # type: ignore[call-arg, unused-ignore]
|
||||
# `call-arg` used to not fail 3.9 or 3.10 tests
|
||||
return await asyncio.shield(task)
|
||||
except TypeError:
|
||||
# Python < 3.11 fallback - create task normally then shield
|
||||
# This won't preserve context perfectly but is better than nothing
|
||||
task = asyncio.create_task(coro)
|
||||
return await asyncio.shield(task)
|
||||
return await asyncio.shield(func(*args, **kwargs))
|
||||
|
||||
return cast("Func", wrapped)
|
||||
|
||||
@@ -1583,6 +1566,9 @@ class CallbackManager(BaseCallbackManager):
|
||||
|
||||
Raises:
|
||||
ValueError: If additional keyword arguments are passed.
|
||||
|
||||
!!! version-added "Added in version 0.2.14"
|
||||
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
@@ -2056,6 +2042,8 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
|
||||
Raises:
|
||||
ValueError: If additional keyword arguments are passed.
|
||||
|
||||
!!! version-added "Added in version 0.2.14"
|
||||
"""
|
||||
if not self.handlers:
|
||||
return
|
||||
@@ -2567,6 +2555,9 @@ async def adispatch_custom_event(
|
||||
This is due to a limitation in asyncio for python <= 3.10 that prevents
|
||||
LangChain from automatically propagating the config object on the user's
|
||||
behalf.
|
||||
|
||||
!!! version-added "Added in version 0.2.15"
|
||||
|
||||
"""
|
||||
# Import locally to prevent circular imports.
|
||||
from langchain_core.runnables.config import ( # noqa: PLC0415
|
||||
@@ -2639,6 +2630,9 @@ def dispatch_custom_event(
|
||||
foo_ = RunnableLambda(foo)
|
||||
foo_.invoke({"a": "1"}, {"callbacks": [CustomCallbackManager()]})
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.2.15"
|
||||
|
||||
"""
|
||||
# Import locally to prevent circular imports.
|
||||
from langchain_core.runnables.config import ( # noqa: PLC0415
|
||||
|
||||
@@ -24,7 +24,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
||||
from langchain_core.callbacks import UsageMetadataCallbackHandler
|
||||
|
||||
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
|
||||
|
||||
callback = UsageMetadataCallbackHandler()
|
||||
result_1 = llm_1.invoke("Hello", config={"callbacks": [callback]})
|
||||
@@ -43,7 +43,7 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
|
||||
'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
|
||||
```
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.49"
|
||||
!!! version-added "Added in version 0.3.49"
|
||||
|
||||
"""
|
||||
|
||||
@@ -109,7 +109,7 @@ def get_usage_metadata_callback(
|
||||
from langchain_core.callbacks import get_usage_metadata_callback
|
||||
|
||||
llm_1 = init_chat_model(model="openai:gpt-4o-mini")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-20241022")
|
||||
llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
|
||||
|
||||
with get_usage_metadata_callback() as cb:
|
||||
llm_1.invoke("Hello")
|
||||
@@ -134,7 +134,7 @@ def get_usage_metadata_callback(
|
||||
}
|
||||
```
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.49"
|
||||
!!! version-added "Added in version 0.3.49"
|
||||
|
||||
"""
|
||||
usage_metadata_callback_var: ContextVar[UsageMetadataCallbackHandler | None] = (
|
||||
|
||||
@@ -121,7 +121,7 @@ class BaseChatMessageHistory(ABC):
|
||||
This method may be deprecated in a future release.
|
||||
|
||||
Args:
|
||||
message: The `HumanMessage` to add to the store.
|
||||
message: The human message to add to the store.
|
||||
"""
|
||||
if isinstance(message, HumanMessage):
|
||||
self.add_message(message)
|
||||
@@ -129,7 +129,7 @@ class BaseChatMessageHistory(ABC):
|
||||
self.add_message(HumanMessage(content=message))
|
||||
|
||||
def add_ai_message(self, message: AIMessage | str) -> None:
|
||||
"""Convenience method for adding an `AIMessage` string to the store.
|
||||
"""Convenience method for adding an AI message string to the store.
|
||||
|
||||
!!! note
|
||||
This is a convenience method. Code should favor the bulk `add_messages`
|
||||
@@ -138,7 +138,7 @@ class BaseChatMessageHistory(ABC):
|
||||
This method may be deprecated in a future release.
|
||||
|
||||
Args:
|
||||
message: The `AIMessage` to add.
|
||||
message: The AI message to add.
|
||||
"""
|
||||
if isinstance(message, AIMessage):
|
||||
self.add_message(message)
|
||||
@@ -173,7 +173,7 @@ class BaseChatMessageHistory(ABC):
|
||||
in an efficient manner to avoid unnecessary round-trips to the underlying store.
|
||||
|
||||
Args:
|
||||
messages: A sequence of `BaseMessage` objects to store.
|
||||
messages: A sequence of BaseMessage objects to store.
|
||||
"""
|
||||
for message in messages:
|
||||
self.add_message(message)
|
||||
@@ -182,7 +182,7 @@ class BaseChatMessageHistory(ABC):
|
||||
"""Async add a list of messages.
|
||||
|
||||
Args:
|
||||
messages: A sequence of `BaseMessage` objects to store.
|
||||
messages: A sequence of BaseMessage objects to store.
|
||||
"""
|
||||
await run_in_executor(None, self.add_messages, messages)
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class BaseLoader(ABC): # noqa: B024
|
||||
"""Interface for Document Loader.
|
||||
|
||||
Implementations should implement the lazy-loading method using generators
|
||||
to avoid loading all documents into memory at once.
|
||||
to avoid loading all Documents into memory at once.
|
||||
|
||||
`load` is provided just for user convenience and should not be overridden.
|
||||
"""
|
||||
@@ -53,11 +53,9 @@ class BaseLoader(ABC): # noqa: B024
|
||||
def load_and_split(
|
||||
self, text_splitter: TextSplitter | None = None
|
||||
) -> list[Document]:
|
||||
"""Load `Document` and split into chunks. Chunks are returned as `Document`.
|
||||
"""Load Documents and split into chunks. Chunks are returned as `Document`.
|
||||
|
||||
!!! danger
|
||||
|
||||
Do not override this method. It should be considered to be deprecated!
|
||||
Do not override this method. It should be considered to be deprecated!
|
||||
|
||||
Args:
|
||||
text_splitter: `TextSplitter` instance to use for splitting documents.
|
||||
@@ -137,7 +135,7 @@ class BaseBlobParser(ABC):
|
||||
"""
|
||||
|
||||
def parse(self, blob: Blob) -> list[Document]:
|
||||
"""Eagerly parse the blob into a `Document` or list of `Document` objects.
|
||||
"""Eagerly parse the blob into a `Document` or `Document` objects.
|
||||
|
||||
This is a convenience method for interactive development environment.
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ class BlobLoader(ABC):
|
||||
def yield_blobs(
|
||||
self,
|
||||
) -> Iterable[Blob]:
|
||||
"""A lazy loader for raw data represented by LangChain's `Blob` object.
|
||||
"""A lazy loader for raw data represented by LangChain's Blob object.
|
||||
|
||||
Returns:
|
||||
A generator over blobs
|
||||
|
||||
@@ -14,13 +14,13 @@ from langchain_core.documents import Document
|
||||
|
||||
|
||||
class LangSmithLoader(BaseLoader):
|
||||
"""Load LangSmith Dataset examples as `Document` objects.
|
||||
"""Load LangSmith Dataset examples as Documents.
|
||||
|
||||
Loads the example inputs as the `Document` page content and places the entire
|
||||
example into the `Document` metadata. This allows you to easily create few-shot
|
||||
example retrievers from the loaded documents.
|
||||
Loads the example inputs as the Document page content and places the entire example
|
||||
into the Document metadata. This allows you to easily create few-shot example
|
||||
retrievers from the loaded documents.
|
||||
|
||||
??? note "Lazy loading example"
|
||||
??? note "Lazy load"
|
||||
|
||||
```python
|
||||
from langchain_core.document_loaders import LangSmithLoader
|
||||
@@ -34,6 +34,9 @@ class LangSmithLoader(BaseLoader):
|
||||
```python
|
||||
# -> [Document("...", metadata={"inputs": {...}, "outputs": {...}, ...}), ...]
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.2.34"
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -66,11 +69,12 @@ class LangSmithLoader(BaseLoader):
|
||||
format_content: Function for converting the content extracted from the example
|
||||
inputs into a string. Defaults to JSON-encoding the contents.
|
||||
example_ids: The IDs of the examples to filter by.
|
||||
as_of: The dataset version tag or timestamp to retrieve the examples as of.
|
||||
Response examples will only be those that were present at the time of
|
||||
the tagged (or timestamped) version.
|
||||
as_of: The dataset version tag OR
|
||||
timestamp to retrieve the examples as of.
|
||||
Response examples will only be those that were present at the time
|
||||
of the tagged (or timestamped) version.
|
||||
splits: A list of dataset splits, which are
|
||||
divisions of your dataset such as `train`, `test`, or `validation`.
|
||||
divisions of your dataset such as 'train', 'test', or 'validation'.
|
||||
Returns examples only from the specified splits.
|
||||
inline_s3_urls: Whether to inline S3 URLs.
|
||||
offset: The offset to start from.
|
||||
|
||||
@@ -1,28 +1,7 @@
|
||||
"""Documents module for data retrieval and processing workflows.
|
||||
"""Documents module.
|
||||
|
||||
This module provides core abstractions for handling data in retrieval-augmented
|
||||
generation (RAG) pipelines, vector stores, and document processing workflows.
|
||||
|
||||
!!! warning "Documents vs. message content"
|
||||
This module is distinct from `langchain_core.messages.content`, which provides
|
||||
multimodal content blocks for **LLM chat I/O** (text, images, audio, etc. within
|
||||
messages).
|
||||
|
||||
**Key distinction:**
|
||||
|
||||
- **Documents** (this module): For **data retrieval and processing workflows**
|
||||
- Vector stores, retrievers, RAG pipelines
|
||||
- Text chunking, embedding, and semantic search
|
||||
- Example: Chunks of a PDF stored in a vector database
|
||||
|
||||
- **Content Blocks** (`messages.content`): For **LLM conversational I/O**
|
||||
- Multimodal message content sent to/from models
|
||||
- Tool calls, reasoning, citations within chat
|
||||
- Example: An image sent to a vision model in a chat message (via
|
||||
[`ImageContentBlock`][langchain.messages.ImageContentBlock])
|
||||
|
||||
While both can represent similar data types (text, files), they serve different
|
||||
architectural purposes in LangChain applications.
|
||||
**Document** module is a collection of classes that handle documents
|
||||
and their transformations.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@@ -1,16 +1,4 @@
|
||||
"""Base classes for media and documents.
|
||||
|
||||
This module contains core abstractions for **data retrieval and processing workflows**:
|
||||
|
||||
- `BaseMedia`: Base class providing `id` and `metadata` fields
|
||||
- `Blob`: Raw data loading (files, binary data) - used by document loaders
|
||||
- `Document`: Text content for retrieval (RAG, vector stores, semantic search)
|
||||
|
||||
!!! note "Not for LLM chat messages"
|
||||
These classes are for data processing pipelines, not LLM I/O. For multimodal
|
||||
content in chat messages (images, audio in conversations), see
|
||||
`langchain.messages` content blocks instead.
|
||||
"""
|
||||
"""Base classes for media and documents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -31,23 +19,27 @@ PathLike = str | PurePath
|
||||
|
||||
|
||||
class BaseMedia(Serializable):
|
||||
"""Base class for content used in retrieval and data processing workflows.
|
||||
"""Use to represent media content.
|
||||
|
||||
Provides common fields for content that needs to be stored, indexed, or searched.
|
||||
Media objects can be used to represent raw data, such as text or binary data.
|
||||
|
||||
!!! note
|
||||
For multimodal content in **chat messages** (images, audio sent to/from LLMs),
|
||||
use `langchain.messages` content blocks instead.
|
||||
LangChain Media objects allow associating metadata and an optional identifier
|
||||
with the content.
|
||||
|
||||
The presence of an ID and metadata make it easier to store, index, and search
|
||||
over the content in a structured way.
|
||||
"""
|
||||
|
||||
# The ID field is optional at the moment.
|
||||
# It will likely become required in a future major release after
|
||||
# it has been adopted by enough VectorStore implementations.
|
||||
# it has been adopted by enough vectorstore implementations.
|
||||
id: str | None = Field(default=None, coerce_numbers_to_str=True)
|
||||
"""An optional identifier for the document.
|
||||
|
||||
Ideally this should be unique across the document collection and formatted
|
||||
as a UUID, but this will not be enforced.
|
||||
|
||||
!!! version-added "Added in version 0.2.11"
|
||||
"""
|
||||
|
||||
metadata: dict = Field(default_factory=dict)
|
||||
@@ -55,70 +47,71 @@ class BaseMedia(Serializable):
|
||||
|
||||
|
||||
class Blob(BaseMedia):
|
||||
"""Raw data abstraction for document loading and file processing.
|
||||
"""Blob represents raw data by either reference or value.
|
||||
|
||||
Represents raw bytes or text, either in-memory or by file reference. Used
|
||||
primarily by document loaders to decouple data loading from parsing.
|
||||
Provides an interface to materialize the blob in different representations, and
|
||||
help to decouple the development of data loaders from the downstream parsing of
|
||||
the raw data.
|
||||
|
||||
Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
|
||||
Inspired by: https://developer.mozilla.org/en-US/docs/Web/API/Blob
|
||||
|
||||
???+ example "Initialize a blob from in-memory data"
|
||||
Example: Initialize a blob from in-memory data
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_data("Hello, world!")
|
||||
blob = Blob.from_data("Hello, world!")
|
||||
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
|
||||
??? example "Load from memory and specify MIME type and metadata"
|
||||
Example: Load from memory and specify mime-type and metadata
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_data(
|
||||
data="Hello, world!",
|
||||
mime_type="text/plain",
|
||||
metadata={"source": "https://example.com"},
|
||||
)
|
||||
```
|
||||
blob = Blob.from_data(
|
||||
data="Hello, world!",
|
||||
mime_type="text/plain",
|
||||
metadata={"source": "https://example.com"},
|
||||
)
|
||||
```
|
||||
|
||||
??? example "Load the blob from a file"
|
||||
Example: Load the blob from a file
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
```python
|
||||
from langchain_core.documents import Blob
|
||||
|
||||
blob = Blob.from_path("path/to/file.txt")
|
||||
blob = Blob.from_path("path/to/file.txt")
|
||||
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
# Read the blob as a string
|
||||
print(blob.as_string())
|
||||
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
# Read the blob as bytes
|
||||
print(blob.as_bytes())
|
||||
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
# Read the blob as a byte stream
|
||||
with blob.as_bytes_io() as f:
|
||||
print(f.read())
|
||||
```
|
||||
"""
|
||||
|
||||
data: bytes | str | None = None
|
||||
"""Raw data associated with the `Blob`."""
|
||||
"""Raw data associated with the blob."""
|
||||
mimetype: str | None = None
|
||||
"""MIME type, not to be confused with a file extension."""
|
||||
"""MimeType not to be confused with a file extension."""
|
||||
encoding: str = "utf-8"
|
||||
"""Encoding to use if decoding the bytes into a string.
|
||||
|
||||
Uses `utf-8` as default encoding if decoding to string.
|
||||
Use `utf-8` as default encoding, if decoding to string.
|
||||
"""
|
||||
path: PathLike | None = None
|
||||
"""Location where the original content was found."""
|
||||
@@ -132,9 +125,9 @@ class Blob(BaseMedia):
|
||||
def source(self) -> str | None:
|
||||
"""The source location of the blob as string if known otherwise none.
|
||||
|
||||
If a path is associated with the `Blob`, it will default to the path location.
|
||||
If a path is associated with the blob, it will default to the path location.
|
||||
|
||||
Unless explicitly set via a metadata field called `'source'`, in which
|
||||
Unless explicitly set via a metadata field called `"source"`, in which
|
||||
case that value will be used instead.
|
||||
"""
|
||||
if self.metadata and "source" in self.metadata:
|
||||
@@ -220,13 +213,13 @@ class Blob(BaseMedia):
|
||||
Args:
|
||||
path: Path-like object to file to be read
|
||||
encoding: Encoding to use if decoding the bytes into a string
|
||||
mime_type: If provided, will be set as the MIME type of the data
|
||||
guess_type: If `True`, the MIME type will be guessed from the file
|
||||
extension, if a MIME type was not provided
|
||||
metadata: Metadata to associate with the `Blob`
|
||||
mime_type: If provided, will be set as the mime-type of the data
|
||||
guess_type: If `True`, the mimetype will be guessed from the file extension,
|
||||
if a mime-type was not provided
|
||||
metadata: Metadata to associate with the blob
|
||||
|
||||
Returns:
|
||||
`Blob` instance
|
||||
Blob instance
|
||||
"""
|
||||
if mime_type is None and guess_type:
|
||||
mimetype = mimetypes.guess_type(path)[0] if guess_type else None
|
||||
@@ -252,17 +245,17 @@ class Blob(BaseMedia):
|
||||
path: str | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> Blob:
|
||||
"""Initialize the `Blob` from in-memory data.
|
||||
"""Initialize the blob from in-memory data.
|
||||
|
||||
Args:
|
||||
data: The in-memory data associated with the `Blob`
|
||||
data: The in-memory data associated with the blob
|
||||
encoding: Encoding to use if decoding the bytes into a string
|
||||
mime_type: If provided, will be set as the MIME type of the data
|
||||
mime_type: If provided, will be set as the mime-type of the data
|
||||
path: If provided, will be set as the source from which the data came
|
||||
metadata: Metadata to associate with the `Blob`
|
||||
metadata: Metadata to associate with the blob
|
||||
|
||||
Returns:
|
||||
`Blob` instance
|
||||
Blob instance
|
||||
"""
|
||||
return cls(
|
||||
data=data,
|
||||
@@ -283,10 +276,6 @@ class Blob(BaseMedia):
|
||||
class Document(BaseMedia):
|
||||
"""Class for storing a piece of text and associated metadata.
|
||||
|
||||
!!! note
|
||||
`Document` is for **retrieval workflows**, not chat I/O. For sending text
|
||||
to an LLM in a conversation, use message types from `langchain.messages`.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
@@ -309,7 +298,7 @@ class Document(BaseMedia):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -322,10 +311,10 @@ class Document(BaseMedia):
|
||||
return ["langchain", "schema", "document"]
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Override `__str__` to restrict it to page_content and metadata.
|
||||
"""Override __str__ to restrict it to page_content and metadata.
|
||||
|
||||
Returns:
|
||||
A string representation of the `Document`.
|
||||
A string representation of the Document.
|
||||
"""
|
||||
# The format matches pydantic format for __str__.
|
||||
#
|
||||
|
||||
@@ -21,14 +21,14 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
|
||||
This abstraction is primarily used for post-processing of retrieved documents.
|
||||
|
||||
`Document` objects matching a given query are first retrieved.
|
||||
Documents matching a given query are first retrieved.
|
||||
|
||||
Then the list of documents can be further processed.
|
||||
|
||||
For example, one could re-rank the retrieved documents using an LLM.
|
||||
|
||||
!!! note
|
||||
Users should favor using a `RunnableLambda` instead of sub-classing from this
|
||||
Users should favor using a RunnableLambda instead of sub-classing from this
|
||||
interface.
|
||||
|
||||
"""
|
||||
@@ -43,9 +43,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
"""Compress retrieved documents given the query context.
|
||||
|
||||
Args:
|
||||
documents: The retrieved `Document` objects.
|
||||
documents: The retrieved documents.
|
||||
query: The query context.
|
||||
callbacks: Optional `Callbacks` to run during compression.
|
||||
callbacks: Optional callbacks to run during compression.
|
||||
|
||||
Returns:
|
||||
The compressed documents.
|
||||
@@ -61,9 +61,9 @@ class BaseDocumentCompressor(BaseModel, ABC):
|
||||
"""Async compress retrieved documents given the query context.
|
||||
|
||||
Args:
|
||||
documents: The retrieved `Document` objects.
|
||||
documents: The retrieved documents.
|
||||
query: The query context.
|
||||
callbacks: Optional `Callbacks` to run during compression.
|
||||
callbacks: Optional callbacks to run during compression.
|
||||
|
||||
Returns:
|
||||
The compressed documents.
|
||||
|
||||
@@ -16,8 +16,8 @@ if TYPE_CHECKING:
|
||||
class BaseDocumentTransformer(ABC):
|
||||
"""Abstract base class for document transformation.
|
||||
|
||||
A document transformation takes a sequence of `Document` objects and returns a
|
||||
sequence of transformed `Document` objects.
|
||||
A document transformation takes a sequence of Documents and returns a
|
||||
sequence of transformed Documents.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -57,10 +57,10 @@ class BaseDocumentTransformer(ABC):
|
||||
"""Transform a list of documents.
|
||||
|
||||
Args:
|
||||
documents: A sequence of `Document` objects to be transformed.
|
||||
documents: A sequence of Documents to be transformed.
|
||||
|
||||
Returns:
|
||||
A sequence of transformed `Document` objects.
|
||||
A sequence of transformed Documents.
|
||||
"""
|
||||
|
||||
async def atransform_documents(
|
||||
@@ -69,10 +69,10 @@ class BaseDocumentTransformer(ABC):
|
||||
"""Asynchronously transform a list of documents.
|
||||
|
||||
Args:
|
||||
documents: A sequence of `Document` objects to be transformed.
|
||||
documents: A sequence of Documents to be transformed.
|
||||
|
||||
Returns:
|
||||
A sequence of transformed `Document` objects.
|
||||
A sequence of transformed Documents.
|
||||
"""
|
||||
return await run_in_executor(
|
||||
None, self.transform_documents, documents, **kwargs
|
||||
|
||||
@@ -18,7 +18,7 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
||||
|
||||
This embedding model creates embeddings by sampling from a normal distribution.
|
||||
|
||||
!!! danger "Toy model"
|
||||
!!! warning
|
||||
Do not use this outside of testing, as it is not a real embedding model.
|
||||
|
||||
Instantiate:
|
||||
@@ -73,7 +73,7 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
||||
This embedding model creates embeddings by sampling from a normal distribution
|
||||
with a seed based on the hash of the text.
|
||||
|
||||
!!! danger "Toy model"
|
||||
!!! warning
|
||||
Do not use this outside of testing, as it is not a real embedding model.
|
||||
|
||||
Instantiate:
|
||||
|
||||
@@ -29,7 +29,7 @@ class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
|
||||
max_length: int = 2048
|
||||
"""Max length for the prompt, beyond which examples are cut."""
|
||||
|
||||
example_text_lengths: list[int] = Field(default_factory=list)
|
||||
example_text_lengths: list[int] = Field(default_factory=list) # :meta private:
|
||||
"""Length of each example."""
|
||||
|
||||
def add_example(self, example: dict[str, str]) -> None:
|
||||
|
||||
@@ -41,7 +41,7 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
|
||||
"""Optional keys to filter input to. If provided, the search is based on
|
||||
the input variables instead of all variables."""
|
||||
vectorstore_kwargs: dict[str, Any] | None = None
|
||||
"""Extra arguments passed to similarity_search function of the `VectorStore`."""
|
||||
"""Extra arguments passed to similarity_search function of the vectorstore."""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
@@ -159,7 +159,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the `VectorStore`.
|
||||
of the vectorstore.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -203,7 +203,7 @@ class SemanticSimilarityExampleSelector(_VectorStoreExampleSelector):
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the `VectorStore`.
|
||||
of the vectorstore.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -286,12 +286,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
|
||||
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
||||
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
||||
k: Number of examples to select.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
input_keys: If provided, the search is based on the input variables
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the `VectorStore`.
|
||||
of the vectorstore.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
@@ -333,12 +333,12 @@ class MaxMarginalRelevanceExampleSelector(_VectorStoreExampleSelector):
|
||||
embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
|
||||
vectorstore_cls: A vector store DB interface class, e.g. FAISS.
|
||||
k: Number of examples to select.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
input_keys: If provided, the search is based on the input variables
|
||||
instead of all variables.
|
||||
example_keys: If provided, keys to filter examples to.
|
||||
vectorstore_kwargs: Extra arguments passed to similarity_search function
|
||||
of the `VectorStore`.
|
||||
of the vectorstore.
|
||||
vectorstore_cls_kwargs: optional kwargs containing url for vector store
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -16,10 +16,9 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
||||
"""Exception that output parsers should raise to signify a parsing error.
|
||||
|
||||
This exists to differentiate parsing errors from other code or execution errors
|
||||
that also may arise inside the output parser.
|
||||
|
||||
`OutputParserException` will be available to catch and handle in ways to fix the
|
||||
parsing error, while other errors will be raised.
|
||||
that also may arise inside the output parser. `OutputParserException` will be
|
||||
available to catch and handle in ways to fix the parsing error, while other
|
||||
errors will be raised.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -33,19 +32,18 @@ class OutputParserException(ValueError, LangChainException): # noqa: N818
|
||||
|
||||
Args:
|
||||
error: The error that's being re-raised or an error message.
|
||||
observation: String explanation of error which can be passed to a model to
|
||||
try and remediate the issue.
|
||||
observation: String explanation of error which can be passed to a
|
||||
model to try and remediate the issue.
|
||||
llm_output: String model output which is error-ing.
|
||||
|
||||
send_to_llm: Whether to send the observation and llm_output back to an Agent
|
||||
after an `OutputParserException` has been raised.
|
||||
|
||||
This gives the underlying model driving the agent the context that the
|
||||
previous output was improperly structured, in the hopes that it will
|
||||
update the output to the correct format.
|
||||
|
||||
Raises:
|
||||
ValueError: If `send_to_llm` is `True` but either observation or
|
||||
ValueError: If `send_to_llm` is True but either observation or
|
||||
`llm_output` are not provided.
|
||||
"""
|
||||
if isinstance(error, str):
|
||||
@@ -68,11 +66,11 @@ class ErrorCode(Enum):
|
||||
"""Error codes."""
|
||||
|
||||
INVALID_PROMPT_INPUT = "INVALID_PROMPT_INPUT"
|
||||
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS" # Used in JS; not Py (yet)
|
||||
INVALID_TOOL_RESULTS = "INVALID_TOOL_RESULTS"
|
||||
MESSAGE_COERCION_FAILURE = "MESSAGE_COERCION_FAILURE"
|
||||
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION" # Used in JS; not Py (yet)
|
||||
MODEL_NOT_FOUND = "MODEL_NOT_FOUND" # Used in JS; not Py (yet)
|
||||
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT" # Used in JS; not Py (yet)
|
||||
MODEL_AUTHENTICATION = "MODEL_AUTHENTICATION"
|
||||
MODEL_NOT_FOUND = "MODEL_NOT_FOUND"
|
||||
MODEL_RATE_LIMIT = "MODEL_RATE_LIMIT"
|
||||
OUTPUT_PARSING_FAILURE = "OUTPUT_PARSING_FAILURE"
|
||||
|
||||
|
||||
@@ -88,6 +86,6 @@ def create_message(*, message: str, error_code: ErrorCode) -> str:
|
||||
"""
|
||||
return (
|
||||
f"{message}\n"
|
||||
"For troubleshooting, visit: https://docs.langchain.com/oss/python/langchain"
|
||||
f"/errors/{error_code.value} "
|
||||
"For troubleshooting, visit: https://python.langchain.com/docs/"
|
||||
f"troubleshooting/errors/{error_code.value} "
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Code to help indexing data into a vectorstore.
|
||||
|
||||
This package contains helper logic to help deal with indexing data into
|
||||
a `VectorStore` while avoiding duplicated content and over-writing content
|
||||
a vectorstore while avoiding duplicated content and over-writing content
|
||||
if it's unchanged.
|
||||
"""
|
||||
|
||||
|
||||
@@ -298,48 +298,48 @@ def index(
|
||||
For the time being, documents are indexed using their hashes, and users
|
||||
are not able to specify the uid of the document.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.25"
|
||||
!!! warning "Behavior changed in 0.3.25"
|
||||
Added `scoped_full` cleanup mode.
|
||||
|
||||
!!! warning
|
||||
|
||||
* In full mode, the loader should be returning
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
* In incremental mode, if documents associated with a particular
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
|
||||
Args:
|
||||
docs_source: Data loader or iterable of documents to index.
|
||||
record_manager: Timestamped set to keep track of which documents were
|
||||
updated.
|
||||
vector_store: `VectorStore` or DocumentIndex to index the documents into.
|
||||
vector_store: VectorStore or DocumentIndex to index the documents into.
|
||||
batch_size: Batch size to use when indexing.
|
||||
cleanup: How to handle clean up of documents.
|
||||
|
||||
- incremental: Cleans up all documents that haven't been updated AND
|
||||
that are associated with source IDs that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
that are associated with source ids that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
- full: Delete all documents that have not been returned by the loader
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
- scoped_full: Similar to Full, but only deletes all documents
|
||||
that haven't been updated AND that are associated with
|
||||
source IDs that were seen during indexing.
|
||||
that haven't been updated AND that are associated with
|
||||
source ids that were seen during indexing.
|
||||
- None: Do not delete any documents.
|
||||
source_id_key: Optional key that helps identify the original source
|
||||
of the document.
|
||||
@@ -349,7 +349,7 @@ def index(
|
||||
key_encoder: Hashing algorithm to use for hashing the document content and
|
||||
metadata. Options include "blake2b", "sha256", and "sha512".
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.66"
|
||||
!!! version-added "Added in version 0.3.66"
|
||||
|
||||
key_encoder: Hashing algorithm to use for hashing the document.
|
||||
If not provided, a default encoder using SHA-1 will be used.
|
||||
@@ -363,10 +363,10 @@ def index(
|
||||
When changing the key encoder, you must change the
|
||||
index as well to avoid duplicated documents in the cache.
|
||||
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
||||
method of the `VectorStore` or the upsert method of the DocumentIndex.
|
||||
method of the VectorStore or the upsert method of the DocumentIndex.
|
||||
For example, you can use this to specify a custom vector_field:
|
||||
upsert_kwargs={"vector_field": "embedding"}
|
||||
!!! version-added "Added in `langchain-core` 0.3.10"
|
||||
!!! version-added "Added in version 0.3.10"
|
||||
|
||||
Returns:
|
||||
Indexing result which contains information about how many documents
|
||||
@@ -375,10 +375,10 @@ def index(
|
||||
Raises:
|
||||
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
|
||||
ValueError: If cleanup mode is incremental and source_id_key is None.
|
||||
ValueError: If `VectorStore` does not have
|
||||
ValueError: If vectorstore does not have
|
||||
"delete" and "add_documents" required methods.
|
||||
ValueError: If source_id_key is not None, but is not a string or callable.
|
||||
TypeError: If `vectorstore` is not a `VectorStore` or a DocumentIndex.
|
||||
TypeError: If `vectorstore` is not a VectorStore or a DocumentIndex.
|
||||
AssertionError: If `source_id` is None when cleanup mode is incremental.
|
||||
(should be unreachable code).
|
||||
"""
|
||||
@@ -415,7 +415,7 @@ def index(
|
||||
raise ValueError(msg)
|
||||
|
||||
if type(destination).delete == VectorStore.delete:
|
||||
# Checking if the VectorStore has overridden the default delete method
|
||||
# Checking if the vectorstore has overridden the default delete method
|
||||
# implementation which just raises a NotImplementedError
|
||||
msg = "Vectorstore has not implemented the delete method"
|
||||
raise ValueError(msg)
|
||||
@@ -466,11 +466,11 @@ def index(
|
||||
]
|
||||
|
||||
if cleanup in {"incremental", "scoped_full"}:
|
||||
# Source IDs are required.
|
||||
# source ids are required.
|
||||
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
|
||||
if source_id is None:
|
||||
msg = (
|
||||
f"Source IDs are required when cleanup mode is "
|
||||
f"Source ids are required when cleanup mode is "
|
||||
f"incremental or scoped_full. "
|
||||
f"Document that starts with "
|
||||
f"content: {hashed_doc.page_content[:100]} "
|
||||
@@ -479,7 +479,7 @@ def index(
|
||||
raise ValueError(msg)
|
||||
if cleanup == "scoped_full":
|
||||
scoped_full_cleanup_source_ids.add(source_id)
|
||||
# Source IDs cannot be None after for loop above.
|
||||
# source ids cannot be None after for loop above.
|
||||
source_ids = cast("Sequence[str]", source_ids)
|
||||
|
||||
exists_batch = record_manager.exists(
|
||||
@@ -538,7 +538,7 @@ def index(
|
||||
# If source IDs are provided, we can do the deletion incrementally!
|
||||
if cleanup == "incremental":
|
||||
# Get the uids of the documents that were not returned by the loader.
|
||||
# mypy isn't good enough to determine that source IDs cannot be None
|
||||
# mypy isn't good enough to determine that source ids cannot be None
|
||||
# here due to a check that's happening above, so we check again.
|
||||
for source_id in source_ids:
|
||||
if source_id is None:
|
||||
@@ -636,48 +636,48 @@ async def aindex(
|
||||
For the time being, documents are indexed using their hashes, and users
|
||||
are not able to specify the uid of the document.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.25"
|
||||
!!! warning "Behavior changed in 0.3.25"
|
||||
Added `scoped_full` cleanup mode.
|
||||
|
||||
!!! warning
|
||||
|
||||
* In full mode, the loader should be returning
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
the entire dataset, and not just a subset of the dataset.
|
||||
Otherwise, the auto_cleanup will remove documents that it is not
|
||||
supposed to.
|
||||
* In incremental mode, if documents associated with a particular
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
source id appear across different batches, the indexing API
|
||||
will do some redundant work. This will still result in the
|
||||
correct end state of the index, but will unfortunately not be
|
||||
100% efficient. For example, if a given document is split into 15
|
||||
chunks, and we index them using a batch size of 5, we'll have 3 batches
|
||||
all with the same source id. In general, to avoid doing too much
|
||||
redundant work select as big a batch size as possible.
|
||||
* The `scoped_full` mode is suitable if determining an appropriate batch size
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
is challenging or if your data loader cannot return the entire dataset at
|
||||
once. This mode keeps track of source IDs in memory, which should be fine
|
||||
for most use cases. If your dataset is large (10M+ docs), you will likely
|
||||
need to parallelize the indexing process regardless.
|
||||
|
||||
Args:
|
||||
docs_source: Data loader or iterable of documents to index.
|
||||
record_manager: Timestamped set to keep track of which documents were
|
||||
updated.
|
||||
vector_store: `VectorStore` or DocumentIndex to index the documents into.
|
||||
vector_store: VectorStore or DocumentIndex to index the documents into.
|
||||
batch_size: Batch size to use when indexing.
|
||||
cleanup: How to handle clean up of documents.
|
||||
|
||||
- incremental: Cleans up all documents that haven't been updated AND
|
||||
that are associated with source IDs that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
that are associated with source ids that were seen during indexing.
|
||||
Clean up is done continuously during indexing helping to minimize the
|
||||
probability of users seeing duplicated content.
|
||||
- full: Delete all documents that have not been returned by the loader
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
during this run of indexing.
|
||||
Clean up runs after all documents have been indexed.
|
||||
This means that users may see duplicated content during indexing.
|
||||
- scoped_full: Similar to Full, but only deletes all documents
|
||||
that haven't been updated AND that are associated with
|
||||
source IDs that were seen during indexing.
|
||||
that haven't been updated AND that are associated with
|
||||
source ids that were seen during indexing.
|
||||
- None: Do not delete any documents.
|
||||
source_id_key: Optional key that helps identify the original source
|
||||
of the document.
|
||||
@@ -687,7 +687,7 @@ async def aindex(
|
||||
key_encoder: Hashing algorithm to use for hashing the document content and
|
||||
metadata. Options include "blake2b", "sha256", and "sha512".
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.66"
|
||||
!!! version-added "Added in version 0.3.66"
|
||||
|
||||
key_encoder: Hashing algorithm to use for hashing the document.
|
||||
If not provided, a default encoder using SHA-1 will be used.
|
||||
@@ -701,10 +701,10 @@ async def aindex(
|
||||
When changing the key encoder, you must change the
|
||||
index as well to avoid duplicated documents in the cache.
|
||||
upsert_kwargs: Additional keyword arguments to pass to the add_documents
|
||||
method of the `VectorStore` or the upsert method of the DocumentIndex.
|
||||
method of the VectorStore or the upsert method of the DocumentIndex.
|
||||
For example, you can use this to specify a custom vector_field:
|
||||
upsert_kwargs={"vector_field": "embedding"}
|
||||
!!! version-added "Added in `langchain-core` 0.3.10"
|
||||
!!! version-added "Added in version 0.3.10"
|
||||
|
||||
Returns:
|
||||
Indexing result which contains information about how many documents
|
||||
@@ -713,10 +713,10 @@ async def aindex(
|
||||
Raises:
|
||||
ValueError: If cleanup mode is not one of 'incremental', 'full' or None
|
||||
ValueError: If cleanup mode is incremental and source_id_key is None.
|
||||
ValueError: If `VectorStore` does not have
|
||||
ValueError: If vectorstore does not have
|
||||
"adelete" and "aadd_documents" required methods.
|
||||
ValueError: If source_id_key is not None, but is not a string or callable.
|
||||
TypeError: If `vector_store` is not a `VectorStore` or DocumentIndex.
|
||||
TypeError: If `vector_store` is not a VectorStore or DocumentIndex.
|
||||
AssertionError: If `source_id_key` is None when cleanup mode is
|
||||
incremental or `scoped_full` (should be unreachable).
|
||||
"""
|
||||
@@ -757,7 +757,7 @@ async def aindex(
|
||||
type(destination).adelete == VectorStore.adelete
|
||||
and type(destination).delete == VectorStore.delete
|
||||
):
|
||||
# Checking if the VectorStore has overridden the default adelete or delete
|
||||
# Checking if the vectorstore has overridden the default adelete or delete
|
||||
# methods implementation which just raises a NotImplementedError
|
||||
msg = "Vectorstore has not implemented the adelete or delete method"
|
||||
raise ValueError(msg)
|
||||
@@ -815,11 +815,11 @@ async def aindex(
|
||||
]
|
||||
|
||||
if cleanup in {"incremental", "scoped_full"}:
|
||||
# If the cleanup mode is incremental, source IDs are required.
|
||||
# If the cleanup mode is incremental, source ids are required.
|
||||
for source_id, hashed_doc in zip(source_ids, hashed_docs, strict=False):
|
||||
if source_id is None:
|
||||
msg = (
|
||||
f"Source IDs are required when cleanup mode is "
|
||||
f"Source ids are required when cleanup mode is "
|
||||
f"incremental or scoped_full. "
|
||||
f"Document that starts with "
|
||||
f"content: {hashed_doc.page_content[:100]} "
|
||||
@@ -828,7 +828,7 @@ async def aindex(
|
||||
raise ValueError(msg)
|
||||
if cleanup == "scoped_full":
|
||||
scoped_full_cleanup_source_ids.add(source_id)
|
||||
# Source IDs cannot be None after for loop above.
|
||||
# source ids cannot be None after for loop above.
|
||||
source_ids = cast("Sequence[str]", source_ids)
|
||||
|
||||
exists_batch = await record_manager.aexists(
|
||||
@@ -888,7 +888,7 @@ async def aindex(
|
||||
if cleanup == "incremental":
|
||||
# Get the uids of the documents that were not returned by the loader.
|
||||
|
||||
# mypy isn't good enough to determine that source IDs cannot be None
|
||||
# mypy isn't good enough to determine that source ids cannot be None
|
||||
# here due to a check that's happening above, so we check again.
|
||||
for source_id in source_ids:
|
||||
if source_id is None:
|
||||
|
||||
@@ -25,7 +25,7 @@ class RecordManager(ABC):
|
||||
The record manager abstraction is used by the langchain indexing API.
|
||||
|
||||
The record manager keeps track of which documents have been
|
||||
written into a `VectorStore` and when they were written.
|
||||
written into a vectorstore and when they were written.
|
||||
|
||||
The indexing API computes hashes for each document and stores the hash
|
||||
together with the write time and the source id in the record manager.
|
||||
@@ -37,7 +37,7 @@ class RecordManager(ABC):
|
||||
already been indexed, and to only index new documents.
|
||||
|
||||
The main benefit of this abstraction is that it works across many vectorstores.
|
||||
To be supported, a `VectorStore` needs to only support the ability to add and
|
||||
To be supported, a vectorstore needs to only support the ability to add and
|
||||
delete documents by ID. Using the record manager, the indexing API will
|
||||
be able to delete outdated documents and avoid redundant indexing of documents
|
||||
that have already been indexed.
|
||||
@@ -45,13 +45,13 @@ class RecordManager(ABC):
|
||||
The main constraints of this abstraction are:
|
||||
|
||||
1. It relies on the time-stamps to determine which documents have been
|
||||
indexed and which have not. This means that the time-stamps must be
|
||||
monotonically increasing. The timestamp should be the timestamp
|
||||
as measured by the server to minimize issues.
|
||||
indexed and which have not. This means that the time-stamps must be
|
||||
monotonically increasing. The timestamp should be the timestamp
|
||||
as measured by the server to minimize issues.
|
||||
2. The record manager is currently implemented separately from the
|
||||
vectorstore, which means that the overall system becomes distributed
|
||||
and may create issues with consistency. For example, writing to
|
||||
record manager succeeds, but corresponding writing to `VectorStore` fails.
|
||||
vectorstore, which means that the overall system becomes distributed
|
||||
and may create issues with consistency. For example, writing to
|
||||
record manager succeeds, but corresponding writing to vectorstore fails.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -460,7 +460,7 @@ class UpsertResponse(TypedDict):
|
||||
class DeleteResponse(TypedDict, total=False):
|
||||
"""A generic response for delete operation.
|
||||
|
||||
The fields in this response are optional and whether the `VectorStore`
|
||||
The fields in this response are optional and whether the vectorstore
|
||||
returns them or not is up to the implementation.
|
||||
"""
|
||||
|
||||
@@ -508,6 +508,8 @@ class DocumentIndex(BaseRetriever):
|
||||
1. Storing document in the index.
|
||||
2. Fetching document by ID.
|
||||
3. Searching for document using a query.
|
||||
|
||||
!!! version-added "Added in version 0.2.29"
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
@@ -518,7 +520,7 @@ class DocumentIndex(BaseRetriever):
|
||||
if it is provided. If the ID is not provided, the upsert method is free
|
||||
to generate an ID for the content.
|
||||
|
||||
When an ID is specified and the content already exists in the `VectorStore`,
|
||||
When an ID is specified and the content already exists in the vectorstore,
|
||||
the upsert method should update the content with the new data. If the content
|
||||
does not exist, the upsert method should add the item to the `VectorStore`.
|
||||
|
||||
@@ -528,20 +530,20 @@ class DocumentIndex(BaseRetriever):
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were
|
||||
successfully added or updated in the `VectorStore` and the list of IDs that
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
failed to be added or updated.
|
||||
"""
|
||||
|
||||
async def aupsert(
|
||||
self, items: Sequence[Document], /, **kwargs: Any
|
||||
) -> UpsertResponse:
|
||||
"""Add or update documents in the `VectorStore`. Async version of `upsert`.
|
||||
"""Add or update documents in the vectorstore. Async version of upsert.
|
||||
|
||||
The upsert functionality should utilize the ID field of the item
|
||||
if it is provided. If the ID is not provided, the upsert method is free
|
||||
to generate an ID for the item.
|
||||
|
||||
When an ID is specified and the item already exists in the `VectorStore`,
|
||||
When an ID is specified and the item already exists in the vectorstore,
|
||||
the upsert method should update the item with the new data. If the item
|
||||
does not exist, the upsert method should add the item to the `VectorStore`.
|
||||
|
||||
@@ -551,7 +553,7 @@ class DocumentIndex(BaseRetriever):
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were
|
||||
successfully added or updated in the `VectorStore` and the list of IDs that
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
failed to be added or updated.
|
||||
"""
|
||||
return await run_in_executor(
|
||||
@@ -568,7 +570,7 @@ class DocumentIndex(BaseRetriever):
|
||||
Calling delete without any input parameters should raise a ValueError!
|
||||
|
||||
Args:
|
||||
ids: List of IDs to delete.
|
||||
ids: List of ids to delete.
|
||||
**kwargs: Additional keyword arguments. This is up to the implementation.
|
||||
For example, can include an option to delete the entire index,
|
||||
or else issue a non-blocking delete etc.
|
||||
@@ -586,7 +588,7 @@ class DocumentIndex(BaseRetriever):
|
||||
Calling adelete without any input parameters should raise a ValueError!
|
||||
|
||||
Args:
|
||||
ids: List of IDs to delete.
|
||||
ids: List of ids to delete.
|
||||
**kwargs: Additional keyword arguments. This is up to the implementation.
|
||||
For example, can include an option to delete the entire index.
|
||||
|
||||
|
||||
@@ -23,6 +23,8 @@ class InMemoryDocumentIndex(DocumentIndex):
|
||||
|
||||
It provides a simple search API that returns documents by the number of
|
||||
counts the given query appears in the document.
|
||||
|
||||
!!! version-added "Added in version 0.2.29"
|
||||
"""
|
||||
|
||||
store: dict[str, Document] = Field(default_factory=dict)
|
||||
@@ -62,10 +64,10 @@ class InMemoryDocumentIndex(DocumentIndex):
|
||||
"""Delete by IDs.
|
||||
|
||||
Args:
|
||||
ids: List of IDs to delete.
|
||||
ids: List of ids to delete.
|
||||
|
||||
Raises:
|
||||
ValueError: If IDs is None.
|
||||
ValueError: If ids is None.
|
||||
|
||||
Returns:
|
||||
A response object that contains the list of IDs that were successfully
|
||||
|
||||
@@ -6,13 +6,12 @@ LangChain has two main classes to work with language models: chat models and
|
||||
**Chat models**
|
||||
|
||||
Language models that use a sequence of messages as inputs and return chat messages
|
||||
as outputs (as opposed to using plain text).
|
||||
as outputs (as opposed to using plain text). Chat models support the assignment of
|
||||
distinct roles to conversation messages, helping to distinguish messages from the AI,
|
||||
users, and instructions such as system messages.
|
||||
|
||||
Chat models support the assignment of distinct roles to conversation messages, helping
|
||||
to distinguish messages from the AI, users, and instructions such as system messages.
|
||||
|
||||
The key abstraction for chat models is `BaseChatModel`. Implementations should inherit
|
||||
from this class.
|
||||
The key abstraction for chat models is `BaseChatModel`. Implementations
|
||||
should inherit from this class.
|
||||
|
||||
See existing [chat model integrations](https://docs.langchain.com/oss/python/integrations/chat).
|
||||
|
||||
|
||||
@@ -139,7 +139,7 @@ def _normalize_messages(
|
||||
directly; this may change in the future
|
||||
- LangChain v0 standard content blocks for backward compatibility
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 1.0.0"
|
||||
!!! warning "Behavior changed in 1.0.0"
|
||||
In previous versions, this function returned messages in LangChain v0 format.
|
||||
Now, it returns messages in LangChain v1 format, which upgraded chat models now
|
||||
expect to receive when passing back in message history. For backward
|
||||
|
||||
@@ -131,19 +131,14 @@ class BaseLanguageModel(
|
||||
|
||||
Caching is not currently supported for streaming methods of models.
|
||||
"""
|
||||
|
||||
verbose: bool = Field(default_factory=_get_verbosity, exclude=True, repr=False)
|
||||
"""Whether to print out response text."""
|
||||
|
||||
callbacks: Callbacks = Field(default=None, exclude=True)
|
||||
"""Callbacks to add to the run trace."""
|
||||
|
||||
tags: list[str] | None = Field(default=None, exclude=True)
|
||||
"""Tags to add to the run trace."""
|
||||
|
||||
metadata: dict[str, Any] | None = Field(default=None, exclude=True)
|
||||
"""Metadata to add to the run trace."""
|
||||
|
||||
custom_get_token_ids: Callable[[str], list[int]] | None = Field(
|
||||
default=None, exclude=True
|
||||
)
|
||||
@@ -200,22 +195,15 @@ class BaseLanguageModel(
|
||||
type (e.g., pure text completion models vs chat models).
|
||||
|
||||
Args:
|
||||
prompts: List of `PromptValue` objects.
|
||||
|
||||
A `PromptValue` is an object that can be converted to match the format
|
||||
of any language model (string for pure text generation models and
|
||||
`BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
|
||||
can be converted to match the format of any language model (string for
|
||||
pure text generation models and `BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generation` objects for
|
||||
@@ -244,22 +232,15 @@ class BaseLanguageModel(
|
||||
type (e.g., pure text completion models vs chat models).
|
||||
|
||||
Args:
|
||||
prompts: List of `PromptValue` objects.
|
||||
|
||||
A `PromptValue` is an object that can be converted to match the format
|
||||
of any language model (string for pure text generation models and
|
||||
`BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
prompts: List of `PromptValue` objects. A `PromptValue` is an object that
|
||||
can be converted to match the format of any language model (string for
|
||||
pure text generation models and `BaseMessage` objects for chat models).
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generation` objects for
|
||||
@@ -281,13 +262,13 @@ class BaseLanguageModel(
|
||||
return self.lc_attributes
|
||||
|
||||
def get_token_ids(self, text: str) -> list[int]:
|
||||
"""Return the ordered IDs of the tokens in a text.
|
||||
"""Return the ordered ids of the tokens in a text.
|
||||
|
||||
Args:
|
||||
text: The string input to tokenize.
|
||||
|
||||
Returns:
|
||||
A list of IDs corresponding to the tokens in the text, in order they occur
|
||||
A list of ids corresponding to the tokens in the text, in order they occur
|
||||
in the text.
|
||||
"""
|
||||
if self.custom_get_token_ids is not None:
|
||||
|
||||
@@ -15,7 +15,6 @@ from typing import TYPE_CHECKING, Any, Literal, cast
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core._api.beta_decorator import beta
|
||||
from langchain_core.caches import BaseCache
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManager,
|
||||
@@ -76,8 +75,6 @@ from langchain_core.utils.utils import LC_ID_PREFIX, from_env
|
||||
if TYPE_CHECKING:
|
||||
import uuid
|
||||
|
||||
from langchain_model_profiles import ModelProfile # type: ignore[import-untyped]
|
||||
|
||||
from langchain_core.output_parsers.base import OutputParserLike
|
||||
from langchain_core.runnables import Runnable, RunnableConfig
|
||||
from langchain_core.tools import BaseTool
|
||||
@@ -332,7 +329,7 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
[`langchain-openai`](https://pypi.org/project/langchain-openai)) can also use this
|
||||
field to roll out new content formats in a backward-compatible way.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 1.0"
|
||||
!!! version-added "Added in version 1.0"
|
||||
|
||||
"""
|
||||
|
||||
@@ -845,21 +842,16 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
|
||||
Args:
|
||||
messages: List of list of messages.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
tags: The tags to apply.
|
||||
metadata: The metadata to apply.
|
||||
run_name: The name of the run.
|
||||
run_id: The ID of the run.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generations` for each
|
||||
@@ -968,21 +960,16 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
|
||||
Args:
|
||||
messages: List of list of messages.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
tags: The tags to apply.
|
||||
metadata: The metadata to apply.
|
||||
run_name: The name of the run.
|
||||
run_id: The ID of the run.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
An `LLMResult`, which contains a list of candidate `Generations` for each
|
||||
@@ -1515,10 +1502,10 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
Args:
|
||||
schema: The output schema. Can be passed in as:
|
||||
|
||||
- An OpenAI function/tool schema,
|
||||
- A JSON Schema,
|
||||
- A `TypedDict` class,
|
||||
- Or a Pydantic class.
|
||||
- an OpenAI function/tool schema,
|
||||
- a JSON Schema,
|
||||
- a `TypedDict` class,
|
||||
- or a Pydantic class.
|
||||
|
||||
If `schema` is a Pydantic class then the model output will be a
|
||||
Pydantic instance of that class, and the model-generated fields will be
|
||||
@@ -1530,15 +1517,11 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
when specifying a Pydantic or `TypedDict` class.
|
||||
|
||||
include_raw:
|
||||
If `False` then only the parsed structured output is returned.
|
||||
|
||||
If an error occurs during model output parsing it will be raised.
|
||||
|
||||
If `True` then both the raw model response (a `BaseMessage`) and the
|
||||
parsed model response will be returned.
|
||||
|
||||
If an error occurs during output parsing it will be caught and returned
|
||||
as well.
|
||||
If `False` then only the parsed structured output is returned. If
|
||||
an error occurs during model output parsing it will be raised. If `True`
|
||||
then both the raw model response (a `BaseMessage`) and the parsed model
|
||||
response will be returned. If an error occurs during output parsing it
|
||||
will be caught and returned as well.
|
||||
|
||||
The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
|
||||
`'parsing_error'`.
|
||||
@@ -1643,8 +1626,8 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
# }
|
||||
```
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.2.26"
|
||||
Added support for `TypedDict` class.
|
||||
!!! warning "Behavior changed in 0.2.26"
|
||||
Added support for TypedDict class.
|
||||
|
||||
""" # noqa: E501
|
||||
_ = kwargs.pop("method", None)
|
||||
@@ -1685,40 +1668,6 @@ class BaseChatModel(BaseLanguageModel[AIMessage], ABC):
|
||||
return RunnableMap(raw=llm) | parser_with_fallback
|
||||
return llm | output_parser
|
||||
|
||||
@property
|
||||
@beta()
|
||||
def profile(self) -> ModelProfile:
|
||||
"""Return profiling information for the model.
|
||||
|
||||
This property relies on the `langchain-model-profiles` package to retrieve chat
|
||||
model capabilities, such as context window sizes and supported features.
|
||||
|
||||
Raises:
|
||||
ImportError: If `langchain-model-profiles` is not installed.
|
||||
|
||||
Returns:
|
||||
A `ModelProfile` object containing profiling information for the model.
|
||||
"""
|
||||
try:
|
||||
from langchain_model_profiles import get_model_profile # noqa: PLC0415
|
||||
except ImportError as err:
|
||||
informative_error_message = (
|
||||
"To access model profiling information, please install the "
|
||||
"`langchain-model-profiles` package: "
|
||||
"`pip install langchain-model-profiles`."
|
||||
)
|
||||
raise ImportError(informative_error_message) from err
|
||||
|
||||
provider_id = self._llm_type
|
||||
model_name = (
|
||||
# Model name is not standardized across integrations. New integrations
|
||||
# should prefer `model`.
|
||||
getattr(self, "model", None)
|
||||
or getattr(self, "model_name", None)
|
||||
or getattr(self, "model_id", "")
|
||||
)
|
||||
return get_model_profile(provider_id, model_name) or {}
|
||||
|
||||
|
||||
class SimpleChatModel(BaseChatModel):
|
||||
"""Simplified implementation for a chat model to inherit from.
|
||||
@@ -1777,12 +1726,9 @@ def _gen_info_and_msg_metadata(
|
||||
}
|
||||
|
||||
|
||||
_MAX_CLEANUP_DEPTH = 100
|
||||
|
||||
|
||||
def _cleanup_llm_representation(serialized: Any, depth: int) -> None:
|
||||
"""Remove non-serializable objects from a serialized object."""
|
||||
if depth > _MAX_CLEANUP_DEPTH: # Don't cooperate for pathological cases
|
||||
if depth > 100: # Don't cooperate for pathological cases
|
||||
return
|
||||
|
||||
if not isinstance(serialized, dict):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Fake chat models for testing purposes."""
|
||||
"""Fake chat model for testing purposes."""
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
"""Base interface for traditional large language models (LLMs) to expose.
|
||||
|
||||
These are traditionally older models (newer models generally are chat models).
|
||||
"""
|
||||
"""Base interface for large language models to expose."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -651,12 +648,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: The prompts to generate from.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
run_manager: Callback manager for the run.
|
||||
|
||||
Returns:
|
||||
@@ -674,12 +668,9 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: The prompts to generate from.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
run_manager: Callback manager for the run.
|
||||
|
||||
Returns:
|
||||
@@ -711,14 +702,11 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Yields:
|
||||
Generation chunks.
|
||||
@@ -740,14 +728,11 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Yields:
|
||||
Generation chunks.
|
||||
@@ -858,14 +843,10 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: List of string prompts.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
tags: List of tags to associate with each prompt. If provided, the length
|
||||
of the list must match the length of the prompts list.
|
||||
metadata: List of metadata dictionaries to associate with each prompt. If
|
||||
@@ -875,9 +856,8 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
length of the list must match the length of the prompts list.
|
||||
run_id: List of run IDs to associate with each prompt. If provided, the
|
||||
length of the list must match the length of the prompts list.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Raises:
|
||||
ValueError: If prompts is not a list.
|
||||
@@ -1133,14 +1113,10 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
|
||||
Args:
|
||||
prompts: List of string prompts.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
callbacks: `Callbacks` to pass through.
|
||||
|
||||
Used for executing additional functionality, such as logging or
|
||||
streaming, throughout generation.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of these substrings.
|
||||
callbacks: `Callbacks` to pass through. Used for executing additional
|
||||
functionality, such as logging or streaming, throughout generation.
|
||||
tags: List of tags to associate with each prompt. If provided, the length
|
||||
of the list must match the length of the prompts list.
|
||||
metadata: List of metadata dictionaries to associate with each prompt. If
|
||||
@@ -1150,9 +1126,8 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
length of the list must match the length of the prompts list.
|
||||
run_id: List of run IDs to associate with each prompt. If provided, the
|
||||
length of the list must match the length of the prompts list.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Raises:
|
||||
ValueError: If the length of `callbacks`, `tags`, `metadata`, or
|
||||
@@ -1416,6 +1391,11 @@ class LLM(BaseLLM):
|
||||
`astream` will use `_astream` if provided, otherwise it will implement
|
||||
a fallback behavior that will use `_stream` if `_stream` is implemented,
|
||||
and use `_acall` if `_stream` is not implemented.
|
||||
|
||||
Please see the following guide for more information on how to
|
||||
implement a custom LLM:
|
||||
|
||||
https://python.langchain.com/docs/how_to/custom_llm/
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
@@ -1432,16 +1412,12 @@ class LLM(BaseLLM):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
The model output as a string. SHOULD NOT include the prompt.
|
||||
@@ -1462,16 +1438,12 @@ class LLM(BaseLLM):
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from.
|
||||
stop: Stop words to use when generating.
|
||||
|
||||
Model output is cut off at the first occurrence of any of these
|
||||
substrings.
|
||||
|
||||
If stop tokens are not supported consider raising `NotImplementedError`.
|
||||
stop: Stop words to use when generating. Model output is cut off at the
|
||||
first occurrence of any of the stop substrings.
|
||||
If stop tokens are not supported consider raising NotImplementedError.
|
||||
run_manager: Callback manager for the run.
|
||||
**kwargs: Arbitrary additional keyword arguments.
|
||||
|
||||
These are usually passed to the model provider API call.
|
||||
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||
to the model provider API call.
|
||||
|
||||
Returns:
|
||||
The model output as a string. SHOULD NOT include the prompt.
|
||||
|
||||
@@ -17,7 +17,7 @@ def default(obj: Any) -> Any:
|
||||
obj: The object to serialize to json if it is a Serializable object.
|
||||
|
||||
Returns:
|
||||
A JSON serializable object or a SerializedNotImplemented object.
|
||||
A json serializable object or a SerializedNotImplemented object.
|
||||
"""
|
||||
if isinstance(obj, Serializable):
|
||||
return obj.to_json()
|
||||
@@ -38,7 +38,7 @@ def _dump_pydantic_models(obj: Any) -> Any:
|
||||
|
||||
|
||||
def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
|
||||
"""Return a JSON string representation of an object.
|
||||
"""Return a json string representation of an object.
|
||||
|
||||
Args:
|
||||
obj: The object to dump.
|
||||
@@ -47,7 +47,7 @@ def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
|
||||
**kwargs: Additional arguments to pass to `json.dumps`
|
||||
|
||||
Returns:
|
||||
A JSON string representation of the object.
|
||||
A json string representation of the object.
|
||||
|
||||
Raises:
|
||||
ValueError: If `default` is passed as a kwarg.
|
||||
@@ -71,12 +71,14 @@ def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
|
||||
def dumpd(obj: Any) -> Any:
|
||||
"""Return a dict representation of an object.
|
||||
|
||||
!!! note
|
||||
Unfortunately this function is not as efficient as it could be because it first
|
||||
dumps the object to a json string and then loads it back into a dictionary.
|
||||
|
||||
Args:
|
||||
obj: The object to dump.
|
||||
|
||||
Returns:
|
||||
Dictionary that can be serialized to json using `json.dumps`.
|
||||
dictionary that can be serialized to json using json.dumps
|
||||
"""
|
||||
# Unfortunately this function is not as efficient as it could be because it first
|
||||
# dumps the object to a json string and then loads it back into a dictionary.
|
||||
return json.loads(dumps(obj))
|
||||
|
||||
@@ -265,8 +265,6 @@ def load(
|
||||
return reviver(loaded_obj)
|
||||
if isinstance(obj, list):
|
||||
return [_load(o) for o in obj]
|
||||
if isinstance(obj, str) and obj in reviver.secrets_map:
|
||||
return reviver.secrets_map[obj]
|
||||
return obj
|
||||
|
||||
return _load(obj)
|
||||
|
||||
@@ -97,14 +97,11 @@ class Serializable(BaseModel, ABC):
|
||||
by default. This is to prevent accidental serialization of objects that should
|
||||
not be serialized.
|
||||
- `get_lc_namespace`: Get the namespace of the LangChain object.
|
||||
|
||||
During deserialization, this namespace is used to identify
|
||||
the correct class to instantiate.
|
||||
|
||||
Please see the `Reviver` class in `langchain_core.load.load` for more details.
|
||||
During deserialization an additional mapping is handle classes that have moved
|
||||
or been renamed across package versions.
|
||||
|
||||
- `lc_secrets`: A map of constructor argument names to secret ids.
|
||||
- `lc_attributes`: List of additional attribute names that should be included
|
||||
as part of the serialized representation.
|
||||
@@ -197,7 +194,7 @@ class Serializable(BaseModel, ABC):
|
||||
ValueError: If the class has deprecated attributes.
|
||||
|
||||
Returns:
|
||||
A JSON serializable object or a `SerializedNotImplemented` object.
|
||||
A json serializable object or a `SerializedNotImplemented` object.
|
||||
"""
|
||||
if not self.is_lc_serializable():
|
||||
return self.to_json_not_implemented()
|
||||
|
||||
@@ -9,9 +9,6 @@ if TYPE_CHECKING:
|
||||
from langchain_core.messages.ai import (
|
||||
AIMessage,
|
||||
AIMessageChunk,
|
||||
InputTokenDetails,
|
||||
OutputTokenDetails,
|
||||
UsageMetadata,
|
||||
)
|
||||
from langchain_core.messages.base import (
|
||||
BaseMessage,
|
||||
@@ -90,12 +87,10 @@ __all__ = (
|
||||
"HumanMessage",
|
||||
"HumanMessageChunk",
|
||||
"ImageContentBlock",
|
||||
"InputTokenDetails",
|
||||
"InvalidToolCall",
|
||||
"MessageLikeRepresentation",
|
||||
"NonStandardAnnotation",
|
||||
"NonStandardContentBlock",
|
||||
"OutputTokenDetails",
|
||||
"PlainTextContentBlock",
|
||||
"ReasoningContentBlock",
|
||||
"RemoveMessage",
|
||||
@@ -109,7 +104,6 @@ __all__ = (
|
||||
"ToolCallChunk",
|
||||
"ToolMessage",
|
||||
"ToolMessageChunk",
|
||||
"UsageMetadata",
|
||||
"VideoContentBlock",
|
||||
"_message_from_dict",
|
||||
"convert_to_messages",
|
||||
@@ -151,7 +145,6 @@ _dynamic_imports = {
|
||||
"HumanMessageChunk": "human",
|
||||
"NonStandardAnnotation": "content",
|
||||
"NonStandardContentBlock": "content",
|
||||
"OutputTokenDetails": "ai",
|
||||
"PlainTextContentBlock": "content",
|
||||
"ReasoningContentBlock": "content",
|
||||
"RemoveMessage": "modifier",
|
||||
@@ -161,14 +154,12 @@ _dynamic_imports = {
|
||||
"SystemMessage": "system",
|
||||
"SystemMessageChunk": "system",
|
||||
"ImageContentBlock": "content",
|
||||
"InputTokenDetails": "ai",
|
||||
"InvalidToolCall": "tool",
|
||||
"TextContentBlock": "content",
|
||||
"ToolCall": "tool",
|
||||
"ToolCallChunk": "tool",
|
||||
"ToolMessage": "tool",
|
||||
"ToolMessageChunk": "tool",
|
||||
"UsageMetadata": "ai",
|
||||
"VideoContentBlock": "content",
|
||||
"AnyMessage": "utils",
|
||||
"MessageLikeRepresentation": "utils",
|
||||
|
||||
@@ -48,9 +48,9 @@ class InputTokenDetails(TypedDict, total=False):
|
||||
}
|
||||
```
|
||||
|
||||
May also hold extra provider-specific keys.
|
||||
!!! version-added "Added in version 0.3.9"
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.9"
|
||||
May also hold extra provider-specific keys.
|
||||
|
||||
"""
|
||||
|
||||
@@ -83,9 +83,7 @@ class OutputTokenDetails(TypedDict, total=False):
|
||||
}
|
||||
```
|
||||
|
||||
May also hold extra provider-specific keys.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.9"
|
||||
!!! version-added "Added in version 0.3.9"
|
||||
|
||||
"""
|
||||
|
||||
@@ -123,13 +121,9 @@ class UsageMetadata(TypedDict):
|
||||
}
|
||||
```
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.9"
|
||||
!!! warning "Behavior changed in 0.3.9"
|
||||
Added `input_token_details` and `output_token_details`.
|
||||
|
||||
!!! note "LangSmith SDK"
|
||||
The LangSmith SDK also has a `UsageMetadata` class. While the two share fields,
|
||||
LangSmith's `UsageMetadata` has additional fields to capture cost information
|
||||
used by the LangSmith platform.
|
||||
"""
|
||||
|
||||
input_tokens: int
|
||||
@@ -137,7 +131,7 @@ class UsageMetadata(TypedDict):
|
||||
output_tokens: int
|
||||
"""Count of output (or completion) tokens. Sum of all output token types."""
|
||||
total_tokens: int
|
||||
"""Total token count. Sum of `input_tokens` + `output_tokens`."""
|
||||
"""Total token count. Sum of input_tokens + output_tokens."""
|
||||
input_token_details: NotRequired[InputTokenDetails]
|
||||
"""Breakdown of input token counts.
|
||||
|
||||
@@ -147,6 +141,7 @@ class UsageMetadata(TypedDict):
|
||||
"""Breakdown of output token counts.
|
||||
|
||||
Does *not* need to sum to full output token count. Does *not* need to have all keys.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -158,6 +153,7 @@ class AIMessage(BaseMessage):
|
||||
This message represents the output of the model and consists of both
|
||||
the raw output as returned by the model and standardized fields
|
||||
(e.g., tool calls, usage metadata) added by the LangChain framework.
|
||||
|
||||
"""
|
||||
|
||||
tool_calls: list[ToolCall] = []
|
||||
@@ -655,13 +651,13 @@ def add_ai_message_chunks(
|
||||
chunk_id = id_
|
||||
break
|
||||
else:
|
||||
# second pass: prefer lc_run-* IDs over lc_* IDs
|
||||
# second pass: prefer lc_run-* ids over lc_* ids
|
||||
for id_ in candidates:
|
||||
if id_ and id_.startswith(LC_ID_PREFIX):
|
||||
chunk_id = id_
|
||||
break
|
||||
else:
|
||||
# third pass: take any remaining ID (auto-generated lc_* IDs)
|
||||
# third pass: take any remaining id (auto-generated lc_* ids)
|
||||
for id_ in candidates:
|
||||
if id_:
|
||||
chunk_id = id_
|
||||
|
||||
@@ -93,10 +93,6 @@ class BaseMessage(Serializable):
|
||||
"""Base abstract message class.
|
||||
|
||||
Messages are the inputs and outputs of a chat model.
|
||||
|
||||
Examples include [`HumanMessage`][langchain.messages.HumanMessage],
|
||||
[`AIMessage`][langchain.messages.AIMessage], and
|
||||
[`SystemMessage`][langchain.messages.SystemMessage].
|
||||
"""
|
||||
|
||||
content: str | list[str | dict]
|
||||
@@ -199,7 +195,7 @@ class BaseMessage(Serializable):
|
||||
def content_blocks(self) -> list[types.ContentBlock]:
|
||||
r"""Load content blocks from the message content.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 1.0.0"
|
||||
!!! version-added "Added in version 1.0.0"
|
||||
|
||||
"""
|
||||
# Needed here to avoid circular import, as these classes import BaseMessages
|
||||
|
||||
@@ -368,7 +368,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
else:
|
||||
# Assume it's raw base64 without data URI
|
||||
try:
|
||||
# Validate base64 and decode for MIME type detection
|
||||
# Validate base64 and decode for mime type detection
|
||||
decoded_bytes = base64.b64decode(url, validate=True)
|
||||
|
||||
image_url_b64_block = {
|
||||
@@ -379,7 +379,7 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
try:
|
||||
import filetype # type: ignore[import-not-found] # noqa: PLC0415
|
||||
|
||||
# Guess MIME type based on file bytes
|
||||
# Guess mime type based on file bytes
|
||||
mime_type = None
|
||||
kind = filetype.guess(decoded_bytes)
|
||||
if kind:
|
||||
@@ -458,8 +458,6 @@ def _convert_to_v1_from_genai(message: AIMessage) -> list[types.ContentBlock]:
|
||||
if outcome is not None:
|
||||
server_tool_result_block["extras"]["outcome"] = outcome
|
||||
converted_blocks.append(server_tool_result_block)
|
||||
elif item_type == "text":
|
||||
converted_blocks.append(cast("types.TextContentBlock", item))
|
||||
else:
|
||||
# Unknown type, preserve as non-standard
|
||||
converted_blocks.append({"type": "non_standard", "value": item})
|
||||
|
||||
@@ -644,7 +644,7 @@ class AudioContentBlock(TypedDict):
|
||||
|
||||
|
||||
class PlainTextContentBlock(TypedDict):
|
||||
"""Plaintext data (e.g., from a `.txt` or `.md` document).
|
||||
"""Plaintext data (e.g., from a document).
|
||||
|
||||
!!! note
|
||||
A `PlainTextContentBlock` existed in `langchain-core<1.0.0`. Although the
|
||||
@@ -767,7 +767,7 @@ class FileContentBlock(TypedDict):
|
||||
|
||||
|
||||
class NonStandardContentBlock(TypedDict):
|
||||
"""Provider-specific content data.
|
||||
"""Provider-specific data.
|
||||
|
||||
This block contains data for which there is not yet a standard type.
|
||||
|
||||
@@ -802,7 +802,7 @@ class NonStandardContentBlock(TypedDict):
|
||||
"""
|
||||
|
||||
value: dict[str, Any]
|
||||
"""Provider-specific content data."""
|
||||
"""Provider-specific data."""
|
||||
|
||||
index: NotRequired[int | str]
|
||||
"""Index of block in aggregate response. Used during streaming."""
|
||||
@@ -867,7 +867,7 @@ def _get_data_content_block_types() -> tuple[str, ...]:
|
||||
Example: ("image", "video", "audio", "text-plain", "file")
|
||||
|
||||
Note that old style multimodal blocks type literals with new style blocks.
|
||||
Specifically, "image", "audio", and "file".
|
||||
Speficially, "image", "audio", and "file".
|
||||
|
||||
See the docstring of `_normalize_messages` in `language_models._utils` for details.
|
||||
"""
|
||||
@@ -906,7 +906,7 @@ def is_data_content_block(block: dict) -> bool:
|
||||
|
||||
# 'text' is checked to support v0 PlainTextContentBlock types
|
||||
# We must guard against new style TextContentBlock which also has 'text' `type`
|
||||
# by ensuring the presence of `source_type`
|
||||
# by ensuring the presense of `source_type`
|
||||
if block["type"] == "text" and "source_type" not in block: # noqa: SIM103 # This is more readable
|
||||
return False
|
||||
|
||||
@@ -1399,7 +1399,7 @@ def create_non_standard_block(
|
||||
"""Create a `NonStandardContentBlock`.
|
||||
|
||||
Args:
|
||||
value: Provider-specific content data.
|
||||
value: Provider-specific data.
|
||||
id: Content block identifier. Generated automatically if not provided.
|
||||
index: Index of block in aggregate response. Used during streaming.
|
||||
|
||||
|
||||
@@ -86,7 +86,7 @@ AnyMessage = Annotated[
|
||||
| Annotated[ToolMessageChunk, Tag(tag="ToolMessageChunk")],
|
||||
Field(discriminator=Discriminator(_get_type)),
|
||||
]
|
||||
"""A type representing any defined `Message` or `MessageChunk` type."""
|
||||
""""A type representing any defined `Message` or `MessageChunk` type."""
|
||||
|
||||
|
||||
def get_buffer_string(
|
||||
@@ -328,16 +328,12 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
||||
"""
|
||||
if isinstance(message, BaseMessage):
|
||||
message_ = message
|
||||
elif isinstance(message, Sequence):
|
||||
if isinstance(message, str):
|
||||
message_ = _create_message_from_message_type("human", message)
|
||||
else:
|
||||
try:
|
||||
message_type_str, template = message
|
||||
except ValueError as e:
|
||||
msg = "Message as a sequence must be (role string, template)"
|
||||
raise NotImplementedError(msg) from e
|
||||
message_ = _create_message_from_message_type(message_type_str, template)
|
||||
elif isinstance(message, str):
|
||||
message_ = _create_message_from_message_type("human", message)
|
||||
elif isinstance(message, Sequence) and len(message) == 2:
|
||||
# mypy doesn't realise this can't be a string given the previous branch
|
||||
message_type_str, template = message # type: ignore[misc]
|
||||
message_ = _create_message_from_message_type(message_type_str, template)
|
||||
elif isinstance(message, dict):
|
||||
msg_kwargs = message.copy()
|
||||
try:
|
||||
@@ -443,8 +439,8 @@ def filter_messages(
|
||||
exclude_ids: Message IDs to exclude.
|
||||
exclude_tool_calls: Tool call IDs to exclude.
|
||||
Can be one of the following:
|
||||
- `True`: All `AIMessage` objects with tool calls and all `ToolMessage`
|
||||
objects will be excluded.
|
||||
- `True`: all `AIMessage`s with tool calls and all
|
||||
`ToolMessage` objects will be excluded.
|
||||
- a sequence of tool call IDs to exclude:
|
||||
- `ToolMessage` objects with the corresponding tool call ID will be
|
||||
excluded.
|
||||
@@ -1029,18 +1025,18 @@ def convert_to_openai_messages(
|
||||
messages: Message-like object or iterable of objects whose contents are
|
||||
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
|
||||
text_format: How to format string or text block contents:
|
||||
- `'string'`:
|
||||
If a message has a string content, this is left as a string. If
|
||||
a message has content blocks that are all of type `'text'`, these
|
||||
are joined with a newline to make a single string. If a message has
|
||||
content blocks and at least one isn't of type `'text'`, then
|
||||
all blocks are left as dicts.
|
||||
- `'block'`:
|
||||
If a message has a string content, this is turned into a list
|
||||
with a single content block of type `'text'`. If a message has
|
||||
content blocks these are left as is.
|
||||
include_id: Whether to include message IDs in the openai messages, if they
|
||||
are present in the source messages.
|
||||
- `'string'`:
|
||||
If a message has a string content, this is left as a string. If
|
||||
a message has content blocks that are all of type `'text'`, these
|
||||
are joined with a newline to make a single string. If a message has
|
||||
content blocks and at least one isn't of type `'text'`, then
|
||||
all blocks are left as dicts.
|
||||
- `'block'`:
|
||||
If a message has a string content, this is turned into a list
|
||||
with a single content block of type `'text'`. If a message has
|
||||
content blocks these are left as is.
|
||||
include_id: Whether to include message ids in the openai messages, if they
|
||||
are present in the source messages.
|
||||
|
||||
Raises:
|
||||
ValueError: if an unrecognized `text_format` is specified, or if a message
|
||||
@@ -1101,7 +1097,7 @@ def convert_to_openai_messages(
|
||||
# ]
|
||||
```
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.11"
|
||||
!!! version-added "Added in version 0.3.11"
|
||||
|
||||
""" # noqa: E501
|
||||
if text_format not in {"string", "block"}:
|
||||
@@ -1701,7 +1697,7 @@ def count_tokens_approximately(
|
||||
Warning:
|
||||
This function does not currently support counting image tokens.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.46"
|
||||
!!! version-added "Added in version 0.3.46"
|
||||
|
||||
"""
|
||||
token_count = 0.0
|
||||
|
||||
@@ -1,20 +1,4 @@
|
||||
"""`OutputParser` classes parse the output of an LLM call into structured data.
|
||||
|
||||
!!! tip "Structured output"
|
||||
|
||||
Output parsers emerged as an early solution to the challenge of obtaining structured
|
||||
output from LLMs.
|
||||
|
||||
Today, most LLMs support [structured output](https://docs.langchain.com/oss/python/langchain/models#structured-outputs)
|
||||
natively. In such cases, using output parsers may be unnecessary, and you should
|
||||
leverage the model's built-in capabilities for structured output. Refer to the
|
||||
[documentation of your chosen model](https://docs.langchain.com/oss/python/integrations/providers/overview)
|
||||
for guidance on how to achieve structured output directly.
|
||||
|
||||
Output parsers remain valuable when working with models that do not support
|
||||
structured output natively, or when you require additional processing or validation
|
||||
of the model's output beyond its inherent capabilities.
|
||||
"""
|
||||
"""**OutputParser** classes parse the output of an LLM call."""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
@@ -135,9 +135,6 @@ class BaseOutputParser(
|
||||
|
||||
Example:
|
||||
```python
|
||||
# Implement a simple boolean output parser
|
||||
|
||||
|
||||
class BooleanOutputParser(BaseOutputParser[bool]):
|
||||
true_val: str = "YES"
|
||||
false_val: str = "NO"
|
||||
|
||||
@@ -1,16 +1,11 @@
|
||||
"""Format instructions."""
|
||||
|
||||
JSON_FORMAT_INSTRUCTIONS = """STRICT OUTPUT FORMAT:
|
||||
- Return only the JSON value that conforms to the schema. Do not include any additional text, explanations, headings, or separators.
|
||||
- Do not wrap the JSON in Markdown or code fences (no ``` or ```json).
|
||||
- Do not prepend or append any text (e.g., do not write "Here is the JSON:").
|
||||
- The response must be a single top-level JSON value exactly as required by the schema (object/array/etc.), with no trailing commas or comments.
|
||||
JSON_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
||||
|
||||
The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
||||
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
|
||||
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
||||
|
||||
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}} the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
||||
|
||||
Here is the output schema (shown in a code block for readability only — do not include any backticks or Markdown in your output):
|
||||
Here is the output schema:
|
||||
```
|
||||
{schema}
|
||||
```""" # noqa: E501
|
||||
|
||||
@@ -31,14 +31,11 @@ TBaseModel = TypeVar("TBaseModel", bound=PydanticBaseModel)
|
||||
class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
|
||||
"""Parse the output of an LLM call to a JSON object.
|
||||
|
||||
Probably the most reliable output parser for getting structured data that does *not*
|
||||
use function calling.
|
||||
|
||||
When used in streaming mode, it will yield partial JSON objects containing
|
||||
all the keys that have been returned so far.
|
||||
|
||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations describing the
|
||||
difference between the previous and the current object.
|
||||
In streaming, if `diff` is set to `True`, yields JSONPatch operations
|
||||
describing the difference between the previous and the current object.
|
||||
"""
|
||||
|
||||
pydantic_object: Annotated[type[TBaseModel] | None, SkipValidation()] = None # type: ignore[valid-type]
|
||||
|
||||
@@ -41,7 +41,7 @@ def droplastn(
|
||||
|
||||
|
||||
class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
"""Parse the output of a model to a list."""
|
||||
"""Parse the output of an LLM call to a list."""
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
@@ -74,30 +74,30 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
buffer = ""
|
||||
for chunk in input:
|
||||
if isinstance(chunk, BaseMessage):
|
||||
# Extract text
|
||||
# extract text
|
||||
chunk_content = chunk.content
|
||||
if not isinstance(chunk_content, str):
|
||||
continue
|
||||
buffer += chunk_content
|
||||
else:
|
||||
# Add current chunk to buffer
|
||||
# add current chunk to buffer
|
||||
buffer += chunk
|
||||
# Parse buffer into a list of parts
|
||||
# parse buffer into a list of parts
|
||||
try:
|
||||
done_idx = 0
|
||||
# Yield only complete parts
|
||||
# yield only complete parts
|
||||
for m in droplastn(self.parse_iter(buffer), 1):
|
||||
done_idx = m.end()
|
||||
yield [m.group(1)]
|
||||
buffer = buffer[done_idx:]
|
||||
except NotImplementedError:
|
||||
parts = self.parse(buffer)
|
||||
# Yield only complete parts
|
||||
# yield only complete parts
|
||||
if len(parts) > 1:
|
||||
for part in parts[:-1]:
|
||||
yield [part]
|
||||
buffer = parts[-1]
|
||||
# Yield the last part
|
||||
# yield the last part
|
||||
for part in self.parse(buffer):
|
||||
yield [part]
|
||||
|
||||
@@ -108,40 +108,40 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
|
||||
buffer = ""
|
||||
async for chunk in input:
|
||||
if isinstance(chunk, BaseMessage):
|
||||
# Extract text
|
||||
# extract text
|
||||
chunk_content = chunk.content
|
||||
if not isinstance(chunk_content, str):
|
||||
continue
|
||||
buffer += chunk_content
|
||||
else:
|
||||
# Add current chunk to buffer
|
||||
# add current chunk to buffer
|
||||
buffer += chunk
|
||||
# Parse buffer into a list of parts
|
||||
# parse buffer into a list of parts
|
||||
try:
|
||||
done_idx = 0
|
||||
# Yield only complete parts
|
||||
# yield only complete parts
|
||||
for m in droplastn(self.parse_iter(buffer), 1):
|
||||
done_idx = m.end()
|
||||
yield [m.group(1)]
|
||||
buffer = buffer[done_idx:]
|
||||
except NotImplementedError:
|
||||
parts = self.parse(buffer)
|
||||
# Yield only complete parts
|
||||
# yield only complete parts
|
||||
if len(parts) > 1:
|
||||
for part in parts[:-1]:
|
||||
yield [part]
|
||||
buffer = parts[-1]
|
||||
# Yield the last part
|
||||
# yield the last part
|
||||
for part in self.parse(buffer):
|
||||
yield [part]
|
||||
|
||||
|
||||
class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
"""Parse the output of a model to a comma-separated list."""
|
||||
"""Parse the output of an LLM call to a comma-separated list."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -177,7 +177,7 @@ class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
)
|
||||
return [item for sublist in reader for item in sublist]
|
||||
except csv.Error:
|
||||
# Keep old logic for backup
|
||||
# keep old logic for backup
|
||||
return [part.strip() for part in text.split(",")]
|
||||
|
||||
@property
|
||||
|
||||
@@ -15,11 +15,7 @@ from langchain_core.messages.tool import tool_call as create_tool_call
|
||||
from langchain_core.output_parsers.transform import BaseCumulativeTransformOutputParser
|
||||
from langchain_core.outputs import ChatGeneration, Generation
|
||||
from langchain_core.utils.json import parse_partial_json
|
||||
from langchain_core.utils.pydantic import (
|
||||
TypeBaseModel,
|
||||
is_pydantic_v1_subclass,
|
||||
is_pydantic_v2_subclass,
|
||||
)
|
||||
from langchain_core.utils.pydantic import TypeBaseModel
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -228,7 +224,7 @@ class JsonOutputKeyToolsParser(JsonOutputToolsParser):
|
||||
result: The result of the LLM call.
|
||||
partial: Whether to parse partial JSON.
|
||||
If `True`, the output will be a JSON object containing
|
||||
all the keys that have been returned so far.
|
||||
all the keys that have been returned so far.
|
||||
If `False`, the output will be the full JSON object.
|
||||
|
||||
Raises:
|
||||
@@ -311,7 +307,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
||||
result: The result of the LLM call.
|
||||
partial: Whether to parse partial JSON.
|
||||
If `True`, the output will be a JSON object containing
|
||||
all the keys that have been returned so far.
|
||||
all the keys that have been returned so far.
|
||||
If `False`, the output will be the full JSON object.
|
||||
|
||||
Returns:
|
||||
@@ -327,15 +323,7 @@ class PydanticToolsParser(JsonOutputToolsParser):
|
||||
return None if self.first_tool_only else []
|
||||
|
||||
json_results = [json_results] if self.first_tool_only else json_results
|
||||
name_dict_v2: dict[str, TypeBaseModel] = {
|
||||
tool.model_config.get("title") or tool.__name__: tool
|
||||
for tool in self.tools
|
||||
if is_pydantic_v2_subclass(tool)
|
||||
}
|
||||
name_dict_v1: dict[str, TypeBaseModel] = {
|
||||
tool.__name__: tool for tool in self.tools if is_pydantic_v1_subclass(tool)
|
||||
}
|
||||
name_dict: dict[str, TypeBaseModel] = {**name_dict_v2, **name_dict_v1}
|
||||
name_dict = {tool.__name__: tool for tool in self.tools}
|
||||
pydantic_objects = []
|
||||
for res in json_results:
|
||||
if not isinstance(res["args"], dict):
|
||||
|
||||
@@ -86,7 +86,7 @@ class PydanticOutputParser(JsonOutputParser, Generic[TBaseModel]):
|
||||
The format instructions for the JSON output.
|
||||
"""
|
||||
# Copy schema to avoid altering original Pydantic schema.
|
||||
schema = dict(self._get_schema(self.pydantic_object).items())
|
||||
schema = dict(self.pydantic_object.model_json_schema().items())
|
||||
|
||||
# Remove extraneous fields.
|
||||
reduced_schema = schema
|
||||
|
||||
@@ -6,14 +6,14 @@ from langchain_core.output_parsers.transform import BaseTransformOutputParser
|
||||
|
||||
|
||||
class StrOutputParser(BaseTransformOutputParser[str]):
|
||||
"""OutputParser that parses `LLMResult` into the top likely string."""
|
||||
"""OutputParser that parses LLMResult into the top likely string."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""`StrOutputParser` is serializable.
|
||||
"""StrOutputParser is serializable.
|
||||
|
||||
Returns:
|
||||
`True`
|
||||
True
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
@@ -43,19 +43,19 @@ class _StreamingParser:
|
||||
"""Streaming parser for XML.
|
||||
|
||||
This implementation is pulled into a class to avoid implementation
|
||||
drift between transform and atransform of the `XMLOutputParser`.
|
||||
drift between transform and atransform of the XMLOutputParser.
|
||||
"""
|
||||
|
||||
def __init__(self, parser: Literal["defusedxml", "xml"]) -> None:
|
||||
"""Initialize the streaming parser.
|
||||
|
||||
Args:
|
||||
parser: Parser to use for XML parsing. Can be either `'defusedxml'` or
|
||||
`'xml'`. See documentation in `XMLOutputParser` for more information.
|
||||
parser: Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
||||
See documentation in XMLOutputParser for more information.
|
||||
|
||||
Raises:
|
||||
ImportError: If `defusedxml` is not installed and the `defusedxml` parser is
|
||||
requested.
|
||||
ImportError: If defusedxml is not installed and the defusedxml
|
||||
parser is requested.
|
||||
"""
|
||||
if parser == "defusedxml":
|
||||
if not _HAS_DEFUSEDXML:
|
||||
@@ -79,10 +79,10 @@ class _StreamingParser:
|
||||
"""Parse a chunk of text.
|
||||
|
||||
Args:
|
||||
chunk: A chunk of text to parse. This can be a `str` or a `BaseMessage`.
|
||||
chunk: A chunk of text to parse. This can be a string or a BaseMessage.
|
||||
|
||||
Yields:
|
||||
A `dict` representing the parsed XML element.
|
||||
A dictionary representing the parsed XML element.
|
||||
|
||||
Raises:
|
||||
xml.etree.ElementTree.ParseError: If the XML is not well-formed.
|
||||
@@ -147,49 +147,46 @@ class _StreamingParser:
|
||||
|
||||
|
||||
class XMLOutputParser(BaseTransformOutputParser):
|
||||
"""Parse an output using xml format.
|
||||
|
||||
Returns a dictionary of tags.
|
||||
"""
|
||||
"""Parse an output using xml format."""
|
||||
|
||||
tags: list[str] | None = None
|
||||
"""Tags to tell the LLM to expect in the XML output.
|
||||
|
||||
Note this may not be perfect depending on the LLM implementation.
|
||||
|
||||
For example, with `tags=["foo", "bar", "baz"]`:
|
||||
For example, with tags=["foo", "bar", "baz"]:
|
||||
|
||||
1. A well-formatted XML instance:
|
||||
`"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"`
|
||||
"<foo>\n <bar>\n <baz></baz>\n </bar>\n</foo>"
|
||||
|
||||
2. A badly-formatted XML instance (missing closing tag for 'bar'):
|
||||
`"<foo>\n <bar>\n </foo>"`
|
||||
"<foo>\n <bar>\n </foo>"
|
||||
|
||||
3. A badly-formatted XML instance (unexpected 'tag' element):
|
||||
`"<foo>\n <tag>\n </tag>\n</foo>"`
|
||||
"<foo>\n <tag>\n </tag>\n</foo>"
|
||||
"""
|
||||
encoding_matcher: re.Pattern = re.compile(
|
||||
r"<([^>]*encoding[^>]*)>\n(.*)", re.MULTILINE | re.DOTALL
|
||||
)
|
||||
parser: Literal["defusedxml", "xml"] = "defusedxml"
|
||||
"""Parser to use for XML parsing. Can be either `'defusedxml'` or `'xml'`.
|
||||
"""Parser to use for XML parsing. Can be either 'defusedxml' or 'xml'.
|
||||
|
||||
* `'defusedxml'` is the default parser and is used to prevent XML vulnerabilities
|
||||
present in some distributions of Python's standard library xml.
|
||||
`defusedxml` is a wrapper around the standard library parser that
|
||||
sets up the parser with secure defaults.
|
||||
* `'xml'` is the standard library parser.
|
||||
* 'defusedxml' is the default parser and is used to prevent XML vulnerabilities
|
||||
present in some distributions of Python's standard library xml.
|
||||
`defusedxml` is a wrapper around the standard library parser that
|
||||
sets up the parser with secure defaults.
|
||||
* 'xml' is the standard library parser.
|
||||
|
||||
Use `xml` only if you are sure that your distribution of the standard library is not
|
||||
vulnerable to XML vulnerabilities.
|
||||
Use `xml` only if you are sure that your distribution of the standard library
|
||||
is not vulnerable to XML vulnerabilities.
|
||||
|
||||
Please review the following resources for more information:
|
||||
|
||||
* https://docs.python.org/3/library/xml.html#xml-vulnerabilities
|
||||
* https://github.com/tiran/defusedxml
|
||||
|
||||
The standard library relies on [`libexpat`](https://github.com/libexpat/libexpat)
|
||||
for parsing XML.
|
||||
The standard library relies on libexpat for parsing XML:
|
||||
https://github.com/libexpat/libexpat
|
||||
"""
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
@@ -203,12 +200,12 @@ class XMLOutputParser(BaseTransformOutputParser):
|
||||
text: The output of an LLM call.
|
||||
|
||||
Returns:
|
||||
A `dict` representing the parsed XML.
|
||||
A dictionary representing the parsed XML.
|
||||
|
||||
Raises:
|
||||
OutputParserException: If the XML is not well-formed.
|
||||
ImportError: If defus`edxml is not installed and the `defusedxml` parser is
|
||||
requested.
|
||||
ImportError: If defusedxml is not installed and the defusedxml
|
||||
parser is requested.
|
||||
"""
|
||||
# Try to find XML string within triple backticks
|
||||
# Imports are temporarily placed here to avoid issue with caching on CI
|
||||
|
||||
@@ -11,8 +11,9 @@ from langchain_core.utils._merge import merge_dicts
|
||||
class Generation(Serializable):
|
||||
"""A single text generation output.
|
||||
|
||||
Generation represents the response from an "old-fashioned" LLM (string-in,
|
||||
string-out) that generates regular text (not chat messages).
|
||||
Generation represents the response from an
|
||||
`"old-fashioned" LLM <https://python.langchain.com/docs/concepts/text_llms/>__` that
|
||||
generates regular text (not chat messages).
|
||||
|
||||
This model is used internally by chat model and will eventually
|
||||
be mapped to a more general `LLMResult` object, and then projected into
|
||||
@@ -20,7 +21,8 @@ class Generation(Serializable):
|
||||
|
||||
LangChain users working with chat models will usually access information via
|
||||
`AIMessage` (returned from runnable interfaces) or `LLMResult` (available
|
||||
via callbacks). Please refer to `AIMessage` and `LLMResult` for more information.
|
||||
via callbacks). Please refer the `AIMessage` and `LLMResult` schema documentation
|
||||
for more information.
|
||||
"""
|
||||
|
||||
text: str
|
||||
@@ -33,13 +35,11 @@ class Generation(Serializable):
|
||||
"""
|
||||
type: Literal["Generation"] = "Generation"
|
||||
"""Type is used exclusively for serialization purposes.
|
||||
|
||||
Set to "Generation" for this class.
|
||||
"""
|
||||
Set to "Generation" for this class."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -53,7 +53,7 @@ class Generation(Serializable):
|
||||
|
||||
|
||||
class GenerationChunk(Generation):
|
||||
"""`GenerationChunk`, which can be concatenated with other Generation chunks."""
|
||||
"""Generation chunk, which can be concatenated with other Generation chunks."""
|
||||
|
||||
def __add__(self, other: GenerationChunk) -> GenerationChunk:
|
||||
"""Concatenate two `GenerationChunk`s.
|
||||
|
||||
@@ -30,13 +30,15 @@ class PromptValue(Serializable, ABC):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "schema", "prompt"]`
|
||||
"""
|
||||
@@ -48,7 +50,7 @@ class PromptValue(Serializable, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def to_messages(self) -> list[BaseMessage]:
|
||||
"""Return prompt as a list of messages."""
|
||||
"""Return prompt as a list of Messages."""
|
||||
|
||||
|
||||
class StringPromptValue(PromptValue):
|
||||
@@ -62,6 +64,8 @@ class StringPromptValue(PromptValue):
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "prompts", "base"]`
|
||||
"""
|
||||
@@ -97,6 +101,8 @@ class ChatPromptValue(PromptValue):
|
||||
def get_lc_namespace(cls) -> list[str]:
|
||||
"""Get the namespace of the LangChain object.
|
||||
|
||||
This is used to determine the namespace of the object when serializing.
|
||||
|
||||
Returns:
|
||||
`["langchain", "prompts", "chat"]`
|
||||
"""
|
||||
|
||||
@@ -46,27 +46,21 @@ class BasePromptTemplate(
|
||||
|
||||
input_variables: list[str]
|
||||
"""A list of the names of the variables whose values are required as inputs to the
|
||||
prompt.
|
||||
"""
|
||||
prompt."""
|
||||
optional_variables: list[str] = Field(default=[])
|
||||
"""A list of the names of the variables for placeholder or `MessagePlaceholder` that
|
||||
are optional.
|
||||
|
||||
These variables are auto inferred from the prompt and user need not provide them.
|
||||
"""
|
||||
"""optional_variables: A list of the names of the variables for placeholder
|
||||
or MessagePlaceholder that are optional. These variables are auto inferred
|
||||
from the prompt and user need not provide them."""
|
||||
input_types: typing.Dict[str, Any] = Field(default_factory=dict, exclude=True) # noqa: UP006
|
||||
"""A dictionary of the types of the variables the prompt template expects.
|
||||
|
||||
If not provided, all variables are assumed to be strings.
|
||||
"""
|
||||
If not provided, all variables are assumed to be strings."""
|
||||
output_parser: BaseOutputParser | None = None
|
||||
"""How to parse the output of calling an LLM on this formatted prompt."""
|
||||
partial_variables: Mapping[str, Any] = Field(default_factory=dict)
|
||||
"""A dictionary of the partial variables the prompt template carries.
|
||||
|
||||
Partial variables populate the template so that you don't need to pass them in every
|
||||
time you call the prompt.
|
||||
"""
|
||||
Partial variables populate the template so that you don't need to
|
||||
pass them in every time you call the prompt."""
|
||||
metadata: typing.Dict[str, Any] | None = None # noqa: UP006
|
||||
"""Metadata to be used for tracing."""
|
||||
tags: list[str] | None = None
|
||||
@@ -111,7 +105,7 @@ class BasePromptTemplate(
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -133,7 +127,7 @@ class BasePromptTemplate(
|
||||
"""Get the input schema for the prompt.
|
||||
|
||||
Args:
|
||||
config: Configuration for the prompt.
|
||||
config: configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The input schema for the prompt.
|
||||
@@ -201,8 +195,8 @@ class BasePromptTemplate(
|
||||
"""Invoke the prompt.
|
||||
|
||||
Args:
|
||||
input: Input to the prompt.
|
||||
config: Configuration for the prompt.
|
||||
input: Dict, input to the prompt.
|
||||
config: RunnableConfig, configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The output of the prompt.
|
||||
@@ -227,8 +221,8 @@ class BasePromptTemplate(
|
||||
"""Async invoke the prompt.
|
||||
|
||||
Args:
|
||||
input: Input to the prompt.
|
||||
config: Configuration for the prompt.
|
||||
input: Dict, input to the prompt.
|
||||
config: RunnableConfig, configuration for the prompt.
|
||||
|
||||
Returns:
|
||||
The output of the prompt.
|
||||
@@ -248,7 +242,7 @@ class BasePromptTemplate(
|
||||
|
||||
@abstractmethod
|
||||
def format_prompt(self, **kwargs: Any) -> PromptValue:
|
||||
"""Create `PromptValue`.
|
||||
"""Create Prompt Value.
|
||||
|
||||
Args:
|
||||
**kwargs: Any arguments to be passed to the prompt template.
|
||||
@@ -258,7 +252,7 @@ class BasePromptTemplate(
|
||||
"""
|
||||
|
||||
async def aformat_prompt(self, **kwargs: Any) -> PromptValue:
|
||||
"""Async create `PromptValue`.
|
||||
"""Async create Prompt Value.
|
||||
|
||||
Args:
|
||||
**kwargs: Any arguments to be passed to the prompt template.
|
||||
@@ -272,7 +266,7 @@ class BasePromptTemplate(
|
||||
"""Return a partial of the prompt template.
|
||||
|
||||
Args:
|
||||
**kwargs: Partial variables to set.
|
||||
**kwargs: partial variables to set.
|
||||
|
||||
Returns:
|
||||
A partial of the prompt template.
|
||||
@@ -302,9 +296,9 @@ class BasePromptTemplate(
|
||||
A formatted string.
|
||||
|
||||
Example:
|
||||
```python
|
||||
prompt.format(variable1="foo")
|
||||
```
|
||||
```python
|
||||
prompt.format(variable1="foo")
|
||||
```
|
||||
"""
|
||||
|
||||
async def aformat(self, **kwargs: Any) -> FormatOutputType:
|
||||
@@ -317,9 +311,9 @@ class BasePromptTemplate(
|
||||
A formatted string.
|
||||
|
||||
Example:
|
||||
```python
|
||||
await prompt.aformat(variable1="foo")
|
||||
```
|
||||
```python
|
||||
await prompt.aformat(variable1="foo")
|
||||
```
|
||||
"""
|
||||
return self.format(**kwargs)
|
||||
|
||||
@@ -354,9 +348,9 @@ class BasePromptTemplate(
|
||||
NotImplementedError: If the prompt type is not implemented.
|
||||
|
||||
Example:
|
||||
```python
|
||||
prompt.save(file_path="path/prompt.yaml")
|
||||
```
|
||||
```python
|
||||
prompt.save(file_path="path/prompt.yaml")
|
||||
```
|
||||
"""
|
||||
if self.partial_variables:
|
||||
msg = "Cannot save prompt with partial variables."
|
||||
@@ -408,23 +402,23 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
|
||||
|
||||
First, this pulls information from the document from two sources:
|
||||
|
||||
1. `page_content`:
|
||||
This takes the information from the `document.page_content` and assigns it to a
|
||||
variable named `page_content`.
|
||||
2. `metadata`:
|
||||
This takes information from `document.metadata` and assigns it to variables of
|
||||
the same name.
|
||||
1. page_content:
|
||||
This takes the information from the `document.page_content`
|
||||
and assigns it to a variable named `page_content`.
|
||||
2. metadata:
|
||||
This takes information from `document.metadata` and assigns
|
||||
it to variables of the same name.
|
||||
|
||||
Those variables are then passed into the `prompt` to produce a formatted string.
|
||||
|
||||
Args:
|
||||
doc: `Document`, the `page_content` and `metadata` will be used to create
|
||||
doc: Document, the page_content and metadata will be used to create
|
||||
the final string.
|
||||
prompt: `BasePromptTemplate`, will be used to format the `page_content`
|
||||
and `metadata` into the final string.
|
||||
prompt: BasePromptTemplate, will be used to format the page_content
|
||||
and metadata into the final string.
|
||||
|
||||
Returns:
|
||||
String of the document formatted.
|
||||
string of the document formatted.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -435,6 +429,7 @@ def format_document(doc: Document, prompt: BasePromptTemplate[str]) -> str:
|
||||
prompt = PromptTemplate.from_template("Page {page}: {page_content}")
|
||||
format_document(doc, prompt)
|
||||
>>> "Page 1: This is a joke"
|
||||
|
||||
```
|
||||
"""
|
||||
return prompt.format(**_get_document_info(doc, prompt))
|
||||
@@ -445,22 +440,22 @@ async def aformat_document(doc: Document, prompt: BasePromptTemplate[str]) -> st
|
||||
|
||||
First, this pulls information from the document from two sources:
|
||||
|
||||
1. `page_content`:
|
||||
This takes the information from the `document.page_content` and assigns it to a
|
||||
variable named `page_content`.
|
||||
2. `metadata`:
|
||||
This takes information from `document.metadata` and assigns it to variables of
|
||||
the same name.
|
||||
1. page_content:
|
||||
This takes the information from the `document.page_content`
|
||||
and assigns it to a variable named `page_content`.
|
||||
2. metadata:
|
||||
This takes information from `document.metadata` and assigns
|
||||
it to variables of the same name.
|
||||
|
||||
Those variables are then passed into the `prompt` to produce a formatted string.
|
||||
|
||||
Args:
|
||||
doc: `Document`, the `page_content` and `metadata` will be used to create
|
||||
doc: Document, the page_content and metadata will be used to create
|
||||
the final string.
|
||||
prompt: `BasePromptTemplate`, will be used to format the `page_content`
|
||||
and `metadata` into the final string.
|
||||
prompt: BasePromptTemplate, will be used to format the page_content
|
||||
and metadata into the final string.
|
||||
|
||||
Returns:
|
||||
String of the document formatted.
|
||||
string of the document formatted.
|
||||
"""
|
||||
return await prompt.aformat(**_get_document_info(doc, prompt))
|
||||
|
||||
@@ -587,15 +587,14 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
for prompt in self.prompt:
|
||||
inputs = {var: kwargs[var] for var in prompt.input_variables}
|
||||
if isinstance(prompt, StringPromptTemplate):
|
||||
formatted_text: str = prompt.format(**inputs)
|
||||
if formatted_text != "":
|
||||
content.append({"type": "text", "text": formatted_text})
|
||||
formatted: str | ImageURL | dict[str, Any] = prompt.format(**inputs)
|
||||
content.append({"type": "text", "text": formatted})
|
||||
elif isinstance(prompt, ImagePromptTemplate):
|
||||
formatted_image: ImageURL = prompt.format(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted_image})
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted})
|
||||
elif isinstance(prompt, DictPromptTemplate):
|
||||
formatted_dict: dict[str, Any] = prompt.format(**inputs)
|
||||
content.append(formatted_dict)
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append(formatted)
|
||||
return self._msg_class(
|
||||
content=content, additional_kwargs=self.additional_kwargs
|
||||
)
|
||||
@@ -618,15 +617,16 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
|
||||
for prompt in self.prompt:
|
||||
inputs = {var: kwargs[var] for var in prompt.input_variables}
|
||||
if isinstance(prompt, StringPromptTemplate):
|
||||
formatted_text: str = await prompt.aformat(**inputs)
|
||||
if formatted_text != "":
|
||||
content.append({"type": "text", "text": formatted_text})
|
||||
formatted: str | ImageURL | dict[str, Any] = await prompt.aformat(
|
||||
**inputs
|
||||
)
|
||||
content.append({"type": "text", "text": formatted})
|
||||
elif isinstance(prompt, ImagePromptTemplate):
|
||||
formatted_image: ImageURL = await prompt.aformat(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted_image})
|
||||
formatted = await prompt.aformat(**inputs)
|
||||
content.append({"type": "image_url", "image_url": formatted})
|
||||
elif isinstance(prompt, DictPromptTemplate):
|
||||
formatted_dict: dict[str, Any] = prompt.format(**inputs)
|
||||
content.append(formatted_dict)
|
||||
formatted = prompt.format(**inputs)
|
||||
content.append(formatted)
|
||||
return self._msg_class(
|
||||
content=content, additional_kwargs=self.additional_kwargs
|
||||
)
|
||||
@@ -776,36 +776,42 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
|
||||
Use to create flexible templated prompts for chat models.
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
Examples:
|
||||
!!! warning "Behavior changed in 0.2.24"
|
||||
You can pass any Message-like formats supported by
|
||||
`ChatPromptTemplate.from_messages()` directly to `ChatPromptTemplate()`
|
||||
init.
|
||||
|
||||
template = ChatPromptTemplate(
|
||||
[
|
||||
("system", "You are a helpful AI bot. Your name is {name}."),
|
||||
("human", "Hello, how are you doing?"),
|
||||
("ai", "I'm doing well, thanks!"),
|
||||
("human", "{user_input}"),
|
||||
]
|
||||
)
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
prompt_value = template.invoke(
|
||||
{
|
||||
"name": "Bob",
|
||||
"user_input": "What is your name?",
|
||||
}
|
||||
)
|
||||
# Output:
|
||||
# ChatPromptValue(
|
||||
# messages=[
|
||||
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
||||
# HumanMessage(content='Hello, how are you doing?'),
|
||||
# AIMessage(content="I'm doing well, thanks!"),
|
||||
# HumanMessage(content='What is your name?')
|
||||
# ]
|
||||
# )
|
||||
```
|
||||
template = ChatPromptTemplate(
|
||||
[
|
||||
("system", "You are a helpful AI bot. Your name is {name}."),
|
||||
("human", "Hello, how are you doing?"),
|
||||
("ai", "I'm doing well, thanks!"),
|
||||
("human", "{user_input}"),
|
||||
]
|
||||
)
|
||||
|
||||
!!! note "Messages Placeholder"
|
||||
prompt_value = template.invoke(
|
||||
{
|
||||
"name": "Bob",
|
||||
"user_input": "What is your name?",
|
||||
}
|
||||
)
|
||||
# Output:
|
||||
# ChatPromptValue(
|
||||
# messages=[
|
||||
# SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
|
||||
# HumanMessage(content='Hello, how are you doing?'),
|
||||
# AIMessage(content="I'm doing well, thanks!"),
|
||||
# HumanMessage(content='What is your name?')
|
||||
# ]
|
||||
# )
|
||||
```
|
||||
|
||||
Messages Placeholder:
|
||||
|
||||
```python
|
||||
# In addition to Human/AI/Tool/Function messages,
|
||||
@@ -846,12 +852,13 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
# )
|
||||
```
|
||||
|
||||
!!! note "Single-variable template"
|
||||
Single-variable template:
|
||||
|
||||
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
|
||||
and you invoke the template with a non-dict object, the prompt template will
|
||||
inject the provided argument into that variable location.
|
||||
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
|
||||
@@ -891,35 +898,25 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
"""Create a chat prompt template from a variety of message formats.
|
||||
|
||||
Args:
|
||||
messages: Sequence of message representations.
|
||||
|
||||
messages: sequence of message representations.
|
||||
A message can be represented using the following formats:
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
template_format: Format of the template.
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}".
|
||||
template_format: format of the template.
|
||||
input_variables: A list of the names of the variables whose values are
|
||||
required as inputs to the prompt.
|
||||
optional_variables: A list of the names of the variables for placeholder
|
||||
or MessagePlaceholder that are optional.
|
||||
|
||||
These variables are auto inferred from the prompt and user need not
|
||||
provide them.
|
||||
partial_variables: A dictionary of the partial variables the prompt
|
||||
template carries.
|
||||
|
||||
Partial variables populate the template so that you don't need to pass
|
||||
them in every time you call the prompt.
|
||||
template carries. Partial variables populate the template so that you
|
||||
don't need to pass them in every time you call the prompt.
|
||||
validate_template: Whether to validate the template.
|
||||
input_types: A dictionary of the types of the variables the prompt template
|
||||
expects.
|
||||
|
||||
If not provided, all variables are assumed to be strings.
|
||||
expects. If not provided, all variables are assumed to be strings.
|
||||
|
||||
Examples:
|
||||
Instantiation from a list of message templates:
|
||||
@@ -1124,17 +1121,12 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
)
|
||||
```
|
||||
Args:
|
||||
messages: Sequence of message representations.
|
||||
|
||||
messages: sequence of message representations.
|
||||
A message can be represented using the following formats:
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}".
|
||||
template_format: format of the template.
|
||||
|
||||
Returns:
|
||||
@@ -1246,7 +1238,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
"""Extend the chat template with a sequence of messages.
|
||||
|
||||
Args:
|
||||
messages: Sequence of message representations to append.
|
||||
messages: sequence of message representations to append.
|
||||
"""
|
||||
self.messages.extend(
|
||||
[_convert_to_message_template(message) for message in messages]
|
||||
@@ -1343,25 +1335,11 @@ def _create_template_from_message_type(
|
||||
raise ValueError(msg)
|
||||
var_name = template[1:-1]
|
||||
message = MessagesPlaceholder(variable_name=var_name, optional=True)
|
||||
else:
|
||||
try:
|
||||
var_name_wrapped, is_optional = template
|
||||
except ValueError as e:
|
||||
msg = (
|
||||
"Unexpected arguments for placeholder message type."
|
||||
" Expected either a single string variable name"
|
||||
" or a list of [variable_name: str, is_optional: bool]."
|
||||
f" Got: {template}"
|
||||
)
|
||||
raise ValueError(msg) from e
|
||||
|
||||
if not isinstance(is_optional, bool):
|
||||
msg = f"Expected is_optional to be a boolean. Got: {is_optional}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
|
||||
elif len(template) == 2 and isinstance(template[1], bool):
|
||||
var_name_wrapped, is_optional = template
|
||||
if not isinstance(var_name_wrapped, str):
|
||||
msg = f"Expected variable name to be a string. Got: {var_name_wrapped}"
|
||||
raise ValueError(msg) # noqa: TRY004
|
||||
raise ValueError(msg) # noqa:TRY004
|
||||
if var_name_wrapped[0] != "{" or var_name_wrapped[-1] != "}":
|
||||
msg = (
|
||||
f"Invalid placeholder template: {var_name_wrapped}."
|
||||
@@ -1371,6 +1349,14 @@ def _create_template_from_message_type(
|
||||
var_name = var_name_wrapped[1:-1]
|
||||
|
||||
message = MessagesPlaceholder(variable_name=var_name, optional=is_optional)
|
||||
else:
|
||||
msg = (
|
||||
"Unexpected arguments for placeholder message type."
|
||||
" Expected either a single string variable name"
|
||||
" or a list of [variable_name: str, is_optional: bool]."
|
||||
f" Got: {template}"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
else:
|
||||
msg = (
|
||||
f"Unexpected message type: {message_type}. Use one of 'human',"
|
||||
@@ -1424,11 +1410,10 @@ def _convert_to_message_template(
|
||||
)
|
||||
raise ValueError(msg)
|
||||
message = (message["role"], message["content"])
|
||||
try:
|
||||
message_type_str, template = message
|
||||
except ValueError as e:
|
||||
if len(message) != 2:
|
||||
msg = f"Expected 2-tuple of (role, template), got {message}"
|
||||
raise ValueError(msg) from e
|
||||
raise ValueError(msg)
|
||||
message_type_str, template = message
|
||||
if isinstance(message_type_str, str):
|
||||
message_ = _create_template_from_message_type(
|
||||
message_type_str, template, template_format=template_format
|
||||
|
||||
@@ -69,7 +69,7 @@ class DictPromptTemplate(RunnableSerializable[dict, dict]):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -18,7 +18,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -32,13 +32,13 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
|
||||
@abstractmethod
|
||||
def format_messages(self, **kwargs: Any) -> list[BaseMessage]:
|
||||
"""Format messages from kwargs. Should return a list of `BaseMessage` objects.
|
||||
"""Format messages from kwargs. Should return a list of BaseMessages.
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments to use for formatting.
|
||||
|
||||
Returns:
|
||||
List of `BaseMessage` objects.
|
||||
List of BaseMessages.
|
||||
"""
|
||||
|
||||
async def aformat_messages(self, **kwargs: Any) -> list[BaseMessage]:
|
||||
@@ -48,7 +48,7 @@ class BaseMessagePromptTemplate(Serializable, ABC):
|
||||
**kwargs: Keyword arguments to use for formatting.
|
||||
|
||||
Returns:
|
||||
List of `BaseMessage` objects.
|
||||
List of BaseMessages.
|
||||
"""
|
||||
return self.format_messages(**kwargs)
|
||||
|
||||
|
||||
@@ -122,16 +122,13 @@ def mustache_formatter(template: str, /, **kwargs: Any) -> str:
|
||||
def mustache_template_vars(
|
||||
template: str,
|
||||
) -> set[str]:
|
||||
"""Get the top-level variables from a mustache template.
|
||||
|
||||
For nested variables like `{{person.name}}`, only the top-level
|
||||
key (`person`) is returned.
|
||||
"""Get the variables from a mustache template.
|
||||
|
||||
Args:
|
||||
template: The template string.
|
||||
|
||||
Returns:
|
||||
The top-level variables from the template.
|
||||
The variables from the template.
|
||||
"""
|
||||
variables: set[str] = set()
|
||||
section_depth = 0
|
||||
|
||||
@@ -104,23 +104,19 @@ class StructuredPrompt(ChatPromptTemplate):
|
||||
)
|
||||
```
|
||||
Args:
|
||||
messages: Sequence of message representations.
|
||||
|
||||
messages: sequence of message representations.
|
||||
A message can be represented using the following formats:
|
||||
|
||||
1. `BaseMessagePromptTemplate`
|
||||
2. `BaseMessage`
|
||||
3. 2-tuple of `(message type, template)`; e.g.,
|
||||
`("human", "{user_input}")`
|
||||
4. 2-tuple of `(message class, template)`
|
||||
5. A string which is shorthand for `("human", template)`; e.g.,
|
||||
`"{user_input}"`
|
||||
schema: A dictionary representation of function call, or a Pydantic model.
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}"
|
||||
schema: a dictionary representation of function call, or a Pydantic model.
|
||||
**kwargs: Any additional kwargs to pass through to
|
||||
`ChatModel.with_structured_output(schema, **kwargs)`.
|
||||
|
||||
Returns:
|
||||
A structured prompt template
|
||||
a structured prompt template
|
||||
|
||||
"""
|
||||
return cls(messages, schema, **kwargs)
|
||||
|
||||
|
||||
@@ -105,9 +105,7 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
||||
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
|
||||
model = ChatAnthropic(
|
||||
model_name="claude-sonnet-4-5-20250929", rate_limiter=rate_limiter
|
||||
)
|
||||
model = ChatAnthropic(model_name="claude-sonnet-4-5", rate_limiter=rate_limiter)
|
||||
|
||||
for _ in range(5):
|
||||
tic = time.time()
|
||||
|
||||
@@ -50,65 +50,65 @@ class LangSmithRetrieverParams(TypedDict, total=False):
|
||||
|
||||
|
||||
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
"""Abstract base class for a document retrieval system.
|
||||
"""Abstract base class for a Document retrieval system.
|
||||
|
||||
A retrieval system is defined as something that can take string queries and return
|
||||
the most 'relevant' documents from some source.
|
||||
the most 'relevant' Documents from some source.
|
||||
|
||||
Usage:
|
||||
|
||||
A retriever follows the standard `Runnable` interface, and should be used via the
|
||||
standard `Runnable` methods of `invoke`, `ainvoke`, `batch`, `abatch`.
|
||||
A retriever follows the standard Runnable interface, and should be used
|
||||
via the standard Runnable methods of `invoke`, `ainvoke`, `batch`, `abatch`.
|
||||
|
||||
Implementation:
|
||||
|
||||
When implementing a custom retriever, the class should implement the
|
||||
`_get_relevant_documents` method to define the logic for retrieving documents.
|
||||
When implementing a custom retriever, the class should implement
|
||||
the `_get_relevant_documents` method to define the logic for retrieving documents.
|
||||
|
||||
Optionally, an async native implementations can be provided by overriding the
|
||||
`_aget_relevant_documents` method.
|
||||
|
||||
!!! example "Retriever that returns the first 5 documents from a list of documents"
|
||||
Example: A retriever that returns the first 5 documents from a list of documents
|
||||
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
```python
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
|
||||
class SimpleRetriever(BaseRetriever):
|
||||
docs: list[Document]
|
||||
k: int = 5
|
||||
class SimpleRetriever(BaseRetriever):
|
||||
docs: list[Document]
|
||||
k: int = 5
|
||||
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"Return the first k documents from the list of documents\"\"\"
|
||||
return self.docs[:self.k]
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"Return the first k documents from the list of documents\"\"\"
|
||||
return self.docs[:self.k]
|
||||
|
||||
async def _aget_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"(Optional) async native implementation.\"\"\"
|
||||
return self.docs[:self.k]
|
||||
```
|
||||
async def _aget_relevant_documents(self, query: str) -> list[Document]:
|
||||
\"\"\"(Optional) async native implementation.\"\"\"
|
||||
return self.docs[:self.k]
|
||||
```
|
||||
|
||||
!!! example "Simple retriever based on a scikit-learn vectorizer"
|
||||
Example: A simple retriever based on a scikit-learn vectorizer
|
||||
|
||||
```python
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
```python
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
|
||||
|
||||
class TFIDFRetriever(BaseRetriever, BaseModel):
|
||||
vectorizer: Any
|
||||
docs: list[Document]
|
||||
tfidf_array: Any
|
||||
k: int = 4
|
||||
class TFIDFRetriever(BaseRetriever, BaseModel):
|
||||
vectorizer: Any
|
||||
docs: list[Document]
|
||||
tfidf_array: Any
|
||||
k: int = 4
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
||||
query_vec = self.vectorizer.transform([query])
|
||||
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
||||
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
|
||||
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
|
||||
```
|
||||
def _get_relevant_documents(self, query: str) -> list[Document]:
|
||||
# Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
|
||||
query_vec = self.vectorizer.transform([query])
|
||||
# Op -- (n_docs,1) -- Cosine Sim with each doc
|
||||
results = cosine_similarity(self.tfidf_array, query_vec).reshape((-1,))
|
||||
return [self.docs[i] for i in results.argsort()[-self.k :][::-1]]
|
||||
```
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -119,19 +119,15 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
_expects_other_args: bool = False
|
||||
tags: list[str] | None = None
|
||||
"""Optional list of tags associated with the retriever.
|
||||
|
||||
These tags will be associated with each call to this retriever,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
|
||||
You can use these to eg identify a specific instance of a retriever with its
|
||||
use case.
|
||||
"""
|
||||
metadata: dict[str, Any] | None = None
|
||||
"""Optional metadata associated with the retriever.
|
||||
|
||||
This metadata will be associated with each call to this retriever,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
|
||||
You can use these to eg identify a specific instance of a retriever with its
|
||||
use case.
|
||||
"""
|
||||
|
||||
@@ -118,8 +118,6 @@ if TYPE_CHECKING:
|
||||
|
||||
Other = TypeVar("Other")
|
||||
|
||||
_RUNNABLE_GENERIC_NUM_ARGS = 2 # Input and Output
|
||||
|
||||
|
||||
class Runnable(ABC, Generic[Input, Output]):
|
||||
"""A unit of work that can be invoked, batched, streamed, transformed and composed.
|
||||
@@ -149,11 +147,11 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
the `input_schema` property, the `output_schema` property and `config_schema`
|
||||
method.
|
||||
|
||||
Composition
|
||||
===========
|
||||
|
||||
Runnable objects can be composed together to create chains in a declarative way.
|
||||
LCEL and Composition
|
||||
====================
|
||||
|
||||
The LangChain Expression Language (LCEL) is a declarative way to compose
|
||||
`Runnable` objectsinto chains.
|
||||
Any chain constructed this way will automatically have sync, async, batch, and
|
||||
streaming support.
|
||||
|
||||
@@ -237,21 +235,21 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
|
||||
You can set the global debug flag to True to enable debug output for all chains:
|
||||
|
||||
```python
|
||||
from langchain_core.globals import set_debug
|
||||
```python
|
||||
from langchain_core.globals import set_debug
|
||||
|
||||
set_debug(True)
|
||||
```
|
||||
set_debug(True)
|
||||
```
|
||||
|
||||
Alternatively, you can pass existing or custom callbacks to any given chain:
|
||||
|
||||
```python
|
||||
from langchain_core.tracers import ConsoleCallbackHandler
|
||||
```python
|
||||
from langchain_core.tracers import ConsoleCallbackHandler
|
||||
|
||||
chain.invoke(..., config={"callbacks": [ConsoleCallbackHandler()]})
|
||||
```
|
||||
chain.invoke(..., config={"callbacks": [ConsoleCallbackHandler()]})
|
||||
```
|
||||
|
||||
For a UI (and much more) checkout [LangSmith](https://docs.langchain.com/langsmith/home).
|
||||
For a UI (and much more) checkout [LangSmith](https://docs.smith.langchain.com/).
|
||||
|
||||
"""
|
||||
|
||||
@@ -311,10 +309,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
for base in self.__class__.mro():
|
||||
if hasattr(base, "__pydantic_generic_metadata__"):
|
||||
metadata = base.__pydantic_generic_metadata__
|
||||
if (
|
||||
"args" in metadata
|
||||
and len(metadata["args"]) == _RUNNABLE_GENERIC_NUM_ARGS
|
||||
):
|
||||
if "args" in metadata and len(metadata["args"]) == 2:
|
||||
return metadata["args"][0]
|
||||
|
||||
# If we didn't find a Pydantic model in the parent classes,
|
||||
@@ -322,7 +317,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
# Runnables that are not pydantic models.
|
||||
for cls in self.__class__.__orig_bases__: # type: ignore[attr-defined]
|
||||
type_args = get_args(cls)
|
||||
if type_args and len(type_args) == _RUNNABLE_GENERIC_NUM_ARGS:
|
||||
if type_args and len(type_args) == 2:
|
||||
return type_args[0]
|
||||
|
||||
msg = (
|
||||
@@ -345,15 +340,12 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
for base in self.__class__.mro():
|
||||
if hasattr(base, "__pydantic_generic_metadata__"):
|
||||
metadata = base.__pydantic_generic_metadata__
|
||||
if (
|
||||
"args" in metadata
|
||||
and len(metadata["args"]) == _RUNNABLE_GENERIC_NUM_ARGS
|
||||
):
|
||||
if "args" in metadata and len(metadata["args"]) == 2:
|
||||
return metadata["args"][1]
|
||||
|
||||
for cls in self.__class__.__orig_bases__: # type: ignore[attr-defined]
|
||||
type_args = get_args(cls)
|
||||
if type_args and len(type_args) == _RUNNABLE_GENERIC_NUM_ARGS:
|
||||
if type_args and len(type_args) == 2:
|
||||
return type_args[1]
|
||||
|
||||
msg = (
|
||||
@@ -432,7 +424,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
print(runnable.get_input_jsonschema())
|
||||
```
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.0"
|
||||
!!! version-added "Added in version 0.3.0"
|
||||
|
||||
"""
|
||||
return self.get_input_schema(config).model_json_schema()
|
||||
@@ -510,7 +502,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
print(runnable.get_output_jsonschema())
|
||||
```
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.0"
|
||||
!!! version-added "Added in version 0.3.0"
|
||||
|
||||
"""
|
||||
return self.get_output_schema(config).model_json_schema()
|
||||
@@ -574,7 +566,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Returns:
|
||||
A JSON schema that represents the config of the `Runnable`.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 0.3.0"
|
||||
!!! version-added "Added in version 0.3.0"
|
||||
|
||||
"""
|
||||
return self.config_schema(include=include).model_json_schema()
|
||||
@@ -774,7 +766,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
"""Assigns new fields to the `dict` output of this `Runnable`.
|
||||
|
||||
```python
|
||||
from langchain_core.language_models.fake import FakeStreamingListLLM
|
||||
from langchain_community.llms.fake import FakeStreamingListLLM
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import SystemMessagePromptTemplate
|
||||
from langchain_core.runnables import Runnable
|
||||
@@ -826,12 +818,10 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
|
||||
Returns:
|
||||
The output of the `Runnable`.
|
||||
@@ -848,12 +838,10 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
|
||||
Returns:
|
||||
The output of the `Runnable`.
|
||||
@@ -880,9 +868,8 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
config: A config to use when invoking the `Runnable`. The config supports
|
||||
standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work
|
||||
to do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
to do in parallel, and other keys. Please refer to the
|
||||
`RunnableConfig` for more details.
|
||||
return_exceptions: Whether to return exceptions instead of raising them.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
@@ -945,12 +932,10 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
inputs: A list of inputs to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
return_exceptions: Whether to return exceptions instead of raising them.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
@@ -1013,12 +998,10 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
inputs: A list of inputs to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
return_exceptions: Whether to return exceptions instead of raising them.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
@@ -1078,12 +1061,10 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
Args:
|
||||
inputs: A list of inputs to the `Runnable`.
|
||||
config: A config to use when invoking the `Runnable`.
|
||||
|
||||
The config supports standard keys like `'tags'`, `'metadata'` for
|
||||
tracing purposes, `'max_concurrency'` for controlling how much work to
|
||||
do in parallel, and other keys.
|
||||
|
||||
Please refer to `RunnableConfig` for more details.
|
||||
do in parallel, and other keys. Please refer to the `RunnableConfig`
|
||||
for more details.
|
||||
return_exceptions: Whether to return exceptions instead of raising them.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
|
||||
@@ -1761,52 +1742,46 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
import time
|
||||
import asyncio
|
||||
|
||||
|
||||
def format_t(timestamp: float) -> str:
|
||||
return datetime.fromtimestamp(timestamp, tz=timezone.utc).isoformat()
|
||||
|
||||
|
||||
async def test_runnable(time_to_sleep: int):
|
||||
print(f"Runnable[{time_to_sleep}s]: starts at {format_t(time.time())}")
|
||||
await asyncio.sleep(time_to_sleep)
|
||||
print(f"Runnable[{time_to_sleep}s]: ends at {format_t(time.time())}")
|
||||
|
||||
|
||||
async def fn_start(run_obj: Runnable):
|
||||
print(f"on start callback starts at {format_t(time.time())}")
|
||||
await asyncio.sleep(3)
|
||||
print(f"on start callback ends at {format_t(time.time())}")
|
||||
|
||||
|
||||
async def fn_end(run_obj: Runnable):
|
||||
print(f"on end callback starts at {format_t(time.time())}")
|
||||
await asyncio.sleep(2)
|
||||
print(f"on end callback ends at {format_t(time.time())}")
|
||||
|
||||
|
||||
runnable = RunnableLambda(test_runnable).with_alisteners(
|
||||
on_start=fn_start, on_end=fn_end
|
||||
on_start=fn_start,
|
||||
on_end=fn_end
|
||||
)
|
||||
|
||||
|
||||
async def concurrent_runs():
|
||||
await asyncio.gather(runnable.ainvoke(2), runnable.ainvoke(3))
|
||||
|
||||
|
||||
asyncio.run(concurrent_runs())
|
||||
# Result:
|
||||
# on start callback starts at 2025-03-01T07:05:22.875378+00:00
|
||||
# on start callback starts at 2025-03-01T07:05:22.875495+00:00
|
||||
# on start callback ends at 2025-03-01T07:05:25.878862+00:00
|
||||
# on start callback ends at 2025-03-01T07:05:25.878947+00:00
|
||||
# Runnable[2s]: starts at 2025-03-01T07:05:25.879392+00:00
|
||||
# Runnable[3s]: starts at 2025-03-01T07:05:25.879804+00:00
|
||||
# Runnable[2s]: ends at 2025-03-01T07:05:27.881998+00:00
|
||||
# on end callback starts at 2025-03-01T07:05:27.882360+00:00
|
||||
# Runnable[3s]: ends at 2025-03-01T07:05:28.881737+00:00
|
||||
# on end callback starts at 2025-03-01T07:05:28.882428+00:00
|
||||
# on end callback ends at 2025-03-01T07:05:29.883893+00:00
|
||||
# on end callback ends at 2025-03-01T07:05:30.884831+00:00
|
||||
Result:
|
||||
on start callback starts at 2025-03-01T07:05:22.875378+00:00
|
||||
on start callback starts at 2025-03-01T07:05:22.875495+00:00
|
||||
on start callback ends at 2025-03-01T07:05:25.878862+00:00
|
||||
on start callback ends at 2025-03-01T07:05:25.878947+00:00
|
||||
Runnable[2s]: starts at 2025-03-01T07:05:25.879392+00:00
|
||||
Runnable[3s]: starts at 2025-03-01T07:05:25.879804+00:00
|
||||
Runnable[2s]: ends at 2025-03-01T07:05:27.881998+00:00
|
||||
on end callback starts at 2025-03-01T07:05:27.882360+00:00
|
||||
Runnable[3s]: ends at 2025-03-01T07:05:28.881737+00:00
|
||||
on end callback starts at 2025-03-01T07:05:28.882428+00:00
|
||||
on end callback ends at 2025-03-01T07:05:29.883893+00:00
|
||||
on end callback ends at 2025-03-01T07:05:30.884831+00:00
|
||||
|
||||
```
|
||||
"""
|
||||
return RunnableBinding(
|
||||
@@ -1868,7 +1843,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
`exp_base`, and `jitter` (all `float` values).
|
||||
|
||||
Returns:
|
||||
A new `Runnable` that retries the original `Runnable` on exceptions.
|
||||
A new Runnable that retries the original Runnable on exceptions.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -1952,9 +1927,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
exceptions_to_handle: A tuple of exception types to handle.
|
||||
exception_key: If `string` is specified then handled exceptions will be
|
||||
passed to fallbacks as part of the input under the specified key.
|
||||
|
||||
If `None`, exceptions will not be passed to fallbacks.
|
||||
|
||||
If used, the base `Runnable` and its fallbacks must accept a
|
||||
dictionary as input.
|
||||
|
||||
@@ -1990,9 +1963,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
exceptions_to_handle: A tuple of exception types to handle.
|
||||
exception_key: If `string` is specified then handled exceptions will be
|
||||
passed to fallbacks as part of the input under the specified key.
|
||||
|
||||
If `None`, exceptions will not be passed to fallbacks.
|
||||
|
||||
If used, the base `Runnable` and its fallbacks must accept a
|
||||
dictionary as input.
|
||||
|
||||
@@ -2458,14 +2429,10 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
|
||||
`as_tool` will instantiate a `BaseTool` with a name, description, and
|
||||
`args_schema` from a `Runnable`. Where possible, schemas are inferred
|
||||
from `runnable.get_input_schema`.
|
||||
|
||||
Alternatively (e.g., if the `Runnable` takes a dict as input and the specific
|
||||
`dict` keys are not typed), the schema can be specified directly with
|
||||
`args_schema`.
|
||||
|
||||
You can also pass `arg_types` to just specify the required arguments and their
|
||||
types.
|
||||
from `runnable.get_input_schema`. Alternatively (e.g., if the
|
||||
`Runnable` takes a dict as input and the specific dict keys are not typed),
|
||||
the schema can be specified directly with `args_schema`. You can also
|
||||
pass `arg_types` to just specify the required arguments and their types.
|
||||
|
||||
Args:
|
||||
args_schema: The schema for the tool.
|
||||
@@ -2534,7 +2501,7 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
as_tool.invoke({"a": 3, "b": [1, 2]})
|
||||
```
|
||||
|
||||
`str` input:
|
||||
String input:
|
||||
|
||||
```python
|
||||
from langchain_core.runnables import RunnableLambda
|
||||
@@ -2552,6 +2519,9 @@ class Runnable(ABC, Generic[Input, Output]):
|
||||
as_tool = runnable.as_tool()
|
||||
as_tool.invoke("b")
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.2.14"
|
||||
|
||||
"""
|
||||
# Avoid circular import
|
||||
from langchain_core.tools import convert_runnable_to_tool # noqa: PLC0415
|
||||
@@ -2670,7 +2640,7 @@ class RunnableSerializable(Serializable, Runnable[Input, Output]):
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model = ChatAnthropic(
|
||||
model_name="claude-sonnet-4-5-20250929"
|
||||
model_name="claude-3-7-sonnet-20250219"
|
||||
).configurable_alternatives(
|
||||
ConfigurableField(id="llm"),
|
||||
default_key="anthropic",
|
||||
@@ -2783,9 +2753,6 @@ def _seq_output_schema(
|
||||
return last.get_output_schema(config)
|
||||
|
||||
|
||||
_RUNNABLE_SEQUENCE_MIN_STEPS = 2
|
||||
|
||||
|
||||
class RunnableSequence(RunnableSerializable[Input, Output]):
|
||||
"""Sequence of `Runnable` objects, where the output of one is the input of the next.
|
||||
|
||||
@@ -2895,7 +2862,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
|
||||
name: The name of the `Runnable`.
|
||||
first: The first `Runnable` in the sequence.
|
||||
middle: The middle `Runnable` objects in the sequence.
|
||||
last: The last `Runnable` in the sequence.
|
||||
last: The last Runnable in the sequence.
|
||||
|
||||
Raises:
|
||||
ValueError: If the sequence has less than 2 steps.
|
||||
@@ -2908,11 +2875,8 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
|
||||
steps_flat.extend(step.steps)
|
||||
else:
|
||||
steps_flat.append(coerce_to_runnable(step))
|
||||
if len(steps_flat) < _RUNNABLE_SEQUENCE_MIN_STEPS:
|
||||
msg = (
|
||||
f"RunnableSequence must have at least {_RUNNABLE_SEQUENCE_MIN_STEPS} "
|
||||
f"steps, got {len(steps_flat)}"
|
||||
)
|
||||
if len(steps_flat) < 2:
|
||||
msg = f"RunnableSequence must have at least 2 steps, got {len(steps_flat)}"
|
||||
raise ValueError(msg)
|
||||
super().__init__(
|
||||
first=steps_flat[0],
|
||||
@@ -2943,7 +2907,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
model_config = ConfigDict(
|
||||
@@ -3539,7 +3503,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
|
||||
|
||||
Returns a mapping of their outputs.
|
||||
|
||||
`RunnableParallel` is one of the two main composition primitives,
|
||||
`RunnableParallel` is one of the two main composition primitives for the LCEL,
|
||||
alongside `RunnableSequence`. It invokes `Runnable`s concurrently, providing the
|
||||
same input to each.
|
||||
|
||||
@@ -3649,7 +3613,7 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -3707,12 +3671,6 @@ class RunnableParallel(RunnableSerializable[Input, dict[str, Any]]):
|
||||
== "object"
|
||||
for s in self.steps__.values()
|
||||
):
|
||||
for step in self.steps__.values():
|
||||
fields = step.get_input_schema(config).model_fields
|
||||
root_field = fields.get("root")
|
||||
if root_field is not None and root_field.annotation != Any:
|
||||
return super().get_input_schema(config)
|
||||
|
||||
# This is correct, but pydantic typings/mypy don't think so.
|
||||
return create_model_v2(
|
||||
self.get_name("Input"),
|
||||
@@ -4522,7 +4480,7 @@ class RunnableLambda(Runnable[Input, Output]):
|
||||
# on itemgetter objects, so we have to parse the repr
|
||||
items = str(func).replace("operator.itemgetter(", "")[:-1].split(", ")
|
||||
if all(
|
||||
item[0] == "'" and item[-1] == "'" and item != "''" for item in items
|
||||
item[0] == "'" and item[-1] == "'" and len(item) > 2 for item in items
|
||||
):
|
||||
fields = {item[1:-1]: (Any, ...) for item in items}
|
||||
# It's a dict, lol
|
||||
@@ -5184,7 +5142,7 @@ class RunnableEachBase(RunnableSerializable[list[Input], list[Output]]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -5367,7 +5325,7 @@ class RunnableEach(RunnableEachBase[Input, Output]):
|
||||
|
||||
|
||||
class RunnableBindingBase(RunnableSerializable[Input, Output]): # type: ignore[no-redef]
|
||||
"""`Runnable` that delegates calls to another `Runnable` with a set of `**kwargs`.
|
||||
"""`Runnable` that delegates calls to another `Runnable` with a set of kwargs.
|
||||
|
||||
Use only if creating a new `RunnableBinding` subclass with different `__init__`
|
||||
args.
|
||||
@@ -5507,7 +5465,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]): # type: ignore[
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -5797,7 +5755,7 @@ class RunnableBinding(RunnableBindingBase[Input, Output]): # type: ignore[no-re
|
||||
```python
|
||||
# Create a Runnable binding that invokes the chat model with the
|
||||
# additional kwarg `stop=['-']` when running it.
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_community.chat_models import ChatOpenAI
|
||||
|
||||
model = ChatOpenAI()
|
||||
model.invoke('Say "Parrot-MAGIC"', stop=["-"]) # Should return `Parrot`
|
||||
|
||||
@@ -36,19 +36,17 @@ from langchain_core.runnables.utils import (
|
||||
get_unique_config_specs,
|
||||
)
|
||||
|
||||
_MIN_BRANCHES = 2
|
||||
|
||||
|
||||
class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
"""`Runnable` that selects which branch to run based on a condition.
|
||||
"""Runnable that selects which branch to run based on a condition.
|
||||
|
||||
The `Runnable` is initialized with a list of `(condition, Runnable)` pairs and
|
||||
The Runnable is initialized with a list of (condition, Runnable) pairs and
|
||||
a default branch.
|
||||
|
||||
When operating on an input, the first condition that evaluates to True is
|
||||
selected, and the corresponding `Runnable` is run on the input.
|
||||
selected, and the corresponding Runnable is run on the input.
|
||||
|
||||
If no condition evaluates to `True`, the default branch is run on the input.
|
||||
If no condition evaluates to True, the default branch is run on the input.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
@@ -67,9 +65,9 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
"""
|
||||
|
||||
branches: Sequence[tuple[Runnable[Input, bool], Runnable[Input, Output]]]
|
||||
"""A list of `(condition, Runnable)` pairs."""
|
||||
"""A list of (condition, Runnable) pairs."""
|
||||
default: Runnable[Input, Output]
|
||||
"""A `Runnable` to run if no condition is met."""
|
||||
"""A Runnable to run if no condition is met."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -81,19 +79,19 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
]
|
||||
| RunnableLike,
|
||||
) -> None:
|
||||
"""A `Runnable` that runs one of two branches based on a condition.
|
||||
"""A Runnable that runs one of two branches based on a condition.
|
||||
|
||||
Args:
|
||||
*branches: A list of `(condition, Runnable)` pairs.
|
||||
Defaults a `Runnable` to run if no condition is met.
|
||||
*branches: A list of (condition, Runnable) pairs.
|
||||
Defaults a Runnable to run if no condition is met.
|
||||
|
||||
Raises:
|
||||
ValueError: If the number of branches is less than `2`.
|
||||
TypeError: If the default branch is not `Runnable`, `Callable` or `Mapping`.
|
||||
TypeError: If a branch is not a `tuple` or `list`.
|
||||
ValueError: If a branch is not of length `2`.
|
||||
ValueError: If the number of branches is less than 2.
|
||||
TypeError: If the default branch is not Runnable, Callable or Mapping.
|
||||
TypeError: If a branch is not a tuple or list.
|
||||
ValueError: If a branch is not of length 2.
|
||||
"""
|
||||
if len(branches) < _MIN_BRANCHES:
|
||||
if len(branches) < 2:
|
||||
msg = "RunnableBranch requires at least two branches"
|
||||
raise ValueError(msg)
|
||||
|
||||
@@ -120,7 +118,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
)
|
||||
raise TypeError(msg)
|
||||
|
||||
if len(branch) != _MIN_BRANCHES:
|
||||
if len(branch) != 2:
|
||||
msg = (
|
||||
f"RunnableBranch branches must be "
|
||||
f"tuples or lists of length 2, not {len(branch)}"
|
||||
@@ -142,7 +140,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -189,12 +187,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
def invoke(
|
||||
self, input: Input, config: RunnableConfig | None = None, **kwargs: Any
|
||||
) -> Output:
|
||||
"""First evaluates the condition, then delegate to `True` or `False` branch.
|
||||
"""First evaluates the condition, then delegate to true or false branch.
|
||||
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: The configuration for the `Runnable`.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
input: The input to the Runnable.
|
||||
config: The configuration for the Runnable.
|
||||
**kwargs: Additional keyword arguments to pass to the Runnable.
|
||||
|
||||
Returns:
|
||||
The output of the branch that was run.
|
||||
@@ -299,12 +297,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any | None,
|
||||
) -> Iterator[Output]:
|
||||
"""First evaluates the condition, then delegate to `True` or `False` branch.
|
||||
"""First evaluates the condition, then delegate to true or false branch.
|
||||
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: The configuration for the Runna`ble.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
input: The input to the Runnable.
|
||||
config: The configuration for the Runnable.
|
||||
**kwargs: Additional keyword arguments to pass to the Runnable.
|
||||
|
||||
Yields:
|
||||
The output of the branch that was run.
|
||||
@@ -383,12 +381,12 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
|
||||
config: RunnableConfig | None = None,
|
||||
**kwargs: Any | None,
|
||||
) -> AsyncIterator[Output]:
|
||||
"""First evaluates the condition, then delegate to `True` or `False` branch.
|
||||
"""First evaluates the condition, then delegate to true or false branch.
|
||||
|
||||
Args:
|
||||
input: The input to the `Runnable`.
|
||||
config: The configuration for the `Runnable`.
|
||||
**kwargs: Additional keyword arguments to pass to the `Runnable`.
|
||||
input: The input to the Runnable.
|
||||
config: The configuration for the Runnable.
|
||||
**kwargs: Additional keyword arguments to pass to the Runnable.
|
||||
|
||||
Yields:
|
||||
The output of the branch that was run.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""`Runnable` objects that can be dynamically configured."""
|
||||
"""Runnables that can be dynamically configured."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -47,14 +47,14 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class DynamicRunnable(RunnableSerializable[Input, Output]):
|
||||
"""Serializable `Runnable` that can be dynamically configured.
|
||||
"""Serializable Runnable that can be dynamically configured.
|
||||
|
||||
A `DynamicRunnable` should be initiated using the `configurable_fields` or
|
||||
`configurable_alternatives` method of a `Runnable`.
|
||||
A DynamicRunnable should be initiated using the `configurable_fields` or
|
||||
`configurable_alternatives` method of a Runnable.
|
||||
"""
|
||||
|
||||
default: RunnableSerializable[Input, Output]
|
||||
"""The default `Runnable` to use."""
|
||||
"""The default Runnable to use."""
|
||||
|
||||
config: RunnableConfig | None = None
|
||||
"""The configuration to use."""
|
||||
@@ -66,7 +66,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -120,13 +120,13 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
|
||||
def prepare(
|
||||
self, config: RunnableConfig | None = None
|
||||
) -> tuple[Runnable[Input, Output], RunnableConfig]:
|
||||
"""Prepare the `Runnable` for invocation.
|
||||
"""Prepare the Runnable for invocation.
|
||||
|
||||
Args:
|
||||
config: The configuration to use.
|
||||
|
||||
Returns:
|
||||
The prepared `Runnable` and configuration.
|
||||
The prepared Runnable and configuration.
|
||||
"""
|
||||
runnable: Runnable[Input, Output] = self
|
||||
while isinstance(runnable, DynamicRunnable):
|
||||
@@ -316,12 +316,12 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
|
||||
|
||||
|
||||
class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
|
||||
"""`Runnable` that can be dynamically configured.
|
||||
"""Runnable that can be dynamically configured.
|
||||
|
||||
A `RunnableConfigurableFields` should be initiated using the
|
||||
`configurable_fields` method of a `Runnable`.
|
||||
A RunnableConfigurableFields should be initiated using the
|
||||
`configurable_fields` method of a Runnable.
|
||||
|
||||
Here is an example of using a `RunnableConfigurableFields` with LLMs:
|
||||
Here is an example of using a RunnableConfigurableFields with LLMs:
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
@@ -348,7 +348,7 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
|
||||
chain.invoke({"x": 0}, config={"configurable": {"temperature": 0.9}})
|
||||
```
|
||||
|
||||
Here is an example of using a `RunnableConfigurableFields` with `HubRunnables`:
|
||||
Here is an example of using a RunnableConfigurableFields with HubRunnables:
|
||||
|
||||
```python
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
@@ -380,7 +380,7 @@ class RunnableConfigurableFields(DynamicRunnable[Input, Output]):
|
||||
|
||||
@property
|
||||
def config_specs(self) -> list[ConfigurableFieldSpec]:
|
||||
"""Get the configuration specs for the `RunnableConfigurableFields`.
|
||||
"""Get the configuration specs for the RunnableConfigurableFields.
|
||||
|
||||
Returns:
|
||||
The configuration specs.
|
||||
@@ -473,13 +473,13 @@ _enums_for_spec_lock = threading.Lock()
|
||||
|
||||
|
||||
class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
"""`Runnable` that can be dynamically configured.
|
||||
"""Runnable that can be dynamically configured.
|
||||
|
||||
A `RunnableConfigurableAlternatives` should be initiated using the
|
||||
`configurable_alternatives` method of a `Runnable` or can be
|
||||
A RunnableConfigurableAlternatives should be initiated using the
|
||||
`configurable_alternatives` method of a Runnable or can be
|
||||
initiated directly as well.
|
||||
|
||||
Here is an example of using a `RunnableConfigurableAlternatives` that uses
|
||||
Here is an example of using a RunnableConfigurableAlternatives that uses
|
||||
alternative prompts to illustrate its functionality:
|
||||
|
||||
```python
|
||||
@@ -506,7 +506,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
chain.with_config(configurable={"prompt": "poem"}).invoke({"topic": "bears"})
|
||||
```
|
||||
|
||||
Equivalently, you can initialize `RunnableConfigurableAlternatives` directly
|
||||
Equivalently, you can initialize RunnableConfigurableAlternatives directly
|
||||
and use in LCEL in the same way:
|
||||
|
||||
```python
|
||||
@@ -531,7 +531,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
"""
|
||||
|
||||
which: ConfigurableField
|
||||
"""The `ConfigurableField` to use to choose between alternatives."""
|
||||
"""The ConfigurableField to use to choose between alternatives."""
|
||||
|
||||
alternatives: dict[
|
||||
str,
|
||||
@@ -544,9 +544,8 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
|
||||
prefix_keys: bool
|
||||
"""Whether to prefix configurable fields of each alternative with a namespace
|
||||
of the form <which.id>==<alternative_key>, e.g. a key named "temperature" used by
|
||||
the alternative named "gpt3" becomes "model==gpt3/temperature".
|
||||
"""
|
||||
of the form <which.id>==<alternative_key>, eg. a key named "temperature" used by
|
||||
the alternative named "gpt3" becomes "model==gpt3/temperature"."""
|
||||
|
||||
@property
|
||||
@override
|
||||
@@ -639,24 +638,24 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
|
||||
|
||||
|
||||
def _strremoveprefix(s: str, prefix: str) -> str:
|
||||
"""`str.removeprefix()` is only available in Python 3.9+."""
|
||||
"""str.removeprefix() is only available in Python 3.9+."""
|
||||
return s.replace(prefix, "", 1) if s.startswith(prefix) else s
|
||||
|
||||
|
||||
def prefix_config_spec(
|
||||
spec: ConfigurableFieldSpec, prefix: str
|
||||
) -> ConfigurableFieldSpec:
|
||||
"""Prefix the id of a `ConfigurableFieldSpec`.
|
||||
"""Prefix the id of a ConfigurableFieldSpec.
|
||||
|
||||
This is useful when a `RunnableConfigurableAlternatives` is used as a
|
||||
`ConfigurableField` of another `RunnableConfigurableAlternatives`.
|
||||
This is useful when a RunnableConfigurableAlternatives is used as a
|
||||
ConfigurableField of another RunnableConfigurableAlternatives.
|
||||
|
||||
Args:
|
||||
spec: The `ConfigurableFieldSpec` to prefix.
|
||||
spec: The ConfigurableFieldSpec to prefix.
|
||||
prefix: The prefix to add.
|
||||
|
||||
Returns:
|
||||
The prefixed `ConfigurableFieldSpec`.
|
||||
The prefixed ConfigurableFieldSpec.
|
||||
"""
|
||||
return (
|
||||
ConfigurableFieldSpec(
|
||||
@@ -678,15 +677,15 @@ def make_options_spec(
|
||||
) -> ConfigurableFieldSpec:
|
||||
"""Make options spec.
|
||||
|
||||
Make a `ConfigurableFieldSpec` for a `ConfigurableFieldSingleOption` or
|
||||
`ConfigurableFieldMultiOption`.
|
||||
Make a ConfigurableFieldSpec for a ConfigurableFieldSingleOption or
|
||||
ConfigurableFieldMultiOption.
|
||||
|
||||
Args:
|
||||
spec: The `ConfigurableFieldSingleOption` or `ConfigurableFieldMultiOption`.
|
||||
spec: The ConfigurableFieldSingleOption or ConfigurableFieldMultiOption.
|
||||
description: The description to use if the spec does not have one.
|
||||
|
||||
Returns:
|
||||
The `ConfigurableFieldSpec`.
|
||||
The ConfigurableFieldSpec.
|
||||
"""
|
||||
with _enums_for_spec_lock:
|
||||
if enum := _enums_for_spec.get(spec):
|
||||
|
||||
@@ -35,20 +35,20 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
"""`Runnable` that can fallback to other `Runnable`s if it fails.
|
||||
"""Runnable that can fallback to other Runnables if it fails.
|
||||
|
||||
External APIs (e.g., APIs for a language model) may at times experience
|
||||
degraded performance or even downtime.
|
||||
|
||||
In these cases, it can be useful to have a fallback `Runnable` that can be
|
||||
used in place of the original `Runnable` (e.g., fallback to another LLM provider).
|
||||
In these cases, it can be useful to have a fallback Runnable that can be
|
||||
used in place of the original Runnable (e.g., fallback to another LLM provider).
|
||||
|
||||
Fallbacks can be defined at the level of a single `Runnable`, or at the level
|
||||
of a chain of `Runnable`s. Fallbacks are tried in order until one succeeds or
|
||||
Fallbacks can be defined at the level of a single Runnable, or at the level
|
||||
of a chain of Runnables. Fallbacks are tried in order until one succeeds or
|
||||
all fail.
|
||||
|
||||
While you can instantiate a `RunnableWithFallbacks` directly, it is usually
|
||||
more convenient to use the `with_fallbacks` method on a `Runnable`.
|
||||
more convenient to use the `with_fallbacks` method on a Runnable.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -87,7 +87,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
"""
|
||||
|
||||
runnable: Runnable[Input, Output]
|
||||
"""The `Runnable` to run first."""
|
||||
"""The Runnable to run first."""
|
||||
fallbacks: Sequence[Runnable[Input, Output]]
|
||||
"""A sequence of fallbacks to try."""
|
||||
exceptions_to_handle: tuple[type[BaseException], ...] = (Exception,)
|
||||
@@ -97,12 +97,9 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
"""
|
||||
exception_key: str | None = None
|
||||
"""If `string` is specified then handled exceptions will be passed to fallbacks as
|
||||
part of the input under the specified key.
|
||||
|
||||
If `None`, exceptions will not be passed to fallbacks.
|
||||
|
||||
If used, the base `Runnable` and its fallbacks must accept a dictionary as input.
|
||||
"""
|
||||
part of the input under the specified key. If `None`, exceptions
|
||||
will not be passed to fallbacks. If used, the base Runnable and its fallbacks
|
||||
must accept a dictionary as input."""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
@@ -140,7 +137,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -155,10 +152,10 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
|
||||
@property
|
||||
def runnables(self) -> Iterator[Runnable[Input, Output]]:
|
||||
"""Iterator over the `Runnable` and its fallbacks.
|
||||
"""Iterator over the Runnable and its fallbacks.
|
||||
|
||||
Yields:
|
||||
The `Runnable` then its fallbacks.
|
||||
The Runnable then its fallbacks.
|
||||
"""
|
||||
yield self.runnable
|
||||
yield from self.fallbacks
|
||||
@@ -592,14 +589,14 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
await run_manager.on_chain_end(output)
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
"""Get an attribute from the wrapped `Runnable` and its fallbacks.
|
||||
"""Get an attribute from the wrapped Runnable and its fallbacks.
|
||||
|
||||
Returns:
|
||||
If the attribute is anything other than a method that outputs a `Runnable`,
|
||||
returns `getattr(self.runnable, name)`. If the attribute is a method that
|
||||
does return a new `Runnable` (e.g. `model.bind_tools([...])` outputs a new
|
||||
`RunnableBinding`) then `self.runnable` and each of the runnables in
|
||||
`self.fallbacks` is replaced with `getattr(x, name)`.
|
||||
If the attribute is anything other than a method that outputs a Runnable,
|
||||
returns getattr(self.runnable, name). If the attribute is a method that
|
||||
does return a new Runnable (e.g. model.bind_tools([...]) outputs a new
|
||||
RunnableBinding) then self.runnable and each of the runnables in
|
||||
self.fallbacks is replaced with getattr(x, name).
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -607,7 +604,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
|
||||
gpt_4o = ChatOpenAI(model="gpt-4o")
|
||||
claude_3_sonnet = ChatAnthropic(model="claude-sonnet-4-5-20250929")
|
||||
claude_3_sonnet = ChatAnthropic(model="claude-3-7-sonnet-20250219")
|
||||
model = gpt_4o.with_fallbacks([claude_3_sonnet])
|
||||
|
||||
model.model_name
|
||||
@@ -621,6 +618,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
|
||||
runnable=RunnableBinding(bound=ChatOpenAI(...), kwargs={"tools": [...]}),
|
||||
fallbacks=[RunnableBinding(bound=ChatAnthropic(...), kwargs={"tools": [...]})],
|
||||
)
|
||||
|
||||
```
|
||||
""" # noqa: E501
|
||||
attr = getattr(self.runnable, name)
|
||||
|
||||
@@ -132,7 +132,7 @@ class Branch(NamedTuple):
|
||||
condition: Callable[..., str]
|
||||
"""A callable that returns a string representation of the condition."""
|
||||
ends: dict[str, str] | None
|
||||
"""Optional dictionary of end node IDs for the branches. """
|
||||
"""Optional dictionary of end node ids for the branches. """
|
||||
|
||||
|
||||
class CurveStyle(Enum):
|
||||
@@ -706,10 +706,8 @@ class Graph:
|
||||
def _first_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
|
||||
"""Find the single node that is not a target of any edge.
|
||||
|
||||
Exclude nodes/sources with IDs in the exclude list.
|
||||
|
||||
Exclude nodes/sources with ids in the exclude list.
|
||||
If there is no such node, or there are multiple, return `None`.
|
||||
|
||||
When drawing the graph, this node would be the origin.
|
||||
"""
|
||||
targets = {edge.target for edge in graph.edges if edge.source not in exclude}
|
||||
@@ -724,10 +722,8 @@ def _first_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
|
||||
def _last_node(graph: Graph, exclude: Sequence[str] = ()) -> Node | None:
|
||||
"""Find the single node that is not a source of any edge.
|
||||
|
||||
Exclude nodes/targets with IDs in the exclude list.
|
||||
|
||||
Exclude nodes/targets with ids in the exclude list.
|
||||
If there is no such node, or there are multiple, return `None`.
|
||||
|
||||
When drawing the graph, this node would be the destination.
|
||||
"""
|
||||
sources = {edge.source for edge in graph.edges if edge.target not in exclude}
|
||||
|
||||
@@ -454,10 +454,7 @@ def _render_mermaid_using_api(
|
||||
return img_bytes
|
||||
|
||||
# If we get a server error (5xx), retry
|
||||
if (
|
||||
requests.codes.internal_server_error <= response.status_code
|
||||
and attempt < max_retries
|
||||
):
|
||||
if 500 <= response.status_code < 600 and attempt < max_retries:
|
||||
# Exponential backoff with jitter
|
||||
sleep_time = retry_delay * (2**attempt) * (0.5 + 0.5 * random.random()) # noqa: S311 not used for crypto
|
||||
time.sleep(sleep_time)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""Helper class to draw a state graph into a PNG file."""
|
||||
|
||||
from itertools import groupby
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.runnables.graph import Graph, LabelsDict
|
||||
@@ -142,7 +141,6 @@ class PngDrawer:
|
||||
# Add nodes, conditional edges, and edges to the graph
|
||||
self.add_nodes(viz, graph)
|
||||
self.add_edges(viz, graph)
|
||||
self.add_subgraph(viz, [node.split(":") for node in graph.nodes])
|
||||
|
||||
# Update entrypoint and END styles
|
||||
self.update_styles(viz, graph)
|
||||
@@ -163,32 +161,6 @@ class PngDrawer:
|
||||
for node in graph.nodes:
|
||||
self.add_node(viz, node)
|
||||
|
||||
def add_subgraph(
|
||||
self,
|
||||
viz: Any,
|
||||
nodes: list[list[str]],
|
||||
parent_prefix: list[str] | None = None,
|
||||
) -> None:
|
||||
"""Add subgraphs to the graph.
|
||||
|
||||
Args:
|
||||
viz: The graphviz object.
|
||||
nodes: The nodes to add.
|
||||
parent_prefix: The prefix of the parent subgraph.
|
||||
"""
|
||||
for prefix, grouped in groupby(
|
||||
[node[:] for node in sorted(nodes)],
|
||||
key=lambda x: x.pop(0),
|
||||
):
|
||||
current_prefix = (parent_prefix or []) + [prefix]
|
||||
grouped_nodes = list(grouped)
|
||||
if len(grouped_nodes) > 1:
|
||||
subgraph = viz.add_subgraph(
|
||||
[":".join(current_prefix + node) for node in grouped_nodes],
|
||||
name="cluster_" + ":".join(current_prefix),
|
||||
)
|
||||
self.add_subgraph(subgraph, grouped_nodes, current_prefix)
|
||||
|
||||
def add_edges(self, viz: Any, graph: Graph) -> None:
|
||||
"""Add edges to the graph.
|
||||
|
||||
|
||||
@@ -36,23 +36,23 @@ GetSessionHistoryCallable = Callable[..., BaseChatMessageHistory]
|
||||
|
||||
|
||||
class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
"""`Runnable` that manages chat message history for another `Runnable`.
|
||||
"""Runnable that manages chat message history for another Runnable.
|
||||
|
||||
A chat message history is a sequence of messages that represent a conversation.
|
||||
|
||||
`RunnableWithMessageHistory` wraps another `Runnable` and manages the chat message
|
||||
RunnableWithMessageHistory wraps another Runnable and manages the chat message
|
||||
history for it; it is responsible for reading and updating the chat message
|
||||
history.
|
||||
|
||||
The formats supported for the inputs and outputs of the wrapped `Runnable`
|
||||
The formats supported for the inputs and outputs of the wrapped Runnable
|
||||
are described below.
|
||||
|
||||
`RunnableWithMessageHistory` must always be called with a config that contains
|
||||
RunnableWithMessageHistory must always be called with a config that contains
|
||||
the appropriate parameters for the chat message history factory.
|
||||
|
||||
By default, the `Runnable` is expected to take a single configuration parameter
|
||||
By default, the Runnable is expected to take a single configuration parameter
|
||||
called `session_id` which is a string. This parameter is used to create a new
|
||||
or look up an existing chat message history that matches the given `session_id`.
|
||||
or look up an existing chat message history that matches the given session_id.
|
||||
|
||||
In this case, the invocation would look like this:
|
||||
|
||||
@@ -117,12 +117,12 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
|
||||
```
|
||||
|
||||
Example where the wrapped `Runnable` takes a dictionary input:
|
||||
Example where the wrapped Runnable takes a dictionary input:
|
||||
|
||||
```python
|
||||
from typing import Optional
|
||||
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_community.chat_models import ChatAnthropic
|
||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||
|
||||
@@ -166,7 +166,7 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
print(store) # noqa: T201
|
||||
```
|
||||
|
||||
Example where the session factory takes two keys (`user_id` and `conversation_id`):
|
||||
Example where the session factory takes two keys, user_id and conversation id):
|
||||
|
||||
```python
|
||||
store = {}
|
||||
@@ -223,28 +223,21 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
"""
|
||||
|
||||
get_session_history: GetSessionHistoryCallable
|
||||
"""Function that returns a new `BaseChatMessageHistory`.
|
||||
|
||||
"""Function that returns a new BaseChatMessageHistory.
|
||||
This function should either take a single positional argument `session_id` of type
|
||||
string and return a corresponding chat message history instance
|
||||
"""
|
||||
string and return a corresponding chat message history instance"""
|
||||
input_messages_key: str | None = None
|
||||
"""Must be specified if the base `Runnable` accepts a `dict` as input.
|
||||
The key in the input `dict` that contains the messages.
|
||||
"""
|
||||
"""Must be specified if the base runnable accepts a dict as input.
|
||||
The key in the input dict that contains the messages."""
|
||||
output_messages_key: str | None = None
|
||||
"""Must be specified if the base `Runnable` returns a `dict` as output.
|
||||
The key in the output `dict` that contains the messages.
|
||||
"""
|
||||
"""Must be specified if the base Runnable returns a dict as output.
|
||||
The key in the output dict that contains the messages."""
|
||||
history_messages_key: str | None = None
|
||||
"""Must be specified if the base `Runnable` accepts a `dict` as input and expects a
|
||||
separate key for historical messages.
|
||||
"""
|
||||
"""Must be specified if the base runnable accepts a dict as input and expects a
|
||||
separate key for historical messages."""
|
||||
history_factory_config: Sequence[ConfigurableFieldSpec]
|
||||
"""Configure fields that should be passed to the chat history factory.
|
||||
|
||||
See `ConfigurableFieldSpec` for more details.
|
||||
"""
|
||||
See `ConfigurableFieldSpec` for more details."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -261,16 +254,15 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
history_factory_config: Sequence[ConfigurableFieldSpec] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize `RunnableWithMessageHistory`.
|
||||
"""Initialize RunnableWithMessageHistory.
|
||||
|
||||
Args:
|
||||
runnable: The base `Runnable` to be wrapped.
|
||||
|
||||
runnable: The base Runnable to be wrapped.
|
||||
Must take as input one of:
|
||||
|
||||
1. A list of `BaseMessage`
|
||||
2. A `dict` with one key for all messages
|
||||
3. A `dict` with one key for the current input string/message(s) and
|
||||
2. A dict with one key for all messages
|
||||
3. A dict with one key for the current input string/message(s) and
|
||||
a separate key for historical messages. If the input key points
|
||||
to a string, it will be treated as a `HumanMessage` in history.
|
||||
|
||||
@@ -278,15 +270,13 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
|
||||
1. A string which can be treated as an `AIMessage`
|
||||
2. A `BaseMessage` or sequence of `BaseMessage`
|
||||
3. A `dict` with a key for a `BaseMessage` or sequence of
|
||||
3. A dict with a key for a `BaseMessage` or sequence of
|
||||
`BaseMessage`
|
||||
|
||||
get_session_history: Function that returns a new `BaseChatMessageHistory`.
|
||||
|
||||
get_session_history: Function that returns a new BaseChatMessageHistory.
|
||||
This function should either take a single positional argument
|
||||
`session_id` of type string and return a corresponding
|
||||
chat message history instance.
|
||||
|
||||
```python
|
||||
def get_session_history(
|
||||
session_id: str, *, user_id: str | None = None
|
||||
@@ -305,17 +295,16 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
) -> BaseChatMessageHistory: ...
|
||||
```
|
||||
|
||||
input_messages_key: Must be specified if the base runnable accepts a `dict`
|
||||
input_messages_key: Must be specified if the base runnable accepts a dict
|
||||
as input.
|
||||
output_messages_key: Must be specified if the base runnable returns a `dict`
|
||||
output_messages_key: Must be specified if the base runnable returns a dict
|
||||
as output.
|
||||
history_messages_key: Must be specified if the base runnable accepts a
|
||||
`dict` as input and expects a separate key for historical messages.
|
||||
history_messages_key: Must be specified if the base runnable accepts a dict
|
||||
as input and expects a separate key for historical messages.
|
||||
history_factory_config: Configure fields that should be passed to the
|
||||
chat history factory. See `ConfigurableFieldSpec` for more details.
|
||||
|
||||
Specifying these allows you to pass multiple config keys into the
|
||||
`get_session_history` factory.
|
||||
Specifying these allows you to pass multiple config keys
|
||||
into the get_session_history factory.
|
||||
**kwargs: Arbitrary additional kwargs to pass to parent class
|
||||
`RunnableBindingBase` init.
|
||||
|
||||
@@ -375,7 +364,7 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
@property
|
||||
@override
|
||||
def config_specs(self) -> list[ConfigurableFieldSpec]:
|
||||
"""Get the configuration specs for the `RunnableWithMessageHistory`."""
|
||||
"""Get the configuration specs for the RunnableWithMessageHistory."""
|
||||
return get_unique_config_specs(
|
||||
super().config_specs + list(self.history_factory_config)
|
||||
)
|
||||
@@ -617,6 +606,6 @@ class RunnableWithMessageHistory(RunnableBindingBase): # type: ignore[no-redef]
|
||||
|
||||
|
||||
def _get_parameter_names(callable_: GetSessionHistoryCallable) -> list[str]:
|
||||
"""Get the parameter names of the `Callable`."""
|
||||
"""Get the parameter names of the callable."""
|
||||
sig = inspect.signature(callable_)
|
||||
return list(sig.parameters.keys())
|
||||
|
||||
@@ -51,10 +51,10 @@ def identity(x: Other) -> Other:
|
||||
"""Identity function.
|
||||
|
||||
Args:
|
||||
x: Input.
|
||||
x: input.
|
||||
|
||||
Returns:
|
||||
Output.
|
||||
output.
|
||||
"""
|
||||
return x
|
||||
|
||||
@@ -63,10 +63,10 @@ async def aidentity(x: Other) -> Other:
|
||||
"""Async identity function.
|
||||
|
||||
Args:
|
||||
x: Input.
|
||||
x: input.
|
||||
|
||||
Returns:
|
||||
Output.
|
||||
output.
|
||||
"""
|
||||
return x
|
||||
|
||||
@@ -74,11 +74,11 @@ async def aidentity(x: Other) -> Other:
|
||||
class RunnablePassthrough(RunnableSerializable[Other, Other]):
|
||||
"""Runnable to passthrough inputs unchanged or with additional keys.
|
||||
|
||||
This `Runnable` behaves almost like the identity function, except that it
|
||||
This Runnable behaves almost like the identity function, except that it
|
||||
can be configured to add additional keys to the output, if the input is a
|
||||
dict.
|
||||
|
||||
The examples below demonstrate this `Runnable` works using a few simple
|
||||
The examples below demonstrate this Runnable works using a few simple
|
||||
chains. The chains rely on simple lambdas to make the examples easy to execute
|
||||
and experiment with.
|
||||
|
||||
@@ -164,7 +164,7 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
|
||||
input_type: type[Other] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Create a `RunnablePassthrough`.
|
||||
"""Create e RunnablePassthrough.
|
||||
|
||||
Args:
|
||||
func: Function to be called with the input.
|
||||
@@ -180,7 +180,7 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -213,11 +213,11 @@ class RunnablePassthrough(RunnableSerializable[Other, Other]):
|
||||
"""Merge the Dict input with the output produced by the mapping argument.
|
||||
|
||||
Args:
|
||||
**kwargs: `Runnable`, `Callable` or a `Mapping` from keys to `Runnable`
|
||||
objects or `Callable`s.
|
||||
**kwargs: Runnable, Callable or a Mapping from keys to Runnables
|
||||
or Callables.
|
||||
|
||||
Returns:
|
||||
A `Runnable` that merges the `dict` input with the output produced by the
|
||||
A Runnable that merges the Dict input with the output produced by the
|
||||
mapping argument.
|
||||
"""
|
||||
return RunnableAssign(RunnableParallel[dict[str, Any]](kwargs))
|
||||
@@ -350,7 +350,7 @@ _graph_passthrough: RunnablePassthrough = RunnablePassthrough()
|
||||
|
||||
|
||||
class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
|
||||
"""Runnable that assigns key-value pairs to `dict[str, Any]` inputs.
|
||||
"""Runnable that assigns key-value pairs to dict[str, Any] inputs.
|
||||
|
||||
The `RunnableAssign` class takes input dictionaries and, through a
|
||||
`RunnableParallel` instance, applies transformations, then combines
|
||||
@@ -392,7 +392,7 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
|
||||
mapper: RunnableParallel
|
||||
|
||||
def __init__(self, mapper: RunnableParallel[dict[str, Any]], **kwargs: Any) -> None:
|
||||
"""Create a `RunnableAssign`.
|
||||
"""Create a RunnableAssign.
|
||||
|
||||
Args:
|
||||
mapper: A `RunnableParallel` instance that will be used to transform the
|
||||
@@ -403,7 +403,7 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
@@ -668,19 +668,13 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
|
||||
yield chunk
|
||||
|
||||
|
||||
class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
"""`Runnable` that picks keys from `dict[str, Any]` inputs.
|
||||
class RunnablePick(RunnableSerializable[dict[str, Any], dict[str, Any]]):
|
||||
"""Runnable that picks keys from dict[str, Any] inputs.
|
||||
|
||||
`RunnablePick` class represents a `Runnable` that selectively picks keys from a
|
||||
RunnablePick class represents a Runnable that selectively picks keys from a
|
||||
dictionary input. It allows you to specify one or more keys to extract
|
||||
from the input dictionary.
|
||||
|
||||
!!! note "Return Type Behavior"
|
||||
The return type depends on the `keys` parameter:
|
||||
|
||||
- When `keys` is a `str`: Returns the single value associated with that key
|
||||
- When `keys` is a `list`: Returns a dictionary containing only the selected
|
||||
keys
|
||||
from the input dictionary. It returns a new dictionary containing only
|
||||
the selected keys.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -693,22 +687,18 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
"country": "USA",
|
||||
}
|
||||
|
||||
# Single key - returns the value directly
|
||||
runnable_single = RunnablePick(keys="name")
|
||||
result_single = runnable_single.invoke(input_data)
|
||||
print(result_single) # Output: "John"
|
||||
runnable = RunnablePick(keys=["name", "age"])
|
||||
|
||||
# Multiple keys - returns a dictionary
|
||||
runnable_multiple = RunnablePick(keys=["name", "age"])
|
||||
result_multiple = runnable_multiple.invoke(input_data)
|
||||
print(result_multiple) # Output: {'name': 'John', 'age': 30}
|
||||
output_data = runnable.invoke(input_data)
|
||||
|
||||
print(output_data) # Output: {'name': 'John', 'age': 30}
|
||||
```
|
||||
"""
|
||||
|
||||
keys: str | list[str]
|
||||
|
||||
def __init__(self, keys: str | list[str], **kwargs: Any) -> None:
|
||||
"""Create a `RunnablePick`.
|
||||
"""Create a RunnablePick.
|
||||
|
||||
Args:
|
||||
keys: A single key or a list of keys to pick from the input dictionary.
|
||||
@@ -718,7 +708,7 @@ class RunnablePick(RunnableSerializable[dict[str, Any], Any]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -40,11 +40,11 @@ class RouterInput(TypedDict):
|
||||
key: str
|
||||
"""The key to route on."""
|
||||
input: Any
|
||||
"""The input to pass to the selected `Runnable`."""
|
||||
"""The input to pass to the selected Runnable."""
|
||||
|
||||
|
||||
class RouterRunnable(RunnableSerializable[RouterInput, Output]):
|
||||
"""`Runnable` that routes to a set of `Runnable` based on `Input['key']`.
|
||||
"""Runnable that routes to a set of Runnables based on Input['key'].
|
||||
|
||||
Returns the output of the selected Runnable.
|
||||
|
||||
@@ -74,10 +74,10 @@ class RouterRunnable(RunnableSerializable[RouterInput, Output]):
|
||||
self,
|
||||
runnables: Mapping[str, Runnable[Any, Output] | Callable[[Any], Output]],
|
||||
) -> None:
|
||||
"""Create a `RouterRunnable`.
|
||||
"""Create a RouterRunnable.
|
||||
|
||||
Args:
|
||||
runnables: A mapping of keys to `Runnable` objects.
|
||||
runnables: A mapping of keys to Runnables.
|
||||
"""
|
||||
super().__init__(
|
||||
runnables={key: coerce_to_runnable(r) for key, r in runnables.items()}
|
||||
@@ -90,7 +90,7 @@ class RouterRunnable(RunnableSerializable[RouterInput, Output]):
|
||||
@classmethod
|
||||
@override
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return `True` as this class is serializable."""
|
||||
"""Return True as this class is serializable."""
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -28,7 +28,7 @@ class EventData(TypedDict, total=False):
|
||||
|
||||
This field is only available if the `Runnable` raised an exception.
|
||||
|
||||
!!! version-added "Added in `langchain-core` 1.0.0"
|
||||
!!! version-added "Added in version 1.0.0"
|
||||
"""
|
||||
output: Any
|
||||
"""The output of the `Runnable` that generated the event.
|
||||
@@ -168,7 +168,10 @@ class StandardStreamEvent(BaseStreamEvent):
|
||||
|
||||
|
||||
class CustomStreamEvent(BaseStreamEvent):
|
||||
"""Custom stream event created by the user."""
|
||||
"""Custom stream event created by the user.
|
||||
|
||||
!!! version-added "Added in version 0.2.15"
|
||||
"""
|
||||
|
||||
# Overwrite the event field to be more specific.
|
||||
event: Literal["on_custom_event"] # type: ignore[misc]
|
||||
|
||||
@@ -86,7 +86,7 @@ class BaseStore(ABC, Generic[K, V]):
|
||||
|
||||
Returns:
|
||||
A sequence of optional values associated with the keys.
|
||||
If a key is not found, the corresponding value will be `None`.
|
||||
If a key is not found, the corresponding value will be None.
|
||||
"""
|
||||
|
||||
async def amget(self, keys: Sequence[K]) -> list[V | None]:
|
||||
@@ -97,7 +97,7 @@ class BaseStore(ABC, Generic[K, V]):
|
||||
|
||||
Returns:
|
||||
A sequence of optional values associated with the keys.
|
||||
If a key is not found, the corresponding value will be `None`.
|
||||
If a key is not found, the corresponding value will be None.
|
||||
"""
|
||||
return await run_in_executor(None, self.mget, keys)
|
||||
|
||||
@@ -243,7 +243,8 @@ class InMemoryStore(InMemoryBaseStore[Any]):
|
||||
"""In-memory store for any type of data.
|
||||
|
||||
Attributes:
|
||||
store: The underlying dictionary that stores the key-value pairs.
|
||||
store (dict[str, Any]): The underlying dictionary that stores
|
||||
the key-value pairs.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
@@ -266,7 +267,8 @@ class InMemoryByteStore(InMemoryBaseStore[bytes]):
|
||||
"""In-memory store for bytes.
|
||||
|
||||
Attributes:
|
||||
store: The underlying dictionary that stores the key-value pairs.
|
||||
store (dict[str, bytes]): The underlying dictionary that stores
|
||||
the key-value pairs.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
|
||||
@@ -125,11 +125,9 @@ def print_sys_info(*, additional_pkgs: Sequence[str] = ()) -> None:
|
||||
for dep in sub_dependencies:
|
||||
try:
|
||||
dep_version = metadata.version(dep)
|
||||
except Exception:
|
||||
dep_version = None
|
||||
|
||||
if dep_version is not None:
|
||||
print(f"> {dep}: {dep_version}")
|
||||
except Exception:
|
||||
print(f"> {dep}: Installed. No version info available.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -391,7 +391,6 @@ class BaseTool(RunnableSerializable[str | dict | ToolCall, Any]):
|
||||
"""Base class for all LangChain tools.
|
||||
|
||||
This abstract class defines the interface that all LangChain tools must implement.
|
||||
|
||||
Tools are components that can be called by agents to perform specific actions.
|
||||
"""
|
||||
|
||||
@@ -402,7 +401,7 @@ class BaseTool(RunnableSerializable[str | dict | ToolCall, Any]):
|
||||
**kwargs: Additional keyword arguments passed to the parent class.
|
||||
|
||||
Raises:
|
||||
SchemaAnnotationError: If `args_schema` has incorrect type annotation.
|
||||
SchemaAnnotationError: If args_schema has incorrect type annotation.
|
||||
"""
|
||||
super().__init_subclass__(**kwargs)
|
||||
|
||||
@@ -443,15 +442,15 @@ class ChildTool(BaseTool):
|
||||
|
||||
Args schema should be either:
|
||||
|
||||
- A subclass of `pydantic.BaseModel`.
|
||||
- A subclass of `pydantic.v1.BaseModel` if accessing v1 namespace in pydantic 2
|
||||
- A JSON schema dict
|
||||
- A subclass of pydantic.BaseModel.
|
||||
- A subclass of pydantic.v1.BaseModel if accessing v1 namespace in pydantic 2
|
||||
- a JSON schema dict
|
||||
"""
|
||||
return_direct: bool = False
|
||||
"""Whether to return the tool's output directly.
|
||||
|
||||
Setting this to `True` means that after the tool is called, the `AgentExecutor` will
|
||||
stop looping.
|
||||
Setting this to True means
|
||||
that after the tool is called, the AgentExecutor will stop looping.
|
||||
"""
|
||||
verbose: bool = False
|
||||
"""Whether to log the tool's progress."""
|
||||
@@ -461,37 +460,31 @@ class ChildTool(BaseTool):
|
||||
|
||||
tags: list[str] | None = None
|
||||
"""Optional list of tags associated with the tool.
|
||||
|
||||
These tags will be associated with each call to this tool,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
|
||||
You can use these to, e.g., identify a specific instance of a tool with its use
|
||||
case.
|
||||
You can use these to eg identify a specific instance of a tool with its use case.
|
||||
"""
|
||||
metadata: dict[str, Any] | None = None
|
||||
"""Optional metadata associated with the tool.
|
||||
|
||||
This metadata will be associated with each call to this tool,
|
||||
and passed as arguments to the handlers defined in `callbacks`.
|
||||
|
||||
You can use these to, e.g., identify a specific instance of a tool with its use
|
||||
case.
|
||||
You can use these to eg identify a specific instance of a tool with its use case.
|
||||
"""
|
||||
|
||||
handle_tool_error: bool | str | Callable[[ToolException], str] | None = False
|
||||
"""Handle the content of the `ToolException` thrown."""
|
||||
"""Handle the content of the ToolException thrown."""
|
||||
|
||||
handle_validation_error: (
|
||||
bool | str | Callable[[ValidationError | ValidationErrorV1], str] | None
|
||||
) = False
|
||||
"""Handle the content of the `ValidationError` thrown."""
|
||||
"""Handle the content of the ValidationError thrown."""
|
||||
|
||||
response_format: Literal["content", "content_and_artifact"] = "content"
|
||||
"""The tool response format.
|
||||
|
||||
If `'content'` then the output of the tool is interpreted as the contents of a
|
||||
`ToolMessage`. If `'content_and_artifact'` then the output is expected to be a
|
||||
two-tuple corresponding to the `(content, artifact)` of a `ToolMessage`.
|
||||
If `"content"` then the output of the tool is interpreted as the contents of a
|
||||
`ToolMessage`. If `"content_and_artifact"` then the output is expected to be a
|
||||
two-tuple corresponding to the (content, artifact) of a `ToolMessage`.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
@@ -499,7 +492,7 @@ class ChildTool(BaseTool):
|
||||
|
||||
Raises:
|
||||
TypeError: If `args_schema` is not a subclass of pydantic `BaseModel` or
|
||||
`dict`.
|
||||
dict.
|
||||
"""
|
||||
if (
|
||||
"args_schema" in kwargs
|
||||
@@ -533,7 +526,7 @@ class ChildTool(BaseTool):
|
||||
"""Get the tool's input arguments schema.
|
||||
|
||||
Returns:
|
||||
`dict` containing the tool's argument properties.
|
||||
Dictionary containing the tool's argument properties.
|
||||
"""
|
||||
if isinstance(self.args_schema, dict):
|
||||
json_schema = self.args_schema
|
||||
@@ -623,9 +616,9 @@ class ChildTool(BaseTool):
|
||||
|
||||
Raises:
|
||||
ValueError: If `string` input is provided with JSON schema `args_schema`.
|
||||
ValueError: If `InjectedToolCallId` is required but `tool_call_id` is not
|
||||
ValueError: If InjectedToolCallId is required but `tool_call_id` is not
|
||||
provided.
|
||||
TypeError: If `args_schema` is not a Pydantic `BaseModel` or dict.
|
||||
TypeError: If args_schema is not a Pydantic `BaseModel` or dict.
|
||||
"""
|
||||
input_args = self.args_schema
|
||||
if isinstance(tool_input, str):
|
||||
@@ -714,35 +707,6 @@ class ChildTool(BaseTool):
|
||||
kwargs["run_manager"] = kwargs["run_manager"].get_sync()
|
||||
return await run_in_executor(None, self._run, *args, **kwargs)
|
||||
|
||||
def _filter_injected_args(self, tool_input: dict) -> dict:
|
||||
"""Filter out injected tool arguments from the input dictionary.
|
||||
|
||||
Injected arguments are those annotated with `InjectedToolArg` or its
|
||||
subclasses, or arguments in `FILTERED_ARGS` like `run_manager` and callbacks.
|
||||
|
||||
Args:
|
||||
tool_input: The tool input dictionary to filter.
|
||||
|
||||
Returns:
|
||||
A filtered dictionary with injected arguments removed.
|
||||
"""
|
||||
# Start with filtered args from the constant
|
||||
filtered_keys = set[str](FILTERED_ARGS)
|
||||
|
||||
# If we have an args_schema, use it to identify injected args
|
||||
if self.args_schema is not None:
|
||||
try:
|
||||
annotations = get_all_basemodel_annotations(self.args_schema)
|
||||
for field_name, field_type in annotations.items():
|
||||
if _is_injected_arg_type(field_type):
|
||||
filtered_keys.add(field_name)
|
||||
except Exception: # noqa: S110
|
||||
# If we can't get annotations, just use FILTERED_ARGS
|
||||
pass
|
||||
|
||||
# Filter out the injected keys from tool_input
|
||||
return {k: v for k, v in tool_input.items() if k not in filtered_keys}
|
||||
|
||||
def _to_args_and_kwargs(
|
||||
self, tool_input: str | dict, tool_call_id: str | None
|
||||
) -> tuple[tuple, dict]:
|
||||
@@ -753,7 +717,7 @@ class ChildTool(BaseTool):
|
||||
tool_call_id: The ID of the tool call, if available.
|
||||
|
||||
Returns:
|
||||
A tuple of `(positional_args, keyword_args)` for the tool.
|
||||
A tuple of (positional_args, keyword_args) for the tool.
|
||||
|
||||
Raises:
|
||||
TypeError: If the tool input type is invalid.
|
||||
@@ -830,29 +794,17 @@ class ChildTool(BaseTool):
|
||||
self.metadata,
|
||||
)
|
||||
|
||||
# Filter out injected arguments from callback inputs
|
||||
filtered_tool_input = (
|
||||
self._filter_injected_args(tool_input)
|
||||
if isinstance(tool_input, dict)
|
||||
else None
|
||||
)
|
||||
|
||||
# Use filtered inputs for the input_str parameter as well
|
||||
tool_input_str = (
|
||||
tool_input
|
||||
if isinstance(tool_input, str)
|
||||
else str(
|
||||
filtered_tool_input if filtered_tool_input is not None else tool_input
|
||||
)
|
||||
)
|
||||
|
||||
run_manager = callback_manager.on_tool_start(
|
||||
{"name": self.name, "description": self.description},
|
||||
tool_input_str,
|
||||
tool_input if isinstance(tool_input, str) else str(tool_input),
|
||||
color=start_color,
|
||||
name=run_name,
|
||||
run_id=run_id,
|
||||
inputs=filtered_tool_input,
|
||||
# Inputs by definition should always be dicts.
|
||||
# For now, it's unclear whether this assumption is ever violated,
|
||||
# but if it is we will send a `None` value to the callback instead
|
||||
# TODO: will need to address issue via a patch.
|
||||
inputs=tool_input if isinstance(tool_input, dict) else None,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -872,19 +824,16 @@ class ChildTool(BaseTool):
|
||||
tool_kwargs |= {config_param: config}
|
||||
response = context.run(self._run, *tool_args, **tool_kwargs)
|
||||
if self.response_format == "content_and_artifact":
|
||||
msg = (
|
||||
"Since response_format='content_and_artifact' "
|
||||
"a two-tuple of the message content and raw tool output is "
|
||||
f"expected. Instead, generated response is of type: "
|
||||
f"{type(response)}."
|
||||
)
|
||||
if not isinstance(response, tuple):
|
||||
if not isinstance(response, tuple) or len(response) != 2:
|
||||
msg = (
|
||||
"Since response_format='content_and_artifact' "
|
||||
"a two-tuple of the message content and raw tool output is "
|
||||
f"expected. Instead generated response of type: "
|
||||
f"{type(response)}."
|
||||
)
|
||||
error_to_raise = ValueError(msg)
|
||||
else:
|
||||
try:
|
||||
content, artifact = response
|
||||
except ValueError:
|
||||
error_to_raise = ValueError(msg)
|
||||
content, artifact = response
|
||||
else:
|
||||
content = response
|
||||
except (ValidationError, ValidationErrorV1) as e:
|
||||
@@ -956,30 +905,17 @@ class ChildTool(BaseTool):
|
||||
metadata,
|
||||
self.metadata,
|
||||
)
|
||||
|
||||
# Filter out injected arguments from callback inputs
|
||||
filtered_tool_input = (
|
||||
self._filter_injected_args(tool_input)
|
||||
if isinstance(tool_input, dict)
|
||||
else None
|
||||
)
|
||||
|
||||
# Use filtered inputs for the input_str parameter as well
|
||||
tool_input_str = (
|
||||
tool_input
|
||||
if isinstance(tool_input, str)
|
||||
else str(
|
||||
filtered_tool_input if filtered_tool_input is not None else tool_input
|
||||
)
|
||||
)
|
||||
|
||||
run_manager = await callback_manager.on_tool_start(
|
||||
{"name": self.name, "description": self.description},
|
||||
tool_input_str,
|
||||
tool_input if isinstance(tool_input, str) else str(tool_input),
|
||||
color=start_color,
|
||||
name=run_name,
|
||||
run_id=run_id,
|
||||
inputs=filtered_tool_input,
|
||||
# Inputs by definition should always be dicts.
|
||||
# For now, it's unclear whether this assumption is ever violated,
|
||||
# but if it is we will send a `None` value to the callback instead
|
||||
# TODO: will need to address issue via a patch.
|
||||
inputs=tool_input if isinstance(tool_input, dict) else None,
|
||||
**kwargs,
|
||||
)
|
||||
content = None
|
||||
@@ -1001,19 +937,16 @@ class ChildTool(BaseTool):
|
||||
coro = self._arun(*tool_args, **tool_kwargs)
|
||||
response = await coro_with_context(coro, context)
|
||||
if self.response_format == "content_and_artifact":
|
||||
msg = (
|
||||
"Since response_format='content_and_artifact' "
|
||||
"a two-tuple of the message content and raw tool output is "
|
||||
f"expected. Instead, generated response is of type: "
|
||||
f"{type(response)}."
|
||||
)
|
||||
if not isinstance(response, tuple):
|
||||
if not isinstance(response, tuple) or len(response) != 2:
|
||||
msg = (
|
||||
"Since response_format='content_and_artifact' "
|
||||
"a two-tuple of the message content and raw tool output is "
|
||||
f"expected. Instead generated response of type: "
|
||||
f"{type(response)}."
|
||||
)
|
||||
error_to_raise = ValueError(msg)
|
||||
else:
|
||||
try:
|
||||
content, artifact = response
|
||||
except ValueError:
|
||||
error_to_raise = ValueError(msg)
|
||||
content, artifact = response
|
||||
else:
|
||||
content = response
|
||||
except ValidationError as e:
|
||||
@@ -1061,7 +994,7 @@ def _handle_validation_error(
|
||||
|
||||
Args:
|
||||
e: The validation error that occurred.
|
||||
flag: How to handle the error (`bool`, `str`, or `Callable`).
|
||||
flag: How to handle the error (bool, string, or callable).
|
||||
|
||||
Returns:
|
||||
The error message to return.
|
||||
@@ -1093,7 +1026,7 @@ def _handle_tool_error(
|
||||
|
||||
Args:
|
||||
e: The tool exception that occurred.
|
||||
flag: How to handle the error (`bool`, `str`, or `Callable`).
|
||||
flag: How to handle the error (bool, string, or callable).
|
||||
|
||||
Returns:
|
||||
The error message to return.
|
||||
@@ -1124,12 +1057,12 @@ def _prep_run_args(
|
||||
"""Prepare arguments for tool execution.
|
||||
|
||||
Args:
|
||||
value: The input value (`str`, `dict`, or `ToolCall`).
|
||||
value: The input value (string, dict, or ToolCall).
|
||||
config: The runnable configuration.
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
A tuple of `(tool_input, run_kwargs)`.
|
||||
A tuple of (tool_input, run_kwargs).
|
||||
"""
|
||||
config = ensure_config(config)
|
||||
if _is_tool_call(value):
|
||||
@@ -1160,7 +1093,7 @@ def _format_output(
|
||||
name: str,
|
||||
status: str,
|
||||
) -> ToolOutputMixin | Any:
|
||||
"""Format tool output as a `ToolMessage` if appropriate.
|
||||
"""Format tool output as a ToolMessage if appropriate.
|
||||
|
||||
Args:
|
||||
content: The main content of the tool output.
|
||||
@@ -1170,7 +1103,7 @@ def _format_output(
|
||||
status: The execution status.
|
||||
|
||||
Returns:
|
||||
The formatted output, either as a `ToolMessage` or the original content.
|
||||
The formatted output, either as a ToolMessage or the original content.
|
||||
"""
|
||||
if isinstance(content, ToolOutputMixin) or tool_call_id is None:
|
||||
return content
|
||||
@@ -1241,7 +1174,7 @@ def _get_type_hints(func: Callable) -> dict[str, type] | None:
|
||||
func: The function to get type hints from.
|
||||
|
||||
Returns:
|
||||
`dict` of type hints, or `None` if extraction fails.
|
||||
Dictionary of type hints, or None if extraction fails.
|
||||
"""
|
||||
if isinstance(func, functools.partial):
|
||||
func = func.func
|
||||
@@ -1252,13 +1185,13 @@ def _get_type_hints(func: Callable) -> dict[str, type] | None:
|
||||
|
||||
|
||||
def _get_runnable_config_param(func: Callable) -> str | None:
|
||||
"""Find the parameter name for `RunnableConfig` in a function.
|
||||
"""Find the parameter name for RunnableConfig in a function.
|
||||
|
||||
Args:
|
||||
func: The function to check.
|
||||
|
||||
Returns:
|
||||
The parameter name for `RunnableConfig`, or `None` if not found.
|
||||
The parameter name for RunnableConfig, or None if not found.
|
||||
"""
|
||||
type_hints = _get_type_hints(func)
|
||||
if not type_hints:
|
||||
@@ -1282,11 +1215,9 @@ class _DirectlyInjectedToolArg:
|
||||
|
||||
Injected via direct type annotation, rather than annotated metadata.
|
||||
|
||||
For example, `ToolRuntime` is a directly injected argument.
|
||||
|
||||
For example, ToolRuntime is a directly injected argument.
|
||||
Note the direct annotation rather than the verbose alternative:
|
||||
`Annotated[ToolRuntime, InjectedRuntime]`
|
||||
|
||||
Annotated[ToolRuntime, InjectedRuntime]
|
||||
```python
|
||||
from langchain_core.tools import tool, ToolRuntime
|
||||
|
||||
@@ -1329,11 +1260,11 @@ class InjectedToolCallId(InjectedToolArg):
|
||||
def _is_directly_injected_arg_type(type_: Any) -> bool:
|
||||
"""Check if a type annotation indicates a directly injected argument.
|
||||
|
||||
This is currently only used for `ToolRuntime`.
|
||||
Checks if either the annotation itself is a subclass of `_DirectlyInjectedToolArg`
|
||||
or the origin of the annotation is a subclass of `_DirectlyInjectedToolArg`.
|
||||
This is currently only used for ToolRuntime.
|
||||
Checks if either the annotation itself is a subclass of _DirectlyInjectedToolArg
|
||||
or the origin of the annotation is a subclass of _DirectlyInjectedToolArg.
|
||||
|
||||
Ex: `ToolRuntime` or `ToolRuntime[ContextT, StateT]` would both return `True`.
|
||||
Ex: ToolRuntime or ToolRuntime[ContextT, StateT] would both return True.
|
||||
"""
|
||||
return (
|
||||
isinstance(type_, type) and issubclass(type_, _DirectlyInjectedToolArg)
|
||||
@@ -1375,14 +1306,14 @@ def _is_injected_arg_type(
|
||||
def get_all_basemodel_annotations(
|
||||
cls: TypeBaseModel | Any, *, default_to_bound: bool = True
|
||||
) -> dict[str, type | TypeVar]:
|
||||
"""Get all annotations from a Pydantic `BaseModel` and its parents.
|
||||
"""Get all annotations from a Pydantic BaseModel and its parents.
|
||||
|
||||
Args:
|
||||
cls: The Pydantic `BaseModel` class.
|
||||
default_to_bound: Whether to default to the bound of a `TypeVar` if it exists.
|
||||
cls: The Pydantic BaseModel class.
|
||||
default_to_bound: Whether to default to the bound of a TypeVar if it exists.
|
||||
|
||||
Returns:
|
||||
`dict` of field names to their type annotations.
|
||||
A dictionary of field names to their type annotations.
|
||||
"""
|
||||
# cls has no subscript: cls = FooBar
|
||||
if isinstance(cls, type):
|
||||
@@ -1448,15 +1379,15 @@ def _replace_type_vars(
|
||||
*,
|
||||
default_to_bound: bool = True,
|
||||
) -> type | TypeVar:
|
||||
"""Replace `TypeVar`s in a type annotation with concrete types.
|
||||
"""Replace TypeVars in a type annotation with concrete types.
|
||||
|
||||
Args:
|
||||
type_: The type annotation to process.
|
||||
generic_map: Mapping of `TypeVar`s to concrete types.
|
||||
default_to_bound: Whether to use `TypeVar` bounds as defaults.
|
||||
generic_map: Mapping of TypeVars to concrete types.
|
||||
default_to_bound: Whether to use TypeVar bounds as defaults.
|
||||
|
||||
Returns:
|
||||
The type with `TypeVar`s replaced.
|
||||
The type with TypeVars replaced.
|
||||
"""
|
||||
generic_map = generic_map or {}
|
||||
if isinstance(type_, TypeVar):
|
||||
|
||||
@@ -81,72 +81,57 @@ def tool(
|
||||
parse_docstring: bool = False,
|
||||
error_on_invalid_docstring: bool = True,
|
||||
) -> BaseTool | Callable[[Callable | Runnable], BaseTool]:
|
||||
"""Convert Python functions and `Runnables` to LangChain tools.
|
||||
|
||||
Can be used as a decorator with or without arguments to create tools from functions.
|
||||
|
||||
Functions can have any signature - the tool will automatically infer input schemas
|
||||
unless disabled.
|
||||
|
||||
!!! note "Requirements"
|
||||
- Functions must have type hints for proper schema inference
|
||||
- When `infer_schema=False`, functions must be `(str) -> str` and have
|
||||
docstrings
|
||||
- When using with `Runnable`, a string name must be provided
|
||||
"""Make tools out of Python functions, can be used with or without arguments.
|
||||
|
||||
Args:
|
||||
name_or_callable: Optional name of the tool or the `Callable` to be
|
||||
converted to a tool. Overrides the function's name.
|
||||
|
||||
Must be provided as a positional argument.
|
||||
runnable: Optional `Runnable` to convert to a tool.
|
||||
|
||||
Must be provided as a positional argument.
|
||||
name_or_callable: Optional name of the tool or the callable to be
|
||||
converted to a tool. Must be provided as a positional argument.
|
||||
runnable: Optional runnable to convert to a tool. Must be provided as a
|
||||
positional argument.
|
||||
description: Optional description for the tool.
|
||||
|
||||
Precedence for the tool description value is as follows:
|
||||
|
||||
- This `description` argument
|
||||
- `description` argument
|
||||
(used even if docstring and/or `args_schema` are provided)
|
||||
- Tool function docstring
|
||||
(used even if `args_schema` is provided)
|
||||
- `args_schema` description
|
||||
(used only if `description` and docstring are not provided)
|
||||
(used only if `description` / docstring are not provided)
|
||||
*args: Extra positional arguments. Must be empty.
|
||||
return_direct: Whether to return directly from the tool rather than continuing
|
||||
the agent loop.
|
||||
return_direct: Whether to return directly from the tool rather
|
||||
than continuing the agent loop.
|
||||
args_schema: Optional argument schema for user to specify.
|
||||
infer_schema: Whether to infer the schema of the arguments from the function's
|
||||
signature. This also makes the resultant tool accept a dictionary input to
|
||||
its `run()` function.
|
||||
response_format: The tool response format.
|
||||
|
||||
If `'content'`, then the output of the tool is interpreted as the contents
|
||||
of a `ToolMessage`.
|
||||
|
||||
If `'content_and_artifact'`, then the output is expected to be a two-tuple
|
||||
infer_schema: Whether to infer the schema of the arguments from
|
||||
the function's signature. This also makes the resultant tool
|
||||
accept a dictionary input to its `run()` function.
|
||||
response_format: The tool response format. If `"content"` then the output of
|
||||
the tool is interpreted as the contents of a `ToolMessage`. If
|
||||
`"content_and_artifact"` then the output is expected to be a two-tuple
|
||||
corresponding to the `(content, artifact)` of a `ToolMessage`.
|
||||
parse_docstring: If `infer_schema` and `parse_docstring`, will attempt to
|
||||
parse_docstring: if `infer_schema` and `parse_docstring`, will attempt to
|
||||
parse parameter descriptions from Google Style function docstrings.
|
||||
error_on_invalid_docstring: If `parse_docstring` is provided, configure
|
||||
error_on_invalid_docstring: if `parse_docstring` is provided, configure
|
||||
whether to raise `ValueError` on invalid Google Style docstrings.
|
||||
|
||||
Raises:
|
||||
ValueError: If too many positional arguments are provided (e.g. violating the
|
||||
`*args` constraint).
|
||||
ValueError: If a `Runnable` is provided without a string name. When using `tool`
|
||||
with a `Runnable`, a `str` name must be provided as the `name_or_callable`.
|
||||
ValueError: If too many positional arguments are provided.
|
||||
ValueError: If a runnable is provided without a string name.
|
||||
ValueError: If the first argument is not a string or callable with
|
||||
a `__name__` attribute.
|
||||
ValueError: If the function does not have a docstring and description
|
||||
is not provided and `infer_schema` is `False`.
|
||||
ValueError: If `parse_docstring` is `True` and the function has an invalid
|
||||
Google-style docstring and `error_on_invalid_docstring` is True.
|
||||
ValueError: If a `Runnable` is provided that does not have an object schema.
|
||||
ValueError: If a Runnable is provided that does not have an object schema.
|
||||
|
||||
Returns:
|
||||
The tool.
|
||||
|
||||
Requires:
|
||||
- Function must be of type `(str) -> str`
|
||||
- Function must have a docstring
|
||||
|
||||
Examples:
|
||||
```python
|
||||
@tool
|
||||
@@ -166,6 +151,8 @@ def tool(
|
||||
return "partial json of results", {"full": "object of results"}
|
||||
```
|
||||
|
||||
!!! version-added "Added in version 0.2.14"
|
||||
|
||||
Parse Google-style docstrings:
|
||||
|
||||
```python
|
||||
|
||||
@@ -83,12 +83,11 @@ def create_retriever_tool(
|
||||
model, so should be descriptive.
|
||||
document_prompt: The prompt to use for the document.
|
||||
document_separator: The separator to use between documents.
|
||||
response_format: The tool response format.
|
||||
|
||||
If `"content"` then the output of the tool is interpreted as the contents of
|
||||
a `ToolMessage`. If `"content_and_artifact"` then the output is expected to
|
||||
be a two-tuple corresponding to the `(content, artifact)` of a `ToolMessage`
|
||||
(artifact being a list of documents in this case).
|
||||
response_format: The tool response format. If `"content"` then the output of
|
||||
the tool is interpreted as the contents of a `ToolMessage`. If
|
||||
`"content_and_artifact"` then the output is expected to be a two-tuple
|
||||
corresponding to the `(content, artifact)` of a `ToolMessage` (artifact
|
||||
being a list of documents in this case).
|
||||
|
||||
Returns:
|
||||
Tool class to pass to an agent.
|
||||
|
||||
@@ -151,13 +151,11 @@ class StructuredTool(BaseTool):
|
||||
return_direct: Whether to return the result directly or as a callback.
|
||||
args_schema: The schema of the tool's input arguments.
|
||||
infer_schema: Whether to infer the schema from the function's signature.
|
||||
response_format: The tool response format.
|
||||
|
||||
If `"content"` then the output of the tool is interpreted as the
|
||||
contents of a `ToolMessage`. If `"content_and_artifact"` then the output
|
||||
is expected to be a two-tuple corresponding to the `(content, artifact)`
|
||||
of a `ToolMessage`.
|
||||
parse_docstring: If `infer_schema` and `parse_docstring`, will attempt
|
||||
response_format: The tool response format. If `"content"` then the output of
|
||||
the tool is interpreted as the contents of a `ToolMessage`. If
|
||||
`"content_and_artifact"` then the output is expected to be a two-tuple
|
||||
corresponding to the `(content, artifact)` of a `ToolMessage`.
|
||||
parse_docstring: if `infer_schema` and `parse_docstring`, will attempt
|
||||
to parse parameter descriptions from Google Style function docstrings.
|
||||
error_on_invalid_docstring: if `parse_docstring` is provided, configure
|
||||
whether to raise `ValueError` on invalid Google Style docstrings.
|
||||
|
||||
@@ -96,10 +96,10 @@ class RunLogPatch:
|
||||
"""Patch to the run log."""
|
||||
|
||||
ops: list[dict[str, Any]]
|
||||
"""List of JSONPatch operations, which describe how to create the run state
|
||||
"""List of jsonpatch operations, which describe how to create the run state
|
||||
from an empty dict. This is the minimal representation of the log, designed to
|
||||
be serialized as JSON and sent over the wire to reconstruct the log on the other
|
||||
side. Reconstruction of the state can be done with any JSONPatch-compliant library,
|
||||
side. Reconstruction of the state can be done with any jsonpatch-compliant library,
|
||||
see https://jsonpatch.com for more information."""
|
||||
|
||||
def __init__(self, *ops: dict[str, Any]) -> None:
|
||||
|
||||
@@ -351,7 +351,7 @@ def convert_to_openai_function(
|
||||
Raises:
|
||||
ValueError: If function is not in a supported format.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.16"
|
||||
!!! warning "Behavior changed in 0.3.16"
|
||||
`description` and `parameters` keys are now optional. Only `name` is
|
||||
required and guaranteed to be part of the output.
|
||||
"""
|
||||
@@ -412,7 +412,7 @@ def convert_to_openai_function(
|
||||
if strict is not None:
|
||||
if "strict" in oai_function and oai_function["strict"] != strict:
|
||||
msg = (
|
||||
f"Tool/function already has a 'strict' key with value "
|
||||
f"Tool/function already has a 'strict' key wth value "
|
||||
f"{oai_function['strict']} which is different from the explicit "
|
||||
f"`strict` arg received {strict=}."
|
||||
)
|
||||
@@ -425,14 +425,6 @@ def convert_to_openai_function(
|
||||
oai_function["parameters"] = _recursive_set_additional_properties_false(
|
||||
oai_function["parameters"]
|
||||
)
|
||||
# All fields must be `required`
|
||||
parameters = oai_function.get("parameters")
|
||||
if isinstance(parameters, dict):
|
||||
fields = parameters.get("properties")
|
||||
if isinstance(fields, dict) and fields:
|
||||
parameters = dict(parameters)
|
||||
parameters["required"] = list(fields.keys())
|
||||
oai_function["parameters"] = parameters
|
||||
return oai_function
|
||||
|
||||
|
||||
@@ -475,16 +467,16 @@ def convert_to_openai_tool(
|
||||
A dict version of the passed in tool which is compatible with the
|
||||
OpenAI tool-calling API.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.16"
|
||||
!!! warning "Behavior changed in 0.3.16"
|
||||
`description` and `parameters` keys are now optional. Only `name` is
|
||||
required and guaranteed to be part of the output.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.44"
|
||||
!!! warning "Behavior changed in 0.3.44"
|
||||
Return OpenAI Responses API-style tools unchanged. This includes
|
||||
any dict with `"type"` in `"file_search"`, `"function"`,
|
||||
`"computer_use_preview"`, `"web_search_preview"`.
|
||||
|
||||
!!! warning "Behavior changed in `langchain-core` 0.3.63"
|
||||
!!! warning "Behavior changed in 0.3.63"
|
||||
Added support for OpenAI's image generation built-in tool.
|
||||
"""
|
||||
# Import locally to prevent circular import
|
||||
@@ -653,9 +645,6 @@ def tool_example_to_messages(
|
||||
return messages
|
||||
|
||||
|
||||
_MIN_DOCSTRING_BLOCKS = 2
|
||||
|
||||
|
||||
def _parse_google_docstring(
|
||||
docstring: str | None,
|
||||
args: list[str],
|
||||
@@ -674,7 +663,7 @@ def _parse_google_docstring(
|
||||
arg for arg in args if arg not in {"run_manager", "callbacks", "return"}
|
||||
}
|
||||
if filtered_annotations and (
|
||||
len(docstring_blocks) < _MIN_DOCSTRING_BLOCKS
|
||||
len(docstring_blocks) < 2
|
||||
or not any(block.startswith("Args:") for block in docstring_blocks[1:])
|
||||
):
|
||||
msg = "Found invalid Google-Style docstring."
|
||||
|
||||
@@ -26,9 +26,6 @@ def get_color_mapping(
|
||||
colors = list(_TEXT_COLOR_MAPPING.keys())
|
||||
if excluded_colors is not None:
|
||||
colors = [c for c in colors if c not in excluded_colors]
|
||||
if not colors:
|
||||
msg = "No colors available after applying exclusions."
|
||||
raise ValueError(msg)
|
||||
return {item: colors[i % len(colors)] for i, item in enumerate(items)}
|
||||
|
||||
|
||||
|
||||
@@ -65,8 +65,8 @@ def get_pydantic_major_version() -> int:
|
||||
PYDANTIC_MAJOR_VERSION = PYDANTIC_VERSION.major
|
||||
PYDANTIC_MINOR_VERSION = PYDANTIC_VERSION.minor
|
||||
|
||||
IS_PYDANTIC_V1 = False
|
||||
IS_PYDANTIC_V2 = True
|
||||
IS_PYDANTIC_V1 = PYDANTIC_VERSION.major == 1
|
||||
IS_PYDANTIC_V2 = PYDANTIC_VERSION.major == 2
|
||||
|
||||
PydanticBaseModel = BaseModel
|
||||
TypeBaseModel = type[BaseModel]
|
||||
|
||||
@@ -30,7 +30,10 @@ def stringify_dict(data: dict) -> str:
|
||||
Returns:
|
||||
The stringified dictionary.
|
||||
"""
|
||||
return "".join(f"{key}: {stringify_value(value)}\n" for key, value in data.items())
|
||||
text = ""
|
||||
for key, value in data.items():
|
||||
text += key + ": " + stringify_value(value) + "\n"
|
||||
return text
|
||||
|
||||
|
||||
def comma_list(items: list[Any]) -> str:
|
||||
|
||||
@@ -218,7 +218,7 @@ def _build_model_kwargs(
|
||||
values: dict[str, Any],
|
||||
all_required_field_names: set[str],
|
||||
) -> dict[str, Any]:
|
||||
"""Build `model_kwargs` param from Pydantic constructor values.
|
||||
"""Build "model_kwargs" param from Pydantic constructor values.
|
||||
|
||||
Args:
|
||||
values: All init args passed in by user.
|
||||
@@ -228,8 +228,8 @@ def _build_model_kwargs(
|
||||
Extra kwargs.
|
||||
|
||||
Raises:
|
||||
ValueError: If a field is specified in both `values` and `extra_kwargs`.
|
||||
ValueError: If a field is specified in `model_kwargs`.
|
||||
ValueError: If a field is specified in both values and extra_kwargs.
|
||||
ValueError: If a field is specified in model_kwargs.
|
||||
"""
|
||||
extra_kwargs = values.get("model_kwargs", {})
|
||||
for field_name in list(values):
|
||||
@@ -267,10 +267,6 @@ def build_extra_kwargs(
|
||||
) -> dict[str, Any]:
|
||||
"""Build extra kwargs from values and extra_kwargs.
|
||||
|
||||
!!! danger "DON'T USE"
|
||||
Kept for backwards-compatibility but should never have been public. Use the
|
||||
internal `_build_model_kwargs` function instead.
|
||||
|
||||
Args:
|
||||
extra_kwargs: Extra kwargs passed in by user.
|
||||
values: Values passed in by user.
|
||||
@@ -280,10 +276,9 @@ def build_extra_kwargs(
|
||||
Extra kwargs.
|
||||
|
||||
Raises:
|
||||
ValueError: If a field is specified in both `values` and `extra_kwargs`.
|
||||
ValueError: If a field is specified in `model_kwargs`.
|
||||
ValueError: If a field is specified in both values and extra_kwargs.
|
||||
ValueError: If a field is specified in model_kwargs.
|
||||
"""
|
||||
# DON'T USE! Kept for backwards-compatibility but should never have been public.
|
||||
for field_name in list(values):
|
||||
if field_name in extra_kwargs:
|
||||
msg = f"Found {field_name} supplied twice."
|
||||
@@ -297,7 +292,6 @@ def build_extra_kwargs(
|
||||
)
|
||||
extra_kwargs[field_name] = values.pop(field_name)
|
||||
|
||||
# DON'T USE! Kept for backwards-compatibility but should never have been public.
|
||||
invalid_model_kwargs = all_required_field_names.intersection(extra_kwargs.keys())
|
||||
if invalid_model_kwargs:
|
||||
msg = (
|
||||
@@ -306,7 +300,6 @@ def build_extra_kwargs(
|
||||
)
|
||||
raise ValueError(msg)
|
||||
|
||||
# DON'T USE! Kept for backwards-compatibility but should never have been public.
|
||||
return extra_kwargs
|
||||
|
||||
|
||||
|
||||
@@ -58,16 +58,16 @@ class VectorStore(ABC):
|
||||
texts: Iterable of strings to add to the `VectorStore`.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
ids: Optional list of IDs associated with the texts.
|
||||
**kwargs: `VectorStore` specific parameters.
|
||||
**kwargs: vectorstore specific parameters.
|
||||
One of the kwargs should be `ids` which is a list of ids
|
||||
associated with the texts.
|
||||
|
||||
Returns:
|
||||
List of IDs from adding the texts into the `VectorStore`.
|
||||
List of ids from adding the texts into the `VectorStore`.
|
||||
|
||||
Raises:
|
||||
ValueError: If the number of metadatas does not match the number of texts.
|
||||
ValueError: If the number of IDs does not match the number of texts.
|
||||
ValueError: If the number of ids does not match the number of texts.
|
||||
"""
|
||||
if type(self).add_documents != VectorStore.add_documents:
|
||||
# This condition is triggered if the subclass has provided
|
||||
@@ -109,12 +109,11 @@ class VectorStore(ABC):
|
||||
"""Delete by vector ID or other criteria.
|
||||
|
||||
Args:
|
||||
ids: List of IDs to delete. If `None`, delete all.
|
||||
ids: List of ids to delete. If `None`, delete all.
|
||||
**kwargs: Other keyword arguments that subclasses might use.
|
||||
|
||||
Returns:
|
||||
`True` if deletion is successful, `False` otherwise, `None` if not
|
||||
implemented.
|
||||
True if deletion is successful, False otherwise, None if not implemented.
|
||||
"""
|
||||
msg = "delete method must be implemented by subclass."
|
||||
raise NotImplementedError(msg)
|
||||
@@ -136,10 +135,12 @@ class VectorStore(ABC):
|
||||
some IDs.
|
||||
|
||||
Args:
|
||||
ids: List of IDs to retrieve.
|
||||
ids: List of ids to retrieve.
|
||||
|
||||
Returns:
|
||||
List of `Document` objects.
|
||||
List of Documents.
|
||||
|
||||
!!! version-added "Added in version 0.2.11"
|
||||
"""
|
||||
msg = f"{self.__class__.__name__} does not yet support get_by_ids."
|
||||
raise NotImplementedError(msg)
|
||||
@@ -162,10 +163,12 @@ class VectorStore(ABC):
|
||||
some IDs.
|
||||
|
||||
Args:
|
||||
ids: List of IDs to retrieve.
|
||||
ids: List of ids to retrieve.
|
||||
|
||||
Returns:
|
||||
List of `Document` objects.
|
||||
List of Documents.
|
||||
|
||||
!!! version-added "Added in version 0.2.11"
|
||||
"""
|
||||
return await run_in_executor(None, self.get_by_ids, ids)
|
||||
|
||||
@@ -173,12 +176,11 @@ class VectorStore(ABC):
|
||||
"""Async delete by vector ID or other criteria.
|
||||
|
||||
Args:
|
||||
ids: List of IDs to delete. If `None`, delete all.
|
||||
ids: List of ids to delete. If `None`, delete all.
|
||||
**kwargs: Other keyword arguments that subclasses might use.
|
||||
|
||||
Returns:
|
||||
`True` if deletion is successful, `False` otherwise, `None` if not
|
||||
implemented.
|
||||
True if deletion is successful, False otherwise, None if not implemented.
|
||||
"""
|
||||
return await run_in_executor(None, self.delete, ids, **kwargs)
|
||||
|
||||
@@ -196,14 +198,14 @@ class VectorStore(ABC):
|
||||
texts: Iterable of strings to add to the `VectorStore`.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
ids: Optional list
|
||||
**kwargs: `VectorStore` specific parameters.
|
||||
**kwargs: vectorstore specific parameters.
|
||||
|
||||
Returns:
|
||||
List of IDs from adding the texts into the `VectorStore`.
|
||||
List of ids from adding the texts into the `VectorStore`.
|
||||
|
||||
Raises:
|
||||
ValueError: If the number of metadatas does not match the number of texts.
|
||||
ValueError: If the number of IDs does not match the number of texts.
|
||||
ValueError: If the number of ids does not match the number of texts.
|
||||
"""
|
||||
if ids is not None:
|
||||
# For backward compatibility
|
||||
@@ -232,14 +234,13 @@ class VectorStore(ABC):
|
||||
return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
|
||||
|
||||
def add_documents(self, documents: list[Document], **kwargs: Any) -> list[str]:
|
||||
"""Add or update documents in the `VectorStore`.
|
||||
"""Add or update documents in the vectorstore.
|
||||
|
||||
Args:
|
||||
documents: Documents to add to the `VectorStore`.
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
If kwargs contains IDs and documents contain ids, the IDs in the kwargs
|
||||
will receive precedence.
|
||||
if kwargs contains ids and documents contain ids,
|
||||
the ids in the kwargs will receive precedence.
|
||||
|
||||
Returns:
|
||||
List of IDs of the added texts.
|
||||
@@ -294,17 +295,17 @@ class VectorStore(ABC):
|
||||
"""Return docs most similar to query using a specified search type.
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
search_type: Type of search to perform. Can be `'similarity'`, `'mmr'`, or
|
||||
`'similarity_score_threshold'`.
|
||||
query: Input text
|
||||
search_type: Type of search to perform. Can be "similarity",
|
||||
"mmr", or "similarity_score_threshold".
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
List of `Document` objects most similar to the query.
|
||||
|
||||
Raises:
|
||||
ValueError: If `search_type` is not one of `'similarity'`,
|
||||
`'mmr'`, or `'similarity_score_threshold'`.
|
||||
ValueError: If search_type is not one of "similarity",
|
||||
"mmr", or "similarity_score_threshold".
|
||||
"""
|
||||
if search_type == "similarity":
|
||||
return self.similarity_search(query, **kwargs)
|
||||
@@ -329,16 +330,16 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
search_type: Type of search to perform. Can be `'similarity'`, `'mmr'`, or
|
||||
`'similarity_score_threshold'`.
|
||||
search_type: Type of search to perform. Can be "similarity",
|
||||
"mmr", or "similarity_score_threshold".
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
List of `Document` objects most similar to the query.
|
||||
|
||||
Raises:
|
||||
ValueError: If `search_type` is not one of `'similarity'`,
|
||||
`'mmr'`, or `'similarity_score_threshold'`.
|
||||
ValueError: If search_type is not one of "similarity",
|
||||
"mmr", or "similarity_score_threshold".
|
||||
"""
|
||||
if search_type == "similarity":
|
||||
return await self.asimilarity_search(query, **kwargs)
|
||||
@@ -363,7 +364,7 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
k: Number of Documents to return.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -422,7 +423,7 @@ class VectorStore(ABC):
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`.
|
||||
List of Tuples of `(doc, similarity_score)`.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -436,7 +437,7 @@ class VectorStore(ABC):
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`.
|
||||
List of Tuples of `(doc, similarity_score)`.
|
||||
"""
|
||||
# This is a temporary workaround to make the similarity search
|
||||
# asynchronous. The proper solution is to make the similarity search
|
||||
@@ -460,13 +461,13 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
k: Number of Documents to return.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include
|
||||
`score_threshold`, An optional floating point value between `0` to `1`
|
||||
to filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`
|
||||
List of Tuples of `(doc, similarity_score)`
|
||||
"""
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
|
||||
@@ -487,13 +488,13 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
k: Number of Documents to return.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include
|
||||
`score_threshold`, An optional floating point value between `0` to `1`
|
||||
to filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`
|
||||
List of Tuples of `(doc, similarity_score)`
|
||||
"""
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
|
||||
@@ -511,13 +512,13 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
k: Number of Documents to return.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include
|
||||
`score_threshold`, An optional floating point value between `0` to `1`
|
||||
to filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`.
|
||||
List of Tuples of `(doc, similarity_score)`.
|
||||
"""
|
||||
score_threshold = kwargs.pop("score_threshold", None)
|
||||
|
||||
@@ -560,13 +561,13 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
k: Number of Documents to return.
|
||||
**kwargs: kwargs to be passed to similarity search. Should include
|
||||
`score_threshold`, An optional floating point value between `0` to `1`
|
||||
to filter the resulting set of retrieved docs
|
||||
|
||||
Returns:
|
||||
List of tuples of `(doc, similarity_score)`
|
||||
List of Tuples of `(doc, similarity_score)`
|
||||
"""
|
||||
score_threshold = kwargs.pop("score_threshold", None)
|
||||
|
||||
@@ -604,7 +605,7 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Input text.
|
||||
k: Number of `Document` objects to return.
|
||||
k: Number of Documents to return.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -622,7 +623,7 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of `Document` objects to return.
|
||||
k: Number of Documents to return.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -637,7 +638,7 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of `Document` objects to return.
|
||||
k: Number of Documents to return.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -665,11 +666,11 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of `Document` objects to return.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between `0` and `1` that determines the degree
|
||||
of diversity among the results with `0` corresponding
|
||||
to maximum diversity and `1` to minimum diversity.
|
||||
k: Number of Documents to return.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -692,11 +693,11 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of `Document` objects to return.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between `0` and `1` that determines the degree
|
||||
of diversity among the results with `0` corresponding
|
||||
to maximum diversity and `1` to minimum diversity.
|
||||
k: Number of Documents to return.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -730,11 +731,11 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of `Document` objects to return.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between `0` and `1` that determines the degree
|
||||
of diversity among the results with `0` corresponding
|
||||
to maximum diversity and `1` to minimum diversity.
|
||||
k: Number of Documents to return.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -757,11 +758,11 @@ class VectorStore(ABC):
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of `Document` objects to return.
|
||||
fetch_k: Number of `Document` objects to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between `0` and `1` that determines the degree
|
||||
of diversity among the results with `0` corresponding
|
||||
to maximum diversity and `1` to minimum diversity.
|
||||
k: Number of Documents to return.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
**kwargs: Arguments to pass to the search method.
|
||||
|
||||
Returns:
|
||||
@@ -848,7 +849,7 @@ class VectorStore(ABC):
|
||||
ids: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> VST:
|
||||
"""Return `VectorStore` initialized from texts and embeddings.
|
||||
"""Return VectorStore initialized from texts and embeddings.
|
||||
|
||||
Args:
|
||||
texts: Texts to add to the `VectorStore`.
|
||||
@@ -858,7 +859,7 @@ class VectorStore(ABC):
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
`VectorStore` initialized from texts and embeddings.
|
||||
VectorStore initialized from texts and embeddings.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@@ -871,7 +872,7 @@ class VectorStore(ABC):
|
||||
ids: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> Self:
|
||||
"""Async return `VectorStore` initialized from texts and embeddings.
|
||||
"""Async return VectorStore initialized from texts and embeddings.
|
||||
|
||||
Args:
|
||||
texts: Texts to add to the `VectorStore`.
|
||||
@@ -881,7 +882,7 @@ class VectorStore(ABC):
|
||||
**kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
`VectorStore` initialized from texts and embeddings.
|
||||
VectorStore initialized from texts and embeddings.
|
||||
"""
|
||||
if ids is not None:
|
||||
kwargs["ids"] = ids
|
||||
@@ -902,21 +903,19 @@ class VectorStore(ABC):
|
||||
Args:
|
||||
**kwargs: Keyword arguments to pass to the search function.
|
||||
Can include:
|
||||
|
||||
* `search_type`: Defines the type of search that the Retriever should
|
||||
perform. Can be `'similarity'` (default), `'mmr'`, or
|
||||
`'similarity_score_threshold'`.
|
||||
* `search_kwargs`: Keyword arguments to pass to the search function. Can
|
||||
search_type: Defines the type of search that the Retriever should
|
||||
perform. Can be "similarity" (default), "mmr", or
|
||||
"similarity_score_threshold".
|
||||
search_kwargs: Keyword arguments to pass to the search function. Can
|
||||
include things like:
|
||||
|
||||
* `k`: Amount of documents to return (Default: `4`)
|
||||
* `score_threshold`: Minimum relevance threshold
|
||||
for `similarity_score_threshold`
|
||||
* `fetch_k`: Amount of documents to pass to MMR algorithm
|
||||
(Default: `20`)
|
||||
* `lambda_mult`: Diversity of results returned by MMR;
|
||||
`1` for minimum diversity and 0 for maximum. (Default: `0.5`)
|
||||
* `filter`: Filter by document metadata
|
||||
k: Amount of documents to return (Default: 4)
|
||||
score_threshold: Minimum relevance threshold
|
||||
for similarity_score_threshold
|
||||
fetch_k: Amount of documents to pass to MMR algorithm
|
||||
(Default: 20)
|
||||
lambda_mult: Diversity of results returned by MMR;
|
||||
1 for minimum diversity and 0 for maximum. (Default: 0.5)
|
||||
filter: Filter by document metadata
|
||||
|
||||
Returns:
|
||||
Retriever class for `VectorStore`.
|
||||
@@ -959,7 +958,7 @@ class VectorStoreRetriever(BaseRetriever):
|
||||
vectorstore: VectorStore
|
||||
"""VectorStore to use for retrieval."""
|
||||
search_type: str = "similarity"
|
||||
"""Type of search to perform."""
|
||||
"""Type of search to perform. Defaults to "similarity"."""
|
||||
search_kwargs: dict = Field(default_factory=dict)
|
||||
"""Keyword arguments to pass to the search function."""
|
||||
allowed_search_types: ClassVar[Collection[str]] = (
|
||||
@@ -984,8 +983,8 @@ class VectorStoreRetriever(BaseRetriever):
|
||||
Validated values.
|
||||
|
||||
Raises:
|
||||
ValueError: If `search_type` is not one of the allowed search types.
|
||||
ValueError: If `score_threshold` is not specified with a float value(`0~1`)
|
||||
ValueError: If search_type is not one of the allowed search types.
|
||||
ValueError: If score_threshold is not specified with a float value(0~1)
|
||||
"""
|
||||
search_type = values.get("search_type", "similarity")
|
||||
if search_type not in cls.allowed_search_types:
|
||||
|
||||
@@ -257,10 +257,10 @@ class InMemoryVectorStore(VectorStore):
|
||||
"""Get documents by their ids.
|
||||
|
||||
Args:
|
||||
ids: The IDs of the documents to get.
|
||||
ids: The ids of the documents to get.
|
||||
|
||||
Returns:
|
||||
A list of `Document` objects.
|
||||
A list of Document objects.
|
||||
"""
|
||||
documents = []
|
||||
|
||||
@@ -281,10 +281,10 @@ class InMemoryVectorStore(VectorStore):
|
||||
"""Async get documents by their ids.
|
||||
|
||||
Args:
|
||||
ids: The IDs of the documents to get.
|
||||
ids: The ids of the documents to get.
|
||||
|
||||
Returns:
|
||||
A list of `Document` objects.
|
||||
A list of Document objects.
|
||||
"""
|
||||
return self.get_by_ids(ids)
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""langchain-core version information and utilities."""
|
||||
|
||||
VERSION = "1.0.4"
|
||||
VERSION = "1.0.0"
|
||||
|
||||
@@ -3,13 +3,8 @@ requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "langchain-core"
|
||||
description = "Building applications with LLMs through composability"
|
||||
license = {text = "MIT"}
|
||||
readme = "README.md"
|
||||
authors = []
|
||||
|
||||
version = "1.0.4"
|
||||
license = {text = "MIT"}
|
||||
requires-python = ">=3.10.0,<4.0.0"
|
||||
dependencies = [
|
||||
"langsmith>=0.3.45,<1.0.0",
|
||||
@@ -20,6 +15,10 @@ dependencies = [
|
||||
"packaging>=23.2.0,<26.0.0",
|
||||
"pydantic>=2.7.4,<3.0.0",
|
||||
]
|
||||
name = "langchain-core"
|
||||
version = "1.0.0"
|
||||
description = "Building applications with LLMs through composability"
|
||||
readme = "README.md"
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://docs.langchain.com/"
|
||||
@@ -36,7 +35,6 @@ typing = [
|
||||
"mypy>=1.18.1,<1.19.0",
|
||||
"types-pyyaml>=6.0.12.2,<7.0.0.0",
|
||||
"types-requests>=2.28.11.5,<3.0.0.0",
|
||||
"langchain-model-profiles",
|
||||
"langchain-text-splitters",
|
||||
]
|
||||
dev = [
|
||||
@@ -58,7 +56,6 @@ test = [
|
||||
"blockbuster>=1.5.18,<1.6.0",
|
||||
"numpy>=1.26.4; python_version<'3.13'",
|
||||
"numpy>=2.1.0; python_version>='3.13'",
|
||||
"langchain-model-profiles",
|
||||
"langchain-tests",
|
||||
"pytest-benchmark",
|
||||
"pytest-codspeed",
|
||||
@@ -66,7 +63,6 @@ test = [
|
||||
test_integration = []
|
||||
|
||||
[tool.uv.sources]
|
||||
langchain-model-profiles = { path = "../model-profiles" }
|
||||
langchain-tests = { path = "../standard-tests" }
|
||||
langchain-text-splitters = { path = "../text-splitters" }
|
||||
|
||||
@@ -105,6 +101,7 @@ ignore = [
|
||||
"ANN401", # No Any types
|
||||
"BLE", # Blind exceptions
|
||||
"ERA", # No commented-out code
|
||||
"PLR2004", # Comparison to magic number
|
||||
]
|
||||
unfixable = [
|
||||
"B028", # People should intentionally tune the stacklevel
|
||||
@@ -125,7 +122,7 @@ ignore-var-parameters = true # ignore missing documentation for *args and **kwa
|
||||
"langchain_core/utils/mustache.py" = [ "PLW0603",]
|
||||
"langchain_core/sys_info.py" = [ "T201",]
|
||||
"tests/unit_tests/test_tools.py" = [ "ARG",]
|
||||
"tests/**" = [ "D1", "PLR2004", "S", "SLF",]
|
||||
"tests/**" = [ "D1", "S", "SLF",]
|
||||
"scripts/**" = [ "INP", "S",]
|
||||
|
||||
[tool.coverage.run]
|
||||
@@ -133,10 +130,7 @@ omit = [ "tests/*",]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
|
||||
markers = [
|
||||
"requires: mark tests as requiring a specific library",
|
||||
"compile: mark placeholder test used to compile integration tests without running them",
|
||||
]
|
||||
markers = [ "requires: mark tests as requiring a specific library", "compile: mark placeholder test used to compile integration tests without running them", ]
|
||||
asyncio_mode = "auto"
|
||||
asyncio_default_fixture_loop_scope = "function"
|
||||
filterwarnings = [ "ignore::langchain_core._api.beta_decorator.LangChainBetaWarning",]
|
||||
asyncio_default_fixture_loop_scope = "function"
|
||||
|
||||
@@ -148,65 +148,4 @@ async def test_inline_handlers_share_parent_context_multiple() -> None:
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
]
|
||||
|
||||
|
||||
async def test_shielded_callback_context_preservation() -> None:
|
||||
"""Verify that shielded callbacks preserve context variables.
|
||||
|
||||
This test specifically addresses the issue where async callbacks decorated
|
||||
with @shielded do not properly preserve context variables, breaking
|
||||
instrumentation and other context-dependent functionality.
|
||||
|
||||
The issue manifests in callbacks that use the @shielded decorator:
|
||||
* on_llm_end
|
||||
* on_llm_error
|
||||
* on_chain_end
|
||||
* on_chain_error
|
||||
* And other shielded callback methods
|
||||
"""
|
||||
context_var: contextvars.ContextVar[str] = contextvars.ContextVar("test_context")
|
||||
|
||||
class ContextTestHandler(AsyncCallbackHandler):
|
||||
"""Handler that reads context variables in shielded callbacks."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.run_inline = False
|
||||
self.context_values: list[str] = []
|
||||
|
||||
@override
|
||||
async def on_llm_end(self, response: Any, **kwargs: Any) -> None:
|
||||
"""This method is decorated with @shielded in the run manager."""
|
||||
# This should preserve the context variable value
|
||||
self.context_values.append(context_var.get("not_found"))
|
||||
|
||||
@override
|
||||
async def on_chain_end(self, outputs: Any, **kwargs: Any) -> None:
|
||||
"""This method is decorated with @shielded in the run manager."""
|
||||
# This should preserve the context variable value
|
||||
self.context_values.append(context_var.get("not_found"))
|
||||
|
||||
# Set up the test context
|
||||
context_var.set("test_value")
|
||||
handler = ContextTestHandler()
|
||||
manager = AsyncCallbackManager(handlers=[handler])
|
||||
|
||||
# Create run managers that have the shielded methods
|
||||
llm_managers = await manager.on_llm_start({}, ["test prompt"])
|
||||
llm_run_manager = llm_managers[0]
|
||||
|
||||
chain_run_manager = await manager.on_chain_start({}, {"test": "input"})
|
||||
|
||||
# Test LLM end callback (which is shielded)
|
||||
await llm_run_manager.on_llm_end({"response": "test"}) # type: ignore[arg-type]
|
||||
|
||||
# Test Chain end callback (which is shielded)
|
||||
await chain_run_manager.on_chain_end({"output": "test"})
|
||||
|
||||
# The context should be preserved in shielded callbacks
|
||||
# This was the main issue - shielded decorators were not preserving context
|
||||
assert handler.context_values == ["test_value", "test_value"], (
|
||||
f"Expected context values ['test_value', 'test_value'], "
|
||||
f"but got {handler.context_values}. "
|
||||
f"This indicates the shielded decorator is not preserving context variables."
|
||||
)
|
||||
], f"Expected order of states was broken due to context loss. Got {states}"
|
||||
|
||||
@@ -33,7 +33,7 @@ def test_hashing() -> None:
|
||||
# hash should be deterministic
|
||||
assert hashed_document.id == "fd1dc827-051b-537d-a1fe-1fa043e8b276"
|
||||
|
||||
# Verify that hashing with sha1 is deterministic
|
||||
# Verify that hashing with sha1 is determinstic
|
||||
another_hashed_document = _get_document_with_hash(document, key_encoder="sha1")
|
||||
assert another_hashed_document.id == hashed_document.id
|
||||
|
||||
|
||||
@@ -604,7 +604,7 @@ def test_incremental_fails_with_bad_source_ids(
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Source IDs are required when cleanup mode is incremental or scoped_full",
|
||||
match="Source ids are required when cleanup mode is incremental or scoped_full",
|
||||
):
|
||||
# Should raise an error because no source id function was specified
|
||||
index(
|
||||
@@ -654,7 +654,7 @@ async def test_aincremental_fails_with_bad_source_ids(
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Source IDs are required when cleanup mode is incremental or scoped_full",
|
||||
match="Source ids are required when cleanup mode is incremental or scoped_full",
|
||||
):
|
||||
# Should raise an error because no source id function was specified
|
||||
await aindex(
|
||||
@@ -956,7 +956,7 @@ def test_scoped_full_fails_with_bad_source_ids(
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Source IDs are required when cleanup mode is incremental or scoped_full",
|
||||
match="Source ids are required when cleanup mode is incremental or scoped_full",
|
||||
):
|
||||
# Should raise an error because no source id function was specified
|
||||
index(
|
||||
@@ -1006,7 +1006,7 @@ async def test_ascoped_full_fails_with_bad_source_ids(
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Source IDs are required when cleanup mode is incremental or scoped_full",
|
||||
match="Source ids are required when cleanup mode is incremental or scoped_full",
|
||||
):
|
||||
# Should raise an error because no source id function was specified
|
||||
await aindex(
|
||||
@@ -2801,7 +2801,7 @@ def test_index_with_upsert_kwargs(
|
||||
]
|
||||
assert [doc.metadata for doc in args[0]] == [{"source": "1"}, {"source": "2"}]
|
||||
|
||||
# Check that IDs are present
|
||||
# Check that ids are present
|
||||
assert "ids" in kwargs
|
||||
assert isinstance(kwargs["ids"], list)
|
||||
assert len(kwargs["ids"]) == 2
|
||||
@@ -2932,7 +2932,7 @@ async def test_aindex_with_upsert_kwargs(
|
||||
]
|
||||
assert [doc.metadata for doc in args[0]] == [{"source": "1"}, {"source": "2"}]
|
||||
|
||||
# Check that IDs are present
|
||||
# Check that ids are present
|
||||
assert "ids" in kwargs
|
||||
assert isinstance(kwargs["ids"], list)
|
||||
assert len(kwargs["ids"]) == 2
|
||||
|
||||
@@ -1217,20 +1217,3 @@ def test_get_ls_params() -> None:
|
||||
|
||||
ls_params = llm._get_ls_params(stop=["stop"])
|
||||
assert ls_params["ls_stop"] == ["stop"]
|
||||
|
||||
|
||||
def test_model_profiles() -> None:
|
||||
model = GenericFakeChatModel(messages=iter([]))
|
||||
profile = model.profile
|
||||
assert profile == {}
|
||||
|
||||
class MyModel(GenericFakeChatModel):
|
||||
model: str = "gpt-5"
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
return "openai-chat"
|
||||
|
||||
model = MyModel(messages=iter([]))
|
||||
profile = model.profile
|
||||
assert profile
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user