Files
langchain/libs/text-splitters/pyproject.toml
Mason Daugherty 07fa576de1 ci: avoid unnecessary dep installs in lint targets (#36046)
CI lint jobs use `uv run --all-groups` for all tools, but ruff doesn't
need dependency resolution — only mypy does. By splitting into
`UV_RUN_LINT` (ruff) and `UV_RUN_TYPE` (mypy), the CI-facing targets run
ruff with `--group lint` only, giving fast-fail feedback before mypy
triggers the full environment sync.

For packages where source code only conditionally imports heavy deps
(text-splitters, huggingface), `lint_package` also overrides
`UV_RUN_TYPE` to `--group lint --group typing`, skipping the ~3.5GB
`test_integration` download entirely. `lint_tests` keeps `--all-groups`
since test code legitimately imports those deps.

Additionally, `lint_imports.sh` was inconsistently wired — most packages
had the script but weren't calling it.

## Changes

**Makefile optimization**
- Introduce `UV_RUN_LINT` and `UV_RUN_TYPE` Make variables, both
defaulting to `uv run --all-groups`. For `lint_package` and
`lint_tests`, `UV_RUN_LINT` is overridden to `uv run --group lint` so
ruff runs instantly without syncing heavy deps
- For `text-splitters` and `huggingface`, override `UV_RUN_TYPE` on
`lint_package` to `uv run --group lint --group typing` — mypy runs
without downloading torch, CUDA, spacy, etc.

**mypy config for lean groups**
- Add `transformers` and `transformers.*` to `ignore_missing_imports` in
`text-splitters` pyproject.toml (conditional `try/except` import, same
treatment as existing `konlpy`/`nltk` entries)
- Add `torch`, `torch.*`, `langchain_community`, `langchain_community.*`
to `ignore_missing_imports` in `huggingface` pyproject.toml
- Add dual `# type: ignore[unreachable, unused-ignore]` in
`text-splitters/base.py` to handle the `PreTrainedTokenizerBase`
isinstance check that behaves differently depending on whether
transformers is installed

**lint_imports.sh consistency**
- Add `./scripts/lint_imports.sh` to the lint recipe in every package
that wasn't calling it (standard-tests, model-profiles, all 15
partners), and create the script for the two packages missing it
entirely (`model-profiles`, `openrouter`)
- Update all `lint_imports.sh` scripts to allow `from langchain.agents`
and `from langchain.tools` imports (legitimate v1 middleware
dependencies used by `langchain-anthropic` and `langchain-openai`)
2026-03-17 21:23:29 -04:00

148 lines
4.7 KiB
TOML

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "langchain-text-splitters"
description = "LangChain text splitting utilities"
license = { text = "MIT" }
readme = "README.md"
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Text Processing",
]
version = "1.1.1"
requires-python = ">=3.10.0,<4.0.0"
dependencies = [
"langchain-core>=1.2.13,<2.0.0",
]
[project.urls]
Homepage = "https://docs.langchain.com/"
Documentation = "https://docs.langchain.com/"
Repository = "https://github.com/langchain-ai/langchain"
Issues = "https://github.com/langchain-ai/langchain/issues"
Changelog = "https://github.com/langchain-ai/langchain/releases?q=%22langchain-text-splitters%22"
Twitter = "https://x.com/LangChain"
Slack = "https://www.langchain.com/join-community"
Reddit = "https://www.reddit.com/r/LangChain/"
[dependency-groups]
lint = [
"ruff>=0.15.0,<0.16.0",
"langchain-core"
]
typing = [
"mypy>=1.19.1,<1.20.0",
"lxml-stubs>=0.5.1,<1.0.0",
"types-requests>=2.31.0.20240218,<3.0.0.0",
"tiktoken>=0.8.0,<1.0.0",
"beautifulsoup4>=4.13.5,<5.0.0",
]
dev = [
"jupyter<2.0.0,>=1.0.0",
"langchain-core"
]
test = [
"pytest>=8.0.0,<10.0.0",
"freezegun>=1.2.2,<2.0.0",
"pytest-mock>=3.10.0,<4.0.0",
"pytest-watcher>=0.3.4,<1.0.0",
"pytest-asyncio>=0.21.1,<2.0.0",
"pytest-socket>=0.7.0,<1.0.0",
"pytest-xdist<4.0.0,>=3.6.1",
"langchain-core",
]
test_integration = [
"spacy>=3.8.7,<4.0.0; python_version < \"3.14\"",
"thinc>=8.3.6,<10.0.0",
"nltk>=3.9.1,<4.0.0",
"transformers>=4.51.3,<6.0.0",
"sentence-transformers>=3.0.1,<6.0.0",
"scipy>=1.7.0,<2.0.0; python_version >= \"3.12\" and python_version < \"3.13\"",
"scipy>=1.14.1,<2.0.0; python_version >= \"3.13\"",
"tiktoken>=0.8.0,<1.0.0",
"en-core-web-sm",
]
[tool.uv.sources]
langchain-core = { path = "../core", editable = true }
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
[tool.mypy]
plugins = ["pydantic.mypy"]
strict = true
enable_error_code = "deprecated"
warn_unreachable = true
[[tool.mypy.overrides]]
module = ["konlpy", "nltk", "transformers", "transformers.*",]
ignore_missing_imports = true
[tool.ruff.format]
docstring-code-format = true
[tool.ruff.lint]
select = [ "ALL",]
ignore = [
"C90", # McCabe complexity
"COM812", # Messes with the formatter
"CPY", # No copyright
"FIX002", # Line contains TODO
"PERF203", # Rarely useful
"PLR09", # Too many something (arg, statements, etc)
"TD002", # Missing author in TODO
"TD003", # Missing issue link in TODO
]
unfixable = [
"B028", # People should intentionally tune the stacklevel
]
flake8-annotations.allow-star-arg-any = true
flake8-annotations.mypy-init-return = true
flake8-type-checking.runtime-evaluated-base-classes = ["pydantic.BaseModel","langchain_core.load.serializable.Serializable","langchain_core.runnables.base.RunnableSerializable"]
pep8-naming.classmethod-decorators = [ "classmethod", "langchain_core.utils.pydantic.pre_init", "pydantic.field_validator", "pydantic.v1.root_validator",]
[tool.ruff.lint.pydocstyle]
convention = "google"
ignore-var-parameters = true # ignore missing documentation for *args and **kwargs parameters
[tool.ruff.lint.flake8-tidy-imports]
ban-relative-imports = "all"
[tool.ruff.lint.per-file-ignores]
"scripts/**" = [
"D1", # Docstrings not mandatory in scripts
"INP001", # Not a package
"S311" # Standard pseudo-random generators are not suitable for cryptographic purposes
]
"tests/**" = [
"D1", # Docstrings not mandatory in tests
"PLR2004", # Magic value comparisons
"S101", # Tests need assertions
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
"SLF001" # Private member access in tests
]
[tool.coverage.run]
omit = ["tests/*"]
[tool.pytest.ini_options]
addopts = "--strict-markers --strict-config --durations=5"
markers = [
"requires: mark tests as requiring a specific library",
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"