infra: migrate to uv (#29566)

This commit is contained in:
ccurme
2025-02-06 13:36:26 -05:00
committed by GitHub
parent 9da06e6e94
commit d172984c91
168 changed files with 56270 additions and 62303 deletions

View File

@@ -6,23 +6,26 @@ all: help
# Define a variable for the test file path.
TEST_FILE ?= tests/unit_tests/
.EXPORT_ALL_VARIABLES:
UV_FROZEN = true
test tests:
poetry run pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)
uv run --group test pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)
integration_test integration_tests:
poetry run pytest tests/integration_tests/
uv run --group test --group test_integration pytest tests/integration_tests/
test_watch:
poetry run ptw --snapshot-update --now . -- -vv -x tests/unit_tests
uv run --group test ptw --snapshot-update --now . -- -vv -x tests/unit_tests
test_profile:
poetry run pytest -vv tests/unit_tests/ --profile-svg
uv run --group test pytest -vv tests/unit_tests/ --profile-svg
check_imports: $(shell find langchain_text_splitters -name '*.py')
poetry run python ./scripts/check_imports.py $^
uv run --group test python ./scripts/check_imports.py $^
extended_tests:
poetry run pytest --disable-socket --allow-unix-socket --only-extended $(TEST_FILE)
uv run --group test pytest --disable-socket --allow-unix-socket --only-extended $(TEST_FILE)
######################
@@ -40,19 +43,19 @@ lint_tests: MYPY_CACHE=.mypy_cache_test
lint lint_diff lint_package lint_tests:
./scripts/lint_imports.sh
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
[ "$(PYTHON_FILES)" = "" ] || uv run --group typing --group lint ruff check $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --group typing --group lint ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run --group typing --group lint mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check --select I --fix $(PYTHON_FILES)
spell_check:
poetry run codespell --toml pyproject.toml
uv run --all-groups codespell --toml pyproject.toml
spell_fix:
poetry run codespell --toml pyproject.toml -w
uv run --all-groups codespell --toml pyproject.toml -w
######################
# HELP

View File

@@ -427,10 +427,10 @@ class HTMLSectionSplitter:
headers = list(self.headers_to_split_on.keys())
sections: list[dict[str, str | None]] = []
headers = soup.find_all(["body"] + headers)
headers = soup.find_all(["body"] + headers) # type: ignore[assignment]
for i, header in enumerate(headers):
header_element: PageElement = header
header_element = cast(PageElement, header)
if i == 0:
current_header = "#TITLE#"
current_header_tag = "h1"

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +1,58 @@
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[tool.poetry]
name = "langchain-text-splitters"
version = "0.3.5"
description = "LangChain text splitting utilities"
[project]
authors = []
license = "MIT"
license = {text = "MIT"}
requires-python = "<4.0,>=3.9"
dependencies = [
"langchain-core<1.0.0,>=0.3.34rc2",
]
name = "langchain-text-splitters"
version = "0.3.6rc2"
description = "LangChain text splitting utilities"
readme = "README.md"
[project.urls]
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/text-splitters"
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-text-splitters%3D%3D0%22&expanded=true"
repository = "https://github.com/langchain-ai/langchain"
[dependency-groups]
lint = [
"ruff<1.0.0,>=0.9.2",
"langchain-core @ file:///${PROJECT_ROOT}/../core",
]
typing = [
"mypy<2.0,>=1.10",
"lxml-stubs<1.0.0,>=0.5.1",
"types-requests<3.0.0.0,>=2.31.0.20240218",
"tiktoken<1.0.0,>=0.8.0",
]
dev = [
"jupyter<2.0.0,>=1.0.0",
"langchain-core @ file:///${PROJECT_ROOT}/../core",
]
test = [
"pytest<9,>=8",
"freezegun<2.0.0,>=1.2.2",
"pytest-mock<4.0.0,>=3.10.0",
"pytest-watcher<1.0.0,>=0.3.4",
"pytest-asyncio<1.0.0,>=0.21.1",
"pytest-socket<1.0.0,>=0.7.0",
"pytest-xdist<4.0.0,>=3.6.1",
"langchain-core @ file:///${PROJECT_ROOT}/../core",
]
test_integration = [
"spacy<3.8.4,>=3.0.0; python_version < \"3.10.0\"",
"spacy<4.0.0,>=3.0.0; python_version < \"3.13.0\"",
"nltk<4.0.0,>=3.9.1",
"transformers<5.0.0,>=4.47.0",
"sentence-transformers>=2.6.0; python_version < \"3.13\"",
]
[tool.mypy]
disallow_untyped_defs = "True"
[[tool.mypy.overrides]]
@@ -30,14 +72,6 @@ module = [
]
ignore_missing_imports = "True"
[tool.poetry.urls]
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/text-splitters"
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-text-splitters%3D%3D0%22&expanded=true"
[tool.poetry.dependencies]
python = ">=3.9,<4.0"
langchain-core = "^0.3.29"
[tool.ruff]
target-version = "py39"
@@ -56,84 +90,8 @@ markers = [
]
asyncio_mode = "auto"
[tool.poetry.group.lint]
optional = true
[tool.poetry.group.typing]
optional = true
[tool.poetry.group.dev]
optional = true
[tool.poetry.group.test]
optional = true
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.ruff.lint.per-file-ignores]
"tests/**" = ["D"]
[tool.poetry.group.lint.dependencies]
ruff = "^0.9.2"
[tool.poetry.group.typing.dependencies]
mypy = "^1.10"
lxml-stubs = "^0.5.1"
types-requests = "^2.31.0.20240218"
tiktoken = "^0.8.0"
[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"
[tool.poetry.group.test.dependencies]
pytest = "^8"
freezegun = "^1.2.2"
pytest-mock = "^3.10.0"
pytest-watcher = "^0.3.4"
pytest-asyncio = "^0.21.1"
pytest-socket = "^0.7.0"
pytest-xdist = "^3.6.1"
[tool.poetry.group.test_integration]
optional = true
[tool.poetry.group.test_integration.dependencies]
spacy = { version = "*", python = "<3.13" }
nltk = "^3.9.1"
transformers = "^4.47.0"
sentence-transformers = { version = ">=2.6.0", python = "<3.13" }
[tool.poetry.group.lint.dependencies.langchain-core]
path = "../core"
develop = true
[tool.poetry.group.dev.dependencies.langchain-core]
path = "../core"
develop = true
[tool.poetry.group.test.dependencies.langchain-core]
path = "../core"
develop = true

View File

@@ -2519,8 +2519,8 @@ def test_split_text_on_tokens() -> None:
assert output == expected_output
@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
@pytest.mark.requires("lxml")
def test_section_aware_happy_path_splitting_based_on_header_1_2() -> None:
# arrange
html_string = """<!DOCTYPE html>
@@ -2573,8 +2573,8 @@ def test_section_aware_happy_path_splitting_based_on_header_1_2() -> None:
assert docs[2].metadata["Header 2"] == "Baz"
@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
@pytest.mark.requires("lxml")
def test_happy_path_splitting_based_on_header_with_font_size() -> None:
# arrange
html_string = """<!DOCTYPE html>
@@ -2624,8 +2624,8 @@ def test_happy_path_splitting_based_on_header_with_font_size() -> None:
assert docs[2].metadata["Header 2"] == "Baz"
@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
@pytest.mark.requires("lxml")
def test_happy_path_splitting_based_on_header_with_whitespace_chars() -> None:
# arrange
html_string = """<!DOCTYPE html>
@@ -2675,8 +2675,8 @@ def test_happy_path_splitting_based_on_header_with_whitespace_chars() -> None:
assert docs[2].metadata["Header 2"] == "Baz"
@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
@pytest.mark.requires("lxml")
def test_section_splitter_accepts_a_relative_path() -> None:
html_string = """<html><body><p>Foo</p></body></html>"""
test_file = Path("tests/test_data/test_splitter.xslt")
@@ -2690,8 +2690,8 @@ def test_section_splitter_accepts_a_relative_path() -> None:
sec_splitter.split_text(html_string)
@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
@pytest.mark.requires("lxml")
def test_section_splitter_accepts_an_absolute_path() -> None:
html_string = """<html><body><p>Foo</p></body></html>"""
test_file = Path("tests/test_data/test_splitter.xslt").absolute()
@@ -2706,8 +2706,8 @@ def test_section_splitter_accepts_an_absolute_path() -> None:
sec_splitter.split_text(html_string)
@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
@pytest.mark.requires("lxml")
def test_happy_path_splitting_with_duplicate_header_tag() -> None:
# arrange
html_string = """<!DOCTYPE html>

4033
libs/text-splitters/uv.lock generated Normal file

File diff suppressed because it is too large Load Diff