infra: migrate to uv (#29566)

2025-09-02 19:47:13 +00:00 · 2025-02-06 13:36:26 -05:00
parent 9da06e6e94
commit d172984c91
168 changed files with 56270 additions and 62303 deletions
--- a/libs/text-splitters/Makefile
+++ b/libs/text-splitters/Makefile
@@ -6,23 +6,26 @@ all: help
 # Define a variable for the test file path.
 TEST_FILE ?= tests/unit_tests/

+.EXPORT_ALL_VARIABLES:
+UV_FROZEN = true
+
 test tests:
-	poetry run pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)
+	uv run --group test pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)

 integration_test integration_tests: 
-	poetry run pytest tests/integration_tests/
+	uv run --group test --group test_integration pytest tests/integration_tests/

 test_watch:
-	poetry run ptw --snapshot-update --now . -- -vv -x tests/unit_tests
+	uv run --group test ptw --snapshot-update --now . -- -vv -x tests/unit_tests

 test_profile:
-	poetry run pytest -vv tests/unit_tests/ --profile-svg
+	uv run --group test pytest -vv tests/unit_tests/ --profile-svg

 check_imports: $(shell find langchain_text_splitters -name '*.py')
-	poetry run python ./scripts/check_imports.py $^
+	uv run --group test python ./scripts/check_imports.py $^

 extended_tests:
-	poetry run pytest --disable-socket --allow-unix-socket --only-extended $(TEST_FILE)
+	uv run --group test pytest --disable-socket --allow-unix-socket --only-extended $(TEST_FILE)


 ######################
@@ -40,19 +43,19 @@ lint_tests: MYPY_CACHE=.mypy_cache_test

 lint lint_diff lint_package lint_tests:
 	./scripts/lint_imports.sh
-	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
-	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
-	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
+	[ "$(PYTHON_FILES)" = "" ] || uv run --group typing --group lint ruff check $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || uv run --group typing --group lint ruff format $(PYTHON_FILES) --diff
+	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run --group typing --group lint mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)

 format format_diff:
-	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
-	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES)
+	[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check --select I --fix $(PYTHON_FILES)

 spell_check:
-	poetry run codespell --toml pyproject.toml
+	uv run --all-groups codespell --toml pyproject.toml

 spell_fix:
-	poetry run codespell --toml pyproject.toml -w
+	uv run --all-groups codespell --toml pyproject.toml -w

 ######################
 # HELP
--- a/libs/text-splitters/langchain_text_splitters/html.py
+++ b/libs/text-splitters/langchain_text_splitters/html.py
@@ -427,10 +427,10 @@ class HTMLSectionSplitter:
        headers = list(self.headers_to_split_on.keys())
        sections: list[dict[str, str | None]] = []

-        headers = soup.find_all(["body"] + headers)
+        headers = soup.find_all(["body"] + headers)  # type: ignore[assignment]

        for i, header in enumerate(headers):
-            header_element: PageElement = header
+            header_element = cast(PageElement, header)
            if i == 0:
                current_header = "#TITLE#"
                current_header_tag = "h1"
--- a/libs/text-splitters/poetry.lock
+++ b/libs/text-splitters/poetry.lock
--- a/libs/text-splitters/pyproject.toml
+++ b/libs/text-splitters/pyproject.toml
@@ -1,16 +1,58 @@
 [build-system]
-requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
+requires = ["pdm-backend"]
+build-backend = "pdm.backend"

-[tool.poetry]
-name = "langchain-text-splitters"
-version = "0.3.5"
-description = "LangChain text splitting utilities"
+[project]
 authors = []
-license = "MIT"
+license = {text = "MIT"}
+requires-python = "<4.0,>=3.9"
+dependencies = [
+    "langchain-core<1.0.0,>=0.3.34rc2",
+]
+name = "langchain-text-splitters"
+version = "0.3.6rc2"
+description = "LangChain text splitting utilities"
 readme = "README.md"
+
+[project.urls]
+"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/text-splitters"
+"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-text-splitters%3D%3D0%22&expanded=true"
 repository = "https://github.com/langchain-ai/langchain"

+[dependency-groups]
+lint = [
+    "ruff<1.0.0,>=0.9.2",
+    "langchain-core @ file:///${PROJECT_ROOT}/../core",
+]
+typing = [
+    "mypy<2.0,>=1.10",
+    "lxml-stubs<1.0.0,>=0.5.1",
+    "types-requests<3.0.0.0,>=2.31.0.20240218",
+    "tiktoken<1.0.0,>=0.8.0",
+]
+dev = [
+    "jupyter<2.0.0,>=1.0.0",
+    "langchain-core @ file:///${PROJECT_ROOT}/../core",
+]
+test = [
+    "pytest<9,>=8",
+    "freezegun<2.0.0,>=1.2.2",
+    "pytest-mock<4.0.0,>=3.10.0",
+    "pytest-watcher<1.0.0,>=0.3.4",
+    "pytest-asyncio<1.0.0,>=0.21.1",
+    "pytest-socket<1.0.0,>=0.7.0",
+    "pytest-xdist<4.0.0,>=3.6.1",
+    "langchain-core @ file:///${PROJECT_ROOT}/../core",
+]
+test_integration = [
+    "spacy<3.8.4,>=3.0.0; python_version < \"3.10.0\"",
+    "spacy<4.0.0,>=3.0.0; python_version < \"3.13.0\"",
+    "nltk<4.0.0,>=3.9.1",
+    "transformers<5.0.0,>=4.47.0",
+    "sentence-transformers>=2.6.0; python_version < \"3.13\"",
+]
+
+
 [tool.mypy]
 disallow_untyped_defs = "True"
 [[tool.mypy.overrides]]
@@ -30,14 +72,6 @@ module = [
 ]
 ignore_missing_imports = "True"

-[tool.poetry.urls]
-"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/text-splitters"
-"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-text-splitters%3D%3D0%22&expanded=true"
-
-[tool.poetry.dependencies]
-python = ">=3.9,<4.0"
-langchain-core = "^0.3.29"
-
 [tool.ruff]
 target-version = "py39"

@@ -56,84 +90,8 @@ markers = [
 ]
 asyncio_mode = "auto"

-[tool.poetry.group.lint]
-optional = true
-
-[tool.poetry.group.typing]
-optional = true
-
-[tool.poetry.group.dev]
-optional = true
-
-[tool.poetry.group.test]
-optional = true
-
 [tool.ruff.lint.pydocstyle]
 convention = "google"

 [tool.ruff.lint.per-file-ignores]
 "tests/**" = ["D"]
-
-[tool.poetry.group.lint.dependencies]
-ruff = "^0.9.2"
-
-
-
-
-[tool.poetry.group.typing.dependencies]
-mypy = "^1.10"
-lxml-stubs = "^0.5.1"
-types-requests = "^2.31.0.20240218"
-tiktoken = "^0.8.0"
-
-
-
-
-[tool.poetry.group.dev.dependencies]
-jupyter = "^1.0.0"
-
-
-
-
-
-[tool.poetry.group.test.dependencies]
-pytest = "^8"
-freezegun = "^1.2.2"
-pytest-mock = "^3.10.0"
-pytest-watcher = "^0.3.4"
-pytest-asyncio = "^0.21.1"
-pytest-socket = "^0.7.0"
-pytest-xdist = "^3.6.1"
-
-
-
-
-[tool.poetry.group.test_integration]
-optional = true
-
-[tool.poetry.group.test_integration.dependencies]
-spacy = { version = "*", python = "<3.13" }
-nltk = "^3.9.1"
-transformers = "^4.47.0"
-sentence-transformers = { version = ">=2.6.0", python = "<3.13" }
-
-
-
-
-[tool.poetry.group.lint.dependencies.langchain-core]
-path = "../core"
-develop = true
-
-
-
-
-[tool.poetry.group.dev.dependencies.langchain-core]
-path = "../core"
-develop = true
-
-
-
-
-[tool.poetry.group.test.dependencies.langchain-core]
-path = "../core"
-develop = true
--- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py
+++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py
@@ -2519,8 +2519,8 @@ def test_split_text_on_tokens() -> None:
    assert output == expected_output


-@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
+@pytest.mark.requires("lxml")
 def test_section_aware_happy_path_splitting_based_on_header_1_2() -> None:
    # arrange
    html_string = """<!DOCTYPE html>
@@ -2573,8 +2573,8 @@ def test_section_aware_happy_path_splitting_based_on_header_1_2() -> None:
    assert docs[2].metadata["Header 2"] == "Baz"


-@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
+@pytest.mark.requires("lxml")
 def test_happy_path_splitting_based_on_header_with_font_size() -> None:
    # arrange
    html_string = """<!DOCTYPE html>
@@ -2624,8 +2624,8 @@ def test_happy_path_splitting_based_on_header_with_font_size() -> None:
    assert docs[2].metadata["Header 2"] == "Baz"


-@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
+@pytest.mark.requires("lxml")
 def test_happy_path_splitting_based_on_header_with_whitespace_chars() -> None:
    # arrange
    html_string = """<!DOCTYPE html>
@@ -2675,8 +2675,8 @@ def test_happy_path_splitting_based_on_header_with_whitespace_chars() -> None:
    assert docs[2].metadata["Header 2"] == "Baz"


-@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
+@pytest.mark.requires("lxml")
 def test_section_splitter_accepts_a_relative_path() -> None:
    html_string = """<html><body><p>Foo</p></body></html>"""
    test_file = Path("tests/test_data/test_splitter.xslt")
@@ -2690,8 +2690,8 @@ def test_section_splitter_accepts_a_relative_path() -> None:
    sec_splitter.split_text(html_string)


-@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
+@pytest.mark.requires("lxml")
 def test_section_splitter_accepts_an_absolute_path() -> None:
    html_string = """<html><body><p>Foo</p></body></html>"""
    test_file = Path("tests/test_data/test_splitter.xslt").absolute()
@@ -2706,8 +2706,8 @@ def test_section_splitter_accepts_an_absolute_path() -> None:
    sec_splitter.split_text(html_string)


-@pytest.mark.requires("lxml")
@pytest.mark.requires("bs4")
+@pytest.mark.requires("lxml")
 def test_happy_path_splitting_with_duplicate_header_tag() -> None:
    # arrange
    html_string = """<!DOCTYPE html>
--- a/libs/text-splitters/uv.lock
+++ b/libs/text-splitters/uv.lock