mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-02 19:47:13 +00:00
infra: migrate to uv (#29566)
This commit is contained in:
@@ -6,23 +6,26 @@ all: help
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
|
||||
.EXPORT_ALL_VARIABLES:
|
||||
UV_FROZEN = true
|
||||
|
||||
test tests:
|
||||
poetry run pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)
|
||||
uv run --group test pytest -n auto --disable-socket --allow-unix-socket $(TEST_FILE)
|
||||
|
||||
integration_test integration_tests:
|
||||
poetry run pytest tests/integration_tests/
|
||||
uv run --group test --group test_integration pytest tests/integration_tests/
|
||||
|
||||
test_watch:
|
||||
poetry run ptw --snapshot-update --now . -- -vv -x tests/unit_tests
|
||||
uv run --group test ptw --snapshot-update --now . -- -vv -x tests/unit_tests
|
||||
|
||||
test_profile:
|
||||
poetry run pytest -vv tests/unit_tests/ --profile-svg
|
||||
uv run --group test pytest -vv tests/unit_tests/ --profile-svg
|
||||
|
||||
check_imports: $(shell find langchain_text_splitters -name '*.py')
|
||||
poetry run python ./scripts/check_imports.py $^
|
||||
uv run --group test python ./scripts/check_imports.py $^
|
||||
|
||||
extended_tests:
|
||||
poetry run pytest --disable-socket --allow-unix-socket --only-extended $(TEST_FILE)
|
||||
uv run --group test pytest --disable-socket --allow-unix-socket --only-extended $(TEST_FILE)
|
||||
|
||||
|
||||
######################
|
||||
@@ -40,19 +43,19 @@ lint_tests: MYPY_CACHE=.mypy_cache_test
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
./scripts/lint_imports.sh
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --group typing --group lint ruff check $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --group typing --group lint ruff format $(PYTHON_FILES) --diff
|
||||
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run --group typing --group lint mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
|
||||
format format_diff:
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff format $(PYTHON_FILES)
|
||||
[ "$(PYTHON_FILES)" = "" ] || uv run --all-groups ruff check --select I --fix $(PYTHON_FILES)
|
||||
|
||||
spell_check:
|
||||
poetry run codespell --toml pyproject.toml
|
||||
uv run --all-groups codespell --toml pyproject.toml
|
||||
|
||||
spell_fix:
|
||||
poetry run codespell --toml pyproject.toml -w
|
||||
uv run --all-groups codespell --toml pyproject.toml -w
|
||||
|
||||
######################
|
||||
# HELP
|
||||
|
@@ -427,10 +427,10 @@ class HTMLSectionSplitter:
|
||||
headers = list(self.headers_to_split_on.keys())
|
||||
sections: list[dict[str, str | None]] = []
|
||||
|
||||
headers = soup.find_all(["body"] + headers)
|
||||
headers = soup.find_all(["body"] + headers) # type: ignore[assignment]
|
||||
|
||||
for i, header in enumerate(headers):
|
||||
header_element: PageElement = header
|
||||
header_element = cast(PageElement, header)
|
||||
if i == 0:
|
||||
current_header = "#TITLE#"
|
||||
current_header_tag = "h1"
|
||||
|
4805
libs/text-splitters/poetry.lock
generated
4805
libs/text-splitters/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,16 +1,58 @@
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
requires = ["pdm-backend"]
|
||||
build-backend = "pdm.backend"
|
||||
|
||||
[tool.poetry]
|
||||
name = "langchain-text-splitters"
|
||||
version = "0.3.5"
|
||||
description = "LangChain text splitting utilities"
|
||||
[project]
|
||||
authors = []
|
||||
license = "MIT"
|
||||
license = {text = "MIT"}
|
||||
requires-python = "<4.0,>=3.9"
|
||||
dependencies = [
|
||||
"langchain-core<1.0.0,>=0.3.34rc2",
|
||||
]
|
||||
name = "langchain-text-splitters"
|
||||
version = "0.3.6rc2"
|
||||
description = "LangChain text splitting utilities"
|
||||
readme = "README.md"
|
||||
|
||||
[project.urls]
|
||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/text-splitters"
|
||||
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-text-splitters%3D%3D0%22&expanded=true"
|
||||
repository = "https://github.com/langchain-ai/langchain"
|
||||
|
||||
[dependency-groups]
|
||||
lint = [
|
||||
"ruff<1.0.0,>=0.9.2",
|
||||
"langchain-core @ file:///${PROJECT_ROOT}/../core",
|
||||
]
|
||||
typing = [
|
||||
"mypy<2.0,>=1.10",
|
||||
"lxml-stubs<1.0.0,>=0.5.1",
|
||||
"types-requests<3.0.0.0,>=2.31.0.20240218",
|
||||
"tiktoken<1.0.0,>=0.8.0",
|
||||
]
|
||||
dev = [
|
||||
"jupyter<2.0.0,>=1.0.0",
|
||||
"langchain-core @ file:///${PROJECT_ROOT}/../core",
|
||||
]
|
||||
test = [
|
||||
"pytest<9,>=8",
|
||||
"freezegun<2.0.0,>=1.2.2",
|
||||
"pytest-mock<4.0.0,>=3.10.0",
|
||||
"pytest-watcher<1.0.0,>=0.3.4",
|
||||
"pytest-asyncio<1.0.0,>=0.21.1",
|
||||
"pytest-socket<1.0.0,>=0.7.0",
|
||||
"pytest-xdist<4.0.0,>=3.6.1",
|
||||
"langchain-core @ file:///${PROJECT_ROOT}/../core",
|
||||
]
|
||||
test_integration = [
|
||||
"spacy<3.8.4,>=3.0.0; python_version < \"3.10.0\"",
|
||||
"spacy<4.0.0,>=3.0.0; python_version < \"3.13.0\"",
|
||||
"nltk<4.0.0,>=3.9.1",
|
||||
"transformers<5.0.0,>=4.47.0",
|
||||
"sentence-transformers>=2.6.0; python_version < \"3.13\"",
|
||||
]
|
||||
|
||||
|
||||
[tool.mypy]
|
||||
disallow_untyped_defs = "True"
|
||||
[[tool.mypy.overrides]]
|
||||
@@ -30,14 +72,6 @@ module = [
|
||||
]
|
||||
ignore_missing_imports = "True"
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/text-splitters"
|
||||
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-text-splitters%3D%3D0%22&expanded=true"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.9,<4.0"
|
||||
langchain-core = "^0.3.29"
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py39"
|
||||
|
||||
@@ -56,84 +90,8 @@ markers = [
|
||||
]
|
||||
asyncio_mode = "auto"
|
||||
|
||||
[tool.poetry.group.lint]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.typing]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test]
|
||||
optional = true
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"tests/**" = ["D"]
|
||||
|
||||
[tool.poetry.group.lint.dependencies]
|
||||
ruff = "^0.9.2"
|
||||
|
||||
|
||||
|
||||
|
||||
[tool.poetry.group.typing.dependencies]
|
||||
mypy = "^1.10"
|
||||
lxml-stubs = "^0.5.1"
|
||||
types-requests = "^2.31.0.20240218"
|
||||
tiktoken = "^0.8.0"
|
||||
|
||||
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
jupyter = "^1.0.0"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^8"
|
||||
freezegun = "^1.2.2"
|
||||
pytest-mock = "^3.10.0"
|
||||
pytest-watcher = "^0.3.4"
|
||||
pytest-asyncio = "^0.21.1"
|
||||
pytest-socket = "^0.7.0"
|
||||
pytest-xdist = "^3.6.1"
|
||||
|
||||
|
||||
|
||||
|
||||
[tool.poetry.group.test_integration]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test_integration.dependencies]
|
||||
spacy = { version = "*", python = "<3.13" }
|
||||
nltk = "^3.9.1"
|
||||
transformers = "^4.47.0"
|
||||
sentence-transformers = { version = ">=2.6.0", python = "<3.13" }
|
||||
|
||||
|
||||
|
||||
|
||||
[tool.poetry.group.lint.dependencies.langchain-core]
|
||||
path = "../core"
|
||||
develop = true
|
||||
|
||||
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies.langchain-core]
|
||||
path = "../core"
|
||||
develop = true
|
||||
|
||||
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies.langchain-core]
|
||||
path = "../core"
|
||||
develop = true
|
||||
|
@@ -2519,8 +2519,8 @@ def test_split_text_on_tokens() -> None:
|
||||
assert output == expected_output
|
||||
|
||||
|
||||
@pytest.mark.requires("lxml")
|
||||
@pytest.mark.requires("bs4")
|
||||
@pytest.mark.requires("lxml")
|
||||
def test_section_aware_happy_path_splitting_based_on_header_1_2() -> None:
|
||||
# arrange
|
||||
html_string = """<!DOCTYPE html>
|
||||
@@ -2573,8 +2573,8 @@ def test_section_aware_happy_path_splitting_based_on_header_1_2() -> None:
|
||||
assert docs[2].metadata["Header 2"] == "Baz"
|
||||
|
||||
|
||||
@pytest.mark.requires("lxml")
|
||||
@pytest.mark.requires("bs4")
|
||||
@pytest.mark.requires("lxml")
|
||||
def test_happy_path_splitting_based_on_header_with_font_size() -> None:
|
||||
# arrange
|
||||
html_string = """<!DOCTYPE html>
|
||||
@@ -2624,8 +2624,8 @@ def test_happy_path_splitting_based_on_header_with_font_size() -> None:
|
||||
assert docs[2].metadata["Header 2"] == "Baz"
|
||||
|
||||
|
||||
@pytest.mark.requires("lxml")
|
||||
@pytest.mark.requires("bs4")
|
||||
@pytest.mark.requires("lxml")
|
||||
def test_happy_path_splitting_based_on_header_with_whitespace_chars() -> None:
|
||||
# arrange
|
||||
html_string = """<!DOCTYPE html>
|
||||
@@ -2675,8 +2675,8 @@ def test_happy_path_splitting_based_on_header_with_whitespace_chars() -> None:
|
||||
assert docs[2].metadata["Header 2"] == "Baz"
|
||||
|
||||
|
||||
@pytest.mark.requires("lxml")
|
||||
@pytest.mark.requires("bs4")
|
||||
@pytest.mark.requires("lxml")
|
||||
def test_section_splitter_accepts_a_relative_path() -> None:
|
||||
html_string = """<html><body><p>Foo</p></body></html>"""
|
||||
test_file = Path("tests/test_data/test_splitter.xslt")
|
||||
@@ -2690,8 +2690,8 @@ def test_section_splitter_accepts_a_relative_path() -> None:
|
||||
sec_splitter.split_text(html_string)
|
||||
|
||||
|
||||
@pytest.mark.requires("lxml")
|
||||
@pytest.mark.requires("bs4")
|
||||
@pytest.mark.requires("lxml")
|
||||
def test_section_splitter_accepts_an_absolute_path() -> None:
|
||||
html_string = """<html><body><p>Foo</p></body></html>"""
|
||||
test_file = Path("tests/test_data/test_splitter.xslt").absolute()
|
||||
@@ -2706,8 +2706,8 @@ def test_section_splitter_accepts_an_absolute_path() -> None:
|
||||
sec_splitter.split_text(html_string)
|
||||
|
||||
|
||||
@pytest.mark.requires("lxml")
|
||||
@pytest.mark.requires("bs4")
|
||||
@pytest.mark.requires("lxml")
|
||||
def test_happy_path_splitting_with_duplicate_header_tag() -> None:
|
||||
# arrange
|
||||
html_string = """<!DOCTYPE html>
|
||||
|
4033
libs/text-splitters/uv.lock
generated
Normal file
4033
libs/text-splitters/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user