diff --git a/libs/text-splitters/langchain_text_splitters/html.py b/libs/text-splitters/langchain_text_splitters/html.py index 94ebbe5d411..0ef60d2a455 100644 --- a/libs/text-splitters/langchain_text_splitters/html.py +++ b/libs/text-splitters/langchain_text_splitters/html.py @@ -15,8 +15,7 @@ from typing import ( cast, ) -import requests -from langchain_core._api import beta +from langchain_core._api import beta, deprecated from langchain_core.documents import BaseDocumentTransformer, Document from typing_extensions import override @@ -186,8 +185,19 @@ class HTMLHeaderTextSplitter: """ return self.split_text_from_file(StringIO(text)) + @deprecated( + since="1.1.2", + removal="2.0.0", + message=( + "Please fetch the HTML content from the URL yourself and pass it " + "to split_text." + ), + ) def split_text_from_url( - self, url: str, timeout: int = 10, **kwargs: Any + self, + url: str, + timeout: int = 10, + **kwargs: Any, # noqa: ARG002 ) -> list[Document]: """Fetch text content from a URL and split it into documents. @@ -205,14 +215,14 @@ class HTMLHeaderTextSplitter: Raises: requests.RequestException: If the HTTP request fails. """ - from langchain_core._security._ssrf_protection import ( # noqa: PLC0415 - validate_safe_url, + from langchain_core._security._transport import ( # noqa: PLC0415 + ssrf_safe_client, ) - validate_safe_url(url, allow_private=False, allow_http=True) - response = requests.get(url, timeout=timeout, **kwargs) - response.raise_for_status() - return self.split_text(response.text) + with ssrf_safe_client() as client: + response = client.get(url, timeout=timeout) + response.raise_for_status() + return self.split_text(response.text) def split_text_from_file(self, file: str | IO[str]) -> list[Document]: """Split HTML content from a file into a list of `Document` objects. diff --git a/libs/text-splitters/pyproject.toml b/libs/text-splitters/pyproject.toml index 5608b92a5c6..439c23f7a99 100644 --- a/libs/text-splitters/pyproject.toml +++ b/libs/text-splitters/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ version = "1.1.1" requires-python = ">=3.10.0,<4.0.0" dependencies = [ - "langchain-core>=1.2.13,<2.0.0", + "langchain-core>=1.2.31,<2.0.0", ] [project.urls]