mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 10:17:00 +00:00
fix(text-splitters): deprecate and use SSRF-safe transport in split_text_from_url (#36821)
This commit is contained in:
@@ -15,8 +15,7 @@ from typing import (
|
||||
cast,
|
||||
)
|
||||
|
||||
import requests
|
||||
from langchain_core._api import beta
|
||||
from langchain_core._api import beta, deprecated
|
||||
from langchain_core.documents import BaseDocumentTransformer, Document
|
||||
from typing_extensions import override
|
||||
|
||||
@@ -186,8 +185,19 @@ class HTMLHeaderTextSplitter:
|
||||
"""
|
||||
return self.split_text_from_file(StringIO(text))
|
||||
|
||||
@deprecated(
|
||||
since="1.1.2",
|
||||
removal="2.0.0",
|
||||
message=(
|
||||
"Please fetch the HTML content from the URL yourself and pass it "
|
||||
"to split_text."
|
||||
),
|
||||
)
|
||||
def split_text_from_url(
|
||||
self, url: str, timeout: int = 10, **kwargs: Any
|
||||
self,
|
||||
url: str,
|
||||
timeout: int = 10,
|
||||
**kwargs: Any, # noqa: ARG002
|
||||
) -> list[Document]:
|
||||
"""Fetch text content from a URL and split it into documents.
|
||||
|
||||
@@ -205,14 +215,14 @@ class HTMLHeaderTextSplitter:
|
||||
Raises:
|
||||
requests.RequestException: If the HTTP request fails.
|
||||
"""
|
||||
from langchain_core._security._ssrf_protection import ( # noqa: PLC0415
|
||||
validate_safe_url,
|
||||
from langchain_core._security._transport import ( # noqa: PLC0415
|
||||
ssrf_safe_client,
|
||||
)
|
||||
|
||||
validate_safe_url(url, allow_private=False, allow_http=True)
|
||||
response = requests.get(url, timeout=timeout, **kwargs)
|
||||
response.raise_for_status()
|
||||
return self.split_text(response.text)
|
||||
with ssrf_safe_client() as client:
|
||||
response = client.get(url, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return self.split_text(response.text)
|
||||
|
||||
def split_text_from_file(self, file: str | IO[str]) -> list[Document]:
|
||||
"""Split HTML content from a file into a list of `Document` objects.
|
||||
|
||||
Reference in New Issue
Block a user