mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 22:03:52 +00:00
text_splitters: add request parameters for function HTMLHeaderTextSplitter.split_text… (#24178)
**Description:** The `split_text_from_url` method of `HTMLHeaderTextSplitter` does not include parameters like `timeout` when using `requests` to send a request. Therefore, I suggest adding a `kwargs` parameter to the function, which can be passed as arguments to `requests.get()` internally, allowing control over the `get` request. --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
9d0c1d2dc9
commit
d895614d19
@ -71,13 +71,15 @@ class HTMLHeaderTextSplitter:
|
|||||||
for chunk in aggregated_chunks
|
for chunk in aggregated_chunks
|
||||||
]
|
]
|
||||||
|
|
||||||
def split_text_from_url(self, url: str) -> List[Document]:
|
def split_text_from_url(self, url: str, **kwargs: Any) -> List[Document]:
|
||||||
"""Split HTML from web URL
|
"""Split HTML from web URL
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url: web URL
|
url: web URL
|
||||||
|
**kwargs: Arbitrary additional keyword arguments. These are usually passed
|
||||||
|
to the fetch url content request.
|
||||||
"""
|
"""
|
||||||
r = requests.get(url)
|
r = requests.get(url, **kwargs)
|
||||||
return self.split_text_from_file(BytesIO(r.content))
|
return self.split_text_from_file(BytesIO(r.content))
|
||||||
|
|
||||||
def split_text(self, text: str) -> List[Document]:
|
def split_text(self, text: str) -> List[Document]:
|
||||||
|
Loading…
Reference in New Issue
Block a user