mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-03 18:24:10 +00:00
community[patch]: add web loader tests (#26728)
This commit is contained in:
parent
4a2745064a
commit
f2285376a5
@ -1,3 +1,7 @@
|
|||||||
|
from textwrap import dedent
|
||||||
|
from typing import Any
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest as pytest
|
import pytest as pytest
|
||||||
|
|
||||||
from langchain_community.document_loaders.web_base import WebBaseLoader
|
from langchain_community.document_loaders.web_base import WebBaseLoader
|
||||||
@ -19,3 +23,62 @@ class TestWebBaseLoader:
|
|||||||
assert web_base_loader.web_paths == ["https://www.example.com"]
|
assert web_base_loader.web_paths == ["https://www.example.com"]
|
||||||
web_base_loader = WebBaseLoader(web_path="https://www.example.com")
|
web_base_loader = WebBaseLoader(web_path="https://www.example.com")
|
||||||
assert web_base_loader.web_paths == ["https://www.example.com"]
|
assert web_base_loader.web_paths == ["https://www.example.com"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("bs4")
|
||||||
|
@patch("langchain_community.document_loaders.web_base.requests.Session.get")
|
||||||
|
def test_lazy_load(mock_get: Any) -> None:
|
||||||
|
import bs4
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = "<html><body><p>Test content</p></body></html>"
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
loader = WebBaseLoader(web_paths=["https://www.example.com"])
|
||||||
|
results = list(loader.lazy_load())
|
||||||
|
mock_get.assert_called_with("https://www.example.com")
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0].page_content == "Test content"
|
||||||
|
|
||||||
|
# Test bs4 kwargs
|
||||||
|
mock_html = dedent("""
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<p>Test content</p>
|
||||||
|
<div class="special-class">This is a div with a special class</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
""")
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = mock_html
|
||||||
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
|
loader = WebBaseLoader(
|
||||||
|
web_paths=["https://www.example.com"],
|
||||||
|
bs_kwargs={"parse_only": bs4.SoupStrainer(class_="special-class")},
|
||||||
|
)
|
||||||
|
results = list(loader.lazy_load())
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0].page_content == "This is a div with a special class"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("bs4")
|
||||||
|
@patch("aiohttp.ClientSession.get")
|
||||||
|
def test_aload(mock_get: Any) -> None:
|
||||||
|
async def mock_text() -> str:
|
||||||
|
return "<html><body><p>Test content</p></body></html>"
|
||||||
|
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.text = mock_text
|
||||||
|
mock_get.return_value.__aenter__.return_value = mock_response
|
||||||
|
|
||||||
|
loader = WebBaseLoader(
|
||||||
|
web_paths=["https://www.example.com"],
|
||||||
|
header_template={"User-Agent": "test-user-agent"},
|
||||||
|
)
|
||||||
|
results = loader.aload()
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0].page_content == "Test content"
|
||||||
|
mock_get.assert_called_with(
|
||||||
|
"https://www.example.com", headers={"User-Agent": "test-user-agent"}, cookies={}
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user