community: Corrected aload func to be asynchronous from webBaseLoader (#28337)

- **Description:** The aload function, contrary to its name, is not an asynchronous function, so it cannot work concurrently with other asynchronous functions. - **Issue:** #28336 - **Test: **: Done - **Docs: ** [here](e0a95e5646/docs/docs/integrations/document_loaders/web_base.ipynb (L201)) - **Lint: ** All checks passed If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17. --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-09-09 23:12:38 +00:00 · 2024-12-21 04:42:52 +09:00
parent a08c76a6b2
commit f38fc89f35
4 changed files with 213 additions and 93 deletions
--- a/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_falkordb_vector_integration.py
@@ -102,8 +102,8 @@ def test_falkordbvector() -> None:
        pre_delete_collection=True,
    )
    output = docsearch.similarity_search("foo", k=1)
-    assert type(output) is list
-    assert type(output[0]) is Document
+    assert isinstance(output, list)
+    assert isinstance(output[0], Document)
    assert output[0].page_content == "foo"

    drop_vector_indexes(docsearch)
@@ -121,8 +121,8 @@ def test_falkordbvector_embeddings() -> None:
        pre_delete_collection=True,
    )
    output = docsearch.similarity_search("foo", k=1)
-    assert type(output) is list
-    assert type(output[0]) is Document
+    assert isinstance(output, list)
+    assert isinstance(output[0], Document)
    assert output[0].page_content == "foo"

    drop_vector_indexes(docsearch)
@@ -168,8 +168,8 @@ def test_falkordbvector_with_metadatas() -> None:
        pre_delete_collection=True,
    )
    output = docsearch.similarity_search("foo", k=1)
-    assert type(output) is list
-    assert type(output[0]) is Document
+    assert isinstance(output, list)
+    assert isinstance(output[0], Document)
    assert output[0].metadata.get("page") == "0"

    drop_vector_indexes(docsearch)
--- a/libs/community/tests/unit_tests/document_loaders/test_web_base.py
+++ b/libs/community/tests/unit_tests/document_loaders/test_web_base.py
@@ -62,6 +62,52 @@ def test_lazy_load(mock_get: Any) -> None:
    assert results[0].page_content == "This is a div with a special class"


+@pytest.mark.requires("bs4")
+@patch("aiohttp.ClientSession.get")
+async def test_alazy_load(mock_get: Any) -> None:
+    async def mock_text() -> str:
+        return "<html><body><p>Test content</p></body></html>"
+
+    import bs4
+
+    mock_response = MagicMock()
+    mock_response.text = mock_text
+    mock_get.return_value.__aenter__.return_value = mock_response
+
+    loader = WebBaseLoader(web_paths=["https://www.example.com"])
+    results = []
+    async for result in loader.alazy_load():
+        results.append(result)
+    # mock_get.assert_called_with("https://www.example.com")
+    assert len(results) == 1
+    assert results[0].page_content == "Test content"
+
+    # Test bs4 kwargs
+    async def mock_text_bs4() -> str:
+        return dedent("""
+            <html>
+            <body>
+                <p>Test content</p>
+                <div class="special-class">This is a div with a special class</div>
+            </body>
+            </html>
+            """)
+
+    mock_response = MagicMock()
+    mock_response.text = mock_text_bs4
+    mock_get.return_value.__aenter__.return_value = mock_response
+
+    loader = WebBaseLoader(
+        web_paths=["https://www.example.com"],
+        bs_kwargs={"parse_only": bs4.SoupStrainer(class_="special-class")},
+    )
+    results = []
+    async for result in loader.alazy_load():
+        results.append(result)
+    assert len(results) == 1
+    assert results[0].page_content == "This is a div with a special class"
+
+
@pytest.mark.requires("bs4")
@patch("aiohttp.ClientSession.get")
 def test_aload(mock_get: Any) -> None: