langchain/libs/community/tests/integration_tests/utilities/test_pubmed.py

"""Integration test for PubMed API Wrapper."""

from typing import Any, List

import pytest
from langchain_core.documents import Document
from langchain_core.tools import BaseTool

from langchain_community.tools import PubmedQueryRun
from langchain_community.utilities import PubMedAPIWrapper

xmltodict = pytest.importorskip("xmltodict")


@pytest.fixture
def api_client() -> PubMedAPIWrapper:
    return PubMedAPIWrapper()  # type: ignore[call-arg]


def test_run_success(api_client: PubMedAPIWrapper) -> None:
    """Test that returns the correct answer"""

    search_string = (
        "Examining the Validity of ChatGPT in Identifying "
        "Relevant Nephrology Literature"
    )
    output = api_client.run(search_string)
    test_string = (
        "Examining the Validity of ChatGPT in Identifying "
        "Relevant Nephrology Literature: Findings and Implications"
    )
    assert test_string in output
    assert len(output) == api_client.doc_content_chars_max


def test_run_returns_no_result(api_client: PubMedAPIWrapper) -> None:
    """Test that gives no result."""

    output = api_client.run("1605.08386WWW")
    assert "No good PubMed Result was found" == output


def test_retrieve_article_returns_book_abstract(api_client: PubMedAPIWrapper) -> None:
    """Test that returns the excerpt of a book."""

    output_nolabel = api_client.retrieve_article("25905357", "")
    output_withlabel = api_client.retrieve_article("29262144", "")
    test_string_nolabel = (
        "Osteoporosis is a multifactorial disorder associated with low bone mass and "
        "enhanced skeletal fragility. Although"
    )
    assert test_string_nolabel in output_nolabel["Summary"]
    assert (
        "Wallenberg syndrome was first described in 1808 by Gaspard Vieusseux. However,"
        in output_withlabel["Summary"]
    )


def test_retrieve_article_returns_article_abstract(
    api_client: PubMedAPIWrapper,
) -> None:
    """Test that returns the abstract of an article."""

    output_nolabel = api_client.retrieve_article("37666905", "")
    output_withlabel = api_client.retrieve_article("37666551", "")
    test_string_nolabel = (
        "This work aims to: (1) Provide maximal hand force data on six different "
        "grasp types for healthy subjects; (2) detect grasp types with maximal "
        "force significantly affected by hand osteoarthritis (HOA) in women; (3) "
        "look for predictors to detect HOA from the maximal forces using discriminant "
        "analyses."
    )
    assert test_string_nolabel in output_nolabel["Summary"]
    test_string_withlabel = (
        "OBJECTIVES: To assess across seven hospitals from six different countries "
        "the extent to which the COVID-19 pandemic affected the volumes of orthopaedic "
        "hospital admissions and patient outcomes for non-COVID-19 patients admitted "
        "for orthopaedic care."
    )
    assert test_string_withlabel in output_withlabel["Summary"]


def test_retrieve_article_no_abstract_available(api_client: PubMedAPIWrapper) -> None:
    """Test that returns 'No abstract available'."""

    output = api_client.retrieve_article("10766884", "")
    assert "No abstract available" == output["Summary"]


def assert_docs(docs: List[Document]) -> None:
    for doc in docs:
        assert doc.metadata
        assert set(doc.metadata) == {
            "Copyright Information",
            "uid",
            "Title",
            "Published",
        }


def test_load_success(api_client: PubMedAPIWrapper) -> None:
    """Test that returns one document"""

    docs = api_client.load_docs("chatgpt")
    assert len(docs) == api_client.top_k_results == 3
    assert_docs(docs)


def test_load_returns_no_result(api_client: PubMedAPIWrapper) -> None:
    """Test that returns no docs"""

    docs = api_client.load_docs("1605.08386WWW")
    assert len(docs) == 0


def test_load_returns_limited_docs() -> None:
    """Test that returns several docs"""
    expected_docs = 2
    api_client = PubMedAPIWrapper(top_k_results=expected_docs)  # type: ignore[call-arg]
    docs = api_client.load_docs("ChatGPT")
    assert len(docs) == expected_docs
    assert_docs(docs)


def test_load_returns_full_set_of_metadata() -> None:
    """Test that returns several docs"""
    api_client = PubMedAPIWrapper(load_max_docs=1, load_all_available_meta=True)  # type: ignore[call-arg]
    docs = api_client.load_docs("ChatGPT")
    assert len(docs) == 3
    for doc in docs:
        assert doc.metadata
        assert set(doc.metadata).issuperset(
            {"Copyright Information", "Published", "Title", "uid"}
        )


def _load_pubmed_from_universal_entry(**kwargs: Any) -> BaseTool:
    from langchain.agents.load_tools import load_tools

    tools = load_tools(["pubmed"], **kwargs)
    assert len(tools) == 1, "loaded more than 1 tool"
    return tools[0]


def test_load_pupmed_from_universal_entry() -> None:
    pubmed_tool = _load_pubmed_from_universal_entry()
    search_string = (
        "Examining the Validity of ChatGPT in Identifying "
        "Relevant Nephrology Literature"
    )
    output = pubmed_tool.invoke(search_string)
    test_string = (
        "Examining the Validity of ChatGPT in Identifying "
        "Relevant Nephrology Literature: Findings and Implications"
    )
    assert test_string in output


def test_load_pupmed_from_universal_entry_with_params() -> None:
    params = {
        "top_k_results": 1,
    }
    pubmed_tool = _load_pubmed_from_universal_entry(**params)
    assert isinstance(pubmed_tool, PubmedQueryRun)
    wp = pubmed_tool.api_wrapper
    assert wp.top_k_results == 1, "failed to assert top_k_results"