multiple: langchain 0.2 in master (#21191)

0.2rc migrations - [x] Move memory - [x] Move remaining retrievers - [x] graph_qa chains - [x] some dependency from evaluation code potentially on math utils - [x] Move openapi chain from `langchain.chains.api.openapi` to `langchain_community.chains.openapi` - [x] Migrate `langchain.chains.ernie_functions` to `langchain_community.chains.ernie_functions` - [x] migrate `langchain/chains/llm_requests.py` to `langchain_community.chains.llm_requests` - [x] Moving `langchain_community.cross_enoders.base:BaseCrossEncoder` -> `langchain_community.retrievers.document_compressors.cross_encoder:BaseCrossEncoder` (namespace not ideal, but it needs to be moved to `langchain` to avoid circular deps) - [x] unit tests langchain -- add pytest.mark.community to some unit tests that will stay in langchain - [x] unit tests community -- move unit tests that depend on community to community - [x] mv integration tests that depend on community to community - [x] mypy checks Other todo - [x] Make deprecation warnings not noisy (need to use warn deprecated and check that things are implemented properly) - [x] Update deprecation messages with timeline for code removal (likely we actually won't be removing things until 0.4 release) -- will give people more time to transition their code. - [ ] Add information to deprecation warning to show users how to migrate their code base using langchain-cli - [ ] Remove any unnecessary requirements in langchain (e.g., is SQLALchemy required?) --------- Co-authored-by: Erick Friis <erick@langchain.dev>
2025-09-04 20:46:45 +00:00 · 2024-05-08 16:46:52 -04:00
parent 6b392d6d12
commit f92006de3c
238 changed files with 7552 additions and 5899 deletions
--- a/libs/community/langchain_community/chains/llm_requests.py
+++ b/libs/community/langchain_community/chains/llm_requests.py
@@ -0,0 +1,97 @@
+"""Chain that hits a URL and then uses an LLM to parse results."""
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional
+
+from langchain.chains import LLMChain
+from langchain.chains.base import Chain
+from langchain_core.callbacks import CallbackManagerForChainRun
+from langchain_core.pydantic_v1 import Extra, Field, root_validator
+
+from langchain_community.utilities.requests import TextRequestsWrapper
+
+DEFAULT_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"  # noqa: E501
+}
+
+
+class LLMRequestsChain(Chain):
+    """Chain that requests a URL and then uses an LLM to parse results.
+
+    **Security Note**: This chain can make GET requests to arbitrary URLs,
+        including internal URLs.
+
+        Control access to who can run this chain and what network access
+        this chain has.
+
+        See https://python.langchain.com/docs/security for more information.
+    """
+
+    llm_chain: LLMChain  # type: ignore[valid-type]
+    requests_wrapper: TextRequestsWrapper = Field(
+        default_factory=lambda: TextRequestsWrapper(headers=DEFAULT_HEADERS),
+        exclude=True,
+    )
+    text_length: int = 8000
+    requests_key: str = "requests_result"  #: :meta private:
+    input_key: str = "url"  #: :meta private:
+    output_key: str = "output"  #: :meta private:
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+        arbitrary_types_allowed = True
+
+    @property
+    def input_keys(self) -> List[str]:
+        """Will be whatever keys the prompt expects.
+
+        :meta private:
+        """
+        return [self.input_key]
+
+    @property
+    def output_keys(self) -> List[str]:
+        """Will always return text key.
+
+        :meta private:
+        """
+        return [self.output_key]
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        try:
+            from bs4 import BeautifulSoup  # noqa: F401
+
+        except ImportError:
+            raise ImportError(
+                "Could not import bs4 python package. "
+                "Please install it with `pip install bs4`."
+            )
+        return values
+
+    def _call(
+        self,
+        inputs: Dict[str, Any],
+        run_manager: Optional[CallbackManagerForChainRun] = None,
+    ) -> Dict[str, Any]:
+        from bs4 import BeautifulSoup
+
+        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
+        # Other keys are assumed to be needed for LLM prediction
+        other_keys = {k: v for k, v in inputs.items() if k != self.input_key}
+        url = inputs[self.input_key]
+        res = self.requests_wrapper.get(url)
+        # extract the text from the html
+        soup = BeautifulSoup(res, "html.parser")
+        other_keys[self.requests_key] = soup.get_text()[: self.text_length]
+        result = self.llm_chain.predict(  # type: ignore[attr-defined]
+            callbacks=_run_manager.get_child(), **other_keys
+        )
+        return {self.output_key: result}
+
+    @property
+    def _chain_type(self) -> str:
+        return "llm_requests_chain"