mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-11-04 02:03:32 +00:00 
			
		
		
		
	Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
		
			
				
	
	
		
			69 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			69 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import annotations
 | 
						|
 | 
						|
from typing import Optional, Type
 | 
						|
 | 
						|
from langchain_core.callbacks import (
 | 
						|
    AsyncCallbackManagerForToolRun,
 | 
						|
    CallbackManagerForToolRun,
 | 
						|
)
 | 
						|
from langchain_core.pydantic_v1 import BaseModel, root_validator
 | 
						|
 | 
						|
from langchain_community.tools.playwright.base import BaseBrowserTool
 | 
						|
from langchain_community.tools.playwright.utils import (
 | 
						|
    aget_current_page,
 | 
						|
    get_current_page,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
class ExtractTextTool(BaseBrowserTool):
 | 
						|
    """Tool for extracting all the text on the current webpage."""
 | 
						|
 | 
						|
    name: str = "extract_text"
 | 
						|
    description: str = "Extract all the text on the current webpage"
 | 
						|
    args_schema: Type[BaseModel] = BaseModel
 | 
						|
 | 
						|
    @root_validator
 | 
						|
    def check_acheck_bs_importrgs(cls, values: dict) -> dict:
 | 
						|
        """Check that the arguments are valid."""
 | 
						|
        try:
 | 
						|
            from bs4 import BeautifulSoup  # noqa: F401
 | 
						|
        except ImportError:
 | 
						|
            raise ImportError(
 | 
						|
                "The 'beautifulsoup4' package is required to use this tool."
 | 
						|
                " Please install it with 'pip install beautifulsoup4'."
 | 
						|
            )
 | 
						|
        return values
 | 
						|
 | 
						|
    def _run(self, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
 | 
						|
        """Use the tool."""
 | 
						|
        # Use Beautiful Soup since it's faster than looping through the elements
 | 
						|
        from bs4 import BeautifulSoup
 | 
						|
 | 
						|
        if self.sync_browser is None:
 | 
						|
            raise ValueError(f"Synchronous browser not provided to {self.name}")
 | 
						|
 | 
						|
        page = get_current_page(self.sync_browser)
 | 
						|
        html_content = page.content()
 | 
						|
 | 
						|
        # Parse the HTML content with BeautifulSoup
 | 
						|
        soup = BeautifulSoup(html_content, "lxml")
 | 
						|
 | 
						|
        return " ".join(text for text in soup.stripped_strings)
 | 
						|
 | 
						|
    async def _arun(
 | 
						|
        self, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
 | 
						|
    ) -> str:
 | 
						|
        """Use the tool."""
 | 
						|
        if self.async_browser is None:
 | 
						|
            raise ValueError(f"Asynchronous browser not provided to {self.name}")
 | 
						|
        # Use Beautiful Soup since it's faster than looping through the elements
 | 
						|
        from bs4 import BeautifulSoup
 | 
						|
 | 
						|
        page = await aget_current_page(self.async_browser)
 | 
						|
        html_content = await page.content()
 | 
						|
 | 
						|
        # Parse the HTML content with BeautifulSoup
 | 
						|
        soup = BeautifulSoup(html_content, "lxml")
 | 
						|
 | 
						|
        return " ".join(text for text in soup.stripped_strings)
 |