mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-06 13:33:37 +00:00
community[minor]: add user agent for web scraping loaders (#22480)
**Description:** This PR adds a `USER_AGENT` env variable that is to be used for web scraping. It creates a util to get that user agent and uses it in the classes used for scraping in [this piece of doc](https://python.langchain.com/v0.1/docs/use_cases/web_scraping/). Identifying your scraper is considered a good politeness practice, this PR aims at easing it. **Issue:** `None` **Dependencies:** `None` **Twitter handle:** `None`
This commit is contained in:
16
libs/community/langchain_community/utils/user_agent.py
Normal file
16
libs/community/langchain_community/utils/user_agent.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_user_agent() -> str:
|
||||
"""Get user agent from environment variable."""
|
||||
env_user_agent = os.environ.get("USER_AGENT")
|
||||
if not env_user_agent:
|
||||
logging.warning(
|
||||
"USER_AGENT environment variable not set, "
|
||||
"consider setting it to identify your requests."
|
||||
)
|
||||
return "DefaultLangchainUserAgent"
|
||||
return env_user_agent
|
Reference in New Issue
Block a user