langchain/libs/community/langchain_community/utilities/reddit_search.py
Erick Friis c2a3021bb0
multiple: pydantic 2 compatibility, v0.3 (#26443)
Signed-off-by: ChengZi <chen.zhang@zilliz.com>
Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: Dan O'Donovan <dan.odonovan@gmail.com>
Co-authored-by: Tom Daniel Grande <tomdgrande@gmail.com>
Co-authored-by: Grande <Tom.Daniel.Grande@statsbygg.no>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: ccurme <chester.curme@gmail.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
Co-authored-by: Tomaz Bratanic <bratanic.tomaz@gmail.com>
Co-authored-by: ZhangShenao <15201440436@163.com>
Co-authored-by: Friso H. Kingma <fhkingma@gmail.com>
Co-authored-by: ChengZi <chen.zhang@zilliz.com>
Co-authored-by: Nuno Campos <nuno@langchain.dev>
Co-authored-by: Morgante Pell <morgantep@google.com>
2024-09-13 14:38:45 -07:00

123 lines
4.4 KiB
Python

"""Wrapper for the Reddit API"""
from typing import Any, Dict, List, Optional
from langchain_core.utils import get_from_dict_or_env
from pydantic import BaseModel, model_validator
class RedditSearchAPIWrapper(BaseModel):
"""Wrapper for Reddit API
To use, set the environment variables ``REDDIT_CLIENT_ID``,
``REDDIT_CLIENT_SECRET``, ``REDDIT_USER_AGENT`` to set the client ID,
client secret, and user agent, respectively, as given by Reddit's API.
Alternatively, all three can be supplied as named parameters in the
constructor: ``reddit_client_id``, ``reddit_client_secret``, and
``reddit_user_agent``, respectively.
Example:
.. code-block:: python
from langchain_community.utilities import RedditSearchAPIWrapper
reddit_search = RedditSearchAPIWrapper()
"""
reddit_client: Any
# Values required to access Reddit API via praw
reddit_client_id: Optional[str]
reddit_client_secret: Optional[str]
reddit_user_agent: Optional[str]
@model_validator(mode="before")
@classmethod
def validate_environment(cls, values: Dict) -> Any:
"""Validate that the API ID, secret and user agent exists in environment
and check that praw module is present.
"""
reddit_client_id = get_from_dict_or_env(
values, "reddit_client_id", "REDDIT_CLIENT_ID"
)
values["reddit_client_id"] = reddit_client_id
reddit_client_secret = get_from_dict_or_env(
values, "reddit_client_secret", "REDDIT_CLIENT_SECRET"
)
values["reddit_client_secret"] = reddit_client_secret
reddit_user_agent = get_from_dict_or_env(
values, "reddit_user_agent", "REDDIT_USER_AGENT"
)
values["reddit_user_agent"] = reddit_user_agent
try:
import praw
except ImportError:
raise ImportError(
"praw package not found, please install it with pip install praw"
)
reddit_client = praw.Reddit(
client_id=reddit_client_id,
client_secret=reddit_client_secret,
user_agent=reddit_user_agent,
)
values["reddit_client"] = reddit_client
return values
def run(
self, query: str, sort: str, time_filter: str, subreddit: str, limit: int
) -> str:
"""Search Reddit and return posts as a single string."""
results: List[Dict] = self.results(
query=query,
sort=sort,
time_filter=time_filter,
subreddit=subreddit,
limit=limit,
)
if len(results) > 0:
output: List[str] = [f"Searching r/{subreddit} found {len(results)} posts:"]
for r in results:
category = "N/A" if r["post_category"] is None else r["post_category"]
p = f"Post Title: '{r['post_title']}'\n\
User: {r['post_author']}\n\
Subreddit: {r['post_subreddit']}:\n\
Text body: {r['post_text']}\n\
Post URL: {r['post_url']}\n\
Post Category: {category}.\n\
Score: {r['post_score']}\n"
output.append(p)
return "\n".join(output)
else:
return f"Searching r/{subreddit} did not find any posts:"
def results(
self, query: str, sort: str, time_filter: str, subreddit: str, limit: int
) -> List[Dict]:
"""Use praw to search Reddit and return a list of dictionaries,
one for each post.
"""
subredditObject = self.reddit_client.subreddit(subreddit)
search_results = subredditObject.search(
query=query, sort=sort, time_filter=time_filter, limit=limit
)
search_results = [r for r in search_results]
results_object = []
for submission in search_results:
results_object.append(
{
"post_subreddit": submission.subreddit_name_prefixed,
"post_category": submission.category,
"post_title": submission.title,
"post_text": submission.selftext,
"post_score": submission.score,
"post_id": submission.id,
"post_url": submission.url,
"post_author": submission.author,
}
)
return results_object