Minor fixes
This commit is contained in:
117
agentstuff/sentiment_agent/clients/reddit.py
Normal file
117
agentstuff/sentiment_agent/clients/reddit.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""Reddit client using the public JSON API.
|
||||
|
||||
No authentication required for read-only search. Reddit requires a descriptive
|
||||
User-Agent header — requests with generic UAs get 429'd.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
|
||||
REDDIT_BASE = "https://www.reddit.com"
|
||||
USER_AGENT = "sentiment-agent/0.1.0 (research; sentiment analysis tool)"
|
||||
|
||||
|
||||
async def search_posts(
|
||||
query: str,
|
||||
subreddit: str = "all",
|
||||
sort: str = "relevance",
|
||||
time_filter: str = "month",
|
||||
limit: int = 25,
|
||||
) -> list[dict]:
|
||||
"""Search Reddit for posts matching a query.
|
||||
|
||||
Args:
|
||||
query: Search terms.
|
||||
subreddit: Subreddit to search within, or "all" for site-wide.
|
||||
sort: One of "relevance", "hot", "top", "new", "comments".
|
||||
time_filter: One of "hour", "day", "week", "month", "year", "all".
|
||||
limit: Max results (capped at 100 by Reddit).
|
||||
|
||||
Returns:
|
||||
List of post dicts with: title, selftext, author, score,
|
||||
num_comments, subreddit, url, permalink, created_utc.
|
||||
"""
|
||||
url = f"{REDDIT_BASE}/r/{subreddit}/search.json"
|
||||
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
|
||||
resp = await client.get(
|
||||
url,
|
||||
params={
|
||||
"q": query,
|
||||
"sort": sort,
|
||||
"t": time_filter,
|
||||
"limit": min(limit, 100),
|
||||
"restrict_sr": "on" if subreddit != "all" else "off",
|
||||
},
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
results = []
|
||||
for child in data.get("data", {}).get("children", []):
|
||||
post = child.get("data", {})
|
||||
results.append(
|
||||
{
|
||||
"title": post.get("title", ""),
|
||||
"selftext": (post.get("selftext") or "")[:2000],
|
||||
"author": post.get("author", "[deleted]"),
|
||||
"score": post.get("score", 0),
|
||||
"upvote_ratio": post.get("upvote_ratio", 0),
|
||||
"num_comments": post.get("num_comments", 0),
|
||||
"subreddit": post.get("subreddit", ""),
|
||||
"url": post.get("url", ""),
|
||||
"permalink": f"https://reddit.com{post.get('permalink', '')}",
|
||||
"created_utc": post.get("created_utc", 0),
|
||||
"is_self": post.get("is_self", False),
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
async def get_post_comments(
|
||||
permalink: str,
|
||||
sort: str = "top",
|
||||
limit: int = 25,
|
||||
) -> list[dict]:
|
||||
"""Fetch top-level comments for a Reddit post.
|
||||
|
||||
Args:
|
||||
permalink: The post's permalink path (e.g., "/r/python/comments/abc123/title/").
|
||||
sort: Comment sort order: "top", "best", "new", "controversial".
|
||||
limit: Max comments to return.
|
||||
|
||||
Returns:
|
||||
List of comment dicts with: body, author, score, created_utc.
|
||||
"""
|
||||
# Strip domain if full URL was passed
|
||||
if permalink.startswith("https://"):
|
||||
permalink = permalink.replace("https://reddit.com", "")
|
||||
permalink = permalink.replace("https://www.reddit.com", "")
|
||||
|
||||
url = f"{REDDIT_BASE}{permalink}.json"
|
||||
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
|
||||
resp = await client.get(
|
||||
url,
|
||||
params={"sort": sort, "limit": limit},
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
# Reddit returns [post_listing, comments_listing]
|
||||
if not isinstance(data, list) or len(data) < 2:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for child in data[1].get("data", {}).get("children", []):
|
||||
if child.get("kind") != "t1":
|
||||
continue
|
||||
comment = child.get("data", {})
|
||||
results.append(
|
||||
{
|
||||
"body": (comment.get("body") or "")[:2000],
|
||||
"author": comment.get("author", "[deleted]"),
|
||||
"score": comment.get("score", 0),
|
||||
"created_utc": comment.get("created_utc", 0),
|
||||
}
|
||||
)
|
||||
return results
|
||||
Reference in New Issue
Block a user