"""Reddit client using the public JSON API. No authentication required for read-only search. Reddit requires a descriptive User-Agent header — requests with generic UAs get 429'd. """ import httpx REDDIT_BASE = "https://www.reddit.com" USER_AGENT = "sentiment-agent/0.1.0 (research; sentiment analysis tool)" async def search_posts( query: str, subreddit: str = "all", sort: str = "relevance", time_filter: str = "month", limit: int = 25, ) -> list[dict]: """Search Reddit for posts matching a query. Args: query: Search terms. subreddit: Subreddit to search within, or "all" for site-wide. sort: One of "relevance", "hot", "top", "new", "comments". time_filter: One of "hour", "day", "week", "month", "year", "all". limit: Max results (capped at 100 by Reddit). Returns: List of post dicts with: title, selftext, author, score, num_comments, subreddit, url, permalink, created_utc. """ url = f"{REDDIT_BASE}/r/{subreddit}/search.json" async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client: resp = await client.get( url, params={ "q": query, "sort": sort, "t": time_filter, "limit": min(limit, 100), "restrict_sr": "on" if subreddit != "all" else "off", }, headers={"User-Agent": USER_AGENT}, ) resp.raise_for_status() data = resp.json() results = [] for child in data.get("data", {}).get("children", []): post = child.get("data", {}) results.append( { "title": post.get("title", ""), "selftext": (post.get("selftext") or "")[:2000], "author": post.get("author", "[deleted]"), "score": post.get("score", 0), "upvote_ratio": post.get("upvote_ratio", 0), "num_comments": post.get("num_comments", 0), "subreddit": post.get("subreddit", ""), "url": post.get("url", ""), "permalink": f"https://reddit.com{post.get('permalink', '')}", "created_utc": post.get("created_utc", 0), "is_self": post.get("is_self", False), } ) return results async def get_post_comments( permalink: str, sort: str = "top", limit: int = 25, ) -> list[dict]: """Fetch top-level comments for a Reddit post. Args: permalink: The post's permalink path (e.g., "/r/python/comments/abc123/title/"). sort: Comment sort order: "top", "best", "new", "controversial". limit: Max comments to return. Returns: List of comment dicts with: body, author, score, created_utc. """ # Strip domain if full URL was passed if permalink.startswith("https://"): permalink = permalink.replace("https://reddit.com", "") permalink = permalink.replace("https://www.reddit.com", "") url = f"{REDDIT_BASE}{permalink}.json" async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client: resp = await client.get( url, params={"sort": sort, "limit": limit}, headers={"User-Agent": USER_AGENT}, ) resp.raise_for_status() data = resp.json() # Reddit returns [post_listing, comments_listing] if not isinstance(data, list) or len(data) < 2: return [] results = [] for child in data[1].get("data", {}).get("children", []): if child.get("kind") != "t1": continue comment = child.get("data", {}) results.append( { "body": (comment.get("body") or "")[:2000], "author": comment.get("author", "[deleted]"), "score": comment.get("score", 0), "created_utc": comment.get("created_utc", 0), } ) return results