Minor fixes
This commit is contained in:
1
agentstuff/sentiment_agent/clients/__init__.py
Normal file
1
agentstuff/sentiment_agent/clients/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""API clients for social media and forum data sources."""
|
||||
166
agentstuff/sentiment_agent/clients/bluesky.py
Normal file
166
agentstuff/sentiment_agent/clients/bluesky.py
Normal file
@@ -0,0 +1,166 @@
|
||||
"""Bluesky client using the AT Protocol API.
|
||||
|
||||
Search requires authentication. Set BLUESKY_HANDLE and BLUESKY_APP_PASSWORD
|
||||
env vars. Create an app password at: https://bsky.app/settings/app-passwords
|
||||
|
||||
Thread fetching works without auth via the public API.
|
||||
"""
|
||||
|
||||
import os
|
||||
import httpx
|
||||
|
||||
BSKY_PUBLIC_API = "https://public.api.bsky.app"
|
||||
BSKY_AUTH_API = "https://bsky.social"
|
||||
|
||||
|
||||
async def _get_session() -> dict | None:
|
||||
"""Authenticate with Bluesky and return session tokens, or None if no creds."""
|
||||
handle = os.environ.get("BLUESKY_HANDLE")
|
||||
app_password = os.environ.get("BLUESKY_APP_PASSWORD")
|
||||
if not handle or not app_password:
|
||||
return None
|
||||
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
resp = await client.post(
|
||||
f"{BSKY_AUTH_API}/xrpc/com.atproto.server.createSession",
|
||||
json={"identifier": handle, "password": app_password},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def _format_post(post_view: dict) -> dict:
|
||||
"""Extract relevant fields from an AT Protocol post view."""
|
||||
post = post_view.get("post", post_view)
|
||||
record = post.get("record", {})
|
||||
author = post.get("author", {})
|
||||
return {
|
||||
"text": record.get("text", ""),
|
||||
"author_handle": author.get("handle", ""),
|
||||
"author_display_name": author.get("displayName", ""),
|
||||
"created_at": record.get("createdAt", ""),
|
||||
"like_count": post.get("likeCount", 0),
|
||||
"repost_count": post.get("repostCount", 0),
|
||||
"reply_count": post.get("replyCount", 0),
|
||||
"uri": post.get("uri", ""),
|
||||
"cid": post.get("cid", ""),
|
||||
"url": _uri_to_url(post.get("uri", ""), author.get("handle", "")),
|
||||
}
|
||||
|
||||
|
||||
def _uri_to_url(uri: str, handle: str) -> str:
|
||||
"""Convert an at:// URI to a bsky.app URL."""
|
||||
# at://did:plc:xxx/app.bsky.feed.post/rkey -> https://bsky.app/profile/handle/post/rkey
|
||||
if not uri.startswith("at://"):
|
||||
return ""
|
||||
parts = uri.split("/")
|
||||
if len(parts) >= 5:
|
||||
rkey = parts[-1]
|
||||
return f"https://bsky.app/profile/{handle}/post/{rkey}"
|
||||
return ""
|
||||
|
||||
|
||||
async def search_posts(query: str, limit: int = 25, sort: str = "top") -> list[dict]:
|
||||
"""Search Bluesky for posts matching a query.
|
||||
|
||||
Requires BLUESKY_HANDLE and BLUESKY_APP_PASSWORD env vars.
|
||||
|
||||
Args:
|
||||
query: Search terms.
|
||||
limit: Max results (capped at 100).
|
||||
sort: "top" (most liked) or "latest" (chronological).
|
||||
|
||||
Returns:
|
||||
List of post dicts with: text, author_handle, author_display_name,
|
||||
created_at, like_count, repost_count, reply_count, uri, url.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If Bluesky credentials are not configured.
|
||||
"""
|
||||
session = await _get_session()
|
||||
if not session:
|
||||
raise RuntimeError(
|
||||
"Bluesky search requires authentication. "
|
||||
"Set BLUESKY_HANDLE and BLUESKY_APP_PASSWORD environment variables. "
|
||||
"Create an app password at: https://bsky.app/settings/app-passwords"
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
resp = await client.get(
|
||||
f"{BSKY_AUTH_API}/xrpc/app.bsky.feed.searchPosts",
|
||||
params={
|
||||
"q": query,
|
||||
"limit": min(limit, 100),
|
||||
"sort": sort,
|
||||
},
|
||||
headers={"Authorization": f"Bearer {session['accessJwt']}"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
return [_format_post(p) for p in data.get("posts", [])]
|
||||
|
||||
|
||||
async def get_thread(uri: str, depth: int = 6) -> dict:
|
||||
"""Fetch a Bluesky thread by AT URI or bsky.app URL.
|
||||
|
||||
Args:
|
||||
uri: Either an at:// URI or a https://bsky.app/profile/.../post/... URL.
|
||||
depth: How many levels of replies to fetch (max 1000).
|
||||
|
||||
Returns:
|
||||
Dict with "post" (the root post) and "replies" (list of reply post dicts).
|
||||
"""
|
||||
# Convert bsky.app URL to AT URI if needed
|
||||
if uri.startswith("https://bsky.app/"):
|
||||
uri = await _resolve_url_to_uri(uri)
|
||||
|
||||
headers = {}
|
||||
session = await _get_session()
|
||||
if session:
|
||||
headers["Authorization"] = f"Bearer {session['accessJwt']}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
resp = await client.get(
|
||||
f"{BSKY_PUBLIC_API}/xrpc/app.bsky.feed.getPostThread",
|
||||
params={"uri": uri, "depth": min(depth, 1000)},
|
||||
headers=headers,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
thread = data.get("thread", {})
|
||||
root_post = _format_post(thread) if "post" in thread else {}
|
||||
|
||||
replies = []
|
||||
for reply in thread.get("replies", []):
|
||||
if "post" in reply:
|
||||
replies.append(_format_post(reply))
|
||||
# Include nested replies one level deep
|
||||
for nested in reply.get("replies", []):
|
||||
if "post" in nested:
|
||||
replies.append(_format_post(nested))
|
||||
|
||||
return {"post": root_post, "replies": replies}
|
||||
|
||||
|
||||
async def _resolve_url_to_uri(url: str) -> str:
|
||||
"""Convert a bsky.app URL to an AT URI by resolving the handle."""
|
||||
# https://bsky.app/profile/handle.bsky.social/post/rkey
|
||||
parts = url.rstrip("/").split("/")
|
||||
if len(parts) < 6:
|
||||
raise ValueError(f"Invalid Bluesky URL: {url}")
|
||||
|
||||
handle = parts[4] # profile/{handle}
|
||||
rkey = parts[6] # post/{rkey}
|
||||
|
||||
# Resolve handle to DID
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
resp = await client.get(
|
||||
f"{BSKY_PUBLIC_API}/xrpc/com.atproto.identity.resolveHandle",
|
||||
params={"handle": handle},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
did = resp.json()["did"]
|
||||
|
||||
return f"at://{did}/app.bsky.feed.post/{rkey}"
|
||||
78
agentstuff/sentiment_agent/clients/hackernews.py
Normal file
78
agentstuff/sentiment_agent/clients/hackernews.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Hacker News client using the Algolia HN Search API.
|
||||
|
||||
No authentication required. Docs: https://hn.algolia.com/api
|
||||
"""
|
||||
|
||||
import httpx
|
||||
|
||||
HN_API_BASE = "https://hn.algolia.com/api/v1"
|
||||
|
||||
|
||||
async def search_stories(query: str, limit: int = 25) -> list[dict]:
|
||||
"""Search HN for stories matching a query.
|
||||
|
||||
Returns a list of story dicts with: title, url, author, points,
|
||||
num_comments, created_at, objectID, story_text.
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
resp = await client.get(
|
||||
f"{HN_API_BASE}/search",
|
||||
params={
|
||||
"query": query,
|
||||
"tags": "story",
|
||||
"hitsPerPage": min(limit, 50),
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
results = []
|
||||
for hit in data.get("hits", []):
|
||||
results.append(
|
||||
{
|
||||
"title": hit.get("title", ""),
|
||||
"url": hit.get("url", ""),
|
||||
"author": hit.get("author", ""),
|
||||
"points": hit.get("points", 0),
|
||||
"num_comments": hit.get("num_comments", 0),
|
||||
"created_at": hit.get("created_at", ""),
|
||||
"object_id": hit.get("objectID", ""),
|
||||
"story_text": hit.get("story_text") or "",
|
||||
"hn_url": f"https://news.ycombinator.com/item?id={hit.get('objectID', '')}",
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
async def search_comments(query: str, limit: int = 25) -> list[dict]:
|
||||
"""Search HN for comments matching a query.
|
||||
|
||||
Returns a list of comment dicts with: comment_text, author, points,
|
||||
created_at, story_title, story_url.
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=15) as client:
|
||||
resp = await client.get(
|
||||
f"{HN_API_BASE}/search",
|
||||
params={
|
||||
"query": query,
|
||||
"tags": "comment",
|
||||
"hitsPerPage": min(limit, 50),
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
results = []
|
||||
for hit in data.get("hits", []):
|
||||
results.append(
|
||||
{
|
||||
"comment_text": hit.get("comment_text", ""),
|
||||
"author": hit.get("author", ""),
|
||||
"points": hit.get("points", 0),
|
||||
"created_at": hit.get("created_at", ""),
|
||||
"story_title": hit.get("story_title", ""),
|
||||
"story_url": hit.get("story_url", ""),
|
||||
"hn_url": f"https://news.ycombinator.com/item?id={hit.get('objectID', '')}",
|
||||
}
|
||||
)
|
||||
return results
|
||||
117
agentstuff/sentiment_agent/clients/reddit.py
Normal file
117
agentstuff/sentiment_agent/clients/reddit.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""Reddit client using the public JSON API.
|
||||
|
||||
No authentication required for read-only search. Reddit requires a descriptive
|
||||
User-Agent header — requests with generic UAs get 429'd.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
|
||||
REDDIT_BASE = "https://www.reddit.com"
|
||||
USER_AGENT = "sentiment-agent/0.1.0 (research; sentiment analysis tool)"
|
||||
|
||||
|
||||
async def search_posts(
|
||||
query: str,
|
||||
subreddit: str = "all",
|
||||
sort: str = "relevance",
|
||||
time_filter: str = "month",
|
||||
limit: int = 25,
|
||||
) -> list[dict]:
|
||||
"""Search Reddit for posts matching a query.
|
||||
|
||||
Args:
|
||||
query: Search terms.
|
||||
subreddit: Subreddit to search within, or "all" for site-wide.
|
||||
sort: One of "relevance", "hot", "top", "new", "comments".
|
||||
time_filter: One of "hour", "day", "week", "month", "year", "all".
|
||||
limit: Max results (capped at 100 by Reddit).
|
||||
|
||||
Returns:
|
||||
List of post dicts with: title, selftext, author, score,
|
||||
num_comments, subreddit, url, permalink, created_utc.
|
||||
"""
|
||||
url = f"{REDDIT_BASE}/r/{subreddit}/search.json"
|
||||
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
|
||||
resp = await client.get(
|
||||
url,
|
||||
params={
|
||||
"q": query,
|
||||
"sort": sort,
|
||||
"t": time_filter,
|
||||
"limit": min(limit, 100),
|
||||
"restrict_sr": "on" if subreddit != "all" else "off",
|
||||
},
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
results = []
|
||||
for child in data.get("data", {}).get("children", []):
|
||||
post = child.get("data", {})
|
||||
results.append(
|
||||
{
|
||||
"title": post.get("title", ""),
|
||||
"selftext": (post.get("selftext") or "")[:2000],
|
||||
"author": post.get("author", "[deleted]"),
|
||||
"score": post.get("score", 0),
|
||||
"upvote_ratio": post.get("upvote_ratio", 0),
|
||||
"num_comments": post.get("num_comments", 0),
|
||||
"subreddit": post.get("subreddit", ""),
|
||||
"url": post.get("url", ""),
|
||||
"permalink": f"https://reddit.com{post.get('permalink', '')}",
|
||||
"created_utc": post.get("created_utc", 0),
|
||||
"is_self": post.get("is_self", False),
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
async def get_post_comments(
|
||||
permalink: str,
|
||||
sort: str = "top",
|
||||
limit: int = 25,
|
||||
) -> list[dict]:
|
||||
"""Fetch top-level comments for a Reddit post.
|
||||
|
||||
Args:
|
||||
permalink: The post's permalink path (e.g., "/r/python/comments/abc123/title/").
|
||||
sort: Comment sort order: "top", "best", "new", "controversial".
|
||||
limit: Max comments to return.
|
||||
|
||||
Returns:
|
||||
List of comment dicts with: body, author, score, created_utc.
|
||||
"""
|
||||
# Strip domain if full URL was passed
|
||||
if permalink.startswith("https://"):
|
||||
permalink = permalink.replace("https://reddit.com", "")
|
||||
permalink = permalink.replace("https://www.reddit.com", "")
|
||||
|
||||
url = f"{REDDIT_BASE}{permalink}.json"
|
||||
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
|
||||
resp = await client.get(
|
||||
url,
|
||||
params={"sort": sort, "limit": limit},
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
# Reddit returns [post_listing, comments_listing]
|
||||
if not isinstance(data, list) or len(data) < 2:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for child in data[1].get("data", {}).get("children", []):
|
||||
if child.get("kind") != "t1":
|
||||
continue
|
||||
comment = child.get("data", {})
|
||||
results.append(
|
||||
{
|
||||
"body": (comment.get("body") or "")[:2000],
|
||||
"author": comment.get("author", "[deleted]"),
|
||||
"score": comment.get("score", 0),
|
||||
"created_utc": comment.get("created_utc", 0),
|
||||
}
|
||||
)
|
||||
return results
|
||||
Reference in New Issue
Block a user