Select Git revision
async_reader.py 2.40 KiB
# Define the global posts list
posts = []
# Nicking the code to read from the bsky firehose here:
# https://gist.github.com/stuartlangridge/20ffe860fee0ecc315d3878c1ea77c35
def append_post(json_data):
# Parse JSON data and append to
#
# Basic idea:
# - Get a feed.
# - Collect basic data on the author
# - Collect four weeks' posts
# - Analyse each post:
# - Check text with Hive and Detectora
# - Check images with Hive and AIorNot
# - Check video by isolating audio to AIorNot
import json
from atproto_client.models import get_or_create
from atproto import CAR, models
from atproto_firehose import FirehoseSubscribeReposClient, parse_subscribe_repos_message
class JSONExtra(json.JSONEncoder):
"""raw objects sometimes contain CID() objects, which
seem to be references to something elsewhere in bluesky.
So, we 'serialise' these as a string representation,
which is a hack but whatevAAAAR"""
def default(self, obj):
try:
result = json.JSONEncoder.default(self, obj)
return result
except:
return repr(obj)
client = FirehoseSubscribeReposClient()
# all of this undocumented horseshit is based on cargo-culting the bollocks out of
# https://github.com/MarshalX/atproto/blob/main/examples/firehose/sub_repos.py
# and
# https://github.com/MarshalX/bluesky-feed-generator/blob/main/server/data_stream.py
def on_message_handler(message):
commit = parse_subscribe_repos_message(message)
if not isinstance(commit, models.ComAtprotoSyncSubscribeRepos.Commit):
return
car = CAR.from_bytes(commit.blocks)
for op in commit.ops:
if op.action in ["create"] and op.cid:
raw = car.blocks.get(op.cid)
cooked = get_or_create(raw, strict=False)
if cooked.py_type == "app.bsky.feed.post":
# other types include "app.bsky.feed.like" etc which we ignore
# note that this data does not include who posted this skeet
# or possibly it does as a "CID" which you have to look up somehow
# who the hell knows? not me
print(json.dumps(raw, cls=JSONExtra, indent=2))
# Also look at this:
# https://social-media-ethics-automation.github.io/book/bsky/ch04_data/05_data_python_platform/03_demo_data_from_platform.html
def main():
client.start(on_message_handler)
return
if __name__ == "__main__":
main()