Skip to content
Snippets Groups Projects
Select Git revision
  • 5c660b605fa7fdadc2197c8cd9da53bf50a655f8
  • main default protected
2 results

index.ts

Blame
  • async_reader.py 2.40 KiB
    # Define the global posts list
    posts = []
    
    # Nicking the code to read from the bsky firehose here: 
    # https://gist.github.com/stuartlangridge/20ffe860fee0ecc315d3878c1ea77c35
    def append_post(json_data):
        # Parse JSON data and append to
    
    
    #
    # Basic idea: 
    # - Get a feed.
    # - Collect basic data on the author
    # - Collect four weeks' posts
    # - Analyse each post: 
    #   - Check text with Hive and Detectora
    #   - Check images with Hive and AIorNot
    #   - Check video by isolating audio to AIorNot
    
    import json
    from atproto_client.models import get_or_create
    from atproto import CAR, models
    from atproto_firehose import FirehoseSubscribeReposClient, parse_subscribe_repos_message
    
    
    
    class JSONExtra(json.JSONEncoder):
        """raw objects sometimes contain CID() objects, which
        seem to be references to something elsewhere in bluesky.
        So, we 'serialise' these as a string representation,
        which is a hack but whatevAAAAR"""
        def default(self, obj):
            try:
                result = json.JSONEncoder.default(self, obj)
                return result
            except:
                return repr(obj)
    
    client = FirehoseSubscribeReposClient()
    
    # all of this undocumented horseshit is based on cargo-culting the bollocks out of
    # https://github.com/MarshalX/atproto/blob/main/examples/firehose/sub_repos.py
    # and
    # https://github.com/MarshalX/bluesky-feed-generator/blob/main/server/data_stream.py
    
    def on_message_handler(message):
        commit = parse_subscribe_repos_message(message)
        if not isinstance(commit, models.ComAtprotoSyncSubscribeRepos.Commit):
            return
        car = CAR.from_bytes(commit.blocks)
        for op in commit.ops:
            if op.action in ["create"] and op.cid:
                raw = car.blocks.get(op.cid)
                cooked = get_or_create(raw, strict=False)
                if cooked.py_type == "app.bsky.feed.post":
                    # other types include "app.bsky.feed.like" etc which we ignore
                    # note that this data does not include who posted this skeet
                    # or possibly it does as a "CID" which you have to look up somehow
                    # who the hell knows? not me
                    
                    print(json.dumps(raw, cls=JSONExtra, indent=2))
    
    
    # Also look at this: 
    # https://social-media-ethics-automation.github.io/book/bsky/ch04_data/05_data_python_platform/03_demo_data_from_platform.html
    
    def main():
        client.start(on_message_handler)
        
        return
    
    
    
    
    
    
    if __name__ == "__main__":
        main()