diff --git a/README.md b/README.md index c27a7904a87bf18e524da846114fbc02ce15e406..2f9c5bd3b14f69ff7a7e4c1eabc57b759d7e9f2e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,22 @@ # aichecker Checks Bluesky accounts (and, later, similar media) for AI suspicion. + +## Der Plan + +### Phase 1: Bluesky + +- Bluesky-API anschauen +- Beliebiges Account vier Wochen scannen +- AIorNot-API einbauen https://docs.aiornot.com/ + - Bilder checken + - Videos checken: + - Audiospur extrahieren + - Audiospur checken +- hive-API einbauen +- Detectora-API einbauen +- Ausgabe: Vermuteter KI-Anteil + +### Phase 2: 4CAT +- 4CAT-Server aufsetzen +- "processor" für KI-Check einbauen +- diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..dfec1ecbcfbc813ce1a9911a2112c6e1e585beb6 --- /dev/null +++ b/main.py @@ -0,0 +1,76 @@ +import json + +# Define the global posts list +posts = [] + +# Nicking the code to read from the bsky firehose here: +# https://gist.github.com/stuartlangridge/20ffe860fee0ecc315d3878c1ea77c35 +def append_post(json_data): + # Parse JSON data and append to + + +# +# Basic idea: +# - Get a feed. +# - Collect basic data on the author +# - Collect four weeks' posts +# - Analyse each post: +# - Check text with Hive and Detectora +# - Check images with Hive and AIorNot +# - Check video by isolating audio to AIorNot + +import json +from atproto_client.models import get_or_create +from atproto import CAR, models +from atproto_firehose import FirehoseSubscribeReposClient, parse_subscribe_repos_message + + + +class JSONExtra(json.JSONEncoder): + """raw objects sometimes contain CID() objects, which + seem to be references to something elsewhere in bluesky. + So, we 'serialise' these as a string representation, + which is a hack but whatevAAAAR""" + def default(self, obj): + try: + result = json.JSONEncoder.default(self, obj) + return result + except: + return repr(obj) + +client = FirehoseSubscribeReposClient() + +# all of this undocumented horseshit is based on cargo-culting the bollocks out of +# https://github.com/MarshalX/atproto/blob/main/examples/firehose/sub_repos.py +# and +# https://github.com/MarshalX/bluesky-feed-generator/blob/main/server/data_stream.py + +def on_message_handler(message): + commit = parse_subscribe_repos_message(message) + if not isinstance(commit, models.ComAtprotoSyncSubscribeRepos.Commit): + return + car = CAR.from_bytes(commit.blocks) + for op in commit.ops: + if op.action in ["create"] and op.cid: + raw = car.blocks.get(op.cid) + cooked = get_or_create(raw, strict=False) + if cooked.py_type == "app.bsky.feed.post": + # other types include "app.bsky.feed.like" etc which we ignore + # note that this data does not include who posted this skeet + # or possibly it does as a "CID" which you have to look up somehow + # who the hell knows? not me + + print(json.dumps(raw, cls=JSONExtra, indent=2)) + + +# Also look at this: +# https://social-media-ethics-automation.github.io/book/bsky/ch04_data/05_data_python_platform/03_demo_data_from_platform.html + +def main(): + client.start(on_message_handler) + return + + + +if __name__ == "__main__": + main() \ No newline at end of file