From b2342c48685f40b48493b066f465b6908260adf5 Mon Sep 17 00:00:00 2001 From: Jan Eggers <janeggers@untergeekPro.local> Date: Wed, 18 Dec 2024 22:55:04 +0100 Subject: [PATCH] Init Commit --- README.md | 20 +++++++++++++++ main.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 main.py diff --git a/README.md b/README.md index c27a790..2f9c5bd 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,22 @@ # aichecker Checks Bluesky accounts (and, later, similar media) for AI suspicion. + +## Der Plan + +### Phase 1: Bluesky + +- Bluesky-API anschauen +- Beliebiges Account vier Wochen scannen +- AIorNot-API einbauen https://docs.aiornot.com/ + - Bilder checken + - Videos checken: + - Audiospur extrahieren + - Audiospur checken +- hive-API einbauen +- Detectora-API einbauen +- Ausgabe: Vermuteter KI-Anteil + +### Phase 2: 4CAT +- 4CAT-Server aufsetzen +- "processor" für KI-Check einbauen +- diff --git a/main.py b/main.py new file mode 100644 index 0000000..dfec1ec --- /dev/null +++ b/main.py @@ -0,0 +1,76 @@ +import json + +# Define the global posts list +posts = [] + +# Nicking the code to read from the bsky firehose here: +# https://gist.github.com/stuartlangridge/20ffe860fee0ecc315d3878c1ea77c35 +def append_post(json_data): + # Parse JSON data and append to + + +# +# Basic idea: +# - Get a feed. +# - Collect basic data on the author +# - Collect four weeks' posts +# - Analyse each post: +# - Check text with Hive and Detectora +# - Check images with Hive and AIorNot +# - Check video by isolating audio to AIorNot + +import json +from atproto_client.models import get_or_create +from atproto import CAR, models +from atproto_firehose import FirehoseSubscribeReposClient, parse_subscribe_repos_message + + + +class JSONExtra(json.JSONEncoder): + """raw objects sometimes contain CID() objects, which + seem to be references to something elsewhere in bluesky. + So, we 'serialise' these as a string representation, + which is a hack but whatevAAAAR""" + def default(self, obj): + try: + result = json.JSONEncoder.default(self, obj) + return result + except: + return repr(obj) + +client = FirehoseSubscribeReposClient() + +# all of this undocumented horseshit is based on cargo-culting the bollocks out of +# https://github.com/MarshalX/atproto/blob/main/examples/firehose/sub_repos.py +# and +# https://github.com/MarshalX/bluesky-feed-generator/blob/main/server/data_stream.py + +def on_message_handler(message): + commit = parse_subscribe_repos_message(message) + if not isinstance(commit, models.ComAtprotoSyncSubscribeRepos.Commit): + return + car = CAR.from_bytes(commit.blocks) + for op in commit.ops: + if op.action in ["create"] and op.cid: + raw = car.blocks.get(op.cid) + cooked = get_or_create(raw, strict=False) + if cooked.py_type == "app.bsky.feed.post": + # other types include "app.bsky.feed.like" etc which we ignore + # note that this data does not include who posted this skeet + # or possibly it does as a "CID" which you have to look up somehow + # who the hell knows? not me + + print(json.dumps(raw, cls=JSONExtra, indent=2)) + + +# Also look at this: +# https://social-media-ethics-automation.github.io/book/bsky/ch04_data/05_data_python_platform/03_demo_data_from_platform.html + +def main(): + client.start(on_message_handler) + return + + + +if __name__ == "__main__": + main() \ No newline at end of file -- GitLab