Skip to content
Snippets Groups Projects
Commit a7c2234d authored by Jan Eggers's avatar Jan Eggers
Browse files

gröbste Telegram-Fehler beseitigt, aiornot-Library

parent fe039aff
No related branches found
No related tags found
No related merge requests found
No preview for this file type
...@@ -63,15 +63,13 @@ if __name__ == "__main__": ...@@ -63,15 +63,13 @@ if __name__ == "__main__":
if 'aiornot_ai_score' not in post: if 'aiornot_ai_score' not in post:
if post['video'] is not None: if post['video'] is not None:
# Audio des Videos analysieren # Audio des Videos analysieren
post['aiornot_ai_score'] = aiornot_wrapper(post['video'].get('url'), is_image = False) post['aiornot_ai_score'] = aiornot_wrapper(post['video'].get('file'), is_image = False)
print("Video: AIORNOT-Score") print("Video: AIORNOT-Score")
# Bild analysieren # Bild analysieren
# Das hier ist für die Galerie: AIORNOT kann derzeit # Das hier ist für die Galerie: AIORNOT kann derzeit
# keine base64-Strings checken. # keine base64-Strings checken.
# Das Problem an den URLs der Photos ist: sie sind nicht garantiert. elif post['photo'] is not None:
base64_image = post['photo'].get('image',None) post['aiornot_ai_score'] = aiornot_wrapper(post['photo'].get('file'))
image = f"data:image/jpeg;base64, {base64_image}"
post['aiornot_ai_score'] = aiornot_wrapper(post['photo'].get('url'))
print("AIORNOT-AI-Score: {post['aiornot_ai_score']}") print("AIORNOT-AI-Score: {post['aiornot_ai_score']}")
# Videos kann man nur über das Audio auf KI checken. # Videos kann man nur über das Audio auf KI checken.
# Muss ich erst noch implementieren. # Muss ich erst noch implementieren.
......
aiornot==0.0.5
beautifulsoup4==4.12.3
numpy==2.0.2
ollama==0.4.4
openai==1.58.1
openai-whisper==20240930
openpyxl==3.1.5
pandas==2.2.3
pathlib==1.0.1
pydub==0.25.1
replicate==1.0.4
soundfile==0.13.0
toml==0.10.2
\ No newline at end of file
...@@ -62,7 +62,7 @@ def query_aiornot(content, is_image = False): ...@@ -62,7 +62,7 @@ def query_aiornot(content, is_image = False):
return None return None
# Dateiname? Dann mit Multipart-Header # Dateiname? Dann mit Multipart-Header
if not (content.startswith("http://") or content.startswith("https://")): if not (content.startswith("http://") or content.startswith("https://")):
fname = fname = content
headers = { headers = {
'Authorization': f"Bearer {api_key}", 'Authorization': f"Bearer {api_key}",
'Accept': 'application/json', 'Accept': 'application/json',
......
...@@ -108,7 +108,7 @@ def tgc_clean(cname): ...@@ -108,7 +108,7 @@ def tgc_clean(cname):
def save_url(fname, name, mdir="./media"): def save_url(fname, name, mdir="./media"):
# Die Medien-URLs bekommen oft einen Parameter mit übergeben; deswegen nicht nur # Die Medien-URLs bekommen oft einen Parameter mit übergeben; deswegen nicht nur
# "irgendwas.ogg" berücksichtigen, sondern auch "irgendwas.mp4?nochirgendwas" # "irgendwas.ogg" berücksichtigen, sondern auch "irgendwas.mp4?nochirgendwas"
content_ext = re.search("\.[a-zA-Z0-9]+(?=\?|$)",fname).group(0) content_ext = re.search(r"\.[a-zA-Z0-9]+(?=\?|$)",fname).group(0)
content_file = f"{mdir}/{name}{content_ext}" content_file = f"{mdir}/{name}{content_ext}"
try: try:
os.makedirs(os.path.dirname(content_file), exist_ok=True) os.makedirs(os.path.dirname(content_file), exist_ok=True)
...@@ -186,8 +186,12 @@ def tg_post_parse(b, save = True, describe = True): ...@@ -186,8 +186,12 @@ def tg_post_parse(b, save = True, describe = True):
# Sprachnachricht tgme_widget_message_voice https://t.me/fragunsdochDasOriginal/27176 # Sprachnachricht tgme_widget_message_voice https://t.me/fragunsdochDasOriginal/27176
if b.select_one('audio.tgme_widget_message_voice') is not None: if b.select_one('audio.tgme_widget_message_voice') is not None:
# Link auf OGG-Datei # Link auf OGG-Datei
voice_url = b.select_one('audio.tgme_widget_message_voice')[url] voice_url = b.select_one('audio.tgme_widget_message_voice')['src']
voice_duration = b.select_one('time.tgme_widget_message_voice_duration').get_text() voice_duration = b.select_one('time.tgme_widget_message_voice_duration').get_text()
voice = {
'url': voice_url,
'duration': voice_duration,
}
# Für Transkription immer lokale Kopie anlegen # Für Transkription immer lokale Kopie anlegen
if save or describe: if save or describe:
voice['file'] = save_url(voice_url, f"{channel}_{b_nr}_voice") voice['file'] = save_url(voice_url, f"{channel}_{b_nr}_voice")
...@@ -321,7 +325,7 @@ def tgc_read_range(cname, n1=1, n2=None, save=True, describe = True): ...@@ -321,7 +325,7 @@ def tgc_read_range(cname, n1=1, n2=None, save=True, describe = True):
# Zuerst: Nummer des letzten Posts holen # Zuerst: Nummer des letzten Posts holen
profile = tgc_profile(cname) profile = tgc_profile(cname)
# Sicherheitscheck: erste Post-Nummer überhaupt schon gepostet? # Sicherheitscheck: erste Post-Nummer überhaupt schon gepostet?
max_nr = profile['n_post'] max_nr = profile['n_posts']
if n1 > max_nr: if n1 > max_nr:
return None return None
loop = True loop = True
...@@ -388,12 +392,7 @@ def check_tg_list(posts, check_images = True): ...@@ -388,12 +392,7 @@ def check_tg_list(posts, check_images = True):
post['aiornot_ai_score'] = aiornot_wrapper(post['video'].get('file'), is_image = False) post['aiornot_ai_score'] = aiornot_wrapper(post['video'].get('file'), is_image = False)
elif post['photo'] is not None: elif post['photo'] is not None:
# Bild analysieren # Bild analysieren
# Das hier ist für die Galerie: AIORNOT kann derzeit post['aiornot_ai_score'] = aiornot_wrapper(post['photo'].get('file'), is_image = True)
# keine base64-Strings checken.
# Das Problem an den URLs der Photos ist: sie sind nicht garantiert.
base64_image = post['photo'].get('image',None)
image = f"data:image/jpeg;base64, {base64_image}"
post['aiornot_ai_score'] = aiornot_wrapper(post['photo'].get('url'))
return posts return posts
# Wrapper für die check_tg_list Routine. # Wrapper für die check_tg_list Routine.
# Gibt Resultate als df zurück, arbeitet aber hinter den Kulissen mit # Gibt Resultate als df zurück, arbeitet aber hinter den Kulissen mit
......
from .detectora import query_detectora from .detectora import query_detectora
from .aiornot import query_aiornot # from .aiornot import query_aiornot
from .transcribe import gpt4_description from .transcribe import gpt4_description
# Alternative zu meinen selbst geschriebenen aiornot-Routinen:
# https://github.com/aiornotinc/aiornot-python
# Installieren mit
# pip install aiornot
from aiornot import Client
# Konstante # Konstante
d_thresh = .8 # 80 Prozent d_thresh = .8 # 80 Prozent
limit = 25 # Posts für den Check limit = 25 # Posts für den Check
...@@ -20,20 +27,25 @@ def detectora_wrapper(text: str): ...@@ -20,20 +27,25 @@ def detectora_wrapper(text: str):
return score return score
def aiornot_wrapper(content, is_image = True): def aiornot_wrapper(content, is_image = True):
aiornot_client = Client()
# Verpackung. Fortschrittsbalken. # Verpackung. Fortschrittsbalken.
if content is None: if content is None:
print(" ", end="") print(" ", end="")
return return
# Fortschrittsbalken # Fortschrittsbalken
print("?", end="") print("?", end="")
report = query_aiornot(content, is_image) is_url = (content.startswith("http://") or content.startswith("https://"))
if is_image:
response = aiornot_client.image_report_by_url(content) if is_url else aiornot_client.image_report_by_file(content)
else:
response = aiornot_client.audio_report_by_url(content) if is_url else aiornot_client.audio_report_by_file(content)
# Beschreibung: https://docs.aiornot.com/#5b3de85d-d3eb-4ad1-a191-54988f56d978 # Beschreibung: https://docs.aiornot.com/#5b3de85d-d3eb-4ad1-a191-54988f56d978
if report is not None: if response is not None:
aiornot_dict = ({ aiornot_dict = ({
'aiornot_score': report['verdict'], 'aiornot_score': response.report.verdict,
# Unterscheidung: Bilder haben den Confidence score im Unter-Key 'ai' # Unterscheidung: Bilder haben den Confidence score im Unter-Key 'ai'
'aiornot_confidence': report['ai']['confidence'] if 'ai' in report else report['confidence'], 'aiornot_confidence': response.report.ai.confidence if hasattr(response.report, 'ai') else response.report.confidence,
'aiornot_generator': report['generator'] if 'generator' in report else 'Audio', 'aiornot_generator': response.report.generator if hasattr(response.report, 'generator') else None,
}) })
print(f"\b{'X' if aiornot_dict['aiornot_score'] != 'human' else '.'}",end="") print(f"\b{'X' if aiornot_dict['aiornot_score'] != 'human' else '.'}",end="")
return aiornot_dict return aiornot_dict
...@@ -50,9 +62,9 @@ def bsky_aiornot_wrapper(did,embed): ...@@ -50,9 +62,9 @@ def bsky_aiornot_wrapper(did,embed):
if 'images' in embed: if 'images' in embed:
images = embed['images'] images = embed['images']
desc = [] desc = []
for i in images: for image in images:
# Construct an URL for the image thumbnail (normalised size) # Construct an URL for the image thumbnail (normalised size)
link = i['image']['ref']['$link'] link = image['image']['ref']['$link']
i_url = f"https://cdn.bsky.app/img/feed_thumbnail/plain/{did}/{link}" i_url = f"https://cdn.bsky.app/img/feed_thumbnail/plain/{did}/{link}"
aiornot_report = aiornot_wrapper(i_url) aiornot_report = aiornot_wrapper(i_url)
aiornot_report['gpt4_description'] = gpt4_description(image) aiornot_report['gpt4_description'] = gpt4_description(image)
......
...@@ -66,7 +66,7 @@ def ai_description(image): ...@@ -66,7 +66,7 @@ def ai_description(image):
# Return ai-generated description # Return ai-generated description
return desc2 return desc2
def transcribe(fname): def transcribe(fname, use_api = False):
# Wrapper; ruft eine der drei Whisper-Transcribe-Varianten auf. # Wrapper; ruft eine der drei Whisper-Transcribe-Varianten auf.
# Favorit: das beschleunigte whisper-s2t # Favorit: das beschleunigte whisper-s2t
# (das aber erst CTranslate2 mit METAL-Unterstützung braucht auf dem Mac # (das aber erst CTranslate2 mit METAL-Unterstützung braucht auf dem Mac
...@@ -76,8 +76,10 @@ def transcribe(fname): ...@@ -76,8 +76,10 @@ def transcribe(fname):
if ".ogg" in fname.lower(): if ".ogg" in fname.lower():
fname = convert_ogg_to_m4a(fname) fname = convert_ogg_to_m4a(fname)
try: try:
text = transcribe_whisper(fname) if use_api:
#text = transcribe_api(fname) text = transcribe_api(fname)
else:
text = transcribe_whisper(fname)
# return transcribe_jax(audio) # return transcribe_jax(audio)
# return transcribe_ws2t(audio) # return transcribe_ws2t(audio)
return text return text
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment