From 14ad06ec6bb79cf83ad7cf04364d6b01433679ff Mon Sep 17 00:00:00 2001 From: Jan Eggers <janeggers@untergeekPro.local> Date: Sat, 18 Jan 2025 19:13:38 +0100 Subject: [PATCH] Fix bei read_range --- pyproject.toml | 2 +- src/aichecker/check_tg.py | 2 +- tg_hr_check.py | 30 ++++++++++++++++-------------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2be555e..6a61e5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ authors = [ maintainers = [ {name = "Jan Eggers", email = "jan.eggers@hr.de"}, ] -version = "0.2.4.2" # Neue Versionsnummern für pip-Update +version = "0.2.4.3" # Neue Versionsnummern für pip-Update description = "Bluesky- und Telegram-Konten auf KI-Inhalte checken" requires-python = ">=3.8" dependencies = [ diff --git a/src/aichecker/check_tg.py b/src/aichecker/check_tg.py index 8c9e72e..562d30e 100644 --- a/src/aichecker/check_tg.py +++ b/src/aichecker/check_tg.py @@ -381,7 +381,7 @@ def tgc_read_range(cname, n1=1, n2=None, save=True, describe = True): posts.append(p) if p['nr'] == n2: return posts - n = max + n = p['nr'] return posts def tgc_read_number(cname, n = 20, cutoff = None, save=True, describe = True): diff --git a/tg_hr_check.py b/tg_hr_check.py index b3f84b9..0ee16b1 100644 --- a/tg_hr_check.py +++ b/tg_hr_check.py @@ -61,6 +61,7 @@ if __name__ == "__main__": channels=['fragunsdochdasoriginal','freiheitffm'] hr_links = [] for c in channels: + existing_df = pd.DataFrame() profile = tgc_profile(c) if profile is None: print(f"Kein Konto mit dem Namen {c} gefunden.") @@ -84,21 +85,22 @@ if __name__ == "__main__": start_post = max(existing_df['nr']) print(f"Dieser Kanal wurde schon einmal ausgelesen, zuletzt Post Nr.: {start_post} - seitdem {last_post-start_post} neue Posts") else: - start_post = last_post-N+1 + start_post = last_post-N print(f"Noch nicht gespeichert. Importiere {N} Posts bis zum letzten: {last_post}.") # Lies die aktuellsten Posts, sichere und analysiere sie # - print("Einlesen {start_post} bis {last_post}...") - posts = tgc_read_range(c, start_post, last_post, save=False, describe= False) - # Nach hr-Links suchen - for post in posts: - interessant = find_hr_links(post['text']) - if post['links']: - interessant.extend(post['links']) - hr_links.extend(interessant) - print(f"Potenziell interessant: {interessant}") - # Posts anhängen an das csv dieses Kanals - df = pd.DataFrame(posts) - if ('existing_df' in globals()): + if start_post < last_post: + print(f"Einlesen {start_post+1} bis {last_post}...") + posts = tgc_read_range(c, start_post+1, last_post, save=False, describe= False) + # Nach hr-Links suchen + for post in posts: + interessant = find_hr_links(post['text']) + if post['links']: + interessant.extend(post['links']) + hr_links.extend(interessant) + print(f"Potenziell interessant: {interessant}") + # Posts anhängen an das csv dieses Kanals + df = pd.DataFrame(posts) df = pd.concat([existing_df, df]).drop_duplicates(subset=['nr']).reset_index(drop=True) - df.to_csv(f'tg-checks/{c}.csv', index=False) # Save to CSV for example + df.to_csv(f'tg-checks/{c}.csv', index=False) # Save to CSV for example + print("Ende Gelände.") -- GitLab