Skip to main content
Sign in
Snippets Groups Projects
Commit 14ad06ec authored by Jan Eggers's avatar Jan Eggers
Browse files

Fix bei read_range

parent dd572123
No related branches found
No related tags found
No related merge requests found
...@@ -10,7 +10,7 @@ authors = [ ...@@ -10,7 +10,7 @@ authors = [
maintainers = [ maintainers = [
{name = "Jan Eggers", email = "jan.eggers@hr.de"}, {name = "Jan Eggers", email = "jan.eggers@hr.de"},
] ]
version = "0.2.4.2" # Neue Versionsnummern für pip-Update version = "0.2.4.3" # Neue Versionsnummern für pip-Update
description = "Bluesky- und Telegram-Konten auf KI-Inhalte checken" description = "Bluesky- und Telegram-Konten auf KI-Inhalte checken"
requires-python = ">=3.8" requires-python = ">=3.8"
dependencies = [ dependencies = [
... ...
......
...@@ -381,7 +381,7 @@ def tgc_read_range(cname, n1=1, n2=None, save=True, describe = True): ...@@ -381,7 +381,7 @@ def tgc_read_range(cname, n1=1, n2=None, save=True, describe = True):
posts.append(p) posts.append(p)
if p['nr'] == n2: if p['nr'] == n2:
return posts return posts
n = max n = p['nr']
return posts return posts
def tgc_read_number(cname, n = 20, cutoff = None, save=True, describe = True): def tgc_read_number(cname, n = 20, cutoff = None, save=True, describe = True):
... ...
......
...@@ -61,6 +61,7 @@ if __name__ == "__main__": ...@@ -61,6 +61,7 @@ if __name__ == "__main__":
channels=['fragunsdochdasoriginal','freiheitffm'] channels=['fragunsdochdasoriginal','freiheitffm']
hr_links = [] hr_links = []
for c in channels: for c in channels:
existing_df = pd.DataFrame()
profile = tgc_profile(c) profile = tgc_profile(c)
if profile is None: if profile is None:
print(f"Kein Konto mit dem Namen {c} gefunden.") print(f"Kein Konto mit dem Namen {c} gefunden.")
...@@ -84,12 +85,13 @@ if __name__ == "__main__": ...@@ -84,12 +85,13 @@ if __name__ == "__main__":
start_post = max(existing_df['nr']) start_post = max(existing_df['nr'])
print(f"Dieser Kanal wurde schon einmal ausgelesen, zuletzt Post Nr.: {start_post} - seitdem {last_post-start_post} neue Posts") print(f"Dieser Kanal wurde schon einmal ausgelesen, zuletzt Post Nr.: {start_post} - seitdem {last_post-start_post} neue Posts")
else: else:
start_post = last_post-N+1 start_post = last_post-N
print(f"Noch nicht gespeichert. Importiere {N} Posts bis zum letzten: {last_post}.") print(f"Noch nicht gespeichert. Importiere {N} Posts bis zum letzten: {last_post}.")
# Lies die aktuellsten Posts, sichere und analysiere sie # Lies die aktuellsten Posts, sichere und analysiere sie
# #
print("Einlesen {start_post} bis {last_post}...") if start_post < last_post:
posts = tgc_read_range(c, start_post, last_post, save=False, describe= False) print(f"Einlesen {start_post+1} bis {last_post}...")
posts = tgc_read_range(c, start_post+1, last_post, save=False, describe= False)
# Nach hr-Links suchen # Nach hr-Links suchen
for post in posts: for post in posts:
interessant = find_hr_links(post['text']) interessant = find_hr_links(post['text'])
...@@ -99,6 +101,6 @@ if __name__ == "__main__": ...@@ -99,6 +101,6 @@ if __name__ == "__main__":
print(f"Potenziell interessant: {interessant}") print(f"Potenziell interessant: {interessant}")
# Posts anhängen an das csv dieses Kanals # Posts anhängen an das csv dieses Kanals
df = pd.DataFrame(posts) df = pd.DataFrame(posts)
if ('existing_df' in globals()):
df = pd.concat([existing_df, df]).drop_duplicates(subset=['nr']).reset_index(drop=True) df = pd.concat([existing_df, df]).drop_duplicates(subset=['nr']).reset_index(drop=True)
df.to_csv(f'tg-checks/{c}.csv', index=False) # Save to CSV for example df.to_csv(f'tg-checks/{c}.csv', index=False) # Save to CSV for example
print("Ende Gelände.")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment