diff --git a/bot.py b/bot.py index 58cee24..62a8596 100644 --- a/bot.py +++ b/bot.py @@ -4,28 +4,26 @@ # RSS Bot # Python 3 required +import logging import re -import redis -import feedparser -from json import loads - from configparser import ConfigParser +from json import loads +from urllib.parse import urlparse + +import feedparser +import redis +from bs4 import BeautifulSoup from telegram import ChatAction, ParseMode +from telegram.error import TelegramError, Unauthorized, BadRequest, TimedOut, NetworkError, ChatMigrated from telegram.ext import Updater, Job, CommandHandler, MessageHandler, Filters from telegram.ext.dispatcher import run_async -from telegram.error import (TelegramError, Unauthorized, BadRequest, - TimedOut, NetworkError, ChatMigrated) - -import logging -from bs4 import BeautifulSoup -from urllib.parse import urlparse # Bot Configuration config = ConfigParser() config.read_file(open('config.ini')) redis_conf = config['REDIS'] -redis_db = redis_conf.get('db' , 0) +redis_db = redis_conf.get('db', 0) redis_host = redis_conf.get('host') redis_port = redis_conf.get('port', 6379) redis_socket = redis_conf.get('socket_path') @@ -51,6 +49,7 @@ if not r.ping(): print('Konnte nicht mit Redis verbinden, prüfe deine Einstellungen') quit() + def is_number(s): try: float(s) @@ -58,113 +57,125 @@ def is_number(s): except ValueError: return False + def remove_tags(html): - return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True)) - + return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True)) + + def can_use(update): if update.message.from_user.id in admins: - return True + return True else: - return False + return False + def cleanRSS(str): - str = str.replace('[…]', '') - str = str.replace('[bilder]', '') - str = str.replace('[mehr]', '') - str = str.replace('[video]', '') - str = str.replace('...[more]', '') - str = str.replace('[more]', '') - str = str.replace('[liveticker]', '') - str = str.replace('[livestream]', '') - str = str.replace('[multimedia]', '') - str = str.replace('[phoenix]', '') - str = str.replace('[swr]', '') - str = str.replace('[ndr]', '') - str = str.replace('[mdr]', '') - str = str.replace('[rbb]', '') - str = str.replace('[wdr]', '') - str = str.replace('[hr]', '') - str = str.replace('[br]', '') - str = str.replace('Click for full.', '') - str = str.replace('Read more »', '') - str = str.replace('Read more', '') - str = str.replace('(more…)', '') - str = str.replace('View On WordPress', '') - str = str.replace('Continue reading →', '') - str = str.replace('(RSS generated with FetchRss)', '') - str = str.replace('-- Delivered by Feed43 service', '') - str = str.replace('Meldung bei www.tagesschau.de lesen', '') - str = str.replace('The post.*appeared first on Sugoi! Anime Blog.', '') - str = str.replace('Der Beitrag.*erschien zuerst auf MAnime.de.', '') - str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str) - return str - + str = str.replace('[…]', '') + str = str.replace('[bilder]', '') + str = str.replace('[mehr]', '') + str = str.replace('[video]', '') + str = str.replace('...[more]', '') + str = str.replace('[more]', '') + str = str.replace('[liveticker]', '') + str = str.replace('[livestream]', '') + str = str.replace('[multimedia]', '') + str = str.replace('[phoenix]', '') + str = str.replace('[swr]', '') + str = str.replace('[ndr]', '') + str = str.replace('[mdr]', '') + str = str.replace('[rbb]', '') + str = str.replace('[wdr]', '') + str = str.replace('[hr]', '') + str = str.replace('[br]', '') + str = str.replace('Click for full.', '') + str = str.replace('Read more »', '') + str = str.replace('Read more', '') + str = str.replace('(more…)', '') + str = str.replace('View On WordPress', '') + str = str.replace('Continue reading →', '') + str = str.replace('(RSS generated with FetchRss)', '') + str = str.replace('-- Delivered by Feed43 service', '') + str = str.replace('Meldung bei www.tagesschau.de lesen', '') + str = str.replace('The post.*appeared first on Sugoi! Anime Blog.', '') + str = str.replace('Der Beitrag.*erschien zuerst auf MAnime.de.', '') + str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str) + return str + + def check_chat(bot, username): try: return bot.getChat(username) except: return - + + # Commands @run_async def start(bot, update): if not can_use(update): - return + return bot.sendMessage( - chat_id = update.message.chat_id, - text = 'Willkommen beim RSS-Bot!\nLass uns anfangen! Sende /hilfe, um zu starten.', - reply_to_message_id = update.message.message_id, - parse_mode = ParseMode.HTML - ) + chat_id=update.message.chat_id, + text='Willkommen beim RSS-Bot!\nLass uns anfangen! Sende /hilfe, um zu starten.', + reply_to_message_id=update.message.message_id, + parse_mode=ParseMode.HTML + ) + @run_async def help(bot, update): if not can_use(update): - return + return bot.sendMessage( - chat_id = update.message.chat_id, - text = '/rss: Abonnierte Feeds anzeigen\n/sub Feed-URL: Feed abonnieren\n/del n: Feed löschen', - reply_to_message_id = update.message.message_id, - parse_mode = ParseMode.HTML - ) + chat_id=update.message.chat_id, + text='/rss: Abonnierte Feeds anzeigen\n/sub Feed-URL: Feed abonnieren\n/del n: Feed löschen', + reply_to_message_id=update.message.message_id, + parse_mode=ParseMode.HTML + ) + def subscribe_to_rss(bot, update, args): if not can_use(update): - return + return if len(args) < 1: - bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.', reply_to_message_id=update.message.message_id) - return + bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.', + reply_to_message_id=update.message.message_id) + return feed_url = args[0] is_url = re.search("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", feed_url) if not is_url: - bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.', reply_to_message_id=update.message.message_id) - return + bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.', + reply_to_message_id=update.message.message_id) + return if len(args) > 1: - username = args[1] - chat_info = check_chat(bot, username) - if not chat_info: - bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', reply_to_message_id=update.message.message_id) - return - chat_id = str(chat_info.id) + username = args[1] + chat_info = check_chat(bot, username) + if not chat_info: + bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', + reply_to_message_id=update.message.message_id) + return + chat_id = str(chat_info.id) else: chat_id = str(update.message.chat_id) - + if r.sismember('pythonbot:rss:' + chat_id, feed_url): - bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.', reply_to_message_id=update.message.message_id) - return + bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.', + reply_to_message_id=update.message.message_id) + return bot.sendChatAction(update.message.chat_id, action=ChatAction.TYPING) feed_data = feedparser.parse(feed_url) if not 'link' in feed_data.feed: - bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.',reply_to_message_id=update.message.message_id) - return - + bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.', + reply_to_message_id=update.message.message_id) + return + if not 'title' in feed_data.feed: - feed_title = 'Unbekannten Feed' + feed_title = 'Unbekannten Feed' else: - feed_title = feed_data.feed.title - + feed_title = feed_data.feed.title + if len(feed_data.entries) > 0: if not 'id' in feed_data.entries[0]: last_entry = feed_data.entries[0].link @@ -172,191 +183,202 @@ def subscribe_to_rss(bot, update, args): last_entry = feed_data.entries[0].id lhash = 'pythonbot:rss:' + feed_url + ':last_entry' if not r.exists(lhash): - r.set(lhash, last_entry) + r.set(lhash, last_entry) r.sadd('pythonbot:rss:' + feed_url + ':subs', int(chat_id)) r.sadd('pythonbot:rss:' + chat_id, feed_url) bot.sendMessage( - chat_id = update.message.chat_id, - text = '' + feed_title + ' hinzugefügt!', - reply_to_message_id = update.message.message_id, - parse_mode = ParseMode.HTML - ) + chat_id=update.message.chat_id, + text='' + feed_title + ' hinzugefügt!', + reply_to_message_id=update.message.message_id, + parse_mode=ParseMode.HTML + ) + def unsubscribe_rss(bot, update, args): if not can_use(update): - return - - if len(args) < 1: - bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein', reply_to_message_id=update.message.message_id) - return - - if len(args) > 1: - username = args[1] - chat_info = check_chat(bot, username) - if not chat_info: - bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', reply_to_message_id=update.message.message_id) return - chat_id = str(chat_info.id) + + if len(args) < 1: + bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein', + reply_to_message_id=update.message.message_id) + return + + if len(args) > 1: + username = args[1] + chat_info = check_chat(bot, username) + if not chat_info: + bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', + reply_to_message_id=update.message.message_id) + return + chat_id = str(chat_info.id) else: chat_id = str(update.message.chat_id) - + if not is_number(args[0]): - bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.', reply_to_message_id=update.message.message_id) - return + bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.', + reply_to_message_id=update.message.message_id) + return uhash = 'pythonbot:rss:' + chat_id n = int(args[0]) subs = list(r.smembers(uhash)) if n < 1 or n > len(subs): - bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.', reply_to_message_id=update.message.message_id) + bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.', + reply_to_message_id=update.message.message_id) return - sub = subs[n-1] + sub = subs[n - 1] lhash = 'pythonbot:rss:' + sub + ':subs' r.srem(uhash, sub) r.srem(lhash, int(chat_id)) bot.sendMessage( - chat_id = update.message.chat_id, - text = '' + sub + ' entfernt.', - reply_to_message_id = update.message.message_id, - parse_mode = ParseMode.HTML - ) + chat_id=update.message.chat_id, + text='' + sub + ' entfernt.', + reply_to_message_id=update.message.message_id, + parse_mode=ParseMode.HTML + ) left = r.smembers(lhash) - if len(left) < 1: # no one subscribed, remove it + if len(left) < 1: # no one subscribed, remove it r.delete('pythonbot:rss:' + sub + ':last_entry') - + def get_rss_list(chat_id, chat_name): uhash = 'pythonbot:rss:' + chat_id subs = list(r.smembers(uhash)) if len(subs) < 1: - return 'Keine Feeds abonniert!' + return 'Keine Feeds abonniert!' text = '' + chat_name + ' hat abonniert:\n' for n, feed in enumerate(subs): - text = text + str(n+1) + ') ' + feed + '\n' + text = text + str(n + 1) + ') ' + feed + '\n' return text -@run_async + +@run_async def list_rss(bot, update, args): if not can_use(update): - return - if len(args) == 1: - username = args[0] - chat_info = check_chat(bot, username) - if not chat_info: - bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', reply_to_message_id=update.message.message_id) return - rss_list = get_rss_list(str(chat_info.id), chat_info.title) + if len(args) == 1: + username = args[0] + chat_info = check_chat(bot, username) + if not chat_info: + bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', + reply_to_message_id=update.message.message_id) + return + rss_list = get_rss_list(str(chat_info.id), chat_info.title) else: - if update.message.chat.first_name: - chat_name = update.message.chat.first_name - else: - chat_name = update.message.chat.title - rss_list = get_rss_list(str(update.message.chat_id), chat_name) + if update.message.chat.first_name: + chat_name = update.message.chat.first_name + else: + chat_name = update.message.chat.title + rss_list = get_rss_list(str(update.message.chat_id), chat_name) bot.sendMessage( - chat_id=update.message.chat_id, - text = rss_list, - reply_to_message_id=update.message.message_id, - parse_mode=ParseMode.HTML - ) + chat_id=update.message.chat_id, + text=rss_list, + reply_to_message_id=update.message.message_id, + parse_mode=ParseMode.HTML + ) + def get_new_entries(last, new_entries): entries = [] - for k,v in enumerate(new_entries): - if 'id' in v: - if v.id == last: - return entries + for k, v in enumerate(new_entries): + if 'id' in v: + if v.id == last: + return entries + else: + entries.append(v) else: - entries.append(v) - else: - if v.link == last: - return entries - else: - entries.append(v) + if v.link == last: + return entries + else: + entries.append(v) return entries + def manually_check_rss(bot, update): if not can_use(update): - return + return check_rss(bot, '') bot.sendMessage( - chat_id=update.message.chat_id, - text = 'Ausgeführt.', - reply_to_message_id=update.message.message_id - ) + chat_id=update.message.chat_id, + text='Ausgeführt.', + reply_to_message_id=update.message.message_id + ) + @run_async def check_rss(bot, job): keys = list(r.keys('pythonbot:rss:*:subs')) for k, v in enumerate(keys): - p = re.compile('pythonbot:rss:(.+):subs') - match_func = p.search(v) - url = match_func.group(1) - print('RSS: ' + url) - last = r.get('pythonbot:rss:' + url + ':last_entry') + p = re.compile('pythonbot:rss:(.+):subs') + match_func = p.search(v) + url = match_func.group(1) + print('RSS: ' + url) + last = r.get('pythonbot:rss:' + url + ':last_entry') - feed_data = feedparser.parse(url) - if feed_data.status < 400: - if not 'title' in feed_data.feed: - feed_title = feed_data.feed.link - else: - feed_title = feed_data.feed.title - newentr = get_new_entries(last, feed_data.entries) - text = '' - for k2, v2 in enumerate(newentr): - if not 'title' in v2: - title = 'Kein Titel' + feed_data = feedparser.parse(url) + if feed_data.status < 400: + if not 'title' in feed_data.feed: + feed_title = feed_data.feed.link else: - title = remove_tags(v2.title).lstrip() - if not 'link' in v2: - link = feed_data.feed.link - link_name = link - else: - link = v2.link - f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com - if f: - link_name = f.group(1) - else: - link_name = urlparse(link).netloc - link_name = re.sub('^www\d?\.', '', link_name) # www. - if 'content' in v2: - content = remove_tags(v2.content[0].value).lstrip() - content = cleanRSS(content) - if len(content) > 250: - content = content[0:250] + '...' - elif 'summary' in v2: - content = remove_tags(v2.summary).lstrip() - content = cleanRSS(content) - if len(content) > 250: - content = content[0:250] + '...' - else: - content = '' - # Für 1 Nachricht pro Beitrag, tue dies: - # Entferne hier das "text + "... - text = text + '\n' + title + '\n' + feed_title + '\n' + content + '\nAuf ' + link_name + ' weiterlesen\n' - # ...und setze hier vor jeder Zeile 2 zusätzliche Leerzeichen - if text != '': - if not 'id' in newentr[0]: - newlast = newentr[0].link - else: - newlast = newentr[0].id - r.set('pythonbot:rss:' + url + ':last_entry', newlast) - for k2, receiver in enumerate(list(r.smembers(v))): - try: - bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True) - except Unauthorized: - print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste') - r.srem(v, receiver) - r.delete('pythonbot:rss:' + receiver) - except ChatMigrated as e: - print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id)) - r.srem(v, receiver) - r.sadd(v, e.new_chat_id) - r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id)) - bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True) - else: - print('HTTP-Fehler: ' + str(feed_data.status)) + feed_title = feed_data.feed.title + newentr = get_new_entries(last, feed_data.entries) + text = '' + for k2, v2 in enumerate(newentr): + if not 'title' in v2: + title = 'Kein Titel' + else: + title = remove_tags(v2.title).lstrip() + if not 'link' in v2: + link = feed_data.feed.link + link_name = link + else: + link = v2.link + f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com + if f: + link_name = f.group(1) + else: + link_name = urlparse(link).netloc + link_name = re.sub('^www\d?\.', '', link_name) # www. + if 'content' in v2: + content = remove_tags(v2.content[0].value).lstrip() + content = cleanRSS(content) + if len(content) > 250: + content = content[0:250] + '...' + elif 'summary' in v2: + content = remove_tags(v2.summary).lstrip() + content = cleanRSS(content) + if len(content) > 250: + content = content[0:250] + '...' + else: + content = '' + # Für 1 Nachricht pro Beitrag, tue dies: + # Entferne hier das "text + "... + text = text + '\n' + title + '\n' + feed_title + '\n' + content + '\nAuf ' + link_name + ' weiterlesen\n' + # ...und setze hier vor jeder Zeile 2 zusätzliche Leerzeichen + if text != '': + if not 'id' in newentr[0]: + newlast = newentr[0].link + else: + newlast = newentr[0].id + r.set('pythonbot:rss:' + url + ':last_entry', newlast) + for k2, receiver in enumerate(list(r.smembers(v))): + try: + bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True) + except Unauthorized: + print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste') + r.srem(v, receiver) + r.delete('pythonbot:rss:' + receiver) + except ChatMigrated as e: + print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id)) + r.srem(v, receiver) + r.sadd(v, e.new_chat_id) + r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id)) + bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True) + else: + print('HTTP-Fehler: ' + str(feed_data.status)) print('----------') + def error(bot, update, error): logger.warn('Update "%s" verursachte Fehler "%s"' % (update, error)) @@ -365,7 +387,7 @@ def main(): # Create the EventHandler and pass it your bot's token. updater = Updater(token=config['DEFAULT']['token']) j = updater.job_queue - + # Bot-Infos prüfen bot_info = updater.bot.getMe() print('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')') @@ -377,7 +399,7 @@ def main(): dp.add_handler(CommandHandler("start", start)) dp.add_handler(CommandHandler("help", help)) dp.add_handler(CommandHandler("hilfe", help)) - + dp.add_handler(CommandHandler("rss", list_rss, pass_args=True)) dp.add_handler(CommandHandler("sub", subscribe_to_rss, pass_args=True)) dp.add_handler(CommandHandler("del", unsubscribe_rss, pass_args=True)) @@ -385,7 +407,7 @@ def main(): # log all errors dp.add_error_handler(error) - + # cron job_minute = Job(check_rss, 60.0) j.put(job_minute, next_t=10.0)