diff --git a/.gitignore b/.gitignore index 19a6bf7..7d06023 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,11 @@ -# Byte-compiled / optimized / DLL files +# Byte-compiled / optimized / DLL files / pip __pycache__/ *.py[cod] *$py.class +src/ -# Config +# IDE +.idea/ + +# Bot-specific files config.ini \ No newline at end of file diff --git a/README.md b/README.md index 4aeaa21..a9587da 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ RSS-Bot für Telegram ===================== - -1. `git clone https://gitlab.com/iCON/rssbot` -2. `sudo apt-get install python3 python3-pip` -3. `sudo pip3 install -r requirements.txt` -4. `cp config.ini.example config.ini` -5. Bot-Token in `config.ini` einfügen +1. **Klonen:** `git clone https://git.centzilius.de/iCON/rssbot` +2. **Python 3 installieren:** `sudo apt-get install python3 python3-pip` +3. **Module installieren:** `sudo pip3 install -r requirements.txt` +4. **Config kopieren:** `cp config.ini.example config.ini` +5. **Bot-Token** in `config.ini` einfügen 1. Weitere Einstellungen für Redis vornehmen, falls vom Standard abweicht -6. `bot.py` öffnen und unter `def can_use(update):` die ID zur eigenen abändern +6. **Eigene Admin-ID in die Config eintragen** 7. `python3 bot.py` (c) 2016-2017 Andreas Bielawski diff --git a/bot.py b/bot.py index 6946e3d..1e04d42 100644 --- a/bot.py +++ b/bot.py @@ -1,428 +1,378 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- -# -# RSS Bot -# Python 3 required - import html import logging import re +import sys from configparser import ConfigParser from json import loads from urllib.parse import urlparse import feedparser import redis -from bs4 import BeautifulSoup -from telegram import ChatAction, ParseMode -from telegram.error import TelegramError, Unauthorized, BadRequest, TimedOut, NetworkError, ChatMigrated -from telegram.ext import Updater, Job, CommandHandler, MessageHandler, Filters +import telegram +from telegram.ext import CommandHandler, Updater from telegram.ext.dispatcher import run_async -# Bot Configuration -config = ConfigParser() -config.read_file(open('config.ini')) +import utils -redis_conf = config['REDIS'] -redis_db = redis_conf.get('db', 0) -redis_host = redis_conf.get('host') -redis_port = redis_conf.get('port', 6379) -redis_socket = redis_conf.get('socket_path') - -# Enable logging -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - level=logging.ERROR) +# Logging +logging.basicConfig( + format="%(asctime)s - %(levelname)s: %(message)s", + datefmt="%d.%m.%Y %H:%M:%S", + level=logging.INFO +) logger = logging.getLogger(__name__) -# Admins -admins = loads(config["ADMIN"]["id"]) -if not admins: - print('Keine Admin-IDs gesetzt, bitte Konfigurationsdatei bearbeiten.') - quit() +# Bot configuration +config = ConfigParser() +try: + config.read_file(open('config.ini')) +except FileNotFoundError: + logger.critical('Config.ini nicht gefunden') + sys.exit(1) -# Utils +# Bot token +try: + bot_token = config['DEFAULT']['token'] +except KeyError: + logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen') + sys.exit(1) +if not bot_token: + logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen') + sys.exit(1) + +# Admins +try: + admins = loads(config["ADMIN"]["id"]) +except KeyError: + logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.') + sys.exit(1) +if not admins: + logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.') + sys.exit(1) + +for admin in admins: + if not isinstance(admin, int): + logger.error('Admin-IDs müssen Integer sein.') + sys.exit(1) + +# Redis +redis_conf = config['REDIS'] +redis_db = redis_conf.get('db', 0) +redis_host = redis_conf.get('host', '127.0.0.1') +redis_port = redis_conf.get('port', 6379) +redis_socket = redis_conf.get('socket_path') if redis_socket: r = redis.Redis(unix_socket_path=redis_socket, db=int(redis_db), decode_responses=True) else: r = redis.Redis(host=redis_host, port=int(redis_port), db=int(redis_db), decode_responses=True) if not r.ping(): - print('Konnte nicht mit Redis verbinden, prüfe deine Einstellungen') - quit() + logging.getLogger("Redis").critical("Redis-Verbindungsfehler, config.ini prüfen") + sys.exit(1) + +feed_hash = 'pythonbot:rss:{0}' -def is_number(s): - try: - float(s) - return True - except ValueError: - return False - - -def remove_tags(html): - return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True)) - - -def can_use(update): - if update.message.from_user.id in admins: - return True - else: - return False - - -def cleanRSS(str): - str = str.replace('[…]', '') - str = str.replace('[bilder]', '') - str = str.replace('[mehr]', '') - str = str.replace('[video]', '') - str = str.replace('...[more]', '') - str = str.replace('[more]', '') - str = str.replace('[liveticker]', '') - str = str.replace('[livestream]', '') - str = str.replace('[multimedia]', '') - str = str.replace('[phoenix]', '') - str = str.replace('[swr]', '') - str = str.replace('[ndr]', '') - str = str.replace('[mdr]', '') - str = str.replace('[rbb]', '') - str = str.replace('[wdr]', '') - str = str.replace('[hr]', '') - str = str.replace('[br]', '') - str = str.replace('Click for full.', '') - str = str.replace('Read more »', '') - str = str.replace('Read more', '') - str = str.replace('(more…)', '') - str = str.replace('View On WordPress', '') - str = str.replace('Continue reading →', '') - str = str.replace('(RSS generated with FetchRss)', '') - str = str.replace('-- Delivered by Feed43 service', '') - str = str.replace('Meldung bei www.tagesschau.de lesen', '') - str = str.replace('<', '<') - str = str.replace('>', '>') - str = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', str) - str = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', str) - str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str) - return str - - -def check_chat(bot, username): - try: - return bot.getChat(username) - except: - return - - -# Commands @run_async def start(bot, update): - if not can_use(update): + if not utils.can_use_bot(update): return - bot.sendMessage( - chat_id=update.message.chat_id, - text='Willkommen beim RSS-Bot!\nLass uns anfangen! Sende /hilfe, um zu starten.', - reply_to_message_id=update.message.message_id, - parse_mode=ParseMode.HTML + update.message.reply_text( + text='Willkommen beim RSS-Bot!\nSende /help, um zu starten.', + parse_mode=telegram.ParseMode.HTML ) @run_async -def help(bot, update): - if not can_use(update): +def help_text(bot, update): + if not utils.can_use_bot(update): return - bot.sendMessage( - chat_id=update.message.chat_id, - text='/rss: Abonnierte Feeds anzeigen\n/sub Feed-URL: Feed abonnieren\n/del n: Feed löschen', - reply_to_message_id=update.message.message_id, - parse_mode=ParseMode.HTML + update.message.reply_text( + text='/rss [Chat]: Abonnierte Feeds anzeigen\n' + '/sub Feed-URL [Chat]: Feed abonnieren\n' + '/del n [Chat]: Feed löschen', + parse_mode=telegram.ParseMode.HTML ) -def subscribe_to_rss(bot, update, args): - if not can_use(update): +@run_async +def list_feeds(bot, update, args): + if not utils.can_use_bot(update): return - if len(args) < 1: - bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.', - reply_to_message_id=update.message.message_id) + if args: + chat_name = args[0] + try: + resp = bot.getChat(chat_name) + except telegram.error.BadRequest: + update.message.reply_text('❌ Dieser Kanal existiert nicht.') + return + chat_id = str(resp.id) + chat_title = resp.title + else: + chat_id = str(update.message.chat.id) + if update.message.chat.type == 'private': + chat_title = update.message.chat.first_name + else: + chat_title = update.message.chat.title + + subs = r.smembers(feed_hash.format(chat_id)) + if not subs: + text = '❌ Keine Feeds abonniert.' + else: + text = '' + html.escape(chat_title) + ' hat abonniert:\n' + for n, feed in enumerate(subs): + text += '' + str(n + 1) + ') ' + feed + '\n' + + update.message.reply_text( + text=text, + parse_mode=telegram.ParseMode.HTML + ) + + +@run_async +def subscribe(bot, update, args): + if not utils.can_use_bot(update): + return + if not args: + update.message.reply_text('❌ Keine Feed-URL angegeben.') return feed_url = args[0] - is_url = re.search("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", feed_url) - if not is_url: - bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.', - reply_to_message_id=update.message.message_id) + if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url): + update.message.reply_text('❌ Das ist keine URL.') return + # Get Chat ID from name if given if len(args) > 1: - username = args[1] - chat_info = check_chat(bot, username) - if not chat_info: - bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', - reply_to_message_id=update.message.message_id) - return - chat_id = str(chat_info.id) - else: - chat_id = str(update.message.chat_id) - - if r.sismember('pythonbot:rss:' + chat_id, feed_url): - bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.', - reply_to_message_id=update.message.message_id) - return - - bot.sendChatAction(update.message.chat_id, action=ChatAction.TYPING) - feed_data = feedparser.parse(feed_url) - if not 'link' in feed_data.feed: - bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.', - reply_to_message_id=update.message.message_id) - return - - if not 'title' in feed_data.feed: - feed_title = 'Unbekannten Feed' - else: - feed_title = feed_data.feed.title - - if len(feed_data.entries) > 0: - if not 'id' in feed_data.entries[0]: - last_entry = feed_data.entries[0].link - else: - last_entry = feed_data.entries[0].id - lhash = 'pythonbot:rss:' + feed_url + ':last_entry' - if not r.exists(lhash): - r.set(lhash, last_entry) - - r.sadd('pythonbot:rss:' + feed_url + ':subs', int(chat_id)) - r.sadd('pythonbot:rss:' + chat_id, feed_url) - bot.sendMessage( - chat_id=update.message.chat_id, - text='' + feed_title + ' hinzugefügt!', - reply_to_message_id=update.message.message_id, - parse_mode=ParseMode.HTML - ) - - -def unsubscribe_rss(bot, update, args): - if not can_use(update): - return - - if len(args) < 1: - bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein', - reply_to_message_id=update.message.message_id) - return - - if len(args) > 1: - username = args[1] - chat_info = check_chat(bot, username) - if not chat_info: - bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', - reply_to_message_id=update.message.message_id) - return - chat_id = str(chat_info.id) - else: - chat_id = str(update.message.chat_id) - - if not is_number(args[0]): - bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.', - reply_to_message_id=update.message.message_id) - return - uhash = 'pythonbot:rss:' + chat_id - n = int(args[0]) - subs = list(r.smembers(uhash)) - if n < 1 or n > len(subs): - bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.', - reply_to_message_id=update.message.message_id) - return - sub = subs[n - 1] - lhash = 'pythonbot:rss:' + sub + ':subs' - r.srem(uhash, sub) - r.srem(lhash, int(chat_id)) - bot.sendMessage( - chat_id=update.message.chat_id, - text='' + sub + ' entfernt.', - reply_to_message_id=update.message.message_id, - parse_mode=ParseMode.HTML - ) - left = r.smembers(lhash) - if len(left) < 1: # no one subscribed, remove it - r.delete('pythonbot:rss:' + sub + ':last_entry') - - -def get_rss_list(chat_id, chat_name): - uhash = 'pythonbot:rss:' + chat_id - subs = list(r.smembers(uhash)) - if len(subs) < 1: - return 'Keine Feeds abonniert!' - text = '' + chat_name + ' hat abonniert:\n' - for n, feed in enumerate(subs): - text = text + str(n + 1) + ') ' + feed + '\n' - return text - - -@run_async -def list_rss(bot, update, args): - if not can_use(update): - return - if len(args) == 1: - username = args[0] - chat_info = check_chat(bot, username) - if not chat_info: - bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', - reply_to_message_id=update.message.message_id) - return - rss_list = get_rss_list(str(chat_info.id), chat_info.title) - else: - if update.message.chat.first_name: - chat_name = update.message.chat.first_name - else: - chat_name = update.message.chat.title - rss_list = get_rss_list(str(update.message.chat_id), chat_name) - bot.sendMessage( - chat_id=update.message.chat_id, - text=rss_list, - reply_to_message_id=update.message.message_id, - parse_mode=ParseMode.HTML - ) - - -def get_new_entries(last, new_entries): - entries = [] - for k, v in enumerate(new_entries): - if 'id' in v: - if v.id == last: - return entries - else: - entries.append(v) - else: - if v.link == last: - return entries - else: - entries.append(v) - return entries - - -def manually_check_rss(bot, update): - if not can_use(update): - return - check_rss(bot, '') - bot.sendMessage( - chat_id=update.message.chat_id, - text='Ausgeführt.', - reply_to_message_id=update.message.message_id - ) - - -@run_async -def check_rss(bot, job): - keys = list(r.keys('pythonbot:rss:*:subs')) - for k, v in enumerate(keys): - p = re.compile('pythonbot:rss:(.+):subs') - match_func = p.search(v) - url = match_func.group(1) - print('RSS: ' + url) - last = r.get('pythonbot:rss:' + url + ':last_entry') - - feed_data = feedparser.parse(url) + chat_name = args[1] try: - if feed_data.status < 400: - if not 'title' in feed_data.feed: - feed_title = feed_data.feed.link - else: - feed_title = feed_data.feed.title - newentr = get_new_entries(last, feed_data.entries) - text = '' - for k2, v2 in enumerate(newentr): - if not 'title' in v2: - title = 'Kein Titel' - else: - title = html.escape(remove_tags(v2.title).lstrip()) - if not 'link' in v2: - link = feed_data.feed.link - link_name = link - else: - link = v2.link - f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com - if f: - link_name = f.group(1) - else: - link_name = urlparse(link).netloc - link_name = re.sub('^www\d?\.', '', link_name) # www. - if 'content' in v2: - content = remove_tags(v2.content[0].value).lstrip() - content = cleanRSS(content) - if len(content) > 250: - content = content[0:250] + '...' - elif 'summary' in v2: - content = remove_tags(v2.summary).lstrip() - content = cleanRSS(content) - if len(content) > 250: - content = content[0:250] + '...' - else: - content = '' - # Für 1 Nachricht pro Beitrag, tue dies: - # Entferne hier das "text + "... - text = text + '\n' + title + '\n' + feed_title + '\n' + content + '\nAuf ' + link_name + ' weiterlesen\n' - # ...und setze hier vor jeder Zeile 4 zusätzliche Leerzeichen (bis "else", Zeile 379) - if text != '': - if not 'id' in newentr[0]: - newlast = newentr[0].link - else: - newlast = newentr[0].id - r.set('pythonbot:rss:' + url + ':last_entry', newlast) - for k2, receiver in enumerate(list(r.smembers(v))): - try: - bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True) - except Unauthorized: - print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste') - r.srem(v, receiver) - r.delete('pythonbot:rss:' + receiver) - except ChatMigrated as e: - print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id)) - r.srem(v, receiver) - r.sadd(v, e.new_chat_id) - r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id)) - bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True) + resp = bot.getChat(chat_name) + except telegram.error.BadRequest: + update.message.reply_text('❌ Dieser Kanal existiert nicht.') + return + chat_id = str(resp.id) + resp = bot.getChatMember(chat_id, bot.id) + if resp.status != 'administrator': + update.message.reply_text('❌ Bot ist kein Administrator in diesem Kanal.') + return + else: + chat_id = str(update.message.chat.id) + + bot.sendChatAction(update.message.chat.id, action=telegram.ChatAction.TYPING) + data = feedparser.parse(feed_url) + if 'link' not in data.feed: + update.message.reply_text('❌ Kein gültiger Feed.') + return + feed_url = data.href # Follow all redirects + if r.sismember(feed_hash.format(chat_id), feed_url): + update.message.reply_text('✅ Dieser Feed wurde bereits abonniert.') + return + + if 'title' not in data.feed: + feed_title = feed_url + else: + feed_title = html.escape(data.feed['title']) + + # Save the last entry in Redis, if it doesn't exist + if data.entries: + last_entry_hash = feed_hash.format(feed_url + ':last_entry') + if not r.exists(last_entry_hash): + if 'id' not in data.entries[0]: + last_entry = data.entries[0]['link'] else: - print('HTTP-Fehler: ' + str(feed_data.status)) - except: - print('Fehler: Seite nicht erreichbar') - print('-----------------------') + last_entry = data.entries[0]['id'] + r.set(last_entry_hash, last_entry) + + r.sadd(feed_hash.format(feed_url + ':subs'), chat_id) + r.sadd(feed_hash.format(chat_id), feed_url) + update.message.reply_text( + text='✅ ' + feed_title + ' hinzugefügt!', + parse_mode=telegram.ParseMode.HTML + ) -def error(bot, update, error): - logger.warn('Update "%s" verursachte Fehler "%s"' % (update, error)) +@run_async +def unsubscribe(bot, update, args): + if not utils.can_use_bot(update): + return + if not args: + update.message.reply_text('❌ Keine Nummer angegeben.') + return + + # Get Chat ID from name if given + if len(args) > 1: + chat_name = args[1] + try: + resp = bot.getChat(chat_name) + except telegram.error.BadRequest: + update.message.reply_text('❌ Dieser Kanal existiert nicht.') + return + chat_id = str(resp.id) + else: + chat_id = str(update.message.chat.id) + + try: + n = int(args[0]) + except ValueError: + update.message.reply_text('❌ Keine Nummer angegeben.') + return + + chat_hash = feed_hash.format(chat_id) + subs = r.smembers(chat_hash) + if n < 1: + update.message.reply_text('❌ Nummer muss größer als 0 sein!') + return + elif n > len(subs): + update.message.reply_text('❌ Feed-ID zu hoch.') + return + + feed_url = list(subs)[n - 1] + sub_hash = feed_hash.format(feed_url + ':subs') + r.srem(chat_hash, feed_url) + r.srem(sub_hash, chat_id) + if not r.smembers(sub_hash): # no one subscribed, remove it + r.delete(feed_hash.format(feed_url + ':last_entry')) + + update.message.reply_text( + text='✅ ' + feed_url + ' entfernt.', + parse_mode=telegram.ParseMode.HTML + ) +@run_async +def check_feed(bot, key): + feed_url = re.match('^pythonbot:rss:(.+):subs$', key).group(1) + logger.info(feed_url) + data = feedparser.parse(feed_url) + if 'link' not in data.feed: + logger.warning('Kein gültiger Feed, Status-Code ' + str(data.status)) + return None + if 'title' not in data.feed: + feed_title = data.feed['link'] + else: + feed_title = data.feed['title'] + last_entry_hash = feed_hash.format(feed_url + ':last_entry') + last_entry = r.get(last_entry_hash) + new_entries = utils.get_new_entries(data.entries, last_entry) + for entry in reversed(new_entries): + if 'title' not in entry: + post_title = 'Kein Titel' + else: + post_title = html.escape(utils.remove_html_tags(entry['title']).strip()) + if 'link' not in entry: + post_link = entry.link + link_name = post_link + else: + post_link = entry.link + feedproxy = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', post_link) # feedproxy.google.com + if feedproxy: + link_name = feedproxy.group(1) + else: + link_name = urlparse(post_link).netloc + link_name = re.sub('^www\d?\.', '', link_name) # remove www. + if 'content' in entry: + content = utils.get_content(entry.content[0]['value']) + elif 'summary' in entry: + content = utils.get_content(entry.summary) + else: + content = '' + text = '{post_title}\n{feed_title}\n{content}'.format( + post_title=post_title, + feed_title=feed_title, + content=content + ) + text += '\nAuf {link_name} weiterlesen\n'.format( + post_link=post_link, + link_name=link_name + ) + for member in r.smembers(key): + try: + bot.sendMessage( + chat_id=member, + text=text, + parse_mode=telegram.ParseMode.HTML, + disable_web_page_preview=True + ) + except telegram.error.Unauthorized: + logging.warning('Chat ' + member + ' existiert nicht mehr, wird gelöscht.') + r.srem(key, member) + r.delete(feed_hash.format(member)) + except telegram.error.ChatMigrated as new_chat: + new_chat_id = new_chat.new_chat_id + logging.info('Chat migriert: ' + member + ' -> ' + str(new_chat_id)) + r.srem(key, member) + r.sadd(key, new_chat_id) + r.rename(feed_hash.format(member), feed_hash.format(new_chat_id)) + bot.sendMessage( + chat_id=member, + text=text, + parse_mode=telegram.ParseMode.HTML, + disable_web_page_preview=True + ) + except telegram.error.TimedOut: + pass + + if not r.exists(key): + r.delete(last_entry_hash) + return + + # Set the new last entry if there are any + if new_entries: + if 'id' not in new_entries[0]: + new_last_entry = new_entries[0].link + else: + new_last_entry = new_entries[0].id + r.set(last_entry_hash, new_last_entry) + + +@run_async +def run_job(bot, job=None): + logger.info('================================') + keys = r.keys('pythonbot:rss:*:subs') + for key in keys: + check_feed(bot, key) + + +# Main function def main(): - # Create the EventHandler and pass it your bot's token. - updater = Updater(token=config['DEFAULT']['token']) - j = updater.job_queue + # Setup the updater and show bot info + updater = Updater(token=bot_token) + try: + bot_info = updater.bot.getMe() + except telegram.error.Unauthorized: + logger.error('Anmeldung nicht möglich, Bot-Token falsch?') + sys.exit(1) - # Bot-Infos prüfen - bot_info = updater.bot.getMe() - print('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')') + logger.info('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')') - # Get the dispatcher to register handlers - dp = updater.dispatcher + # Register Handlers + handlers = [ + CommandHandler('start', start), + CommandHandler('help', help_text), + CommandHandler('rss', list_feeds, pass_args=True), + CommandHandler('sub', subscribe, pass_args=True), + CommandHandler('del', unsubscribe, pass_args=True), + CommandHandler('sync', run_job) + ] + for handler in handlers: + updater.dispatcher.add_handler(handler) - # on different commands - answer in Telegram - dp.add_handler(CommandHandler("start", start)) - dp.add_handler(CommandHandler("help", help)) - dp.add_handler(CommandHandler("hilfe", help)) + updater.job_queue.run_repeating( + run_job, + interval=60.0, + first=2.0 + ) - dp.add_handler(CommandHandler("rss", list_rss, pass_args=True)) - dp.add_handler(CommandHandler("sub", subscribe_to_rss, pass_args=True)) - dp.add_handler(CommandHandler("del", unsubscribe_rss, pass_args=True)) - dp.add_handler(CommandHandler("sync", manually_check_rss)) + # Start this thing! + updater.start_polling( + clean=True, + bootstrap_retries=-1, + allowed_updates=["message"] + ) - # log all errors - dp.add_error_handler(error) - - # cron - j.run_repeating(check_rss, interval=60.0, first=15.0) - - # Start the Bot - updater.start_polling(timeout=20, clean=True, bootstrap_retries=-1, allowed_updates=["message"]) - - # Run the bot until the you presses Ctrl-C or the process receives SIGINT, - # SIGTERM or SIGABRT. This should be used most of the time, since - # start_polling() is non-blocking and will stop the bot gracefully. + # Run Bot until CTRL+C is pressed or a SIGINIT, + # SIGTERM or SIGABRT is sent. updater.idle() diff --git a/config.ini.example b/config.ini.example index 245ce0f..1bac842 100644 --- a/config.ini.example +++ b/config.ini.example @@ -8,4 +8,5 @@ token = 1337:1234567890abcdefgh #socket_path = /home/user/.redis/sock [ADMIN] +# Must be integers! id = [1337] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d5ba097..d17363a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -beautifulsoup4 feedparser python-telegram-bot redis \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..340dca8 --- /dev/null +++ b/utils.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import re + +from bot import admins + + +def can_use_bot(update): + """Returns True if user is an admin""" + if update.message.from_user.id in admins: + return True + else: + return False + + +def get_new_entries(entries, last_entry): + """Returns all new entries from an entries dict up to the last new article""" + new_entries = [] + for entry in entries: + if 'id' in entry: + if entry['id'] == last_entry: + return new_entries + else: + new_entries.append(entry) + else: + if entry['link'] == last_entry: + return new_entries + else: + new_entries.append(entry) + return new_entries + + +def remove_html_tags(html): + """Removes HTML tags""" + cleanr = re.compile('<.*?>') + cleantext = re.sub(cleanr, '', html) + return cleantext + + +def clean_rss(content): + """Cleans content""" + content = content.replace('[…]', '') + content = content.replace('[bilder]', '') + content = content.replace('[mehr]', '') + content = content.replace('[video]', '') + content = content.replace('...[more]', '') + content = content.replace('[more]', '') + content = content.replace('[liveticker]', '') + content = content.replace('[livestream]', '') + content = content.replace('[multimedia]', '') + content = content.replace('[phoenix]', '') + content = content.replace('[swr]', '') + content = content.replace('[ndr]', '') + content = content.replace('[mdr]', '') + content = content.replace('[rbb]', '') + content = content.replace('[wdr]', '') + content = content.replace('[hr]', '') + content = content.replace('[br]', '') + content = content.replace('Click for full.', '') + content = content.replace('Read more »', '') + content = content.replace('Read more', '') + content = content.replace('(more…)', '') + content = content.replace('View On WordPress', '') + content = content.replace('Continue reading →', '') + content = content.replace('(RSS generated with FetchRss)', '') + content = content.replace('-- Delivered by Feed43 service', '') + content = content.replace('Meldung bei www.tagesschau.de lesen', '') + content = content.replace('<', '<') + content = content.replace('>', '>') + content = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', content) + content = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', content) + content = re.sub('http://www\.serienjunkies.de/.*\.html', '', content) + return content + + +def get_content(content): + """Sanitizes content and cuts it to 250 chars""" + content = clean_rss(remove_html_tags(content).strip()) + if len(content) > 250: + content = content[0:250] + '...' + return content