diff --git a/bot.py b/bot.py
index 58cee24..62a8596 100644
--- a/bot.py
+++ b/bot.py
@@ -4,28 +4,26 @@
# RSS Bot
# Python 3 required
+import logging
import re
-import redis
-import feedparser
-from json import loads
-
from configparser import ConfigParser
+from json import loads
+from urllib.parse import urlparse
+
+import feedparser
+import redis
+from bs4 import BeautifulSoup
from telegram import ChatAction, ParseMode
+from telegram.error import TelegramError, Unauthorized, BadRequest, TimedOut, NetworkError, ChatMigrated
from telegram.ext import Updater, Job, CommandHandler, MessageHandler, Filters
from telegram.ext.dispatcher import run_async
-from telegram.error import (TelegramError, Unauthorized, BadRequest,
- TimedOut, NetworkError, ChatMigrated)
-
-import logging
-from bs4 import BeautifulSoup
-from urllib.parse import urlparse
# Bot Configuration
config = ConfigParser()
config.read_file(open('config.ini'))
redis_conf = config['REDIS']
-redis_db = redis_conf.get('db' , 0)
+redis_db = redis_conf.get('db', 0)
redis_host = redis_conf.get('host')
redis_port = redis_conf.get('port', 6379)
redis_socket = redis_conf.get('socket_path')
@@ -51,6 +49,7 @@ if not r.ping():
print('Konnte nicht mit Redis verbinden, prüfe deine Einstellungen')
quit()
+
def is_number(s):
try:
float(s)
@@ -58,113 +57,125 @@ def is_number(s):
except ValueError:
return False
+
def remove_tags(html):
- return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True))
-
+ return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True))
+
+
def can_use(update):
if update.message.from_user.id in admins:
- return True
+ return True
else:
- return False
+ return False
+
def cleanRSS(str):
- str = str.replace('[…]', '')
- str = str.replace('[bilder]', '')
- str = str.replace('[mehr]', '')
- str = str.replace('[video]', '')
- str = str.replace('...[more]', '')
- str = str.replace('[more]', '')
- str = str.replace('[liveticker]', '')
- str = str.replace('[livestream]', '')
- str = str.replace('[multimedia]', '')
- str = str.replace('[phoenix]', '')
- str = str.replace('[swr]', '')
- str = str.replace('[ndr]', '')
- str = str.replace('[mdr]', '')
- str = str.replace('[rbb]', '')
- str = str.replace('[wdr]', '')
- str = str.replace('[hr]', '')
- str = str.replace('[br]', '')
- str = str.replace('Click for full.', '')
- str = str.replace('Read more »', '')
- str = str.replace('Read more', '')
- str = str.replace('(more…)', '')
- str = str.replace('View On WordPress', '')
- str = str.replace('Continue reading →', '')
- str = str.replace('(RSS generated with FetchRss)', '')
- str = str.replace('-- Delivered by Feed43 service', '')
- str = str.replace('Meldung bei www.tagesschau.de lesen', '')
- str = str.replace('The post.*appeared first on Sugoi! Anime Blog.', '')
- str = str.replace('Der Beitrag.*erschien zuerst auf MAnime.de.', '')
- str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str)
- return str
-
+ str = str.replace('[…]', '')
+ str = str.replace('[bilder]', '')
+ str = str.replace('[mehr]', '')
+ str = str.replace('[video]', '')
+ str = str.replace('...[more]', '')
+ str = str.replace('[more]', '')
+ str = str.replace('[liveticker]', '')
+ str = str.replace('[livestream]', '')
+ str = str.replace('[multimedia]', '')
+ str = str.replace('[phoenix]', '')
+ str = str.replace('[swr]', '')
+ str = str.replace('[ndr]', '')
+ str = str.replace('[mdr]', '')
+ str = str.replace('[rbb]', '')
+ str = str.replace('[wdr]', '')
+ str = str.replace('[hr]', '')
+ str = str.replace('[br]', '')
+ str = str.replace('Click for full.', '')
+ str = str.replace('Read more »', '')
+ str = str.replace('Read more', '')
+ str = str.replace('(more…)', '')
+ str = str.replace('View On WordPress', '')
+ str = str.replace('Continue reading →', '')
+ str = str.replace('(RSS generated with FetchRss)', '')
+ str = str.replace('-- Delivered by Feed43 service', '')
+ str = str.replace('Meldung bei www.tagesschau.de lesen', '')
+ str = str.replace('The post.*appeared first on Sugoi! Anime Blog.', '')
+ str = str.replace('Der Beitrag.*erschien zuerst auf MAnime.de.', '')
+ str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str)
+ return str
+
+
def check_chat(bot, username):
try:
return bot.getChat(username)
except:
return
-
+
+
# Commands
@run_async
def start(bot, update):
if not can_use(update):
- return
+ return
bot.sendMessage(
- chat_id = update.message.chat_id,
- text = 'Willkommen beim RSS-Bot!\nLass uns anfangen! Sende /hilfe, um zu starten.',
- reply_to_message_id = update.message.message_id,
- parse_mode = ParseMode.HTML
- )
+ chat_id=update.message.chat_id,
+ text='Willkommen beim RSS-Bot!\nLass uns anfangen! Sende /hilfe, um zu starten.',
+ reply_to_message_id=update.message.message_id,
+ parse_mode=ParseMode.HTML
+ )
+
@run_async
def help(bot, update):
if not can_use(update):
- return
+ return
bot.sendMessage(
- chat_id = update.message.chat_id,
- text = '/rss: Abonnierte Feeds anzeigen\n/sub Feed-URL: Feed abonnieren\n/del n: Feed löschen',
- reply_to_message_id = update.message.message_id,
- parse_mode = ParseMode.HTML
- )
+ chat_id=update.message.chat_id,
+ text='/rss: Abonnierte Feeds anzeigen\n/sub Feed-URL: Feed abonnieren\n/del n: Feed löschen',
+ reply_to_message_id=update.message.message_id,
+ parse_mode=ParseMode.HTML
+ )
+
def subscribe_to_rss(bot, update, args):
if not can_use(update):
- return
+ return
if len(args) < 1:
- bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.', reply_to_message_id=update.message.message_id)
- return
+ bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.',
+ reply_to_message_id=update.message.message_id)
+ return
feed_url = args[0]
is_url = re.search("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", feed_url)
if not is_url:
- bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.', reply_to_message_id=update.message.message_id)
- return
+ bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.',
+ reply_to_message_id=update.message.message_id)
+ return
if len(args) > 1:
- username = args[1]
- chat_info = check_chat(bot, username)
- if not chat_info:
- bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', reply_to_message_id=update.message.message_id)
- return
- chat_id = str(chat_info.id)
+ username = args[1]
+ chat_info = check_chat(bot, username)
+ if not chat_info:
+ bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
+ reply_to_message_id=update.message.message_id)
+ return
+ chat_id = str(chat_info.id)
else:
chat_id = str(update.message.chat_id)
-
+
if r.sismember('pythonbot:rss:' + chat_id, feed_url):
- bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.', reply_to_message_id=update.message.message_id)
- return
+ bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.',
+ reply_to_message_id=update.message.message_id)
+ return
bot.sendChatAction(update.message.chat_id, action=ChatAction.TYPING)
feed_data = feedparser.parse(feed_url)
if not 'link' in feed_data.feed:
- bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.',reply_to_message_id=update.message.message_id)
- return
-
+ bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.',
+ reply_to_message_id=update.message.message_id)
+ return
+
if not 'title' in feed_data.feed:
- feed_title = 'Unbekannten Feed'
+ feed_title = 'Unbekannten Feed'
else:
- feed_title = feed_data.feed.title
-
+ feed_title = feed_data.feed.title
+
if len(feed_data.entries) > 0:
if not 'id' in feed_data.entries[0]:
last_entry = feed_data.entries[0].link
@@ -172,191 +183,202 @@ def subscribe_to_rss(bot, update, args):
last_entry = feed_data.entries[0].id
lhash = 'pythonbot:rss:' + feed_url + ':last_entry'
if not r.exists(lhash):
- r.set(lhash, last_entry)
+ r.set(lhash, last_entry)
r.sadd('pythonbot:rss:' + feed_url + ':subs', int(chat_id))
r.sadd('pythonbot:rss:' + chat_id, feed_url)
bot.sendMessage(
- chat_id = update.message.chat_id,
- text = '' + feed_title + ' hinzugefügt!',
- reply_to_message_id = update.message.message_id,
- parse_mode = ParseMode.HTML
- )
+ chat_id=update.message.chat_id,
+ text='' + feed_title + ' hinzugefügt!',
+ reply_to_message_id=update.message.message_id,
+ parse_mode=ParseMode.HTML
+ )
+
def unsubscribe_rss(bot, update, args):
if not can_use(update):
- return
-
- if len(args) < 1:
- bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein', reply_to_message_id=update.message.message_id)
- return
-
- if len(args) > 1:
- username = args[1]
- chat_info = check_chat(bot, username)
- if not chat_info:
- bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', reply_to_message_id=update.message.message_id)
return
- chat_id = str(chat_info.id)
+
+ if len(args) < 1:
+ bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein',
+ reply_to_message_id=update.message.message_id)
+ return
+
+ if len(args) > 1:
+ username = args[1]
+ chat_info = check_chat(bot, username)
+ if not chat_info:
+ bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
+ reply_to_message_id=update.message.message_id)
+ return
+ chat_id = str(chat_info.id)
else:
chat_id = str(update.message.chat_id)
-
+
if not is_number(args[0]):
- bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.', reply_to_message_id=update.message.message_id)
- return
+ bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.',
+ reply_to_message_id=update.message.message_id)
+ return
uhash = 'pythonbot:rss:' + chat_id
n = int(args[0])
subs = list(r.smembers(uhash))
if n < 1 or n > len(subs):
- bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.', reply_to_message_id=update.message.message_id)
+ bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.',
+ reply_to_message_id=update.message.message_id)
return
- sub = subs[n-1]
+ sub = subs[n - 1]
lhash = 'pythonbot:rss:' + sub + ':subs'
r.srem(uhash, sub)
r.srem(lhash, int(chat_id))
bot.sendMessage(
- chat_id = update.message.chat_id,
- text = '' + sub + ' entfernt.',
- reply_to_message_id = update.message.message_id,
- parse_mode = ParseMode.HTML
- )
+ chat_id=update.message.chat_id,
+ text='' + sub + ' entfernt.',
+ reply_to_message_id=update.message.message_id,
+ parse_mode=ParseMode.HTML
+ )
left = r.smembers(lhash)
- if len(left) < 1: # no one subscribed, remove it
+ if len(left) < 1: # no one subscribed, remove it
r.delete('pythonbot:rss:' + sub + ':last_entry')
-
+
def get_rss_list(chat_id, chat_name):
uhash = 'pythonbot:rss:' + chat_id
subs = list(r.smembers(uhash))
if len(subs) < 1:
- return 'Keine Feeds abonniert!'
+ return 'Keine Feeds abonniert!'
text = '' + chat_name + ' hat abonniert:\n'
for n, feed in enumerate(subs):
- text = text + str(n+1) + ') ' + feed + '\n'
+ text = text + str(n + 1) + ') ' + feed + '\n'
return text
-@run_async
+
+@run_async
def list_rss(bot, update, args):
if not can_use(update):
- return
- if len(args) == 1:
- username = args[0]
- chat_info = check_chat(bot, username)
- if not chat_info:
- bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!', reply_to_message_id=update.message.message_id)
return
- rss_list = get_rss_list(str(chat_info.id), chat_info.title)
+ if len(args) == 1:
+ username = args[0]
+ chat_info = check_chat(bot, username)
+ if not chat_info:
+ bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
+ reply_to_message_id=update.message.message_id)
+ return
+ rss_list = get_rss_list(str(chat_info.id), chat_info.title)
else:
- if update.message.chat.first_name:
- chat_name = update.message.chat.first_name
- else:
- chat_name = update.message.chat.title
- rss_list = get_rss_list(str(update.message.chat_id), chat_name)
+ if update.message.chat.first_name:
+ chat_name = update.message.chat.first_name
+ else:
+ chat_name = update.message.chat.title
+ rss_list = get_rss_list(str(update.message.chat_id), chat_name)
bot.sendMessage(
- chat_id=update.message.chat_id,
- text = rss_list,
- reply_to_message_id=update.message.message_id,
- parse_mode=ParseMode.HTML
- )
+ chat_id=update.message.chat_id,
+ text=rss_list,
+ reply_to_message_id=update.message.message_id,
+ parse_mode=ParseMode.HTML
+ )
+
def get_new_entries(last, new_entries):
entries = []
- for k,v in enumerate(new_entries):
- if 'id' in v:
- if v.id == last:
- return entries
+ for k, v in enumerate(new_entries):
+ if 'id' in v:
+ if v.id == last:
+ return entries
+ else:
+ entries.append(v)
else:
- entries.append(v)
- else:
- if v.link == last:
- return entries
- else:
- entries.append(v)
+ if v.link == last:
+ return entries
+ else:
+ entries.append(v)
return entries
+
def manually_check_rss(bot, update):
if not can_use(update):
- return
+ return
check_rss(bot, '')
bot.sendMessage(
- chat_id=update.message.chat_id,
- text = 'Ausgeführt.',
- reply_to_message_id=update.message.message_id
- )
+ chat_id=update.message.chat_id,
+ text='Ausgeführt.',
+ reply_to_message_id=update.message.message_id
+ )
+
@run_async
def check_rss(bot, job):
keys = list(r.keys('pythonbot:rss:*:subs'))
for k, v in enumerate(keys):
- p = re.compile('pythonbot:rss:(.+):subs')
- match_func = p.search(v)
- url = match_func.group(1)
- print('RSS: ' + url)
- last = r.get('pythonbot:rss:' + url + ':last_entry')
+ p = re.compile('pythonbot:rss:(.+):subs')
+ match_func = p.search(v)
+ url = match_func.group(1)
+ print('RSS: ' + url)
+ last = r.get('pythonbot:rss:' + url + ':last_entry')
- feed_data = feedparser.parse(url)
- if feed_data.status < 400:
- if not 'title' in feed_data.feed:
- feed_title = feed_data.feed.link
- else:
- feed_title = feed_data.feed.title
- newentr = get_new_entries(last, feed_data.entries)
- text = ''
- for k2, v2 in enumerate(newentr):
- if not 'title' in v2:
- title = 'Kein Titel'
+ feed_data = feedparser.parse(url)
+ if feed_data.status < 400:
+ if not 'title' in feed_data.feed:
+ feed_title = feed_data.feed.link
else:
- title = remove_tags(v2.title).lstrip()
- if not 'link' in v2:
- link = feed_data.feed.link
- link_name = link
- else:
- link = v2.link
- f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com
- if f:
- link_name = f.group(1)
- else:
- link_name = urlparse(link).netloc
- link_name = re.sub('^www\d?\.', '', link_name) # www.
- if 'content' in v2:
- content = remove_tags(v2.content[0].value).lstrip()
- content = cleanRSS(content)
- if len(content) > 250:
- content = content[0:250] + '...'
- elif 'summary' in v2:
- content = remove_tags(v2.summary).lstrip()
- content = cleanRSS(content)
- if len(content) > 250:
- content = content[0:250] + '...'
- else:
- content = ''
- # Für 1 Nachricht pro Beitrag, tue dies:
- # Entferne hier das "text + "...
- text = text + '\n' + title + '\n' + feed_title + '\n' + content + '\nAuf ' + link_name + ' weiterlesen\n'
- # ...und setze hier vor jeder Zeile 2 zusätzliche Leerzeichen
- if text != '':
- if not 'id' in newentr[0]:
- newlast = newentr[0].link
- else:
- newlast = newentr[0].id
- r.set('pythonbot:rss:' + url + ':last_entry', newlast)
- for k2, receiver in enumerate(list(r.smembers(v))):
- try:
- bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
- except Unauthorized:
- print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste')
- r.srem(v, receiver)
- r.delete('pythonbot:rss:' + receiver)
- except ChatMigrated as e:
- print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id))
- r.srem(v, receiver)
- r.sadd(v, e.new_chat_id)
- r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id))
- bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
- else:
- print('HTTP-Fehler: ' + str(feed_data.status))
+ feed_title = feed_data.feed.title
+ newentr = get_new_entries(last, feed_data.entries)
+ text = ''
+ for k2, v2 in enumerate(newentr):
+ if not 'title' in v2:
+ title = 'Kein Titel'
+ else:
+ title = remove_tags(v2.title).lstrip()
+ if not 'link' in v2:
+ link = feed_data.feed.link
+ link_name = link
+ else:
+ link = v2.link
+ f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com
+ if f:
+ link_name = f.group(1)
+ else:
+ link_name = urlparse(link).netloc
+ link_name = re.sub('^www\d?\.', '', link_name) # www.
+ if 'content' in v2:
+ content = remove_tags(v2.content[0].value).lstrip()
+ content = cleanRSS(content)
+ if len(content) > 250:
+ content = content[0:250] + '...'
+ elif 'summary' in v2:
+ content = remove_tags(v2.summary).lstrip()
+ content = cleanRSS(content)
+ if len(content) > 250:
+ content = content[0:250] + '...'
+ else:
+ content = ''
+ # Für 1 Nachricht pro Beitrag, tue dies:
+ # Entferne hier das "text + "...
+ text = text + '\n' + title + '\n' + feed_title + '\n' + content + '\nAuf ' + link_name + ' weiterlesen\n'
+ # ...und setze hier vor jeder Zeile 2 zusätzliche Leerzeichen
+ if text != '':
+ if not 'id' in newentr[0]:
+ newlast = newentr[0].link
+ else:
+ newlast = newentr[0].id
+ r.set('pythonbot:rss:' + url + ':last_entry', newlast)
+ for k2, receiver in enumerate(list(r.smembers(v))):
+ try:
+ bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
+ except Unauthorized:
+ print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste')
+ r.srem(v, receiver)
+ r.delete('pythonbot:rss:' + receiver)
+ except ChatMigrated as e:
+ print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id))
+ r.srem(v, receiver)
+ r.sadd(v, e.new_chat_id)
+ r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id))
+ bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
+ else:
+ print('HTTP-Fehler: ' + str(feed_data.status))
print('----------')
+
def error(bot, update, error):
logger.warn('Update "%s" verursachte Fehler "%s"' % (update, error))
@@ -365,7 +387,7 @@ def main():
# Create the EventHandler and pass it your bot's token.
updater = Updater(token=config['DEFAULT']['token'])
j = updater.job_queue
-
+
# Bot-Infos prüfen
bot_info = updater.bot.getMe()
print('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
@@ -377,7 +399,7 @@ def main():
dp.add_handler(CommandHandler("start", start))
dp.add_handler(CommandHandler("help", help))
dp.add_handler(CommandHandler("hilfe", help))
-
+
dp.add_handler(CommandHandler("rss", list_rss, pass_args=True))
dp.add_handler(CommandHandler("sub", subscribe_to_rss, pass_args=True))
dp.add_handler(CommandHandler("del", unsubscribe_rss, pass_args=True))
@@ -385,7 +407,7 @@ def main():
# log all errors
dp.add_error_handler(error)
-
+
# cron
job_minute = Job(check_rss, 60.0)
j.put(job_minute, next_t=10.0)