This repository has been archived on 2021-04-24. You can view files and clone it, but cannot push or open issues or pull requests.
RikoBot/bot.py

431 lines
15 KiB
Python
Raw Normal View History

2016-11-21 23:04:28 +01:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# RSS Bot
# Python 3 required
2016-12-13 21:20:02 +01:00
import logging
2016-11-21 23:04:28 +01:00
import re
2016-12-13 21:20:02 +01:00
from configparser import ConfigParser
from json import loads
2016-12-13 21:20:02 +01:00
from urllib.parse import urlparse
2016-11-21 23:04:28 +01:00
2016-12-13 21:20:02 +01:00
import feedparser
import redis
from bs4 import BeautifulSoup
2016-11-21 23:04:28 +01:00
from telegram import ChatAction, ParseMode
2016-12-13 21:20:02 +01:00
from telegram.error import TelegramError, Unauthorized, BadRequest, TimedOut, NetworkError, ChatMigrated
2016-11-21 23:04:28 +01:00
from telegram.ext import Updater, Job, CommandHandler, MessageHandler, Filters
from telegram.ext.dispatcher import run_async
# Bot Configuration
config = ConfigParser()
config.read_file(open('config.ini'))
redis_conf = config['REDIS']
2016-12-13 21:20:02 +01:00
redis_db = redis_conf.get('db', 0)
2016-11-21 23:04:28 +01:00
redis_host = redis_conf.get('host')
redis_port = redis_conf.get('port', 6379)
redis_socket = redis_conf.get('socket_path')
# Enable logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.ERROR)
logger = logging.getLogger(__name__)
# Admins
admins = loads(config["ADMIN"]["id"])
if not admins:
print('Keine Admin-IDs gesetzt, bitte Konfigurationsdatei bearbeiten.')
quit()
2016-11-21 23:04:28 +01:00
# Utils
if redis_socket:
r = redis.Redis(unix_socket_path=redis_socket, db=int(redis_db), decode_responses=True)
else:
r = redis.Redis(host=redis_host, port=int(redis_port), db=int(redis_db), decode_responses=True)
if not r.ping():
print('Konnte nicht mit Redis verbinden, prüfe deine Einstellungen')
quit()
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
def remove_tags(html):
2016-12-13 21:20:02 +01:00
return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True))
2016-11-21 23:04:28 +01:00
def can_use(update):
if update.message.from_user.id in admins:
2016-12-13 21:20:02 +01:00
return True
2016-11-21 23:04:28 +01:00
else:
2016-12-13 21:20:02 +01:00
return False
2016-11-21 23:18:48 +01:00
def cleanRSS(str):
2016-12-13 21:20:02 +01:00
str = str.replace('[…]', '')
str = str.replace('[bilder]', '')
str = str.replace('[mehr]', '')
str = str.replace('[video]', '')
str = str.replace('...[more]', '')
str = str.replace('[more]', '')
str = str.replace('[liveticker]', '')
str = str.replace('[livestream]', '')
str = str.replace('[multimedia]', '')
str = str.replace('[phoenix]', '')
str = str.replace('[swr]', '')
str = str.replace('[ndr]', '')
str = str.replace('[mdr]', '')
str = str.replace('[rbb]', '')
str = str.replace('[wdr]', '')
str = str.replace('[hr]', '')
str = str.replace('[br]', '')
str = str.replace('Click for full.', '')
str = str.replace('Read more »', '')
str = str.replace('Read more', '')
str = str.replace('(more…)', '')
str = str.replace('View On WordPress', '')
str = str.replace('Continue reading →', '')
str = str.replace('(RSS generated with FetchRss)', '')
str = str.replace('-- Delivered by Feed43 service', '')
str = str.replace('Meldung bei www.tagesschau.de lesen', '')
2017-03-17 19:21:38 +01:00
str = str.replace('<', '&lt;')
str = str.replace('>', '&gt;')
2017-05-15 16:52:08 +02:00
str = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', str)
str = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', str)
2016-12-13 21:20:02 +01:00
str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str)
return str
2016-11-21 23:04:28 +01:00
def check_chat(bot, username):
try:
return bot.getChat(username)
except:
return
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
# Commands
@run_async
def start(bot, update):
if not can_use(update):
2016-12-13 21:20:02 +01:00
return
2016-11-21 23:04:28 +01:00
bot.sendMessage(
2016-12-13 21:20:02 +01:00
chat_id=update.message.chat_id,
text='<b>Willkommen beim RSS-Bot!</b>\nLass uns anfangen! Sende /hilfe, um zu starten.',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
2016-11-21 23:04:28 +01:00
@run_async
def help(bot, update):
if not can_use(update):
2016-12-13 21:20:02 +01:00
return
2016-11-21 23:04:28 +01:00
bot.sendMessage(
2016-12-13 21:20:02 +01:00
chat_id=update.message.chat_id,
text='<b>/rss</b>: Abonnierte Feeds anzeigen\n<b>/sub</b> <i>Feed-URL</i>: Feed abonnieren\n<b>/del</b> <i>n</i>: Feed löschen',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
2016-11-21 23:04:28 +01:00
def subscribe_to_rss(bot, update, args):
if not can_use(update):
2016-12-13 21:20:02 +01:00
return
2016-11-21 23:04:28 +01:00
if len(args) < 1:
2016-12-13 21:20:02 +01:00
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.',
reply_to_message_id=update.message.message_id)
return
2016-11-21 23:04:28 +01:00
feed_url = args[0]
is_url = re.search("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", feed_url)
if not is_url:
2016-12-13 21:20:02 +01:00
bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.',
reply_to_message_id=update.message.message_id)
return
2016-11-21 23:04:28 +01:00
if len(args) > 1:
2016-12-13 21:20:02 +01:00
username = args[1]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
chat_id = str(chat_info.id)
2016-11-21 23:04:28 +01:00
else:
chat_id = str(update.message.chat_id)
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
if r.sismember('pythonbot:rss:' + chat_id, feed_url):
2016-12-13 21:20:02 +01:00
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.',
reply_to_message_id=update.message.message_id)
return
2016-11-21 23:04:28 +01:00
bot.sendChatAction(update.message.chat_id, action=ChatAction.TYPING)
feed_data = feedparser.parse(feed_url)
if not 'link' in feed_data.feed:
2016-12-13 21:20:02 +01:00
bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.',
reply_to_message_id=update.message.message_id)
return
2016-11-21 23:04:28 +01:00
if not 'title' in feed_data.feed:
2016-12-13 21:20:02 +01:00
feed_title = 'Unbekannten Feed'
2016-11-21 23:04:28 +01:00
else:
2016-12-13 21:20:02 +01:00
feed_title = feed_data.feed.title
if len(feed_data.entries) > 0:
if not 'id' in feed_data.entries[0]:
last_entry = feed_data.entries[0].link
else:
last_entry = feed_data.entries[0].id
lhash = 'pythonbot:rss:' + feed_url + ':last_entry'
if not r.exists(lhash):
2016-12-13 21:20:02 +01:00
r.set(lhash, last_entry)
2016-11-21 23:04:28 +01:00
r.sadd('pythonbot:rss:' + feed_url + ':subs', int(chat_id))
r.sadd('pythonbot:rss:' + chat_id, feed_url)
bot.sendMessage(
2016-12-13 21:20:02 +01:00
chat_id=update.message.chat_id,
text='<b>' + feed_title + '</b> hinzugefügt!',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
2016-11-21 23:04:28 +01:00
def unsubscribe_rss(bot, update, args):
if not can_use(update):
2016-12-13 21:20:02 +01:00
return
2016-11-21 23:04:28 +01:00
if len(args) < 1:
2016-12-13 21:20:02 +01:00
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein',
reply_to_message_id=update.message.message_id)
2016-11-21 23:04:28 +01:00
return
2016-12-13 21:20:02 +01:00
if len(args) > 1:
username = args[1]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
chat_id = str(chat_info.id)
2016-11-21 23:04:28 +01:00
else:
chat_id = str(update.message.chat_id)
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
if not is_number(args[0]):
2016-12-13 21:20:02 +01:00
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.',
reply_to_message_id=update.message.message_id)
return
2016-11-21 23:04:28 +01:00
uhash = 'pythonbot:rss:' + chat_id
n = int(args[0])
subs = list(r.smembers(uhash))
if n < 1 or n > len(subs):
2016-12-13 21:20:02 +01:00
bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.',
reply_to_message_id=update.message.message_id)
2016-11-21 23:04:28 +01:00
return
2016-12-13 21:20:02 +01:00
sub = subs[n - 1]
2016-11-21 23:04:28 +01:00
lhash = 'pythonbot:rss:' + sub + ':subs'
r.srem(uhash, sub)
r.srem(lhash, int(chat_id))
bot.sendMessage(
2016-12-13 21:20:02 +01:00
chat_id=update.message.chat_id,
text='<b>' + sub + '</b> entfernt.',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
2016-11-21 23:04:28 +01:00
left = r.smembers(lhash)
2016-12-13 21:20:02 +01:00
if len(left) < 1: # no one subscribed, remove it
2016-11-21 23:04:28 +01:00
r.delete('pythonbot:rss:' + sub + ':last_entry')
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
def get_rss_list(chat_id, chat_name):
uhash = 'pythonbot:rss:' + chat_id
subs = list(r.smembers(uhash))
if len(subs) < 1:
2016-12-13 21:20:02 +01:00
return '<b>Keine Feeds abonniert!</b>'
2016-11-21 23:04:28 +01:00
text = '<b>' + chat_name + '</b> hat abonniert:\n'
for n, feed in enumerate(subs):
2016-12-13 21:20:02 +01:00
text = text + str(n + 1) + ') ' + feed + '\n'
2016-11-21 23:04:28 +01:00
return text
2016-12-13 21:20:02 +01:00
@run_async
2016-11-21 23:04:28 +01:00
def list_rss(bot, update, args):
if not can_use(update):
2016-11-21 23:04:28 +01:00
return
2016-12-13 21:20:02 +01:00
if len(args) == 1:
username = args[0]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
rss_list = get_rss_list(str(chat_info.id), chat_info.title)
2016-11-21 23:04:28 +01:00
else:
2016-12-13 21:20:02 +01:00
if update.message.chat.first_name:
chat_name = update.message.chat.first_name
else:
chat_name = update.message.chat.title
rss_list = get_rss_list(str(update.message.chat_id), chat_name)
2016-11-21 23:04:28 +01:00
bot.sendMessage(
2016-12-13 21:20:02 +01:00
chat_id=update.message.chat_id,
text=rss_list,
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
2016-11-21 23:04:28 +01:00
def get_new_entries(last, new_entries):
2016-11-21 23:04:28 +01:00
entries = []
2016-12-13 21:20:02 +01:00
for k, v in enumerate(new_entries):
if 'id' in v:
if v.id == last:
return entries
else:
entries.append(v)
else:
2016-12-13 21:20:02 +01:00
if v.link == last:
return entries
else:
entries.append(v)
2016-11-21 23:04:28 +01:00
return entries
2016-12-13 21:20:02 +01:00
def manually_check_rss(bot, update):
if not can_use(update):
2016-12-13 21:20:02 +01:00
return
check_rss(bot, '')
bot.sendMessage(
2016-12-13 21:20:02 +01:00
chat_id=update.message.chat_id,
text='Ausgeführt.',
reply_to_message_id=update.message.message_id
)
2016-11-21 23:04:28 +01:00
@run_async
def check_rss(bot, job):
keys = list(r.keys('pythonbot:rss:*:subs'))
for k, v in enumerate(keys):
2016-12-13 21:20:02 +01:00
p = re.compile('pythonbot:rss:(.+):subs')
match_func = p.search(v)
url = match_func.group(1)
print('RSS: ' + url)
last = r.get('pythonbot:rss:' + url + ':last_entry')
feed_data = feedparser.parse(url)
2017-05-08 12:39:04 +02:00
try:
if feed_data.status < 400:
if not 'title' in feed_data.feed:
feed_title = feed_data.feed.link
2016-12-13 21:20:02 +01:00
else:
2017-05-08 12:39:04 +02:00
feed_title = feed_data.feed.title
newentr = get_new_entries(last, feed_data.entries)
text = ''
for k2, v2 in enumerate(newentr):
if not 'title' in v2:
title = 'Kein Titel'
2016-12-13 21:20:02 +01:00
else:
2017-05-08 12:39:04 +02:00
title = remove_tags(v2.title).lstrip()
if not 'link' in v2:
link = feed_data.feed.link
link_name = link
else:
link = v2.link
f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com
if f:
link_name = f.group(1)
else:
link_name = urlparse(link).netloc
link_name = re.sub('^www\d?\.', '', link_name) # www.
if 'content' in v2:
content = remove_tags(v2.content[0].value).lstrip()
content = cleanRSS(content)
if len(content) > 250:
content = content[0:250] + '...'
elif 'summary' in v2:
content = remove_tags(v2.summary).lstrip()
content = cleanRSS(content)
if len(content) > 250:
content = content[0:250] + '...'
else:
content = ''
# Für 1 Nachricht pro Beitrag, tue dies:
# Entferne hier das "text + "...
text = text + '\n<b>' + title + '</b>\n<i>' + feed_title + '</i>\n' + content + '\n<a href="' + link + '">Auf ' + link_name + ' weiterlesen</a>\n'
2017-05-15 16:52:08 +02:00
# ...und setze hier vor jeder Zeile 4 zusätzliche Leerzeichen (bis "else", Zeile 379)
2017-05-08 12:39:04 +02:00
if text != '':
if not 'id' in newentr[0]:
newlast = newentr[0].link
else:
newlast = newentr[0].id
r.set('pythonbot:rss:' + url + ':last_entry', newlast)
for k2, receiver in enumerate(list(r.smembers(v))):
try:
bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
except Unauthorized:
print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste')
r.srem(v, receiver)
r.delete('pythonbot:rss:' + receiver)
except ChatMigrated as e:
print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id))
r.srem(v, receiver)
r.sadd(v, e.new_chat_id)
r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id))
bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
else:
print('HTTP-Fehler: ' + str(feed_data.status))
except:
print('Fehler: Seite nicht erreichbar')
print('-----------------------')
2016-11-21 23:04:28 +01:00
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
def error(bot, update, error):
logger.warn('Update "%s" verursachte Fehler "%s"' % (update, error))
def main():
# Create the EventHandler and pass it your bot's token.
updater = Updater(token=config['DEFAULT']['token'])
j = updater.job_queue
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
# Bot-Infos prüfen
bot_info = updater.bot.getMe()
print('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
# Get the dispatcher to register handlers
dp = updater.dispatcher
# on different commands - answer in Telegram
dp.add_handler(CommandHandler("start", start))
dp.add_handler(CommandHandler("help", help))
dp.add_handler(CommandHandler("hilfe", help))
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
dp.add_handler(CommandHandler("rss", list_rss, pass_args=True))
dp.add_handler(CommandHandler("sub", subscribe_to_rss, pass_args=True))
dp.add_handler(CommandHandler("del", unsubscribe_rss, pass_args=True))
dp.add_handler(CommandHandler("sync", manually_check_rss))
2016-11-21 23:04:28 +01:00
# log all errors
dp.add_error_handler(error)
2016-12-13 21:20:02 +01:00
2016-11-21 23:04:28 +01:00
# cron
job_minute = Job(check_rss, 60.0)
j.put(job_minute, next_t=10.0)
# Start the Bot
updater.start_polling(timeout=20, clean=True)
# Run the bot until the you presses Ctrl-C or the process receives SIGINT,
# SIGTERM or SIGABRT. This should be used most of the time, since
# start_polling() is non-blocking and will stop the bot gracefully.
updater.idle()
if __name__ == '__main__':
2016-11-22 19:42:38 +01:00
main()