Kompletter Rewrite

This commit is contained in:
Andreas Bielawski 2017-09-20 23:25:57 +02:00
parent e22b9c11af
commit b7fe806c8f
6 changed files with 416 additions and 382 deletions

8
.gitignore vendored
View File

@ -1,7 +1,11 @@
# Byte-compiled / optimized / DLL files
# Byte-compiled / optimized / DLL files / pip
__pycache__/
*.py[cod]
*$py.class
src/
# Config
# IDE
.idea/
# Bot-specific files
config.ini

View File

@ -1,13 +1,12 @@
RSS-Bot für Telegram
=====================
1. `git clone https://gitlab.com/iCON/rssbot`
2. `sudo apt-get install python3 python3-pip`
3. `sudo pip3 install -r requirements.txt`
4. `cp config.ini.example config.ini`
5. Bot-Token in `config.ini` einfügen
1. **Klonen:** `git clone https://git.centzilius.de/iCON/rssbot`
2. **Python 3 installieren:** `sudo apt-get install python3 python3-pip`
3. **Module installieren:** `sudo pip3 install -r requirements.txt`
4. **Config kopieren:** `cp config.ini.example config.ini`
5. **Bot-Token** in `config.ini` einfügen
1. Weitere Einstellungen für Redis vornehmen, falls vom Standard abweicht
6. `bot.py` öffnen und unter `def can_use(update):` die ID zur eigenen abändern
6. **Eigene Admin-ID in die Config eintragen**
7. `python3 bot.py`
(c) 2016-2017 Andreas Bielawski

678
bot.py
View File

@ -1,428 +1,378 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# RSS Bot
# Python 3 required
import html
import logging
import re
import sys
from configparser import ConfigParser
from json import loads
from urllib.parse import urlparse
import feedparser
import redis
from bs4 import BeautifulSoup
from telegram import ChatAction, ParseMode
from telegram.error import TelegramError, Unauthorized, BadRequest, TimedOut, NetworkError, ChatMigrated
from telegram.ext import Updater, Job, CommandHandler, MessageHandler, Filters
import telegram
from telegram.ext import CommandHandler, Updater
from telegram.ext.dispatcher import run_async
# Bot Configuration
config = ConfigParser()
config.read_file(open('config.ini'))
import utils
redis_conf = config['REDIS']
redis_db = redis_conf.get('db', 0)
redis_host = redis_conf.get('host')
redis_port = redis_conf.get('port', 6379)
redis_socket = redis_conf.get('socket_path')
# Enable logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.ERROR)
# Logging
logging.basicConfig(
format="%(asctime)s - %(levelname)s: %(message)s",
datefmt="%d.%m.%Y %H:%M:%S",
level=logging.INFO
)
logger = logging.getLogger(__name__)
# Admins
admins = loads(config["ADMIN"]["id"])
if not admins:
print('Keine Admin-IDs gesetzt, bitte Konfigurationsdatei bearbeiten.')
quit()
# Bot configuration
config = ConfigParser()
try:
config.read_file(open('config.ini'))
except FileNotFoundError:
logger.critical('Config.ini nicht gefunden')
sys.exit(1)
# Utils
# Bot token
try:
bot_token = config['DEFAULT']['token']
except KeyError:
logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen')
sys.exit(1)
if not bot_token:
logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen')
sys.exit(1)
# Admins
try:
admins = loads(config["ADMIN"]["id"])
except KeyError:
logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.')
sys.exit(1)
if not admins:
logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.')
sys.exit(1)
for admin in admins:
if not isinstance(admin, int):
logger.error('Admin-IDs müssen Integer sein.')
sys.exit(1)
# Redis
redis_conf = config['REDIS']
redis_db = redis_conf.get('db', 0)
redis_host = redis_conf.get('host', '127.0.0.1')
redis_port = redis_conf.get('port', 6379)
redis_socket = redis_conf.get('socket_path')
if redis_socket:
r = redis.Redis(unix_socket_path=redis_socket, db=int(redis_db), decode_responses=True)
else:
r = redis.Redis(host=redis_host, port=int(redis_port), db=int(redis_db), decode_responses=True)
if not r.ping():
print('Konnte nicht mit Redis verbinden, prüfe deine Einstellungen')
quit()
logging.getLogger("Redis").critical("Redis-Verbindungsfehler, config.ini prüfen")
sys.exit(1)
feed_hash = 'pythonbot:rss:{0}'
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
def remove_tags(html):
return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True))
def can_use(update):
if update.message.from_user.id in admins:
return True
else:
return False
def cleanRSS(str):
str = str.replace('[…]', '')
str = str.replace('[bilder]', '')
str = str.replace('[mehr]', '')
str = str.replace('[video]', '')
str = str.replace('...[more]', '')
str = str.replace('[more]', '')
str = str.replace('[liveticker]', '')
str = str.replace('[livestream]', '')
str = str.replace('[multimedia]', '')
str = str.replace('[phoenix]', '')
str = str.replace('[swr]', '')
str = str.replace('[ndr]', '')
str = str.replace('[mdr]', '')
str = str.replace('[rbb]', '')
str = str.replace('[wdr]', '')
str = str.replace('[hr]', '')
str = str.replace('[br]', '')
str = str.replace('Click for full.', '')
str = str.replace('Read more »', '')
str = str.replace('Read more', '')
str = str.replace('(more…)', '')
str = str.replace('View On WordPress', '')
str = str.replace('Continue reading →', '')
str = str.replace('(RSS generated with FetchRss)', '')
str = str.replace('-- Delivered by Feed43 service', '')
str = str.replace('Meldung bei www.tagesschau.de lesen', '')
str = str.replace('<', '&lt;')
str = str.replace('>', '&gt;')
str = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', str)
str = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', str)
str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str)
return str
def check_chat(bot, username):
try:
return bot.getChat(username)
except:
return
# Commands
@run_async
def start(bot, update):
if not can_use(update):
if not utils.can_use_bot(update):
return
bot.sendMessage(
chat_id=update.message.chat_id,
text='<b>Willkommen beim RSS-Bot!</b>\nLass uns anfangen! Sende /hilfe, um zu starten.',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
update.message.reply_text(
text='<b>Willkommen beim RSS-Bot!</b>\nSende /help, um zu starten.',
parse_mode=telegram.ParseMode.HTML
)
@run_async
def help(bot, update):
if not can_use(update):
def help_text(bot, update):
if not utils.can_use_bot(update):
return
bot.sendMessage(
chat_id=update.message.chat_id,
text='<b>/rss</b>: Abonnierte Feeds anzeigen\n<b>/sub</b> <i>Feed-URL</i>: Feed abonnieren\n<b>/del</b> <i>n</i>: Feed löschen',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
update.message.reply_text(
text='<b>/rss</b> <i>[Chat]</i>: Abonnierte Feeds anzeigen\n'
'<b>/sub</b> <i>Feed-URL</i> <i>[Chat]</i>: Feed abonnieren\n'
'<b>/del</b> <i>n</i> <i>[Chat]</i>: Feed löschen',
parse_mode=telegram.ParseMode.HTML
)
def subscribe_to_rss(bot, update, args):
if not can_use(update):
@run_async
def list_feeds(bot, update, args):
if not utils.can_use_bot(update):
return
if len(args) < 1:
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.',
reply_to_message_id=update.message.message_id)
if args:
chat_name = args[0]
try:
resp = bot.getChat(chat_name)
except telegram.error.BadRequest:
update.message.reply_text('❌ Dieser Kanal existiert nicht.')
return
chat_id = str(resp.id)
chat_title = resp.title
else:
chat_id = str(update.message.chat.id)
if update.message.chat.type == 'private':
chat_title = update.message.chat.first_name
else:
chat_title = update.message.chat.title
subs = r.smembers(feed_hash.format(chat_id))
if not subs:
text = '❌ Keine Feeds abonniert.'
else:
text = '<b>' + html.escape(chat_title) + '</b> hat abonniert:\n'
for n, feed in enumerate(subs):
text += '<b>' + str(n + 1) + ')</b> ' + feed + '\n'
update.message.reply_text(
text=text,
parse_mode=telegram.ParseMode.HTML
)
@run_async
def subscribe(bot, update, args):
if not utils.can_use_bot(update):
return
if not args:
update.message.reply_text('❌ Keine Feed-URL angegeben.')
return
feed_url = args[0]
is_url = re.search("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", feed_url)
if not is_url:
bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.',
reply_to_message_id=update.message.message_id)
if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url):
update.message.reply_text('❌ Das ist keine URL.')
return
# Get Chat ID from name if given
if len(args) > 1:
username = args[1]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
chat_id = str(chat_info.id)
else:
chat_id = str(update.message.chat_id)
if r.sismember('pythonbot:rss:' + chat_id, feed_url):
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.',
reply_to_message_id=update.message.message_id)
return
bot.sendChatAction(update.message.chat_id, action=ChatAction.TYPING)
feed_data = feedparser.parse(feed_url)
if not 'link' in feed_data.feed:
bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.',
reply_to_message_id=update.message.message_id)
return
if not 'title' in feed_data.feed:
feed_title = 'Unbekannten Feed'
else:
feed_title = feed_data.feed.title
if len(feed_data.entries) > 0:
if not 'id' in feed_data.entries[0]:
last_entry = feed_data.entries[0].link
else:
last_entry = feed_data.entries[0].id
lhash = 'pythonbot:rss:' + feed_url + ':last_entry'
if not r.exists(lhash):
r.set(lhash, last_entry)
r.sadd('pythonbot:rss:' + feed_url + ':subs', int(chat_id))
r.sadd('pythonbot:rss:' + chat_id, feed_url)
bot.sendMessage(
chat_id=update.message.chat_id,
text='<b>' + feed_title + '</b> hinzugefügt!',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
def unsubscribe_rss(bot, update, args):
if not can_use(update):
return
if len(args) < 1:
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein',
reply_to_message_id=update.message.message_id)
return
if len(args) > 1:
username = args[1]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
chat_id = str(chat_info.id)
else:
chat_id = str(update.message.chat_id)
if not is_number(args[0]):
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.',
reply_to_message_id=update.message.message_id)
return
uhash = 'pythonbot:rss:' + chat_id
n = int(args[0])
subs = list(r.smembers(uhash))
if n < 1 or n > len(subs):
bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.',
reply_to_message_id=update.message.message_id)
return
sub = subs[n - 1]
lhash = 'pythonbot:rss:' + sub + ':subs'
r.srem(uhash, sub)
r.srem(lhash, int(chat_id))
bot.sendMessage(
chat_id=update.message.chat_id,
text='<b>' + sub + '</b> entfernt.',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
left = r.smembers(lhash)
if len(left) < 1: # no one subscribed, remove it
r.delete('pythonbot:rss:' + sub + ':last_entry')
def get_rss_list(chat_id, chat_name):
uhash = 'pythonbot:rss:' + chat_id
subs = list(r.smembers(uhash))
if len(subs) < 1:
return '<b>Keine Feeds abonniert!</b>'
text = '<b>' + chat_name + '</b> hat abonniert:\n'
for n, feed in enumerate(subs):
text = text + str(n + 1) + ') ' + feed + '\n'
return text
@run_async
def list_rss(bot, update, args):
if not can_use(update):
return
if len(args) == 1:
username = args[0]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
rss_list = get_rss_list(str(chat_info.id), chat_info.title)
else:
if update.message.chat.first_name:
chat_name = update.message.chat.first_name
else:
chat_name = update.message.chat.title
rss_list = get_rss_list(str(update.message.chat_id), chat_name)
bot.sendMessage(
chat_id=update.message.chat_id,
text=rss_list,
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
def get_new_entries(last, new_entries):
entries = []
for k, v in enumerate(new_entries):
if 'id' in v:
if v.id == last:
return entries
else:
entries.append(v)
else:
if v.link == last:
return entries
else:
entries.append(v)
return entries
def manually_check_rss(bot, update):
if not can_use(update):
return
check_rss(bot, '')
bot.sendMessage(
chat_id=update.message.chat_id,
text='Ausgeführt.',
reply_to_message_id=update.message.message_id
)
@run_async
def check_rss(bot, job):
keys = list(r.keys('pythonbot:rss:*:subs'))
for k, v in enumerate(keys):
p = re.compile('pythonbot:rss:(.+):subs')
match_func = p.search(v)
url = match_func.group(1)
print('RSS: ' + url)
last = r.get('pythonbot:rss:' + url + ':last_entry')
feed_data = feedparser.parse(url)
chat_name = args[1]
try:
if feed_data.status < 400:
if not 'title' in feed_data.feed:
feed_title = feed_data.feed.link
resp = bot.getChat(chat_name)
except telegram.error.BadRequest:
update.message.reply_text('❌ Dieser Kanal existiert nicht.')
return
chat_id = str(resp.id)
resp = bot.getChatMember(chat_id, bot.id)
if resp.status != 'administrator':
update.message.reply_text('❌ Bot ist kein Administrator in diesem Kanal.')
return
else:
feed_title = feed_data.feed.title
newentr = get_new_entries(last, feed_data.entries)
text = ''
for k2, v2 in enumerate(newentr):
if not 'title' in v2:
title = 'Kein Titel'
chat_id = str(update.message.chat.id)
bot.sendChatAction(update.message.chat.id, action=telegram.ChatAction.TYPING)
data = feedparser.parse(feed_url)
if 'link' not in data.feed:
update.message.reply_text('❌ Kein gültiger Feed.')
return
feed_url = data.href # Follow all redirects
if r.sismember(feed_hash.format(chat_id), feed_url):
update.message.reply_text('✅ Dieser Feed wurde bereits abonniert.')
return
if 'title' not in data.feed:
feed_title = feed_url
else:
title = html.escape(remove_tags(v2.title).lstrip())
if not 'link' in v2:
link = feed_data.feed.link
link_name = link
feed_title = html.escape(data.feed['title'])
# Save the last entry in Redis, if it doesn't exist
if data.entries:
last_entry_hash = feed_hash.format(feed_url + ':last_entry')
if not r.exists(last_entry_hash):
if 'id' not in data.entries[0]:
last_entry = data.entries[0]['link']
else:
link = v2.link
f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com
if f:
link_name = f.group(1)
last_entry = data.entries[0]['id']
r.set(last_entry_hash, last_entry)
r.sadd(feed_hash.format(feed_url + ':subs'), chat_id)
r.sadd(feed_hash.format(chat_id), feed_url)
update.message.reply_text(
text='✅ <b>' + feed_title + '</b> hinzugefügt!',
parse_mode=telegram.ParseMode.HTML
)
@run_async
def unsubscribe(bot, update, args):
if not utils.can_use_bot(update):
return
if not args:
update.message.reply_text('❌ Keine Nummer angegeben.')
return
# Get Chat ID from name if given
if len(args) > 1:
chat_name = args[1]
try:
resp = bot.getChat(chat_name)
except telegram.error.BadRequest:
update.message.reply_text('❌ Dieser Kanal existiert nicht.')
return
chat_id = str(resp.id)
else:
link_name = urlparse(link).netloc
link_name = re.sub('^www\d?\.', '', link_name) # www.
if 'content' in v2:
content = remove_tags(v2.content[0].value).lstrip()
content = cleanRSS(content)
if len(content) > 250:
content = content[0:250] + '...'
elif 'summary' in v2:
content = remove_tags(v2.summary).lstrip()
content = cleanRSS(content)
if len(content) > 250:
content = content[0:250] + '...'
chat_id = str(update.message.chat.id)
try:
n = int(args[0])
except ValueError:
update.message.reply_text('❌ Keine Nummer angegeben.')
return
chat_hash = feed_hash.format(chat_id)
subs = r.smembers(chat_hash)
if n < 1:
update.message.reply_text('❌ Nummer muss größer als 0 sein!')
return
elif n > len(subs):
update.message.reply_text('❌ Feed-ID zu hoch.')
return
feed_url = list(subs)[n - 1]
sub_hash = feed_hash.format(feed_url + ':subs')
r.srem(chat_hash, feed_url)
r.srem(sub_hash, chat_id)
if not r.smembers(sub_hash): # no one subscribed, remove it
r.delete(feed_hash.format(feed_url + ':last_entry'))
update.message.reply_text(
text='✅ <b>' + feed_url + '</b> entfernt.',
parse_mode=telegram.ParseMode.HTML
)
@run_async
def check_feed(bot, key):
feed_url = re.match('^pythonbot:rss:(.+):subs$', key).group(1)
logger.info(feed_url)
data = feedparser.parse(feed_url)
if 'link' not in data.feed:
logger.warning('Kein gültiger Feed, Status-Code ' + str(data.status))
return None
if 'title' not in data.feed:
feed_title = data.feed['link']
else:
feed_title = data.feed['title']
last_entry_hash = feed_hash.format(feed_url + ':last_entry')
last_entry = r.get(last_entry_hash)
new_entries = utils.get_new_entries(data.entries, last_entry)
for entry in reversed(new_entries):
if 'title' not in entry:
post_title = 'Kein Titel'
else:
post_title = html.escape(utils.remove_html_tags(entry['title']).strip())
if 'link' not in entry:
post_link = entry.link
link_name = post_link
else:
post_link = entry.link
feedproxy = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', post_link) # feedproxy.google.com
if feedproxy:
link_name = feedproxy.group(1)
else:
link_name = urlparse(post_link).netloc
link_name = re.sub('^www\d?\.', '', link_name) # remove www.
if 'content' in entry:
content = utils.get_content(entry.content[0]['value'])
elif 'summary' in entry:
content = utils.get_content(entry.summary)
else:
content = ''
# Für 1 Nachricht pro Beitrag, tue dies:
# Entferne hier das "text + "...
text = text + '\n<b>' + title + '</b>\n<i>' + feed_title + '</i>\n' + content + '\n<a href="' + link + '">Auf ' + link_name + ' weiterlesen</a>\n'
# ...und setze hier vor jeder Zeile 4 zusätzliche Leerzeichen (bis "else", Zeile 379)
if text != '':
if not 'id' in newentr[0]:
newlast = newentr[0].link
else:
newlast = newentr[0].id
r.set('pythonbot:rss:' + url + ':last_entry', newlast)
for k2, receiver in enumerate(list(r.smembers(v))):
text = '<b>{post_title}</b>\n<i>{feed_title}</i>\n{content}'.format(
post_title=post_title,
feed_title=feed_title,
content=content
)
text += '\n<a href="{post_link}">Auf {link_name} weiterlesen</a>\n'.format(
post_link=post_link,
link_name=link_name
)
for member in r.smembers(key):
try:
bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
except Unauthorized:
print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste')
r.srem(v, receiver)
r.delete('pythonbot:rss:' + receiver)
except ChatMigrated as e:
print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id))
r.srem(v, receiver)
r.sadd(v, e.new_chat_id)
r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id))
bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
bot.sendMessage(
chat_id=member,
text=text,
parse_mode=telegram.ParseMode.HTML,
disable_web_page_preview=True
)
except telegram.error.Unauthorized:
logging.warning('Chat ' + member + ' existiert nicht mehr, wird gelöscht.')
r.srem(key, member)
r.delete(feed_hash.format(member))
except telegram.error.ChatMigrated as new_chat:
new_chat_id = new_chat.new_chat_id
logging.info('Chat migriert: ' + member + ' -> ' + str(new_chat_id))
r.srem(key, member)
r.sadd(key, new_chat_id)
r.rename(feed_hash.format(member), feed_hash.format(new_chat_id))
bot.sendMessage(
chat_id=member,
text=text,
parse_mode=telegram.ParseMode.HTML,
disable_web_page_preview=True
)
except telegram.error.TimedOut:
pass
if not r.exists(key):
r.delete(last_entry_hash)
return
# Set the new last entry if there are any
if new_entries:
if 'id' not in new_entries[0]:
new_last_entry = new_entries[0].link
else:
print('HTTP-Fehler: ' + str(feed_data.status))
except:
print('Fehler: Seite nicht erreichbar')
print('-----------------------')
new_last_entry = new_entries[0].id
r.set(last_entry_hash, new_last_entry)
def error(bot, update, error):
logger.warn('Update "%s" verursachte Fehler "%s"' % (update, error))
@run_async
def run_job(bot, job=None):
logger.info('================================')
keys = r.keys('pythonbot:rss:*:subs')
for key in keys:
check_feed(bot, key)
# Main function
def main():
# Create the EventHandler and pass it your bot's token.
updater = Updater(token=config['DEFAULT']['token'])
j = updater.job_queue
# Bot-Infos prüfen
# Setup the updater and show bot info
updater = Updater(token=bot_token)
try:
bot_info = updater.bot.getMe()
print('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
except telegram.error.Unauthorized:
logger.error('Anmeldung nicht möglich, Bot-Token falsch?')
sys.exit(1)
# Get the dispatcher to register handlers
dp = updater.dispatcher
logger.info('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
# on different commands - answer in Telegram
dp.add_handler(CommandHandler("start", start))
dp.add_handler(CommandHandler("help", help))
dp.add_handler(CommandHandler("hilfe", help))
# Register Handlers
handlers = [
CommandHandler('start', start),
CommandHandler('help', help_text),
CommandHandler('rss', list_feeds, pass_args=True),
CommandHandler('sub', subscribe, pass_args=True),
CommandHandler('del', unsubscribe, pass_args=True),
CommandHandler('sync', run_job)
]
for handler in handlers:
updater.dispatcher.add_handler(handler)
dp.add_handler(CommandHandler("rss", list_rss, pass_args=True))
dp.add_handler(CommandHandler("sub", subscribe_to_rss, pass_args=True))
dp.add_handler(CommandHandler("del", unsubscribe_rss, pass_args=True))
dp.add_handler(CommandHandler("sync", manually_check_rss))
updater.job_queue.run_repeating(
run_job,
interval=60.0,
first=2.0
)
# log all errors
dp.add_error_handler(error)
# Start this thing!
updater.start_polling(
clean=True,
bootstrap_retries=-1,
allowed_updates=["message"]
)
# cron
j.run_repeating(check_rss, interval=60.0, first=15.0)
# Start the Bot
updater.start_polling(timeout=20, clean=True, bootstrap_retries=-1, allowed_updates=["message"])
# Run the bot until the you presses Ctrl-C or the process receives SIGINT,
# SIGTERM or SIGABRT. This should be used most of the time, since
# start_polling() is non-blocking and will stop the bot gracefully.
# Run Bot until CTRL+C is pressed or a SIGINIT,
# SIGTERM or SIGABRT is sent.
updater.idle()

View File

@ -8,4 +8,5 @@ token = 1337:1234567890abcdefgh
#socket_path = /home/user/.redis/sock
[ADMIN]
# Must be integers!
id = [1337]

View File

@ -1,4 +1,3 @@
beautifulsoup4
feedparser
python-telegram-bot
redis

81
utils.py Normal file
View File

@ -0,0 +1,81 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from bot import admins
def can_use_bot(update):
"""Returns True if user is an admin"""
if update.message.from_user.id in admins:
return True
else:
return False
def get_new_entries(entries, last_entry):
"""Returns all new entries from an entries dict up to the last new article"""
new_entries = []
for entry in entries:
if 'id' in entry:
if entry['id'] == last_entry:
return new_entries
else:
new_entries.append(entry)
else:
if entry['link'] == last_entry:
return new_entries
else:
new_entries.append(entry)
return new_entries
def remove_html_tags(html):
"""Removes HTML tags"""
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', html)
return cleantext
def clean_rss(content):
"""Cleans content"""
content = content.replace('[…]', '')
content = content.replace('[bilder]', '')
content = content.replace('[mehr]', '')
content = content.replace('[video]', '')
content = content.replace('...[more]', '')
content = content.replace('[more]', '')
content = content.replace('[liveticker]', '')
content = content.replace('[livestream]', '')
content = content.replace('[multimedia]', '')
content = content.replace('[phoenix]', '')
content = content.replace('[swr]', '')
content = content.replace('[ndr]', '')
content = content.replace('[mdr]', '')
content = content.replace('[rbb]', '')
content = content.replace('[wdr]', '')
content = content.replace('[hr]', '')
content = content.replace('[br]', '')
content = content.replace('Click for full.', '')
content = content.replace('Read more »', '')
content = content.replace('Read more', '')
content = content.replace('(more…)', '')
content = content.replace('View On WordPress', '')
content = content.replace('Continue reading →', '')
content = content.replace('(RSS generated with FetchRss)', '')
content = content.replace('-- Delivered by Feed43 service', '')
content = content.replace('Meldung bei www.tagesschau.de lesen', '')
content = content.replace('<', '&lt;')
content = content.replace('>', '&gt;')
content = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', content)
content = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', content)
content = re.sub('http://www\.serienjunkies.de/.*\.html', '', content)
return content
def get_content(content):
"""Sanitizes content and cuts it to 250 chars"""
content = clean_rss(remove_html_tags(content).strip())
if len(content) > 250:
content = content[0:250] + '...'
return content