Kompletter Rewrite

This commit is contained in:
Andreas Bielawski 2017-09-20 23:25:57 +02:00
parent e22b9c11af
commit b7fe806c8f
6 changed files with 416 additions and 382 deletions

8
.gitignore vendored
View File

@ -1,7 +1,11 @@
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files / pip
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
*$py.class *$py.class
src/
# Config # IDE
.idea/
# Bot-specific files
config.ini config.ini

View File

@ -1,13 +1,12 @@
RSS-Bot für Telegram RSS-Bot für Telegram
===================== =====================
1. **Klonen:** `git clone https://git.centzilius.de/iCON/rssbot`
1. `git clone https://gitlab.com/iCON/rssbot` 2. **Python 3 installieren:** `sudo apt-get install python3 python3-pip`
2. `sudo apt-get install python3 python3-pip` 3. **Module installieren:** `sudo pip3 install -r requirements.txt`
3. `sudo pip3 install -r requirements.txt` 4. **Config kopieren:** `cp config.ini.example config.ini`
4. `cp config.ini.example config.ini` 5. **Bot-Token** in `config.ini` einfügen
5. Bot-Token in `config.ini` einfügen
1. Weitere Einstellungen für Redis vornehmen, falls vom Standard abweicht 1. Weitere Einstellungen für Redis vornehmen, falls vom Standard abweicht
6. `bot.py` öffnen und unter `def can_use(update):` die ID zur eigenen abändern 6. **Eigene Admin-ID in die Config eintragen**
7. `python3 bot.py` 7. `python3 bot.py`
(c) 2016-2017 Andreas Bielawski (c) 2016-2017 Andreas Bielawski

678
bot.py
View File

@ -1,428 +1,378 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
#
# RSS Bot
# Python 3 required
import html import html
import logging import logging
import re import re
import sys
from configparser import ConfigParser from configparser import ConfigParser
from json import loads from json import loads
from urllib.parse import urlparse from urllib.parse import urlparse
import feedparser import feedparser
import redis import redis
from bs4 import BeautifulSoup import telegram
from telegram import ChatAction, ParseMode from telegram.ext import CommandHandler, Updater
from telegram.error import TelegramError, Unauthorized, BadRequest, TimedOut, NetworkError, ChatMigrated
from telegram.ext import Updater, Job, CommandHandler, MessageHandler, Filters
from telegram.ext.dispatcher import run_async from telegram.ext.dispatcher import run_async
# Bot Configuration import utils
config = ConfigParser()
config.read_file(open('config.ini'))
redis_conf = config['REDIS'] # Logging
redis_db = redis_conf.get('db', 0) logging.basicConfig(
redis_host = redis_conf.get('host') format="%(asctime)s - %(levelname)s: %(message)s",
redis_port = redis_conf.get('port', 6379) datefmt="%d.%m.%Y %H:%M:%S",
redis_socket = redis_conf.get('socket_path') level=logging.INFO
)
# Enable logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.ERROR)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Admins # Bot configuration
admins = loads(config["ADMIN"]["id"]) config = ConfigParser()
if not admins: try:
print('Keine Admin-IDs gesetzt, bitte Konfigurationsdatei bearbeiten.') config.read_file(open('config.ini'))
quit() except FileNotFoundError:
logger.critical('Config.ini nicht gefunden')
sys.exit(1)
# Utils # Bot token
try:
bot_token = config['DEFAULT']['token']
except KeyError:
logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen')
sys.exit(1)
if not bot_token:
logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen')
sys.exit(1)
# Admins
try:
admins = loads(config["ADMIN"]["id"])
except KeyError:
logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.')
sys.exit(1)
if not admins:
logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.')
sys.exit(1)
for admin in admins:
if not isinstance(admin, int):
logger.error('Admin-IDs müssen Integer sein.')
sys.exit(1)
# Redis
redis_conf = config['REDIS']
redis_db = redis_conf.get('db', 0)
redis_host = redis_conf.get('host', '127.0.0.1')
redis_port = redis_conf.get('port', 6379)
redis_socket = redis_conf.get('socket_path')
if redis_socket: if redis_socket:
r = redis.Redis(unix_socket_path=redis_socket, db=int(redis_db), decode_responses=True) r = redis.Redis(unix_socket_path=redis_socket, db=int(redis_db), decode_responses=True)
else: else:
r = redis.Redis(host=redis_host, port=int(redis_port), db=int(redis_db), decode_responses=True) r = redis.Redis(host=redis_host, port=int(redis_port), db=int(redis_db), decode_responses=True)
if not r.ping(): if not r.ping():
print('Konnte nicht mit Redis verbinden, prüfe deine Einstellungen') logging.getLogger("Redis").critical("Redis-Verbindungsfehler, config.ini prüfen")
quit() sys.exit(1)
feed_hash = 'pythonbot:rss:{0}'
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
def remove_tags(html):
return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True))
def can_use(update):
if update.message.from_user.id in admins:
return True
else:
return False
def cleanRSS(str):
str = str.replace('[…]', '')
str = str.replace('[bilder]', '')
str = str.replace('[mehr]', '')
str = str.replace('[video]', '')
str = str.replace('...[more]', '')
str = str.replace('[more]', '')
str = str.replace('[liveticker]', '')
str = str.replace('[livestream]', '')
str = str.replace('[multimedia]', '')
str = str.replace('[phoenix]', '')
str = str.replace('[swr]', '')
str = str.replace('[ndr]', '')
str = str.replace('[mdr]', '')
str = str.replace('[rbb]', '')
str = str.replace('[wdr]', '')
str = str.replace('[hr]', '')
str = str.replace('[br]', '')
str = str.replace('Click for full.', '')
str = str.replace('Read more »', '')
str = str.replace('Read more', '')
str = str.replace('(more…)', '')
str = str.replace('View On WordPress', '')
str = str.replace('Continue reading →', '')
str = str.replace('(RSS generated with FetchRss)', '')
str = str.replace('-- Delivered by Feed43 service', '')
str = str.replace('Meldung bei www.tagesschau.de lesen', '')
str = str.replace('<', '&lt;')
str = str.replace('>', '&gt;')
str = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', str)
str = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', str)
str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str)
return str
def check_chat(bot, username):
try:
return bot.getChat(username)
except:
return
# Commands
@run_async @run_async
def start(bot, update): def start(bot, update):
if not can_use(update): if not utils.can_use_bot(update):
return return
bot.sendMessage( update.message.reply_text(
chat_id=update.message.chat_id, text='<b>Willkommen beim RSS-Bot!</b>\nSende /help, um zu starten.',
text='<b>Willkommen beim RSS-Bot!</b>\nLass uns anfangen! Sende /hilfe, um zu starten.', parse_mode=telegram.ParseMode.HTML
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
) )
@run_async @run_async
def help(bot, update): def help_text(bot, update):
if not can_use(update): if not utils.can_use_bot(update):
return return
bot.sendMessage( update.message.reply_text(
chat_id=update.message.chat_id, text='<b>/rss</b> <i>[Chat]</i>: Abonnierte Feeds anzeigen\n'
text='<b>/rss</b>: Abonnierte Feeds anzeigen\n<b>/sub</b> <i>Feed-URL</i>: Feed abonnieren\n<b>/del</b> <i>n</i>: Feed löschen', '<b>/sub</b> <i>Feed-URL</i> <i>[Chat]</i>: Feed abonnieren\n'
reply_to_message_id=update.message.message_id, '<b>/del</b> <i>n</i> <i>[Chat]</i>: Feed löschen',
parse_mode=ParseMode.HTML parse_mode=telegram.ParseMode.HTML
) )
def subscribe_to_rss(bot, update, args): @run_async
if not can_use(update): def list_feeds(bot, update, args):
if not utils.can_use_bot(update):
return return
if len(args) < 1: if args:
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.', chat_name = args[0]
reply_to_message_id=update.message.message_id) try:
resp = bot.getChat(chat_name)
except telegram.error.BadRequest:
update.message.reply_text('❌ Dieser Kanal existiert nicht.')
return
chat_id = str(resp.id)
chat_title = resp.title
else:
chat_id = str(update.message.chat.id)
if update.message.chat.type == 'private':
chat_title = update.message.chat.first_name
else:
chat_title = update.message.chat.title
subs = r.smembers(feed_hash.format(chat_id))
if not subs:
text = '❌ Keine Feeds abonniert.'
else:
text = '<b>' + html.escape(chat_title) + '</b> hat abonniert:\n'
for n, feed in enumerate(subs):
text += '<b>' + str(n + 1) + ')</b> ' + feed + '\n'
update.message.reply_text(
text=text,
parse_mode=telegram.ParseMode.HTML
)
@run_async
def subscribe(bot, update, args):
if not utils.can_use_bot(update):
return
if not args:
update.message.reply_text('❌ Keine Feed-URL angegeben.')
return return
feed_url = args[0] feed_url = args[0]
is_url = re.search("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", feed_url) if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url):
if not is_url: update.message.reply_text('❌ Das ist keine URL.')
bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.',
reply_to_message_id=update.message.message_id)
return return
# Get Chat ID from name if given
if len(args) > 1: if len(args) > 1:
username = args[1] chat_name = args[1]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
chat_id = str(chat_info.id)
else:
chat_id = str(update.message.chat_id)
if r.sismember('pythonbot:rss:' + chat_id, feed_url):
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.',
reply_to_message_id=update.message.message_id)
return
bot.sendChatAction(update.message.chat_id, action=ChatAction.TYPING)
feed_data = feedparser.parse(feed_url)
if not 'link' in feed_data.feed:
bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.',
reply_to_message_id=update.message.message_id)
return
if not 'title' in feed_data.feed:
feed_title = 'Unbekannten Feed'
else:
feed_title = feed_data.feed.title
if len(feed_data.entries) > 0:
if not 'id' in feed_data.entries[0]:
last_entry = feed_data.entries[0].link
else:
last_entry = feed_data.entries[0].id
lhash = 'pythonbot:rss:' + feed_url + ':last_entry'
if not r.exists(lhash):
r.set(lhash, last_entry)
r.sadd('pythonbot:rss:' + feed_url + ':subs', int(chat_id))
r.sadd('pythonbot:rss:' + chat_id, feed_url)
bot.sendMessage(
chat_id=update.message.chat_id,
text='<b>' + feed_title + '</b> hinzugefügt!',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
def unsubscribe_rss(bot, update, args):
if not can_use(update):
return
if len(args) < 1:
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein',
reply_to_message_id=update.message.message_id)
return
if len(args) > 1:
username = args[1]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
chat_id = str(chat_info.id)
else:
chat_id = str(update.message.chat_id)
if not is_number(args[0]):
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.',
reply_to_message_id=update.message.message_id)
return
uhash = 'pythonbot:rss:' + chat_id
n = int(args[0])
subs = list(r.smembers(uhash))
if n < 1 or n > len(subs):
bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.',
reply_to_message_id=update.message.message_id)
return
sub = subs[n - 1]
lhash = 'pythonbot:rss:' + sub + ':subs'
r.srem(uhash, sub)
r.srem(lhash, int(chat_id))
bot.sendMessage(
chat_id=update.message.chat_id,
text='<b>' + sub + '</b> entfernt.',
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
left = r.smembers(lhash)
if len(left) < 1: # no one subscribed, remove it
r.delete('pythonbot:rss:' + sub + ':last_entry')
def get_rss_list(chat_id, chat_name):
uhash = 'pythonbot:rss:' + chat_id
subs = list(r.smembers(uhash))
if len(subs) < 1:
return '<b>Keine Feeds abonniert!</b>'
text = '<b>' + chat_name + '</b> hat abonniert:\n'
for n, feed in enumerate(subs):
text = text + str(n + 1) + ') ' + feed + '\n'
return text
@run_async
def list_rss(bot, update, args):
if not can_use(update):
return
if len(args) == 1:
username = args[0]
chat_info = check_chat(bot, username)
if not chat_info:
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
reply_to_message_id=update.message.message_id)
return
rss_list = get_rss_list(str(chat_info.id), chat_info.title)
else:
if update.message.chat.first_name:
chat_name = update.message.chat.first_name
else:
chat_name = update.message.chat.title
rss_list = get_rss_list(str(update.message.chat_id), chat_name)
bot.sendMessage(
chat_id=update.message.chat_id,
text=rss_list,
reply_to_message_id=update.message.message_id,
parse_mode=ParseMode.HTML
)
def get_new_entries(last, new_entries):
entries = []
for k, v in enumerate(new_entries):
if 'id' in v:
if v.id == last:
return entries
else:
entries.append(v)
else:
if v.link == last:
return entries
else:
entries.append(v)
return entries
def manually_check_rss(bot, update):
if not can_use(update):
return
check_rss(bot, '')
bot.sendMessage(
chat_id=update.message.chat_id,
text='Ausgeführt.',
reply_to_message_id=update.message.message_id
)
@run_async
def check_rss(bot, job):
keys = list(r.keys('pythonbot:rss:*:subs'))
for k, v in enumerate(keys):
p = re.compile('pythonbot:rss:(.+):subs')
match_func = p.search(v)
url = match_func.group(1)
print('RSS: ' + url)
last = r.get('pythonbot:rss:' + url + ':last_entry')
feed_data = feedparser.parse(url)
try: try:
if feed_data.status < 400: resp = bot.getChat(chat_name)
if not 'title' in feed_data.feed: except telegram.error.BadRequest:
feed_title = feed_data.feed.link update.message.reply_text('❌ Dieser Kanal existiert nicht.')
return
chat_id = str(resp.id)
resp = bot.getChatMember(chat_id, bot.id)
if resp.status != 'administrator':
update.message.reply_text('❌ Bot ist kein Administrator in diesem Kanal.')
return
else: else:
feed_title = feed_data.feed.title chat_id = str(update.message.chat.id)
newentr = get_new_entries(last, feed_data.entries)
text = '' bot.sendChatAction(update.message.chat.id, action=telegram.ChatAction.TYPING)
for k2, v2 in enumerate(newentr): data = feedparser.parse(feed_url)
if not 'title' in v2: if 'link' not in data.feed:
title = 'Kein Titel' update.message.reply_text('❌ Kein gültiger Feed.')
return
feed_url = data.href # Follow all redirects
if r.sismember(feed_hash.format(chat_id), feed_url):
update.message.reply_text('✅ Dieser Feed wurde bereits abonniert.')
return
if 'title' not in data.feed:
feed_title = feed_url
else: else:
title = html.escape(remove_tags(v2.title).lstrip()) feed_title = html.escape(data.feed['title'])
if not 'link' in v2:
link = feed_data.feed.link # Save the last entry in Redis, if it doesn't exist
link_name = link if data.entries:
last_entry_hash = feed_hash.format(feed_url + ':last_entry')
if not r.exists(last_entry_hash):
if 'id' not in data.entries[0]:
last_entry = data.entries[0]['link']
else: else:
link = v2.link last_entry = data.entries[0]['id']
f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com r.set(last_entry_hash, last_entry)
if f:
link_name = f.group(1) r.sadd(feed_hash.format(feed_url + ':subs'), chat_id)
r.sadd(feed_hash.format(chat_id), feed_url)
update.message.reply_text(
text='✅ <b>' + feed_title + '</b> hinzugefügt!',
parse_mode=telegram.ParseMode.HTML
)
@run_async
def unsubscribe(bot, update, args):
if not utils.can_use_bot(update):
return
if not args:
update.message.reply_text('❌ Keine Nummer angegeben.')
return
# Get Chat ID from name if given
if len(args) > 1:
chat_name = args[1]
try:
resp = bot.getChat(chat_name)
except telegram.error.BadRequest:
update.message.reply_text('❌ Dieser Kanal existiert nicht.')
return
chat_id = str(resp.id)
else: else:
link_name = urlparse(link).netloc chat_id = str(update.message.chat.id)
link_name = re.sub('^www\d?\.', '', link_name) # www.
if 'content' in v2: try:
content = remove_tags(v2.content[0].value).lstrip() n = int(args[0])
content = cleanRSS(content) except ValueError:
if len(content) > 250: update.message.reply_text('❌ Keine Nummer angegeben.')
content = content[0:250] + '...' return
elif 'summary' in v2:
content = remove_tags(v2.summary).lstrip() chat_hash = feed_hash.format(chat_id)
content = cleanRSS(content) subs = r.smembers(chat_hash)
if len(content) > 250: if n < 1:
content = content[0:250] + '...' update.message.reply_text('❌ Nummer muss größer als 0 sein!')
return
elif n > len(subs):
update.message.reply_text('❌ Feed-ID zu hoch.')
return
feed_url = list(subs)[n - 1]
sub_hash = feed_hash.format(feed_url + ':subs')
r.srem(chat_hash, feed_url)
r.srem(sub_hash, chat_id)
if not r.smembers(sub_hash): # no one subscribed, remove it
r.delete(feed_hash.format(feed_url + ':last_entry'))
update.message.reply_text(
text='✅ <b>' + feed_url + '</b> entfernt.',
parse_mode=telegram.ParseMode.HTML
)
@run_async
def check_feed(bot, key):
feed_url = re.match('^pythonbot:rss:(.+):subs$', key).group(1)
logger.info(feed_url)
data = feedparser.parse(feed_url)
if 'link' not in data.feed:
logger.warning('Kein gültiger Feed, Status-Code ' + str(data.status))
return None
if 'title' not in data.feed:
feed_title = data.feed['link']
else:
feed_title = data.feed['title']
last_entry_hash = feed_hash.format(feed_url + ':last_entry')
last_entry = r.get(last_entry_hash)
new_entries = utils.get_new_entries(data.entries, last_entry)
for entry in reversed(new_entries):
if 'title' not in entry:
post_title = 'Kein Titel'
else:
post_title = html.escape(utils.remove_html_tags(entry['title']).strip())
if 'link' not in entry:
post_link = entry.link
link_name = post_link
else:
post_link = entry.link
feedproxy = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', post_link) # feedproxy.google.com
if feedproxy:
link_name = feedproxy.group(1)
else:
link_name = urlparse(post_link).netloc
link_name = re.sub('^www\d?\.', '', link_name) # remove www.
if 'content' in entry:
content = utils.get_content(entry.content[0]['value'])
elif 'summary' in entry:
content = utils.get_content(entry.summary)
else: else:
content = '' content = ''
# Für 1 Nachricht pro Beitrag, tue dies: text = '<b>{post_title}</b>\n<i>{feed_title}</i>\n{content}'.format(
# Entferne hier das "text + "... post_title=post_title,
text = text + '\n<b>' + title + '</b>\n<i>' + feed_title + '</i>\n' + content + '\n<a href="' + link + '">Auf ' + link_name + ' weiterlesen</a>\n' feed_title=feed_title,
# ...und setze hier vor jeder Zeile 4 zusätzliche Leerzeichen (bis "else", Zeile 379) content=content
if text != '': )
if not 'id' in newentr[0]: text += '\n<a href="{post_link}">Auf {link_name} weiterlesen</a>\n'.format(
newlast = newentr[0].link post_link=post_link,
else: link_name=link_name
newlast = newentr[0].id )
r.set('pythonbot:rss:' + url + ':last_entry', newlast) for member in r.smembers(key):
for k2, receiver in enumerate(list(r.smembers(v))):
try: try:
bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True) bot.sendMessage(
except Unauthorized: chat_id=member,
print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste') text=text,
r.srem(v, receiver) parse_mode=telegram.ParseMode.HTML,
r.delete('pythonbot:rss:' + receiver) disable_web_page_preview=True
except ChatMigrated as e: )
print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id)) except telegram.error.Unauthorized:
r.srem(v, receiver) logging.warning('Chat ' + member + ' existiert nicht mehr, wird gelöscht.')
r.sadd(v, e.new_chat_id) r.srem(key, member)
r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id)) r.delete(feed_hash.format(member))
bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True) except telegram.error.ChatMigrated as new_chat:
new_chat_id = new_chat.new_chat_id
logging.info('Chat migriert: ' + member + ' -> ' + str(new_chat_id))
r.srem(key, member)
r.sadd(key, new_chat_id)
r.rename(feed_hash.format(member), feed_hash.format(new_chat_id))
bot.sendMessage(
chat_id=member,
text=text,
parse_mode=telegram.ParseMode.HTML,
disable_web_page_preview=True
)
except telegram.error.TimedOut:
pass
if not r.exists(key):
r.delete(last_entry_hash)
return
# Set the new last entry if there are any
if new_entries:
if 'id' not in new_entries[0]:
new_last_entry = new_entries[0].link
else: else:
print('HTTP-Fehler: ' + str(feed_data.status)) new_last_entry = new_entries[0].id
except: r.set(last_entry_hash, new_last_entry)
print('Fehler: Seite nicht erreichbar')
print('-----------------------')
def error(bot, update, error): @run_async
logger.warn('Update "%s" verursachte Fehler "%s"' % (update, error)) def run_job(bot, job=None):
logger.info('================================')
keys = r.keys('pythonbot:rss:*:subs')
for key in keys:
check_feed(bot, key)
# Main function
def main(): def main():
# Create the EventHandler and pass it your bot's token. # Setup the updater and show bot info
updater = Updater(token=config['DEFAULT']['token']) updater = Updater(token=bot_token)
j = updater.job_queue try:
# Bot-Infos prüfen
bot_info = updater.bot.getMe() bot_info = updater.bot.getMe()
print('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')') except telegram.error.Unauthorized:
logger.error('Anmeldung nicht möglich, Bot-Token falsch?')
sys.exit(1)
# Get the dispatcher to register handlers logger.info('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
dp = updater.dispatcher
# on different commands - answer in Telegram # Register Handlers
dp.add_handler(CommandHandler("start", start)) handlers = [
dp.add_handler(CommandHandler("help", help)) CommandHandler('start', start),
dp.add_handler(CommandHandler("hilfe", help)) CommandHandler('help', help_text),
CommandHandler('rss', list_feeds, pass_args=True),
CommandHandler('sub', subscribe, pass_args=True),
CommandHandler('del', unsubscribe, pass_args=True),
CommandHandler('sync', run_job)
]
for handler in handlers:
updater.dispatcher.add_handler(handler)
dp.add_handler(CommandHandler("rss", list_rss, pass_args=True)) updater.job_queue.run_repeating(
dp.add_handler(CommandHandler("sub", subscribe_to_rss, pass_args=True)) run_job,
dp.add_handler(CommandHandler("del", unsubscribe_rss, pass_args=True)) interval=60.0,
dp.add_handler(CommandHandler("sync", manually_check_rss)) first=2.0
)
# log all errors # Start this thing!
dp.add_error_handler(error) updater.start_polling(
clean=True,
bootstrap_retries=-1,
allowed_updates=["message"]
)
# cron # Run Bot until CTRL+C is pressed or a SIGINIT,
j.run_repeating(check_rss, interval=60.0, first=15.0) # SIGTERM or SIGABRT is sent.
# Start the Bot
updater.start_polling(timeout=20, clean=True, bootstrap_retries=-1, allowed_updates=["message"])
# Run the bot until the you presses Ctrl-C or the process receives SIGINT,
# SIGTERM or SIGABRT. This should be used most of the time, since
# start_polling() is non-blocking and will stop the bot gracefully.
updater.idle() updater.idle()

View File

@ -8,4 +8,5 @@ token = 1337:1234567890abcdefgh
#socket_path = /home/user/.redis/sock #socket_path = /home/user/.redis/sock
[ADMIN] [ADMIN]
# Must be integers!
id = [1337] id = [1337]

View File

@ -1,4 +1,3 @@
beautifulsoup4
feedparser feedparser
python-telegram-bot python-telegram-bot
redis redis

81
utils.py Normal file
View File

@ -0,0 +1,81 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from bot import admins
def can_use_bot(update):
"""Returns True if user is an admin"""
if update.message.from_user.id in admins:
return True
else:
return False
def get_new_entries(entries, last_entry):
"""Returns all new entries from an entries dict up to the last new article"""
new_entries = []
for entry in entries:
if 'id' in entry:
if entry['id'] == last_entry:
return new_entries
else:
new_entries.append(entry)
else:
if entry['link'] == last_entry:
return new_entries
else:
new_entries.append(entry)
return new_entries
def remove_html_tags(html):
"""Removes HTML tags"""
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', html)
return cleantext
def clean_rss(content):
"""Cleans content"""
content = content.replace('[…]', '')
content = content.replace('[bilder]', '')
content = content.replace('[mehr]', '')
content = content.replace('[video]', '')
content = content.replace('...[more]', '')
content = content.replace('[more]', '')
content = content.replace('[liveticker]', '')
content = content.replace('[livestream]', '')
content = content.replace('[multimedia]', '')
content = content.replace('[phoenix]', '')
content = content.replace('[swr]', '')
content = content.replace('[ndr]', '')
content = content.replace('[mdr]', '')
content = content.replace('[rbb]', '')
content = content.replace('[wdr]', '')
content = content.replace('[hr]', '')
content = content.replace('[br]', '')
content = content.replace('Click for full.', '')
content = content.replace('Read more »', '')
content = content.replace('Read more', '')
content = content.replace('(more…)', '')
content = content.replace('View On WordPress', '')
content = content.replace('Continue reading →', '')
content = content.replace('(RSS generated with FetchRss)', '')
content = content.replace('-- Delivered by Feed43 service', '')
content = content.replace('Meldung bei www.tagesschau.de lesen', '')
content = content.replace('<', '&lt;')
content = content.replace('>', '&gt;')
content = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', content)
content = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', content)
content = re.sub('http://www\.serienjunkies.de/.*\.html', '', content)
return content
def get_content(content):
"""Sanitizes content and cuts it to 250 chars"""
content = clean_rss(remove_html_tags(content).strip())
if len(content) > 250:
content = content[0:250] + '...'
return content