Kompletter Rewrite
This commit is contained in:
parent
e22b9c11af
commit
b7fe806c8f
8
.gitignore
vendored
8
.gitignore
vendored
@ -1,7 +1,11 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
# Byte-compiled / optimized / DLL files / pip
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
src/
|
||||
|
||||
# Config
|
||||
# IDE
|
||||
.idea/
|
||||
|
||||
# Bot-specific files
|
||||
config.ini
|
13
README.md
13
README.md
@ -1,13 +1,12 @@
|
||||
RSS-Bot für Telegram
|
||||
=====================
|
||||
|
||||
1. `git clone https://gitlab.com/iCON/rssbot`
|
||||
2. `sudo apt-get install python3 python3-pip`
|
||||
3. `sudo pip3 install -r requirements.txt`
|
||||
4. `cp config.ini.example config.ini`
|
||||
5. Bot-Token in `config.ini` einfügen
|
||||
1. **Klonen:** `git clone https://git.centzilius.de/iCON/rssbot`
|
||||
2. **Python 3 installieren:** `sudo apt-get install python3 python3-pip`
|
||||
3. **Module installieren:** `sudo pip3 install -r requirements.txt`
|
||||
4. **Config kopieren:** `cp config.ini.example config.ini`
|
||||
5. **Bot-Token** in `config.ini` einfügen
|
||||
1. Weitere Einstellungen für Redis vornehmen, falls vom Standard abweicht
|
||||
6. `bot.py` öffnen und unter `def can_use(update):` die ID zur eigenen abändern
|
||||
6. **Eigene Admin-ID in die Config eintragen**
|
||||
7. `python3 bot.py`
|
||||
|
||||
(c) 2016-2017 Andreas Bielawski
|
||||
|
694
bot.py
694
bot.py
@ -1,428 +1,378 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# RSS Bot
|
||||
# Python 3 required
|
||||
|
||||
import html
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from configparser import ConfigParser
|
||||
from json import loads
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import feedparser
|
||||
import redis
|
||||
from bs4 import BeautifulSoup
|
||||
from telegram import ChatAction, ParseMode
|
||||
from telegram.error import TelegramError, Unauthorized, BadRequest, TimedOut, NetworkError, ChatMigrated
|
||||
from telegram.ext import Updater, Job, CommandHandler, MessageHandler, Filters
|
||||
import telegram
|
||||
from telegram.ext import CommandHandler, Updater
|
||||
from telegram.ext.dispatcher import run_async
|
||||
|
||||
# Bot Configuration
|
||||
config = ConfigParser()
|
||||
config.read_file(open('config.ini'))
|
||||
import utils
|
||||
|
||||
redis_conf = config['REDIS']
|
||||
redis_db = redis_conf.get('db', 0)
|
||||
redis_host = redis_conf.get('host')
|
||||
redis_port = redis_conf.get('port', 6379)
|
||||
redis_socket = redis_conf.get('socket_path')
|
||||
|
||||
# Enable logging
|
||||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
level=logging.ERROR)
|
||||
# Logging
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s - %(levelname)s: %(message)s",
|
||||
datefmt="%d.%m.%Y %H:%M:%S",
|
||||
level=logging.INFO
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Admins
|
||||
admins = loads(config["ADMIN"]["id"])
|
||||
if not admins:
|
||||
print('Keine Admin-IDs gesetzt, bitte Konfigurationsdatei bearbeiten.')
|
||||
quit()
|
||||
# Bot configuration
|
||||
config = ConfigParser()
|
||||
try:
|
||||
config.read_file(open('config.ini'))
|
||||
except FileNotFoundError:
|
||||
logger.critical('Config.ini nicht gefunden')
|
||||
sys.exit(1)
|
||||
|
||||
# Utils
|
||||
# Bot token
|
||||
try:
|
||||
bot_token = config['DEFAULT']['token']
|
||||
except KeyError:
|
||||
logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen')
|
||||
sys.exit(1)
|
||||
if not bot_token:
|
||||
logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen')
|
||||
sys.exit(1)
|
||||
|
||||
# Admins
|
||||
try:
|
||||
admins = loads(config["ADMIN"]["id"])
|
||||
except KeyError:
|
||||
logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.')
|
||||
sys.exit(1)
|
||||
if not admins:
|
||||
logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.')
|
||||
sys.exit(1)
|
||||
|
||||
for admin in admins:
|
||||
if not isinstance(admin, int):
|
||||
logger.error('Admin-IDs müssen Integer sein.')
|
||||
sys.exit(1)
|
||||
|
||||
# Redis
|
||||
redis_conf = config['REDIS']
|
||||
redis_db = redis_conf.get('db', 0)
|
||||
redis_host = redis_conf.get('host', '127.0.0.1')
|
||||
redis_port = redis_conf.get('port', 6379)
|
||||
redis_socket = redis_conf.get('socket_path')
|
||||
if redis_socket:
|
||||
r = redis.Redis(unix_socket_path=redis_socket, db=int(redis_db), decode_responses=True)
|
||||
else:
|
||||
r = redis.Redis(host=redis_host, port=int(redis_port), db=int(redis_db), decode_responses=True)
|
||||
|
||||
if not r.ping():
|
||||
print('Konnte nicht mit Redis verbinden, prüfe deine Einstellungen')
|
||||
quit()
|
||||
logging.getLogger("Redis").critical("Redis-Verbindungsfehler, config.ini prüfen")
|
||||
sys.exit(1)
|
||||
|
||||
feed_hash = 'pythonbot:rss:{0}'
|
||||
|
||||
|
||||
def is_number(s):
|
||||
try:
|
||||
float(s)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def remove_tags(html):
|
||||
return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True))
|
||||
|
||||
|
||||
def can_use(update):
|
||||
if update.message.from_user.id in admins:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def cleanRSS(str):
|
||||
str = str.replace('[…]', '')
|
||||
str = str.replace('[bilder]', '')
|
||||
str = str.replace('[mehr]', '')
|
||||
str = str.replace('[video]', '')
|
||||
str = str.replace('...[more]', '')
|
||||
str = str.replace('[more]', '')
|
||||
str = str.replace('[liveticker]', '')
|
||||
str = str.replace('[livestream]', '')
|
||||
str = str.replace('[multimedia]', '')
|
||||
str = str.replace('[phoenix]', '')
|
||||
str = str.replace('[swr]', '')
|
||||
str = str.replace('[ndr]', '')
|
||||
str = str.replace('[mdr]', '')
|
||||
str = str.replace('[rbb]', '')
|
||||
str = str.replace('[wdr]', '')
|
||||
str = str.replace('[hr]', '')
|
||||
str = str.replace('[br]', '')
|
||||
str = str.replace('Click for full.', '')
|
||||
str = str.replace('Read more »', '')
|
||||
str = str.replace('Read more', '')
|
||||
str = str.replace('(more…)', '')
|
||||
str = str.replace('View On WordPress', '')
|
||||
str = str.replace('Continue reading →', '')
|
||||
str = str.replace('(RSS generated with FetchRss)', '')
|
||||
str = str.replace('-- Delivered by Feed43 service', '')
|
||||
str = str.replace('Meldung bei www.tagesschau.de lesen', '')
|
||||
str = str.replace('<', '<')
|
||||
str = str.replace('>', '>')
|
||||
str = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', str)
|
||||
str = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', str)
|
||||
str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str)
|
||||
return str
|
||||
|
||||
|
||||
def check_chat(bot, username):
|
||||
try:
|
||||
return bot.getChat(username)
|
||||
except:
|
||||
return
|
||||
|
||||
|
||||
# Commands
|
||||
@run_async
|
||||
def start(bot, update):
|
||||
if not can_use(update):
|
||||
if not utils.can_use_bot(update):
|
||||
return
|
||||
bot.sendMessage(
|
||||
chat_id=update.message.chat_id,
|
||||
text='<b>Willkommen beim RSS-Bot!</b>\nLass uns anfangen! Sende /hilfe, um zu starten.',
|
||||
reply_to_message_id=update.message.message_id,
|
||||
parse_mode=ParseMode.HTML
|
||||
update.message.reply_text(
|
||||
text='<b>Willkommen beim RSS-Bot!</b>\nSende /help, um zu starten.',
|
||||
parse_mode=telegram.ParseMode.HTML
|
||||
)
|
||||
|
||||
|
||||
@run_async
|
||||
def help(bot, update):
|
||||
if not can_use(update):
|
||||
def help_text(bot, update):
|
||||
if not utils.can_use_bot(update):
|
||||
return
|
||||
bot.sendMessage(
|
||||
chat_id=update.message.chat_id,
|
||||
text='<b>/rss</b>: Abonnierte Feeds anzeigen\n<b>/sub</b> <i>Feed-URL</i>: Feed abonnieren\n<b>/del</b> <i>n</i>: Feed löschen',
|
||||
reply_to_message_id=update.message.message_id,
|
||||
parse_mode=ParseMode.HTML
|
||||
update.message.reply_text(
|
||||
text='<b>/rss</b> <i>[Chat]</i>: Abonnierte Feeds anzeigen\n'
|
||||
'<b>/sub</b> <i>Feed-URL</i> <i>[Chat]</i>: Feed abonnieren\n'
|
||||
'<b>/del</b> <i>n</i> <i>[Chat]</i>: Feed löschen',
|
||||
parse_mode=telegram.ParseMode.HTML
|
||||
)
|
||||
|
||||
|
||||
def subscribe_to_rss(bot, update, args):
|
||||
if not can_use(update):
|
||||
@run_async
|
||||
def list_feeds(bot, update, args):
|
||||
if not utils.can_use_bot(update):
|
||||
return
|
||||
if len(args) < 1:
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
if args:
|
||||
chat_name = args[0]
|
||||
try:
|
||||
resp = bot.getChat(chat_name)
|
||||
except telegram.error.BadRequest:
|
||||
update.message.reply_text('❌ Dieser Kanal existiert nicht.')
|
||||
return
|
||||
chat_id = str(resp.id)
|
||||
chat_title = resp.title
|
||||
else:
|
||||
chat_id = str(update.message.chat.id)
|
||||
if update.message.chat.type == 'private':
|
||||
chat_title = update.message.chat.first_name
|
||||
else:
|
||||
chat_title = update.message.chat.title
|
||||
|
||||
subs = r.smembers(feed_hash.format(chat_id))
|
||||
if not subs:
|
||||
text = '❌ Keine Feeds abonniert.'
|
||||
else:
|
||||
text = '<b>' + html.escape(chat_title) + '</b> hat abonniert:\n'
|
||||
for n, feed in enumerate(subs):
|
||||
text += '<b>' + str(n + 1) + ')</b> ' + feed + '\n'
|
||||
|
||||
update.message.reply_text(
|
||||
text=text,
|
||||
parse_mode=telegram.ParseMode.HTML
|
||||
)
|
||||
|
||||
|
||||
@run_async
|
||||
def subscribe(bot, update, args):
|
||||
if not utils.can_use_bot(update):
|
||||
return
|
||||
if not args:
|
||||
update.message.reply_text('❌ Keine Feed-URL angegeben.')
|
||||
return
|
||||
feed_url = args[0]
|
||||
is_url = re.search("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", feed_url)
|
||||
if not is_url:
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url):
|
||||
update.message.reply_text('❌ Das ist keine URL.')
|
||||
return
|
||||
|
||||
# Get Chat ID from name if given
|
||||
if len(args) > 1:
|
||||
username = args[1]
|
||||
chat_info = check_chat(bot, username)
|
||||
if not chat_info:
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
return
|
||||
chat_id = str(chat_info.id)
|
||||
else:
|
||||
chat_id = str(update.message.chat_id)
|
||||
|
||||
if r.sismember('pythonbot:rss:' + chat_id, feed_url):
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
return
|
||||
|
||||
bot.sendChatAction(update.message.chat_id, action=ChatAction.TYPING)
|
||||
feed_data = feedparser.parse(feed_url)
|
||||
if not 'link' in feed_data.feed:
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
return
|
||||
|
||||
if not 'title' in feed_data.feed:
|
||||
feed_title = 'Unbekannten Feed'
|
||||
else:
|
||||
feed_title = feed_data.feed.title
|
||||
|
||||
if len(feed_data.entries) > 0:
|
||||
if not 'id' in feed_data.entries[0]:
|
||||
last_entry = feed_data.entries[0].link
|
||||
else:
|
||||
last_entry = feed_data.entries[0].id
|
||||
lhash = 'pythonbot:rss:' + feed_url + ':last_entry'
|
||||
if not r.exists(lhash):
|
||||
r.set(lhash, last_entry)
|
||||
|
||||
r.sadd('pythonbot:rss:' + feed_url + ':subs', int(chat_id))
|
||||
r.sadd('pythonbot:rss:' + chat_id, feed_url)
|
||||
bot.sendMessage(
|
||||
chat_id=update.message.chat_id,
|
||||
text='<b>' + feed_title + '</b> hinzugefügt!',
|
||||
reply_to_message_id=update.message.message_id,
|
||||
parse_mode=ParseMode.HTML
|
||||
)
|
||||
|
||||
|
||||
def unsubscribe_rss(bot, update, args):
|
||||
if not can_use(update):
|
||||
return
|
||||
|
||||
if len(args) < 1:
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
return
|
||||
|
||||
if len(args) > 1:
|
||||
username = args[1]
|
||||
chat_info = check_chat(bot, username)
|
||||
if not chat_info:
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
return
|
||||
chat_id = str(chat_info.id)
|
||||
else:
|
||||
chat_id = str(update.message.chat_id)
|
||||
|
||||
if not is_number(args[0]):
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
return
|
||||
uhash = 'pythonbot:rss:' + chat_id
|
||||
n = int(args[0])
|
||||
subs = list(r.smembers(uhash))
|
||||
if n < 1 or n > len(subs):
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
return
|
||||
sub = subs[n - 1]
|
||||
lhash = 'pythonbot:rss:' + sub + ':subs'
|
||||
r.srem(uhash, sub)
|
||||
r.srem(lhash, int(chat_id))
|
||||
bot.sendMessage(
|
||||
chat_id=update.message.chat_id,
|
||||
text='<b>' + sub + '</b> entfernt.',
|
||||
reply_to_message_id=update.message.message_id,
|
||||
parse_mode=ParseMode.HTML
|
||||
)
|
||||
left = r.smembers(lhash)
|
||||
if len(left) < 1: # no one subscribed, remove it
|
||||
r.delete('pythonbot:rss:' + sub + ':last_entry')
|
||||
|
||||
|
||||
def get_rss_list(chat_id, chat_name):
|
||||
uhash = 'pythonbot:rss:' + chat_id
|
||||
subs = list(r.smembers(uhash))
|
||||
if len(subs) < 1:
|
||||
return '<b>Keine Feeds abonniert!</b>'
|
||||
text = '<b>' + chat_name + '</b> hat abonniert:\n'
|
||||
for n, feed in enumerate(subs):
|
||||
text = text + str(n + 1) + ') ' + feed + '\n'
|
||||
return text
|
||||
|
||||
|
||||
@run_async
|
||||
def list_rss(bot, update, args):
|
||||
if not can_use(update):
|
||||
return
|
||||
if len(args) == 1:
|
||||
username = args[0]
|
||||
chat_info = check_chat(bot, username)
|
||||
if not chat_info:
|
||||
bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
|
||||
reply_to_message_id=update.message.message_id)
|
||||
return
|
||||
rss_list = get_rss_list(str(chat_info.id), chat_info.title)
|
||||
else:
|
||||
if update.message.chat.first_name:
|
||||
chat_name = update.message.chat.first_name
|
||||
else:
|
||||
chat_name = update.message.chat.title
|
||||
rss_list = get_rss_list(str(update.message.chat_id), chat_name)
|
||||
bot.sendMessage(
|
||||
chat_id=update.message.chat_id,
|
||||
text=rss_list,
|
||||
reply_to_message_id=update.message.message_id,
|
||||
parse_mode=ParseMode.HTML
|
||||
)
|
||||
|
||||
|
||||
def get_new_entries(last, new_entries):
|
||||
entries = []
|
||||
for k, v in enumerate(new_entries):
|
||||
if 'id' in v:
|
||||
if v.id == last:
|
||||
return entries
|
||||
else:
|
||||
entries.append(v)
|
||||
else:
|
||||
if v.link == last:
|
||||
return entries
|
||||
else:
|
||||
entries.append(v)
|
||||
return entries
|
||||
|
||||
|
||||
def manually_check_rss(bot, update):
|
||||
if not can_use(update):
|
||||
return
|
||||
check_rss(bot, '')
|
||||
bot.sendMessage(
|
||||
chat_id=update.message.chat_id,
|
||||
text='Ausgeführt.',
|
||||
reply_to_message_id=update.message.message_id
|
||||
)
|
||||
|
||||
|
||||
@run_async
|
||||
def check_rss(bot, job):
|
||||
keys = list(r.keys('pythonbot:rss:*:subs'))
|
||||
for k, v in enumerate(keys):
|
||||
p = re.compile('pythonbot:rss:(.+):subs')
|
||||
match_func = p.search(v)
|
||||
url = match_func.group(1)
|
||||
print('RSS: ' + url)
|
||||
last = r.get('pythonbot:rss:' + url + ':last_entry')
|
||||
|
||||
feed_data = feedparser.parse(url)
|
||||
chat_name = args[1]
|
||||
try:
|
||||
if feed_data.status < 400:
|
||||
if not 'title' in feed_data.feed:
|
||||
feed_title = feed_data.feed.link
|
||||
else:
|
||||
feed_title = feed_data.feed.title
|
||||
newentr = get_new_entries(last, feed_data.entries)
|
||||
text = ''
|
||||
for k2, v2 in enumerate(newentr):
|
||||
if not 'title' in v2:
|
||||
title = 'Kein Titel'
|
||||
else:
|
||||
title = html.escape(remove_tags(v2.title).lstrip())
|
||||
if not 'link' in v2:
|
||||
link = feed_data.feed.link
|
||||
link_name = link
|
||||
else:
|
||||
link = v2.link
|
||||
f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com
|
||||
if f:
|
||||
link_name = f.group(1)
|
||||
else:
|
||||
link_name = urlparse(link).netloc
|
||||
link_name = re.sub('^www\d?\.', '', link_name) # www.
|
||||
if 'content' in v2:
|
||||
content = remove_tags(v2.content[0].value).lstrip()
|
||||
content = cleanRSS(content)
|
||||
if len(content) > 250:
|
||||
content = content[0:250] + '...'
|
||||
elif 'summary' in v2:
|
||||
content = remove_tags(v2.summary).lstrip()
|
||||
content = cleanRSS(content)
|
||||
if len(content) > 250:
|
||||
content = content[0:250] + '...'
|
||||
else:
|
||||
content = ''
|
||||
# Für 1 Nachricht pro Beitrag, tue dies:
|
||||
# Entferne hier das "text + "...
|
||||
text = text + '\n<b>' + title + '</b>\n<i>' + feed_title + '</i>\n' + content + '\n<a href="' + link + '">Auf ' + link_name + ' weiterlesen</a>\n'
|
||||
# ...und setze hier vor jeder Zeile 4 zusätzliche Leerzeichen (bis "else", Zeile 379)
|
||||
if text != '':
|
||||
if not 'id' in newentr[0]:
|
||||
newlast = newentr[0].link
|
||||
else:
|
||||
newlast = newentr[0].id
|
||||
r.set('pythonbot:rss:' + url + ':last_entry', newlast)
|
||||
for k2, receiver in enumerate(list(r.smembers(v))):
|
||||
try:
|
||||
bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
|
||||
except Unauthorized:
|
||||
print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste')
|
||||
r.srem(v, receiver)
|
||||
r.delete('pythonbot:rss:' + receiver)
|
||||
except ChatMigrated as e:
|
||||
print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id))
|
||||
r.srem(v, receiver)
|
||||
r.sadd(v, e.new_chat_id)
|
||||
r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id))
|
||||
bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
|
||||
resp = bot.getChat(chat_name)
|
||||
except telegram.error.BadRequest:
|
||||
update.message.reply_text('❌ Dieser Kanal existiert nicht.')
|
||||
return
|
||||
chat_id = str(resp.id)
|
||||
resp = bot.getChatMember(chat_id, bot.id)
|
||||
if resp.status != 'administrator':
|
||||
update.message.reply_text('❌ Bot ist kein Administrator in diesem Kanal.')
|
||||
return
|
||||
else:
|
||||
chat_id = str(update.message.chat.id)
|
||||
|
||||
bot.sendChatAction(update.message.chat.id, action=telegram.ChatAction.TYPING)
|
||||
data = feedparser.parse(feed_url)
|
||||
if 'link' not in data.feed:
|
||||
update.message.reply_text('❌ Kein gültiger Feed.')
|
||||
return
|
||||
feed_url = data.href # Follow all redirects
|
||||
if r.sismember(feed_hash.format(chat_id), feed_url):
|
||||
update.message.reply_text('✅ Dieser Feed wurde bereits abonniert.')
|
||||
return
|
||||
|
||||
if 'title' not in data.feed:
|
||||
feed_title = feed_url
|
||||
else:
|
||||
feed_title = html.escape(data.feed['title'])
|
||||
|
||||
# Save the last entry in Redis, if it doesn't exist
|
||||
if data.entries:
|
||||
last_entry_hash = feed_hash.format(feed_url + ':last_entry')
|
||||
if not r.exists(last_entry_hash):
|
||||
if 'id' not in data.entries[0]:
|
||||
last_entry = data.entries[0]['link']
|
||||
else:
|
||||
print('HTTP-Fehler: ' + str(feed_data.status))
|
||||
except:
|
||||
print('Fehler: Seite nicht erreichbar')
|
||||
print('-----------------------')
|
||||
last_entry = data.entries[0]['id']
|
||||
r.set(last_entry_hash, last_entry)
|
||||
|
||||
r.sadd(feed_hash.format(feed_url + ':subs'), chat_id)
|
||||
r.sadd(feed_hash.format(chat_id), feed_url)
|
||||
update.message.reply_text(
|
||||
text='✅ <b>' + feed_title + '</b> hinzugefügt!',
|
||||
parse_mode=telegram.ParseMode.HTML
|
||||
)
|
||||
|
||||
|
||||
def error(bot, update, error):
|
||||
logger.warn('Update "%s" verursachte Fehler "%s"' % (update, error))
|
||||
@run_async
|
||||
def unsubscribe(bot, update, args):
|
||||
if not utils.can_use_bot(update):
|
||||
return
|
||||
if not args:
|
||||
update.message.reply_text('❌ Keine Nummer angegeben.')
|
||||
return
|
||||
|
||||
# Get Chat ID from name if given
|
||||
if len(args) > 1:
|
||||
chat_name = args[1]
|
||||
try:
|
||||
resp = bot.getChat(chat_name)
|
||||
except telegram.error.BadRequest:
|
||||
update.message.reply_text('❌ Dieser Kanal existiert nicht.')
|
||||
return
|
||||
chat_id = str(resp.id)
|
||||
else:
|
||||
chat_id = str(update.message.chat.id)
|
||||
|
||||
try:
|
||||
n = int(args[0])
|
||||
except ValueError:
|
||||
update.message.reply_text('❌ Keine Nummer angegeben.')
|
||||
return
|
||||
|
||||
chat_hash = feed_hash.format(chat_id)
|
||||
subs = r.smembers(chat_hash)
|
||||
if n < 1:
|
||||
update.message.reply_text('❌ Nummer muss größer als 0 sein!')
|
||||
return
|
||||
elif n > len(subs):
|
||||
update.message.reply_text('❌ Feed-ID zu hoch.')
|
||||
return
|
||||
|
||||
feed_url = list(subs)[n - 1]
|
||||
sub_hash = feed_hash.format(feed_url + ':subs')
|
||||
r.srem(chat_hash, feed_url)
|
||||
r.srem(sub_hash, chat_id)
|
||||
if not r.smembers(sub_hash): # no one subscribed, remove it
|
||||
r.delete(feed_hash.format(feed_url + ':last_entry'))
|
||||
|
||||
update.message.reply_text(
|
||||
text='✅ <b>' + feed_url + '</b> entfernt.',
|
||||
parse_mode=telegram.ParseMode.HTML
|
||||
)
|
||||
|
||||
|
||||
@run_async
|
||||
def check_feed(bot, key):
|
||||
feed_url = re.match('^pythonbot:rss:(.+):subs$', key).group(1)
|
||||
logger.info(feed_url)
|
||||
data = feedparser.parse(feed_url)
|
||||
if 'link' not in data.feed:
|
||||
logger.warning('Kein gültiger Feed, Status-Code ' + str(data.status))
|
||||
return None
|
||||
if 'title' not in data.feed:
|
||||
feed_title = data.feed['link']
|
||||
else:
|
||||
feed_title = data.feed['title']
|
||||
last_entry_hash = feed_hash.format(feed_url + ':last_entry')
|
||||
last_entry = r.get(last_entry_hash)
|
||||
new_entries = utils.get_new_entries(data.entries, last_entry)
|
||||
for entry in reversed(new_entries):
|
||||
if 'title' not in entry:
|
||||
post_title = 'Kein Titel'
|
||||
else:
|
||||
post_title = html.escape(utils.remove_html_tags(entry['title']).strip())
|
||||
if 'link' not in entry:
|
||||
post_link = entry.link
|
||||
link_name = post_link
|
||||
else:
|
||||
post_link = entry.link
|
||||
feedproxy = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', post_link) # feedproxy.google.com
|
||||
if feedproxy:
|
||||
link_name = feedproxy.group(1)
|
||||
else:
|
||||
link_name = urlparse(post_link).netloc
|
||||
link_name = re.sub('^www\d?\.', '', link_name) # remove www.
|
||||
if 'content' in entry:
|
||||
content = utils.get_content(entry.content[0]['value'])
|
||||
elif 'summary' in entry:
|
||||
content = utils.get_content(entry.summary)
|
||||
else:
|
||||
content = ''
|
||||
text = '<b>{post_title}</b>\n<i>{feed_title}</i>\n{content}'.format(
|
||||
post_title=post_title,
|
||||
feed_title=feed_title,
|
||||
content=content
|
||||
)
|
||||
text += '\n<a href="{post_link}">Auf {link_name} weiterlesen</a>\n'.format(
|
||||
post_link=post_link,
|
||||
link_name=link_name
|
||||
)
|
||||
for member in r.smembers(key):
|
||||
try:
|
||||
bot.sendMessage(
|
||||
chat_id=member,
|
||||
text=text,
|
||||
parse_mode=telegram.ParseMode.HTML,
|
||||
disable_web_page_preview=True
|
||||
)
|
||||
except telegram.error.Unauthorized:
|
||||
logging.warning('Chat ' + member + ' existiert nicht mehr, wird gelöscht.')
|
||||
r.srem(key, member)
|
||||
r.delete(feed_hash.format(member))
|
||||
except telegram.error.ChatMigrated as new_chat:
|
||||
new_chat_id = new_chat.new_chat_id
|
||||
logging.info('Chat migriert: ' + member + ' -> ' + str(new_chat_id))
|
||||
r.srem(key, member)
|
||||
r.sadd(key, new_chat_id)
|
||||
r.rename(feed_hash.format(member), feed_hash.format(new_chat_id))
|
||||
bot.sendMessage(
|
||||
chat_id=member,
|
||||
text=text,
|
||||
parse_mode=telegram.ParseMode.HTML,
|
||||
disable_web_page_preview=True
|
||||
)
|
||||
except telegram.error.TimedOut:
|
||||
pass
|
||||
|
||||
if not r.exists(key):
|
||||
r.delete(last_entry_hash)
|
||||
return
|
||||
|
||||
# Set the new last entry if there are any
|
||||
if new_entries:
|
||||
if 'id' not in new_entries[0]:
|
||||
new_last_entry = new_entries[0].link
|
||||
else:
|
||||
new_last_entry = new_entries[0].id
|
||||
r.set(last_entry_hash, new_last_entry)
|
||||
|
||||
|
||||
@run_async
|
||||
def run_job(bot, job=None):
|
||||
logger.info('================================')
|
||||
keys = r.keys('pythonbot:rss:*:subs')
|
||||
for key in keys:
|
||||
check_feed(bot, key)
|
||||
|
||||
|
||||
# Main function
|
||||
def main():
|
||||
# Create the EventHandler and pass it your bot's token.
|
||||
updater = Updater(token=config['DEFAULT']['token'])
|
||||
j = updater.job_queue
|
||||
# Setup the updater and show bot info
|
||||
updater = Updater(token=bot_token)
|
||||
try:
|
||||
bot_info = updater.bot.getMe()
|
||||
except telegram.error.Unauthorized:
|
||||
logger.error('Anmeldung nicht möglich, Bot-Token falsch?')
|
||||
sys.exit(1)
|
||||
|
||||
# Bot-Infos prüfen
|
||||
bot_info = updater.bot.getMe()
|
||||
print('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
|
||||
logger.info('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
|
||||
|
||||
# Get the dispatcher to register handlers
|
||||
dp = updater.dispatcher
|
||||
# Register Handlers
|
||||
handlers = [
|
||||
CommandHandler('start', start),
|
||||
CommandHandler('help', help_text),
|
||||
CommandHandler('rss', list_feeds, pass_args=True),
|
||||
CommandHandler('sub', subscribe, pass_args=True),
|
||||
CommandHandler('del', unsubscribe, pass_args=True),
|
||||
CommandHandler('sync', run_job)
|
||||
]
|
||||
for handler in handlers:
|
||||
updater.dispatcher.add_handler(handler)
|
||||
|
||||
# on different commands - answer in Telegram
|
||||
dp.add_handler(CommandHandler("start", start))
|
||||
dp.add_handler(CommandHandler("help", help))
|
||||
dp.add_handler(CommandHandler("hilfe", help))
|
||||
updater.job_queue.run_repeating(
|
||||
run_job,
|
||||
interval=60.0,
|
||||
first=2.0
|
||||
)
|
||||
|
||||
dp.add_handler(CommandHandler("rss", list_rss, pass_args=True))
|
||||
dp.add_handler(CommandHandler("sub", subscribe_to_rss, pass_args=True))
|
||||
dp.add_handler(CommandHandler("del", unsubscribe_rss, pass_args=True))
|
||||
dp.add_handler(CommandHandler("sync", manually_check_rss))
|
||||
# Start this thing!
|
||||
updater.start_polling(
|
||||
clean=True,
|
||||
bootstrap_retries=-1,
|
||||
allowed_updates=["message"]
|
||||
)
|
||||
|
||||
# log all errors
|
||||
dp.add_error_handler(error)
|
||||
|
||||
# cron
|
||||
j.run_repeating(check_rss, interval=60.0, first=15.0)
|
||||
|
||||
# Start the Bot
|
||||
updater.start_polling(timeout=20, clean=True, bootstrap_retries=-1, allowed_updates=["message"])
|
||||
|
||||
# Run the bot until the you presses Ctrl-C or the process receives SIGINT,
|
||||
# SIGTERM or SIGABRT. This should be used most of the time, since
|
||||
# start_polling() is non-blocking and will stop the bot gracefully.
|
||||
# Run Bot until CTRL+C is pressed or a SIGINIT,
|
||||
# SIGTERM or SIGABRT is sent.
|
||||
updater.idle()
|
||||
|
||||
|
||||
|
@ -8,4 +8,5 @@ token = 1337:1234567890abcdefgh
|
||||
#socket_path = /home/user/.redis/sock
|
||||
|
||||
[ADMIN]
|
||||
# Must be integers!
|
||||
id = [1337]
|
@ -1,4 +1,3 @@
|
||||
beautifulsoup4
|
||||
feedparser
|
||||
python-telegram-bot
|
||||
redis
|
81
utils.py
Normal file
81
utils.py
Normal file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
|
||||
from bot import admins
|
||||
|
||||
|
||||
def can_use_bot(update):
|
||||
"""Returns True if user is an admin"""
|
||||
if update.message.from_user.id in admins:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def get_new_entries(entries, last_entry):
|
||||
"""Returns all new entries from an entries dict up to the last new article"""
|
||||
new_entries = []
|
||||
for entry in entries:
|
||||
if 'id' in entry:
|
||||
if entry['id'] == last_entry:
|
||||
return new_entries
|
||||
else:
|
||||
new_entries.append(entry)
|
||||
else:
|
||||
if entry['link'] == last_entry:
|
||||
return new_entries
|
||||
else:
|
||||
new_entries.append(entry)
|
||||
return new_entries
|
||||
|
||||
|
||||
def remove_html_tags(html):
|
||||
"""Removes HTML tags"""
|
||||
cleanr = re.compile('<.*?>')
|
||||
cleantext = re.sub(cleanr, '', html)
|
||||
return cleantext
|
||||
|
||||
|
||||
def clean_rss(content):
|
||||
"""Cleans content"""
|
||||
content = content.replace('[…]', '')
|
||||
content = content.replace('[bilder]', '')
|
||||
content = content.replace('[mehr]', '')
|
||||
content = content.replace('[video]', '')
|
||||
content = content.replace('...[more]', '')
|
||||
content = content.replace('[more]', '')
|
||||
content = content.replace('[liveticker]', '')
|
||||
content = content.replace('[livestream]', '')
|
||||
content = content.replace('[multimedia]', '')
|
||||
content = content.replace('[phoenix]', '')
|
||||
content = content.replace('[swr]', '')
|
||||
content = content.replace('[ndr]', '')
|
||||
content = content.replace('[mdr]', '')
|
||||
content = content.replace('[rbb]', '')
|
||||
content = content.replace('[wdr]', '')
|
||||
content = content.replace('[hr]', '')
|
||||
content = content.replace('[br]', '')
|
||||
content = content.replace('Click for full.', '')
|
||||
content = content.replace('Read more »', '')
|
||||
content = content.replace('Read more', '')
|
||||
content = content.replace('(more…)', '')
|
||||
content = content.replace('View On WordPress', '')
|
||||
content = content.replace('Continue reading →', '')
|
||||
content = content.replace('(RSS generated with FetchRss)', '')
|
||||
content = content.replace('-- Delivered by Feed43 service', '')
|
||||
content = content.replace('Meldung bei www.tagesschau.de lesen', '')
|
||||
content = content.replace('<', '<')
|
||||
content = content.replace('>', '>')
|
||||
content = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', content)
|
||||
content = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', content)
|
||||
content = re.sub('http://www\.serienjunkies.de/.*\.html', '', content)
|
||||
return content
|
||||
|
||||
|
||||
def get_content(content):
|
||||
"""Sanitizes content and cuts it to 250 chars"""
|
||||
content = clean_rss(remove_html_tags(content).strip())
|
||||
if len(content) > 250:
|
||||
content = content[0:250] + '...'
|
||||
return content
|
Reference in New Issue
Block a user