diff --git a/.gitignore b/.gitignore
index 19a6bf7..7d06023 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,11 @@
-# Byte-compiled / optimized / DLL files
+# Byte-compiled / optimized / DLL files / pip
__pycache__/
*.py[cod]
*$py.class
+src/
-# Config
+# IDE
+.idea/
+
+# Bot-specific files
config.ini
\ No newline at end of file
diff --git a/README.md b/README.md
index 4aeaa21..a9587da 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,12 @@
RSS-Bot für Telegram
=====================
-
-1. `git clone https://gitlab.com/iCON/rssbot`
-2. `sudo apt-get install python3 python3-pip`
-3. `sudo pip3 install -r requirements.txt`
-4. `cp config.ini.example config.ini`
-5. Bot-Token in `config.ini` einfügen
+1. **Klonen:** `git clone https://git.centzilius.de/iCON/rssbot`
+2. **Python 3 installieren:** `sudo apt-get install python3 python3-pip`
+3. **Module installieren:** `sudo pip3 install -r requirements.txt`
+4. **Config kopieren:** `cp config.ini.example config.ini`
+5. **Bot-Token** in `config.ini` einfügen
1. Weitere Einstellungen für Redis vornehmen, falls vom Standard abweicht
-6. `bot.py` öffnen und unter `def can_use(update):` die ID zur eigenen abändern
+6. **Eigene Admin-ID in die Config eintragen**
7. `python3 bot.py`
(c) 2016-2017 Andreas Bielawski
diff --git a/bot.py b/bot.py
index 6946e3d..1e04d42 100644
--- a/bot.py
+++ b/bot.py
@@ -1,428 +1,378 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-#
-# RSS Bot
-# Python 3 required
-
import html
import logging
import re
+import sys
from configparser import ConfigParser
from json import loads
from urllib.parse import urlparse
import feedparser
import redis
-from bs4 import BeautifulSoup
-from telegram import ChatAction, ParseMode
-from telegram.error import TelegramError, Unauthorized, BadRequest, TimedOut, NetworkError, ChatMigrated
-from telegram.ext import Updater, Job, CommandHandler, MessageHandler, Filters
+import telegram
+from telegram.ext import CommandHandler, Updater
from telegram.ext.dispatcher import run_async
-# Bot Configuration
-config = ConfigParser()
-config.read_file(open('config.ini'))
+import utils
-redis_conf = config['REDIS']
-redis_db = redis_conf.get('db', 0)
-redis_host = redis_conf.get('host')
-redis_port = redis_conf.get('port', 6379)
-redis_socket = redis_conf.get('socket_path')
-
-# Enable logging
-logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- level=logging.ERROR)
+# Logging
+logging.basicConfig(
+ format="%(asctime)s - %(levelname)s: %(message)s",
+ datefmt="%d.%m.%Y %H:%M:%S",
+ level=logging.INFO
+)
logger = logging.getLogger(__name__)
-# Admins
-admins = loads(config["ADMIN"]["id"])
-if not admins:
- print('Keine Admin-IDs gesetzt, bitte Konfigurationsdatei bearbeiten.')
- quit()
+# Bot configuration
+config = ConfigParser()
+try:
+ config.read_file(open('config.ini'))
+except FileNotFoundError:
+ logger.critical('Config.ini nicht gefunden')
+ sys.exit(1)
-# Utils
+# Bot token
+try:
+ bot_token = config['DEFAULT']['token']
+except KeyError:
+ logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen')
+ sys.exit(1)
+if not bot_token:
+ logger.error('Kein Bot-Token gesetzt, bitte config.ini prüfen')
+ sys.exit(1)
+
+# Admins
+try:
+ admins = loads(config["ADMIN"]["id"])
+except KeyError:
+ logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.')
+ sys.exit(1)
+if not admins:
+ logger.error('Keine Admin-IDs gesetzt, bitte config.ini prüfen.')
+ sys.exit(1)
+
+for admin in admins:
+ if not isinstance(admin, int):
+ logger.error('Admin-IDs müssen Integer sein.')
+ sys.exit(1)
+
+# Redis
+redis_conf = config['REDIS']
+redis_db = redis_conf.get('db', 0)
+redis_host = redis_conf.get('host', '127.0.0.1')
+redis_port = redis_conf.get('port', 6379)
+redis_socket = redis_conf.get('socket_path')
if redis_socket:
r = redis.Redis(unix_socket_path=redis_socket, db=int(redis_db), decode_responses=True)
else:
r = redis.Redis(host=redis_host, port=int(redis_port), db=int(redis_db), decode_responses=True)
if not r.ping():
- print('Konnte nicht mit Redis verbinden, prüfe deine Einstellungen')
- quit()
+ logging.getLogger("Redis").critical("Redis-Verbindungsfehler, config.ini prüfen")
+ sys.exit(1)
+
+feed_hash = 'pythonbot:rss:{0}'
-def is_number(s):
- try:
- float(s)
- return True
- except ValueError:
- return False
-
-
-def remove_tags(html):
- return ''.join(BeautifulSoup(html, "html.parser").findAll(text=True))
-
-
-def can_use(update):
- if update.message.from_user.id in admins:
- return True
- else:
- return False
-
-
-def cleanRSS(str):
- str = str.replace('[…]', '')
- str = str.replace('[bilder]', '')
- str = str.replace('[mehr]', '')
- str = str.replace('[video]', '')
- str = str.replace('...[more]', '')
- str = str.replace('[more]', '')
- str = str.replace('[liveticker]', '')
- str = str.replace('[livestream]', '')
- str = str.replace('[multimedia]', '')
- str = str.replace('[phoenix]', '')
- str = str.replace('[swr]', '')
- str = str.replace('[ndr]', '')
- str = str.replace('[mdr]', '')
- str = str.replace('[rbb]', '')
- str = str.replace('[wdr]', '')
- str = str.replace('[hr]', '')
- str = str.replace('[br]', '')
- str = str.replace('Click for full.', '')
- str = str.replace('Read more »', '')
- str = str.replace('Read more', '')
- str = str.replace('(more…)', '')
- str = str.replace('View On WordPress', '')
- str = str.replace('Continue reading →', '')
- str = str.replace('(RSS generated with FetchRss)', '')
- str = str.replace('-- Delivered by Feed43 service', '')
- str = str.replace('Meldung bei www.tagesschau.de lesen', '')
- str = str.replace('<', '<')
- str = str.replace('>', '>')
- str = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', str)
- str = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', str)
- str = re.sub('http://www\.serienjunkies.de/.*\.html', '', str)
- return str
-
-
-def check_chat(bot, username):
- try:
- return bot.getChat(username)
- except:
- return
-
-
-# Commands
@run_async
def start(bot, update):
- if not can_use(update):
+ if not utils.can_use_bot(update):
return
- bot.sendMessage(
- chat_id=update.message.chat_id,
- text='Willkommen beim RSS-Bot!\nLass uns anfangen! Sende /hilfe, um zu starten.',
- reply_to_message_id=update.message.message_id,
- parse_mode=ParseMode.HTML
+ update.message.reply_text(
+ text='Willkommen beim RSS-Bot!\nSende /help, um zu starten.',
+ parse_mode=telegram.ParseMode.HTML
)
@run_async
-def help(bot, update):
- if not can_use(update):
+def help_text(bot, update):
+ if not utils.can_use_bot(update):
return
- bot.sendMessage(
- chat_id=update.message.chat_id,
- text='/rss: Abonnierte Feeds anzeigen\n/sub Feed-URL: Feed abonnieren\n/del n: Feed löschen',
- reply_to_message_id=update.message.message_id,
- parse_mode=ParseMode.HTML
+ update.message.reply_text(
+ text='/rss [Chat]: Abonnierte Feeds anzeigen\n'
+ '/sub Feed-URL [Chat]: Feed abonnieren\n'
+ '/del n [Chat]: Feed löschen',
+ parse_mode=telegram.ParseMode.HTML
)
-def subscribe_to_rss(bot, update, args):
- if not can_use(update):
+@run_async
+def list_feeds(bot, update, args):
+ if not utils.can_use_bot(update):
return
- if len(args) < 1:
- bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Feed-URL ein.',
- reply_to_message_id=update.message.message_id)
+ if args:
+ chat_name = args[0]
+ try:
+ resp = bot.getChat(chat_name)
+ except telegram.error.BadRequest:
+ update.message.reply_text('❌ Dieser Kanal existiert nicht.')
+ return
+ chat_id = str(resp.id)
+ chat_title = resp.title
+ else:
+ chat_id = str(update.message.chat.id)
+ if update.message.chat.type == 'private':
+ chat_title = update.message.chat.first_name
+ else:
+ chat_title = update.message.chat.title
+
+ subs = r.smembers(feed_hash.format(chat_id))
+ if not subs:
+ text = '❌ Keine Feeds abonniert.'
+ else:
+ text = '' + html.escape(chat_title) + ' hat abonniert:\n'
+ for n, feed in enumerate(subs):
+ text += '' + str(n + 1) + ') ' + feed + '\n'
+
+ update.message.reply_text(
+ text=text,
+ parse_mode=telegram.ParseMode.HTML
+ )
+
+
+@run_async
+def subscribe(bot, update, args):
+ if not utils.can_use_bot(update):
+ return
+ if not args:
+ update.message.reply_text('❌ Keine Feed-URL angegeben.')
return
feed_url = args[0]
- is_url = re.search("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", feed_url)
- if not is_url:
- bot.sendMessage(chat_id=update.message.chat_id, text='Dies ist keine URL.',
- reply_to_message_id=update.message.message_id)
+ if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url):
+ update.message.reply_text('❌ Das ist keine URL.')
return
+ # Get Chat ID from name if given
if len(args) > 1:
- username = args[1]
- chat_info = check_chat(bot, username)
- if not chat_info:
- bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
- reply_to_message_id=update.message.message_id)
- return
- chat_id = str(chat_info.id)
- else:
- chat_id = str(update.message.chat_id)
-
- if r.sismember('pythonbot:rss:' + chat_id, feed_url):
- bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Feed wurde bereits abonniert.',
- reply_to_message_id=update.message.message_id)
- return
-
- bot.sendChatAction(update.message.chat_id, action=ChatAction.TYPING)
- feed_data = feedparser.parse(feed_url)
- if not 'link' in feed_data.feed:
- bot.sendMessage(chat_id=update.message.chat_id, text='Kein gültiger Feed.',
- reply_to_message_id=update.message.message_id)
- return
-
- if not 'title' in feed_data.feed:
- feed_title = 'Unbekannten Feed'
- else:
- feed_title = feed_data.feed.title
-
- if len(feed_data.entries) > 0:
- if not 'id' in feed_data.entries[0]:
- last_entry = feed_data.entries[0].link
- else:
- last_entry = feed_data.entries[0].id
- lhash = 'pythonbot:rss:' + feed_url + ':last_entry'
- if not r.exists(lhash):
- r.set(lhash, last_entry)
-
- r.sadd('pythonbot:rss:' + feed_url + ':subs', int(chat_id))
- r.sadd('pythonbot:rss:' + chat_id, feed_url)
- bot.sendMessage(
- chat_id=update.message.chat_id,
- text='' + feed_title + ' hinzugefügt!',
- reply_to_message_id=update.message.message_id,
- parse_mode=ParseMode.HTML
- )
-
-
-def unsubscribe_rss(bot, update, args):
- if not can_use(update):
- return
-
- if len(args) < 1:
- bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein',
- reply_to_message_id=update.message.message_id)
- return
-
- if len(args) > 1:
- username = args[1]
- chat_info = check_chat(bot, username)
- if not chat_info:
- bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
- reply_to_message_id=update.message.message_id)
- return
- chat_id = str(chat_info.id)
- else:
- chat_id = str(update.message.chat_id)
-
- if not is_number(args[0]):
- bot.sendMessage(chat_id=update.message.chat_id, text='Bitte gebe eine Nummer ein.',
- reply_to_message_id=update.message.message_id)
- return
- uhash = 'pythonbot:rss:' + chat_id
- n = int(args[0])
- subs = list(r.smembers(uhash))
- if n < 1 or n > len(subs):
- bot.sendMessage(chat_id=update.message.chat_id, text='Abonnement-ID ist zu hoch.',
- reply_to_message_id=update.message.message_id)
- return
- sub = subs[n - 1]
- lhash = 'pythonbot:rss:' + sub + ':subs'
- r.srem(uhash, sub)
- r.srem(lhash, int(chat_id))
- bot.sendMessage(
- chat_id=update.message.chat_id,
- text='' + sub + ' entfernt.',
- reply_to_message_id=update.message.message_id,
- parse_mode=ParseMode.HTML
- )
- left = r.smembers(lhash)
- if len(left) < 1: # no one subscribed, remove it
- r.delete('pythonbot:rss:' + sub + ':last_entry')
-
-
-def get_rss_list(chat_id, chat_name):
- uhash = 'pythonbot:rss:' + chat_id
- subs = list(r.smembers(uhash))
- if len(subs) < 1:
- return 'Keine Feeds abonniert!'
- text = '' + chat_name + ' hat abonniert:\n'
- for n, feed in enumerate(subs):
- text = text + str(n + 1) + ') ' + feed + '\n'
- return text
-
-
-@run_async
-def list_rss(bot, update, args):
- if not can_use(update):
- return
- if len(args) == 1:
- username = args[0]
- chat_info = check_chat(bot, username)
- if not chat_info:
- bot.sendMessage(chat_id=update.message.chat_id, text='Dieser Kanal existiert nicht!',
- reply_to_message_id=update.message.message_id)
- return
- rss_list = get_rss_list(str(chat_info.id), chat_info.title)
- else:
- if update.message.chat.first_name:
- chat_name = update.message.chat.first_name
- else:
- chat_name = update.message.chat.title
- rss_list = get_rss_list(str(update.message.chat_id), chat_name)
- bot.sendMessage(
- chat_id=update.message.chat_id,
- text=rss_list,
- reply_to_message_id=update.message.message_id,
- parse_mode=ParseMode.HTML
- )
-
-
-def get_new_entries(last, new_entries):
- entries = []
- for k, v in enumerate(new_entries):
- if 'id' in v:
- if v.id == last:
- return entries
- else:
- entries.append(v)
- else:
- if v.link == last:
- return entries
- else:
- entries.append(v)
- return entries
-
-
-def manually_check_rss(bot, update):
- if not can_use(update):
- return
- check_rss(bot, '')
- bot.sendMessage(
- chat_id=update.message.chat_id,
- text='Ausgeführt.',
- reply_to_message_id=update.message.message_id
- )
-
-
-@run_async
-def check_rss(bot, job):
- keys = list(r.keys('pythonbot:rss:*:subs'))
- for k, v in enumerate(keys):
- p = re.compile('pythonbot:rss:(.+):subs')
- match_func = p.search(v)
- url = match_func.group(1)
- print('RSS: ' + url)
- last = r.get('pythonbot:rss:' + url + ':last_entry')
-
- feed_data = feedparser.parse(url)
+ chat_name = args[1]
try:
- if feed_data.status < 400:
- if not 'title' in feed_data.feed:
- feed_title = feed_data.feed.link
- else:
- feed_title = feed_data.feed.title
- newentr = get_new_entries(last, feed_data.entries)
- text = ''
- for k2, v2 in enumerate(newentr):
- if not 'title' in v2:
- title = 'Kein Titel'
- else:
- title = html.escape(remove_tags(v2.title).lstrip())
- if not 'link' in v2:
- link = feed_data.feed.link
- link_name = link
- else:
- link = v2.link
- f = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', link) # feedproxy.google.com
- if f:
- link_name = f.group(1)
- else:
- link_name = urlparse(link).netloc
- link_name = re.sub('^www\d?\.', '', link_name) # www.
- if 'content' in v2:
- content = remove_tags(v2.content[0].value).lstrip()
- content = cleanRSS(content)
- if len(content) > 250:
- content = content[0:250] + '...'
- elif 'summary' in v2:
- content = remove_tags(v2.summary).lstrip()
- content = cleanRSS(content)
- if len(content) > 250:
- content = content[0:250] + '...'
- else:
- content = ''
- # Für 1 Nachricht pro Beitrag, tue dies:
- # Entferne hier das "text + "...
- text = text + '\n' + title + '\n' + feed_title + '\n' + content + '\nAuf ' + link_name + ' weiterlesen\n'
- # ...und setze hier vor jeder Zeile 4 zusätzliche Leerzeichen (bis "else", Zeile 379)
- if text != '':
- if not 'id' in newentr[0]:
- newlast = newentr[0].link
- else:
- newlast = newentr[0].id
- r.set('pythonbot:rss:' + url + ':last_entry', newlast)
- for k2, receiver in enumerate(list(r.smembers(v))):
- try:
- bot.sendMessage(receiver, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
- except Unauthorized:
- print('Chat ' + receiver + ' existiert nicht mehr, lösche aus Abonnenten-Liste')
- r.srem(v, receiver)
- r.delete('pythonbot:rss:' + receiver)
- except ChatMigrated as e:
- print('Chat migriert: ' + receiver + ' -> ' + str(e.new_chat_id))
- r.srem(v, receiver)
- r.sadd(v, e.new_chat_id)
- r.rename('pythonbot:rss:' + receiver, 'pythonbot:rss:' + str(e.new_chat_id))
- bot.sendMessage(e.new_chat_id, text, parse_mode=ParseMode.HTML, disable_web_page_preview=True)
+ resp = bot.getChat(chat_name)
+ except telegram.error.BadRequest:
+ update.message.reply_text('❌ Dieser Kanal existiert nicht.')
+ return
+ chat_id = str(resp.id)
+ resp = bot.getChatMember(chat_id, bot.id)
+ if resp.status != 'administrator':
+ update.message.reply_text('❌ Bot ist kein Administrator in diesem Kanal.')
+ return
+ else:
+ chat_id = str(update.message.chat.id)
+
+ bot.sendChatAction(update.message.chat.id, action=telegram.ChatAction.TYPING)
+ data = feedparser.parse(feed_url)
+ if 'link' not in data.feed:
+ update.message.reply_text('❌ Kein gültiger Feed.')
+ return
+ feed_url = data.href # Follow all redirects
+ if r.sismember(feed_hash.format(chat_id), feed_url):
+ update.message.reply_text('✅ Dieser Feed wurde bereits abonniert.')
+ return
+
+ if 'title' not in data.feed:
+ feed_title = feed_url
+ else:
+ feed_title = html.escape(data.feed['title'])
+
+ # Save the last entry in Redis, if it doesn't exist
+ if data.entries:
+ last_entry_hash = feed_hash.format(feed_url + ':last_entry')
+ if not r.exists(last_entry_hash):
+ if 'id' not in data.entries[0]:
+ last_entry = data.entries[0]['link']
else:
- print('HTTP-Fehler: ' + str(feed_data.status))
- except:
- print('Fehler: Seite nicht erreichbar')
- print('-----------------------')
+ last_entry = data.entries[0]['id']
+ r.set(last_entry_hash, last_entry)
+
+ r.sadd(feed_hash.format(feed_url + ':subs'), chat_id)
+ r.sadd(feed_hash.format(chat_id), feed_url)
+ update.message.reply_text(
+ text='✅ ' + feed_title + ' hinzugefügt!',
+ parse_mode=telegram.ParseMode.HTML
+ )
-def error(bot, update, error):
- logger.warn('Update "%s" verursachte Fehler "%s"' % (update, error))
+@run_async
+def unsubscribe(bot, update, args):
+ if not utils.can_use_bot(update):
+ return
+ if not args:
+ update.message.reply_text('❌ Keine Nummer angegeben.')
+ return
+
+ # Get Chat ID from name if given
+ if len(args) > 1:
+ chat_name = args[1]
+ try:
+ resp = bot.getChat(chat_name)
+ except telegram.error.BadRequest:
+ update.message.reply_text('❌ Dieser Kanal existiert nicht.')
+ return
+ chat_id = str(resp.id)
+ else:
+ chat_id = str(update.message.chat.id)
+
+ try:
+ n = int(args[0])
+ except ValueError:
+ update.message.reply_text('❌ Keine Nummer angegeben.')
+ return
+
+ chat_hash = feed_hash.format(chat_id)
+ subs = r.smembers(chat_hash)
+ if n < 1:
+ update.message.reply_text('❌ Nummer muss größer als 0 sein!')
+ return
+ elif n > len(subs):
+ update.message.reply_text('❌ Feed-ID zu hoch.')
+ return
+
+ feed_url = list(subs)[n - 1]
+ sub_hash = feed_hash.format(feed_url + ':subs')
+ r.srem(chat_hash, feed_url)
+ r.srem(sub_hash, chat_id)
+ if not r.smembers(sub_hash): # no one subscribed, remove it
+ r.delete(feed_hash.format(feed_url + ':last_entry'))
+
+ update.message.reply_text(
+ text='✅ ' + feed_url + ' entfernt.',
+ parse_mode=telegram.ParseMode.HTML
+ )
+@run_async
+def check_feed(bot, key):
+ feed_url = re.match('^pythonbot:rss:(.+):subs$', key).group(1)
+ logger.info(feed_url)
+ data = feedparser.parse(feed_url)
+ if 'link' not in data.feed:
+ logger.warning('Kein gültiger Feed, Status-Code ' + str(data.status))
+ return None
+ if 'title' not in data.feed:
+ feed_title = data.feed['link']
+ else:
+ feed_title = data.feed['title']
+ last_entry_hash = feed_hash.format(feed_url + ':last_entry')
+ last_entry = r.get(last_entry_hash)
+ new_entries = utils.get_new_entries(data.entries, last_entry)
+ for entry in reversed(new_entries):
+ if 'title' not in entry:
+ post_title = 'Kein Titel'
+ else:
+ post_title = html.escape(utils.remove_html_tags(entry['title']).strip())
+ if 'link' not in entry:
+ post_link = entry.link
+ link_name = post_link
+ else:
+ post_link = entry.link
+ feedproxy = re.search('^https?://feedproxy\.google\.com/~r/(.+?)/.*', post_link) # feedproxy.google.com
+ if feedproxy:
+ link_name = feedproxy.group(1)
+ else:
+ link_name = urlparse(post_link).netloc
+ link_name = re.sub('^www\d?\.', '', link_name) # remove www.
+ if 'content' in entry:
+ content = utils.get_content(entry.content[0]['value'])
+ elif 'summary' in entry:
+ content = utils.get_content(entry.summary)
+ else:
+ content = ''
+ text = '{post_title}\n{feed_title}\n{content}'.format(
+ post_title=post_title,
+ feed_title=feed_title,
+ content=content
+ )
+ text += '\nAuf {link_name} weiterlesen\n'.format(
+ post_link=post_link,
+ link_name=link_name
+ )
+ for member in r.smembers(key):
+ try:
+ bot.sendMessage(
+ chat_id=member,
+ text=text,
+ parse_mode=telegram.ParseMode.HTML,
+ disable_web_page_preview=True
+ )
+ except telegram.error.Unauthorized:
+ logging.warning('Chat ' + member + ' existiert nicht mehr, wird gelöscht.')
+ r.srem(key, member)
+ r.delete(feed_hash.format(member))
+ except telegram.error.ChatMigrated as new_chat:
+ new_chat_id = new_chat.new_chat_id
+ logging.info('Chat migriert: ' + member + ' -> ' + str(new_chat_id))
+ r.srem(key, member)
+ r.sadd(key, new_chat_id)
+ r.rename(feed_hash.format(member), feed_hash.format(new_chat_id))
+ bot.sendMessage(
+ chat_id=member,
+ text=text,
+ parse_mode=telegram.ParseMode.HTML,
+ disable_web_page_preview=True
+ )
+ except telegram.error.TimedOut:
+ pass
+
+ if not r.exists(key):
+ r.delete(last_entry_hash)
+ return
+
+ # Set the new last entry if there are any
+ if new_entries:
+ if 'id' not in new_entries[0]:
+ new_last_entry = new_entries[0].link
+ else:
+ new_last_entry = new_entries[0].id
+ r.set(last_entry_hash, new_last_entry)
+
+
+@run_async
+def run_job(bot, job=None):
+ logger.info('================================')
+ keys = r.keys('pythonbot:rss:*:subs')
+ for key in keys:
+ check_feed(bot, key)
+
+
+# Main function
def main():
- # Create the EventHandler and pass it your bot's token.
- updater = Updater(token=config['DEFAULT']['token'])
- j = updater.job_queue
+ # Setup the updater and show bot info
+ updater = Updater(token=bot_token)
+ try:
+ bot_info = updater.bot.getMe()
+ except telegram.error.Unauthorized:
+ logger.error('Anmeldung nicht möglich, Bot-Token falsch?')
+ sys.exit(1)
- # Bot-Infos prüfen
- bot_info = updater.bot.getMe()
- print('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
+ logger.info('Starte ' + bot_info.first_name + ', AKA @' + bot_info.username + ' (' + str(bot_info.id) + ')')
- # Get the dispatcher to register handlers
- dp = updater.dispatcher
+ # Register Handlers
+ handlers = [
+ CommandHandler('start', start),
+ CommandHandler('help', help_text),
+ CommandHandler('rss', list_feeds, pass_args=True),
+ CommandHandler('sub', subscribe, pass_args=True),
+ CommandHandler('del', unsubscribe, pass_args=True),
+ CommandHandler('sync', run_job)
+ ]
+ for handler in handlers:
+ updater.dispatcher.add_handler(handler)
- # on different commands - answer in Telegram
- dp.add_handler(CommandHandler("start", start))
- dp.add_handler(CommandHandler("help", help))
- dp.add_handler(CommandHandler("hilfe", help))
+ updater.job_queue.run_repeating(
+ run_job,
+ interval=60.0,
+ first=2.0
+ )
- dp.add_handler(CommandHandler("rss", list_rss, pass_args=True))
- dp.add_handler(CommandHandler("sub", subscribe_to_rss, pass_args=True))
- dp.add_handler(CommandHandler("del", unsubscribe_rss, pass_args=True))
- dp.add_handler(CommandHandler("sync", manually_check_rss))
+ # Start this thing!
+ updater.start_polling(
+ clean=True,
+ bootstrap_retries=-1,
+ allowed_updates=["message"]
+ )
- # log all errors
- dp.add_error_handler(error)
-
- # cron
- j.run_repeating(check_rss, interval=60.0, first=15.0)
-
- # Start the Bot
- updater.start_polling(timeout=20, clean=True, bootstrap_retries=-1, allowed_updates=["message"])
-
- # Run the bot until the you presses Ctrl-C or the process receives SIGINT,
- # SIGTERM or SIGABRT. This should be used most of the time, since
- # start_polling() is non-blocking and will stop the bot gracefully.
+ # Run Bot until CTRL+C is pressed or a SIGINIT,
+ # SIGTERM or SIGABRT is sent.
updater.idle()
diff --git a/config.ini.example b/config.ini.example
index 245ce0f..1bac842 100644
--- a/config.ini.example
+++ b/config.ini.example
@@ -8,4 +8,5 @@ token = 1337:1234567890abcdefgh
#socket_path = /home/user/.redis/sock
[ADMIN]
+# Must be integers!
id = [1337]
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index d5ba097..d17363a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-beautifulsoup4
feedparser
python-telegram-bot
redis
\ No newline at end of file
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..340dca8
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import re
+
+from bot import admins
+
+
+def can_use_bot(update):
+ """Returns True if user is an admin"""
+ if update.message.from_user.id in admins:
+ return True
+ else:
+ return False
+
+
+def get_new_entries(entries, last_entry):
+ """Returns all new entries from an entries dict up to the last new article"""
+ new_entries = []
+ for entry in entries:
+ if 'id' in entry:
+ if entry['id'] == last_entry:
+ return new_entries
+ else:
+ new_entries.append(entry)
+ else:
+ if entry['link'] == last_entry:
+ return new_entries
+ else:
+ new_entries.append(entry)
+ return new_entries
+
+
+def remove_html_tags(html):
+ """Removes HTML tags"""
+ cleanr = re.compile('<.*?>')
+ cleantext = re.sub(cleanr, '', html)
+ return cleantext
+
+
+def clean_rss(content):
+ """Cleans content"""
+ content = content.replace('[…]', '')
+ content = content.replace('[bilder]', '')
+ content = content.replace('[mehr]', '')
+ content = content.replace('[video]', '')
+ content = content.replace('...[more]', '')
+ content = content.replace('[more]', '')
+ content = content.replace('[liveticker]', '')
+ content = content.replace('[livestream]', '')
+ content = content.replace('[multimedia]', '')
+ content = content.replace('[phoenix]', '')
+ content = content.replace('[swr]', '')
+ content = content.replace('[ndr]', '')
+ content = content.replace('[mdr]', '')
+ content = content.replace('[rbb]', '')
+ content = content.replace('[wdr]', '')
+ content = content.replace('[hr]', '')
+ content = content.replace('[br]', '')
+ content = content.replace('Click for full.', '')
+ content = content.replace('Read more »', '')
+ content = content.replace('Read more', '')
+ content = content.replace('(more…)', '')
+ content = content.replace('View On WordPress', '')
+ content = content.replace('Continue reading →', '')
+ content = content.replace('(RSS generated with FetchRss)', '')
+ content = content.replace('-- Delivered by Feed43 service', '')
+ content = content.replace('Meldung bei www.tagesschau.de lesen', '')
+ content = content.replace('<', '<')
+ content = content.replace('>', '>')
+ content = re.sub('Der Beitrag.*erschien zuerst auf MAnime.de.', '', content)
+ content = re.sub('The post.*appeared first on Sugoi! Anime Blog.', '', content)
+ content = re.sub('http://www\.serienjunkies.de/.*\.html', '', content)
+ return content
+
+
+def get_content(content):
+ """Sanitizes content and cuts it to 250 chars"""
+ content = clean_rss(remove_html_tags(content).strip())
+ if len(content) > 250:
+ content = content[0:250] + '...'
+ return content