diff --git a/bot.py b/bot.py index b6a6b15..0165c1e 100644 --- a/bot.py +++ b/bot.py @@ -139,7 +139,7 @@ def subscribe(bot, update, args): update.message.reply_text('❌ Keine Feed-URL angegeben.') return feed_url = args[0] - if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url): + if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&~+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url): update.message.reply_text('❌ Das ist keine URL.') return diff --git a/utils.py b/utils.py index 8b4b99d..977a96d 100644 --- a/utils.py +++ b/utils.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import html import re from bot import admins @@ -30,10 +31,10 @@ def get_new_entries(entries, last_entry): return new_entries -def remove_html_tags(html): +def remove_html_tags(rawhtml): """Removes HTML tags""" cleanr = re.compile('<.*?>') - cleantext = re.sub(cleanr, '', html) + cleantext = re.sub(cleanr, '', rawhtml) return cleantext @@ -79,7 +80,7 @@ def clean_rss(content): def get_content(content): """Sanitizes content and cuts it to 250 chars""" - content = clean_rss(remove_html_tags(content).strip()) + content = clean_rss(remove_html_tags(html.unescape(content)).strip()) if len(content) > 250: content = content[0:250] + '...' return content