From 6361b7f374c856d5d46f6b3076cac66c4a6128bf Mon Sep 17 00:00:00 2001 From: Andreas Bielawski Date: Thu, 21 Sep 2017 00:50:53 +0200 Subject: [PATCH] =?UTF-8?q?Fixes=20f=C3=BCr=20schon-escapete=20Feeds=20+?= =?UTF-8?q?=20erlaube=20Tilden=20in=20der=20URL?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bot.py | 2 +- utils.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/bot.py b/bot.py index 6ccaf54..c8bb81a 100644 --- a/bot.py +++ b/bot.py @@ -139,7 +139,7 @@ def subscribe(bot, update, args): update.message.reply_text('❌ Keine Feed-URL angegeben.') return feed_url = args[0] - if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url): + if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&~+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url): update.message.reply_text('❌ Das ist keine URL.') return diff --git a/utils.py b/utils.py index 3b7359c..7179d6d 100644 --- a/utils.py +++ b/utils.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import html import re from bot import admins @@ -30,10 +31,10 @@ def get_new_entries(entries, last_entry): return new_entries -def remove_html_tags(html): +def remove_html_tags(rawhtml): """Removes HTML tags""" cleanr = re.compile('<.*?>') - cleantext = re.sub(cleanr, '', html) + cleantext = re.sub(cleanr, '', rawhtml) return cleantext @@ -78,7 +79,7 @@ def clean_rss(content): def get_content(content): """Sanitizes content and cuts it to 250 chars""" - content = clean_rss(remove_html_tags(content).strip()) + content = clean_rss(remove_html_tags(html.unescape(content)).strip()) if len(content) > 250: content = content[0:250] + '...' return content