Fixes für schon-escapete Feeds + erlaube Tilden in der URL

This commit is contained in:
Andreas Bielawski 2017-09-21 00:50:53 +02:00
parent f95fc5df52
commit 6361b7f374
2 changed files with 5 additions and 4 deletions

2
bot.py
View File

@ -139,7 +139,7 @@ def subscribe(bot, update, args):
update.message.reply_text('❌ Keine Feed-URL angegeben.')
return
feed_url = args[0]
if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url):
if not re.match("^http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&~+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$", feed_url):
update.message.reply_text('❌ Das ist keine URL.')
return

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import html
import re
from bot import admins
@ -30,10 +31,10 @@ def get_new_entries(entries, last_entry):
return new_entries
def remove_html_tags(html):
def remove_html_tags(rawhtml):
"""Removes HTML tags"""
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', html)
cleantext = re.sub(cleanr, '', rawhtml)
return cleantext
@ -78,7 +79,7 @@ def clean_rss(content):
def get_content(content):
"""Sanitizes content and cuts it to 250 chars"""
content = clean_rss(remove_html_tags(content).strip())
content = clean_rss(remove_html_tags(html.unescape(content)).strip())
if len(content) > 250:
content = content[0:250] + '...'
return content