From 460ccc6d324dcf8182165bce2d08d93d95e606e7 Mon Sep 17 00:00:00 2001 From: Akamaru Date: Sun, 25 Feb 2018 23:19:12 +0100 Subject: [PATCH] Mehr cleanRSS --- utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/utils.py b/utils.py index 977a96d..66e1117 100644 --- a/utils.py +++ b/utils.py @@ -41,7 +41,9 @@ def remove_html_tags(rawhtml): def clean_rss(content): """Cleans content""" content = content.replace('\n', ' ') + content = content.replace('←', '') content = content.replace('[…]', '') + content = content.replace('[...]', '') content = content.replace('[bilder]', '') content = content.replace('[boerse]', '') content = content.replace('[mehr]', '') @@ -67,11 +69,14 @@ def clean_rss(content): content = content.replace('(more…)', '') content = content.replace('View On WordPress', '') content = content.replace('Continue reading →', '') + content = content.replace('» weiterlesen', '') + content = content.replace('Ein Kommentar.', '') content = content.replace('(RSS generated with FetchRss)', '') content = content.replace('-- Delivered by Feed43 service', '') content = content.replace('Meldung bei www.tagesschau.de lesen', '') content = content.replace('<', '<') content = content.replace('>', '>') + content = re.sub('Nächstältere Version.*', '', content) content = re.sub('Der Beitrag.*erschien zuerst auf .+\.', '', content) content = re.sub('The post.*appeared first on .+\.', '', content) content = re.sub('http://www\.serienjunkies.de/.*\.html', '', content)