Mehr cleanRSS

This commit is contained in:
Akamaru 2018-02-25 23:19:12 +01:00
parent 77e673a54d
commit 460ccc6d32

View File

@ -41,7 +41,9 @@ def remove_html_tags(rawhtml):
def clean_rss(content):
"""Cleans content"""
content = content.replace('\n', ' ')
content = content.replace('', '')
content = content.replace('[…]', '')
content = content.replace('[...]', '')
content = content.replace('[bilder]', '')
content = content.replace('[boerse]', '')
content = content.replace('[mehr]', '')
@ -67,11 +69,14 @@ def clean_rss(content):
content = content.replace('(more…)', '')
content = content.replace('View On WordPress', '')
content = content.replace('Continue reading →', '')
content = content.replace('» weiterlesen', '')
content = content.replace('Ein Kommentar.', '')
content = content.replace('(RSS generated with FetchRss)', '')
content = content.replace('-- Delivered by Feed43 service', '')
content = content.replace('Meldung bei www.tagesschau.de lesen', '')
content = content.replace('<', '&lt;')
content = content.replace('>', '&gt;')
content = re.sub('Nächstältere Version.*', '', content)
content = re.sub('Der Beitrag.*erschien zuerst auf .+\.', '', content)
content = re.sub('The post.*appeared first on .+\.', '', content)
content = re.sub('http://www\.serienjunkies.de/.*\.html', '', content)