This repository has been archived on 2021-04-24. You can view files and clone it, but cannot push or open issues or pull requests.
RikoBot/utils.py

95 lines
3.4 KiB
Python
Raw Normal View History

2017-09-20 23:25:57 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import html
2017-09-20 23:25:57 +02:00
import re
from bot import admins
def can_use_bot(update):
"""Returns True if user is an admin"""
if update.message.from_user.id in admins:
return True
else:
return False
def get_new_entries(entries, last_entry):
"""Returns all new entries from an entries dict up to the last new article"""
new_entries = []
for entry in entries:
if 'id' in entry:
if entry['id'] == last_entry:
return new_entries
else:
new_entries.append(entry)
else:
if entry['link'] == last_entry:
return new_entries
else:
new_entries.append(entry)
return new_entries
def remove_html_tags(rawhtml):
2017-09-20 23:25:57 +02:00
"""Removes HTML tags"""
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', rawhtml)
2017-09-20 23:25:57 +02:00
return cleantext
def clean_rss(content):
"""Cleans content"""
2017-09-21 00:13:40 +02:00
content = content.replace('\n', ' ')
2018-02-25 23:19:12 +01:00
content = content.replace('', '')
2017-09-20 23:25:57 +02:00
content = content.replace('[…]', '')
2018-02-25 23:19:12 +01:00
content = content.replace('[...]', '')
2017-09-20 23:25:57 +02:00
content = content.replace('[bilder]', '')
2017-09-20 23:57:58 +02:00
content = content.replace('[boerse]', '')
2017-09-20 23:25:57 +02:00
content = content.replace('[mehr]', '')
content = content.replace('[video]', '')
content = content.replace('...[more]', '')
content = content.replace('[more]', '')
content = content.replace('[liveticker]', '')
content = content.replace('[livestream]', '')
content = content.replace('[multimedia]', '')
2017-09-20 23:57:58 +02:00
content = content.replace('[sportschau]', '')
2017-09-20 23:25:57 +02:00
content = content.replace('[phoenix]', '')
content = content.replace('[swr]', '')
content = content.replace('[ndr]', '')
content = content.replace('[mdr]', '')
content = content.replace('[rbb]', '')
content = content.replace('[wdr]', '')
content = content.replace('[hr]', '')
content = content.replace('[br]', '')
content = content.replace('Click for full.', '')
content = content.replace('Read more »', '')
content = content.replace('Read more', '')
2017-09-20 23:57:58 +02:00
content = content.replace('...Read More', '')
2017-09-20 23:25:57 +02:00
content = content.replace('(more…)', '')
content = content.replace('View On WordPress', '')
content = content.replace('Continue reading →', '')
2018-02-25 23:19:12 +01:00
content = content.replace('» weiterlesen', '')
content = content.replace('Ein Kommentar.', '')
2018-09-15 18:29:12 +02:00
content = content.replace('Änderungen zeigen', '')
2019-01-03 22:46:55 +01:00
content = content.replace('(RSS generated with FetchRss)', '')
2017-09-20 23:25:57 +02:00
content = content.replace('-- Delivered by Feed43 service', '')
content = content.replace('Meldung bei www.tagesschau.de lesen', '')
content = content.replace('<', '&lt;')
content = content.replace('>', '&gt;')
2018-02-25 23:19:12 +01:00
content = re.sub('Nächstältere Version.*', '', content)
2018-09-15 18:29:12 +02:00
content = re.sub('Die Seite wurde neu angelegt.*', '', content)
content = re.sub('Weiterleitung nach.*erstellt.*', '', content)
2017-09-20 23:57:58 +02:00
content = re.sub('Der Beitrag.*erschien zuerst auf .+\.', '', content)
content = re.sub('The post.*appeared first on .+\.', '', content)
2017-09-20 23:25:57 +02:00
content = re.sub('http://www\.serienjunkies.de/.*\.html', '', content)
return content
def get_content(content):
"""Sanitizes content and cuts it to 250 chars"""
content = clean_rss(remove_html_tags(html.unescape(content)).strip())
2017-09-20 23:25:57 +02:00
if len(content) > 250:
content = content[0:250] + '...'
return content