[
'series_slug' => [
'name' => 'Serien-Slug (z.B. "faketrain")',
'type' => 'text',
'required' => true,
'exampleValue' => 'faketrain',
'title' => 'Den Serien-Slug findest du in der URL: bpb.de/mediathek/reihen/SERIEN-SLUG/'
],
'limit' => [
'name' => 'Maximale Anzahl an Episoden',
'type' => 'number',
'required' => false,
'defaultValue' => 20
],
'fetch_details' => [
'name' => 'Detaillierte Metadaten laden',
'type' => 'checkbox',
'required' => false,
'defaultValue' => 'checked',
'title' => 'Lädt JSON-LD Metadaten von Episodenseiten (Thumbnail, Beschreibung, Datum)'
]
]
];
private $seriesTitle = '';
public function collectData()
{
$seriesSlug = $this->getInput('series_slug');
$limit = $this->getInput('limit') ?? 20;
$fetchDetails = $this->getInput('fetch_details') ?? false;
if (empty($seriesSlug)) {
returnClientError('Serien-Slug ist erforderlich.');
}
// Build series listing URL
$seriesUrl = self::URI . '/mediathek/reihen/' . $seriesSlug . '/';
// Fetch HTML
$html = getSimpleHTMLDOM($seriesUrl);
if (!$html) {
returnServerError('Konnte die Serien-Seite nicht laden: ' . $seriesUrl);
}
// Extract series title from page
$titleTag = $html->find('h1', 0);
if ($titleTag) {
$this->seriesTitle = trim($titleTag->plaintext);
} else {
$this->seriesTitle = 'Unbekannte Serie';
}
// Find all episode entries (h3 tags with links)
$episodeElements = $html->find('h3 a[href*="/mediathek/"]');
if (empty($episodeElements)) {
returnServerError('Keine Episoden gefunden.');
}
// Collect all episodes (no early limit for proper sorting)
$episodes = [];
foreach ($episodeElements as $episodeLink) {
$episode = [];
// Extract title
$episode['title'] = trim($episodeLink->plaintext);
// Extract URL
$episode['url'] = $this->normalizeUrl($episodeLink->href);
// If fetch_details is enabled, load individual episode page
if ($fetchDetails) {
$this->enrichEpisodeWithJsonLD($episode);
} else {
// Basic metadata only
$episode['timestamp'] = 0; // Will be sorted to bottom
$episode['thumbnail'] = null;
$episode['description'] = '';
}
$episodes[] = $episode;
}
// Sort episodes: newest first (by timestamp)
usort($episodes, function ($a, $b) {
$timeA = $a['timestamp'] ?? 0;
$timeB = $b['timestamp'] ?? 0;
return $timeB <=> $timeA; // Descending order
});
// Apply limit after sorting
$episodes = array_slice($episodes, 0, $limit);
// Create RSS items from episodes
foreach ($episodes as $episode) {
$item = [];
// Title: Episode title only
$item['title'] = $episode['title'];
// URL
$item['uri'] = $episode['url'];
// Unique ID
$item['uid'] = md5($episode['url']);
// Timestamp
if (!empty($episode['timestamp'])) {
$item['timestamp'] = $episode['timestamp'];
}
// Author
$item['author'] = 'bpb.de';
// Build content HTML (only image + description)
$content = '';
// Thumbnail
if (!empty($episode['thumbnail'])) {
$content .= '
';
$item['enclosures'] = [$episode['thumbnail']];
}
// Description
if (!empty($episode['description'])) {
$content .= '
' . $episode['description'] . '
'; } $item['content'] = $content; $this->items[] = $item; } } /** * Enriches episode data by fetching and parsing JSON-LD from episode page */ private function enrichEpisodeWithJsonLD(&$episode) { try { $episodeHtml = getSimpleHTMLDOM($episode['url']); if (!$episodeHtml) { // Failed to load, use basic data $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; return; } // Extract JSON-LD script tag $jsonLdScript = $episodeHtml->find('script[type="application/ld+json"]', 0); if (!$jsonLdScript) { $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; return; } $jsonData = json_decode($jsonLdScript->innertext, true); if (!$jsonData || json_last_error() !== JSON_ERROR_NONE) { $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; return; } // Parse VideoObject schema if (isset($jsonData['@type']) && $jsonData['@type'] === 'VideoObject') { // Thumbnail/image if (isset($jsonData['thumbnailUrl'])) { $episode['thumbnail'] = is_array($jsonData['thumbnailUrl']) ? $jsonData['thumbnailUrl'][0] : $jsonData['thumbnailUrl']; } else { $episode['thumbnail'] = null; } // Description if (isset($jsonData['description'])) { $episode['description'] = $jsonData['description']; } else { $episode['description'] = ''; } // Upload/publish date if (isset($jsonData['uploadDate'])) { $timestamp = strtotime($jsonData['uploadDate']); if ($timestamp !== false) { $episode['timestamp'] = $timestamp; } else { $episode['timestamp'] = 0; } } elseif (isset($jsonData['datePublished'])) { $timestamp = strtotime($jsonData['datePublished']); if ($timestamp !== false) { $episode['timestamp'] = $timestamp; } else { $episode['timestamp'] = 0; } } else { $episode['timestamp'] = 0; } } else { // Not a VideoObject, use defaults $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; } } catch (Exception $e) { // Graceful degradation: Silent fail, use basic data $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; } } /** * Normalizes relative URLs to absolute URLs */ private function normalizeUrl($url) { if (strpos($url, 'http') === 0) { return $url; } return strpos($url, '/') === 0 ? self::URI . $url : self::URI . '/' . $url; } public function getName() { if (!empty($this->seriesTitle)) { return $this->seriesTitle . ' - bpb.de Mediathek'; } return parent::getName(); } public function getURI() { $seriesSlug = $this->getInput('series_slug'); if (!empty($seriesSlug)) { return self::URI . '/mediathek/reihen/' . $seriesSlug . '/'; } return self::URI; } public function getIcon() { return 'https://www.google.com/s2/favicons?domain=www.bpb.de&sz=32'; } public function detectParameters($url) { // Pattern: https://www.bpb.de/mediathek/reihen/SLUG/ if (preg_match('#bpb\.de/mediathek/reihen/([a-z0-9-]+)/?#i', $url, $matches)) { return [ 'series_slug' => $matches[1] ]; } return null; } }