[ 'series_slug' => [ 'name' => 'Serien-Slug (z.B. "faketrain")', 'type' => 'text', 'required' => true, 'exampleValue' => 'faketrain', 'title' => 'Den Serien-Slug findest du in der URL: bpb.de/mediathek/reihen/SERIEN-SLUG/' ], 'limit' => [ 'name' => 'Maximale Anzahl an Episoden', 'type' => 'number', 'required' => false, 'defaultValue' => 20 ], 'fetch_details' => [ 'name' => 'Detaillierte Metadaten laden', 'type' => 'checkbox', 'required' => false, 'defaultValue' => 'checked', 'title' => 'Lädt JSON-LD Metadaten von Episodenseiten (Thumbnail, Beschreibung, Datum)' ] ] ]; private $seriesTitle = ''; public function collectData() { $seriesSlug = $this->getInput('series_slug'); $limit = $this->getInput('limit') ?? 20; $fetchDetails = $this->getInput('fetch_details') ?? false; if (empty($seriesSlug)) { returnClientError('Serien-Slug ist erforderlich.'); } // Build series listing URL $seriesUrl = self::URI . '/mediathek/reihen/' . $seriesSlug . '/'; // Fetch HTML $html = getSimpleHTMLDOM($seriesUrl); if (!$html) { returnServerError('Konnte die Serien-Seite nicht laden: ' . $seriesUrl); } // Extract series title from page $titleTag = $html->find('h1', 0); if ($titleTag) { $this->seriesTitle = trim($titleTag->plaintext); } else { $this->seriesTitle = 'Unbekannte Serie'; } // Find all episode entries (h3 tags with links) $episodeElements = $html->find('h3 a[href*="/mediathek/"]'); if (empty($episodeElements)) { returnServerError('Keine Episoden gefunden.'); } // Collect all episodes (no early limit for proper sorting) $episodes = []; foreach ($episodeElements as $episodeLink) { $episode = []; // Extract title $episode['title'] = trim($episodeLink->plaintext); // Extract URL $episode['url'] = $this->normalizeUrl($episodeLink->href); // If fetch_details is enabled, load individual episode page if ($fetchDetails) { $this->enrichEpisodeWithJsonLD($episode); } else { // Basic metadata only $episode['timestamp'] = 0; // Will be sorted to bottom $episode['thumbnail'] = null; $episode['description'] = ''; } $episodes[] = $episode; } // Sort episodes: newest first (by timestamp) usort($episodes, function ($a, $b) { $timeA = $a['timestamp'] ?? 0; $timeB = $b['timestamp'] ?? 0; return $timeB <=> $timeA; // Descending order }); // Apply limit after sorting $episodes = array_slice($episodes, 0, $limit); // Create RSS items from episodes foreach ($episodes as $episode) { $item = []; // Title: Episode title only $item['title'] = $episode['title']; // URL $item['uri'] = $episode['url']; // Unique ID $item['uid'] = md5($episode['url']); // Timestamp if (!empty($episode['timestamp'])) { $item['timestamp'] = $episode['timestamp']; } // Author $item['author'] = 'bpb.de'; // Build content HTML (only image + description) $content = ''; // Thumbnail if (!empty($episode['thumbnail'])) { $content .= '' . htmlspecialchars($episode['title']) . '
'; $item['enclosures'] = [$episode['thumbnail']]; } // Description if (!empty($episode['description'])) { $content .= '

' . $episode['description'] . '

'; } $item['content'] = $content; $this->items[] = $item; } } /** * Enriches episode data by fetching and parsing JSON-LD from episode page */ private function enrichEpisodeWithJsonLD(&$episode) { try { $episodeHtml = getSimpleHTMLDOM($episode['url']); if (!$episodeHtml) { // Failed to load, use basic data $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; return; } // Extract JSON-LD script tag $jsonLdScript = $episodeHtml->find('script[type="application/ld+json"]', 0); if (!$jsonLdScript) { $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; return; } $jsonData = json_decode($jsonLdScript->innertext, true); if (!$jsonData || json_last_error() !== JSON_ERROR_NONE) { $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; return; } // Parse VideoObject schema if (isset($jsonData['@type']) && $jsonData['@type'] === 'VideoObject') { // Thumbnail/image if (isset($jsonData['thumbnailUrl'])) { $episode['thumbnail'] = is_array($jsonData['thumbnailUrl']) ? $jsonData['thumbnailUrl'][0] : $jsonData['thumbnailUrl']; } else { $episode['thumbnail'] = null; } // Description if (isset($jsonData['description'])) { $episode['description'] = $jsonData['description']; } else { $episode['description'] = ''; } // Upload/publish date if (isset($jsonData['uploadDate'])) { $timestamp = strtotime($jsonData['uploadDate']); if ($timestamp !== false) { $episode['timestamp'] = $timestamp; } else { $episode['timestamp'] = 0; } } elseif (isset($jsonData['datePublished'])) { $timestamp = strtotime($jsonData['datePublished']); if ($timestamp !== false) { $episode['timestamp'] = $timestamp; } else { $episode['timestamp'] = 0; } } else { $episode['timestamp'] = 0; } } else { // Not a VideoObject, use defaults $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; } } catch (Exception $e) { // Graceful degradation: Silent fail, use basic data $episode['timestamp'] = 0; $episode['thumbnail'] = null; $episode['description'] = ''; } } /** * Normalizes relative URLs to absolute URLs */ private function normalizeUrl($url) { if (strpos($url, 'http') === 0) { return $url; } return strpos($url, '/') === 0 ? self::URI . $url : self::URI . '/' . $url; } public function getName() { if (!empty($this->seriesTitle)) { return $this->seriesTitle . ' - bpb.de Mediathek'; } return parent::getName(); } public function getURI() { $seriesSlug = $this->getInput('series_slug'); if (!empty($seriesSlug)) { return self::URI . '/mediathek/reihen/' . $seriesSlug . '/'; } return self::URI; } public function getIcon() { return 'https://www.google.com/s2/favicons?domain=www.bpb.de&sz=32'; } public function detectParameters($url) { // Pattern: https://www.bpb.de/mediathek/reihen/SLUG/ if (preg_match('#bpb\.de/mediathek/reihen/([a-z0-9-]+)/?#i', $url, $matches)) { return [ 'series_slug' => $matches[1] ]; } return null; } }