282 lines
8.9 KiB
PHP
282 lines
8.9 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
class BpbMediathekSeriesBridge extends BridgeAbstract
|
|
{
|
|
const NAME = 'bpb.de Mediathek Serien Bridge';
|
|
const URI = 'https://www.bpb.de';
|
|
const DESCRIPTION = 'Gibt die neuesten Episoden einer Serie aus der bpb.de Mediathek zurück.';
|
|
const MAINTAINER = 'Akamaru';
|
|
const CACHE_TIMEOUT = 3600; // 1 Stunde
|
|
|
|
const PARAMETERS = [
|
|
'Serie' => [
|
|
'series_slug' => [
|
|
'name' => 'Serien-Slug (z.B. "faketrain")',
|
|
'type' => 'text',
|
|
'required' => true,
|
|
'exampleValue' => 'faketrain',
|
|
'title' => 'Den Serien-Slug findest du in der URL: bpb.de/mediathek/reihen/SERIEN-SLUG/'
|
|
],
|
|
'limit' => [
|
|
'name' => 'Maximale Anzahl an Episoden',
|
|
'type' => 'number',
|
|
'required' => false,
|
|
'defaultValue' => 20
|
|
],
|
|
'fetch_details' => [
|
|
'name' => 'Detaillierte Metadaten laden',
|
|
'type' => 'checkbox',
|
|
'required' => false,
|
|
'defaultValue' => 'checked',
|
|
'title' => 'Lädt JSON-LD Metadaten von Episodenseiten (Thumbnail, Beschreibung, Datum)'
|
|
]
|
|
]
|
|
];
|
|
|
|
private $seriesTitle = '';
|
|
|
|
public function collectData()
|
|
{
|
|
$seriesSlug = $this->getInput('series_slug');
|
|
$limit = $this->getInput('limit') ?? 20;
|
|
$fetchDetails = $this->getInput('fetch_details') ?? false;
|
|
|
|
if (empty($seriesSlug)) {
|
|
returnClientError('Serien-Slug ist erforderlich.');
|
|
}
|
|
|
|
// Build series listing URL
|
|
$seriesUrl = self::URI . '/mediathek/reihen/' . $seriesSlug . '/';
|
|
|
|
// Fetch HTML
|
|
$html = getSimpleHTMLDOM($seriesUrl);
|
|
if (!$html) {
|
|
returnServerError('Konnte die Serien-Seite nicht laden: ' . $seriesUrl);
|
|
}
|
|
|
|
// Extract series title from page
|
|
$titleTag = $html->find('h1', 0);
|
|
if ($titleTag) {
|
|
$this->seriesTitle = trim($titleTag->plaintext);
|
|
} else {
|
|
$this->seriesTitle = 'Unbekannte Serie';
|
|
}
|
|
|
|
// Find all episode entries (h3 tags with links)
|
|
$episodeElements = $html->find('h3 a[href*="/mediathek/"]');
|
|
|
|
if (empty($episodeElements)) {
|
|
returnServerError('Keine Episoden gefunden.');
|
|
}
|
|
|
|
// Collect all episodes (no early limit for proper sorting)
|
|
$episodes = [];
|
|
|
|
foreach ($episodeElements as $episodeLink) {
|
|
$episode = [];
|
|
|
|
// Extract title
|
|
$episode['title'] = trim($episodeLink->plaintext);
|
|
|
|
// Extract URL
|
|
$episode['url'] = $this->normalizeUrl($episodeLink->href);
|
|
|
|
// If fetch_details is enabled, load individual episode page
|
|
if ($fetchDetails) {
|
|
$this->enrichEpisodeWithJsonLD($episode);
|
|
} else {
|
|
// Basic metadata only
|
|
$episode['timestamp'] = 0; // Will be sorted to bottom
|
|
$episode['thumbnail'] = null;
|
|
$episode['description'] = '';
|
|
}
|
|
|
|
$episodes[] = $episode;
|
|
}
|
|
|
|
// Sort episodes: newest first (by timestamp)
|
|
usort($episodes, function ($a, $b) {
|
|
$timeA = $a['timestamp'] ?? 0;
|
|
$timeB = $b['timestamp'] ?? 0;
|
|
return $timeB <=> $timeA; // Descending order
|
|
});
|
|
|
|
// Apply limit after sorting
|
|
$episodes = array_slice($episodes, 0, $limit);
|
|
|
|
// Create RSS items from episodes
|
|
foreach ($episodes as $episode) {
|
|
$item = [];
|
|
|
|
// Title: Episode title only
|
|
$item['title'] = $episode['title'];
|
|
|
|
// URL
|
|
$item['uri'] = $episode['url'];
|
|
|
|
// Unique ID
|
|
$item['uid'] = md5($episode['url']);
|
|
|
|
// Timestamp
|
|
if (!empty($episode['timestamp'])) {
|
|
$item['timestamp'] = $episode['timestamp'];
|
|
}
|
|
|
|
// Author
|
|
$item['author'] = 'bpb.de';
|
|
|
|
// Build content HTML (only image + description)
|
|
$content = '';
|
|
|
|
// Thumbnail
|
|
if (!empty($episode['thumbnail'])) {
|
|
$content .= '<img src="' . htmlspecialchars($episode['thumbnail']) . '" alt="' . htmlspecialchars($episode['title']) . '" /><br>';
|
|
$item['enclosures'] = [$episode['thumbnail']];
|
|
}
|
|
|
|
// Description
|
|
if (!empty($episode['description'])) {
|
|
$content .= '<p>' . $episode['description'] . '</p>';
|
|
}
|
|
|
|
$item['content'] = $content;
|
|
|
|
$this->items[] = $item;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Enriches episode data by fetching and parsing JSON-LD from episode page
|
|
*/
|
|
private function enrichEpisodeWithJsonLD(&$episode)
|
|
{
|
|
try {
|
|
$episodeHtml = getSimpleHTMLDOM($episode['url']);
|
|
if (!$episodeHtml) {
|
|
// Failed to load, use basic data
|
|
$episode['timestamp'] = 0;
|
|
$episode['thumbnail'] = null;
|
|
$episode['description'] = '';
|
|
return;
|
|
}
|
|
|
|
// Extract JSON-LD script tag
|
|
$jsonLdScript = $episodeHtml->find('script[type="application/ld+json"]', 0);
|
|
if (!$jsonLdScript) {
|
|
$episode['timestamp'] = 0;
|
|
$episode['thumbnail'] = null;
|
|
$episode['description'] = '';
|
|
return;
|
|
}
|
|
|
|
$jsonData = json_decode($jsonLdScript->innertext, true);
|
|
if (!$jsonData || json_last_error() !== JSON_ERROR_NONE) {
|
|
$episode['timestamp'] = 0;
|
|
$episode['thumbnail'] = null;
|
|
$episode['description'] = '';
|
|
return;
|
|
}
|
|
|
|
// Parse VideoObject schema
|
|
if (isset($jsonData['@type']) && $jsonData['@type'] === 'VideoObject') {
|
|
// Thumbnail/image
|
|
if (isset($jsonData['thumbnailUrl'])) {
|
|
$episode['thumbnail'] = is_array($jsonData['thumbnailUrl'])
|
|
? $jsonData['thumbnailUrl'][0]
|
|
: $jsonData['thumbnailUrl'];
|
|
} else {
|
|
$episode['thumbnail'] = null;
|
|
}
|
|
|
|
// Description
|
|
if (isset($jsonData['description'])) {
|
|
$episode['description'] = $jsonData['description'];
|
|
} else {
|
|
$episode['description'] = '';
|
|
}
|
|
|
|
// Upload/publish date
|
|
if (isset($jsonData['uploadDate'])) {
|
|
$timestamp = strtotime($jsonData['uploadDate']);
|
|
if ($timestamp !== false) {
|
|
$episode['timestamp'] = $timestamp;
|
|
} else {
|
|
$episode['timestamp'] = 0;
|
|
}
|
|
} elseif (isset($jsonData['datePublished'])) {
|
|
$timestamp = strtotime($jsonData['datePublished']);
|
|
if ($timestamp !== false) {
|
|
$episode['timestamp'] = $timestamp;
|
|
} else {
|
|
$episode['timestamp'] = 0;
|
|
}
|
|
} else {
|
|
$episode['timestamp'] = 0;
|
|
}
|
|
} else {
|
|
// Not a VideoObject, use defaults
|
|
$episode['timestamp'] = 0;
|
|
$episode['thumbnail'] = null;
|
|
$episode['description'] = '';
|
|
}
|
|
} catch (Exception $e) {
|
|
// Graceful degradation: Silent fail, use basic data
|
|
$episode['timestamp'] = 0;
|
|
$episode['thumbnail'] = null;
|
|
$episode['description'] = '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Normalizes relative URLs to absolute URLs
|
|
*/
|
|
private function normalizeUrl($url)
|
|
{
|
|
if (strpos($url, 'http') === 0) {
|
|
return $url;
|
|
}
|
|
|
|
return strpos($url, '/') === 0
|
|
? self::URI . $url
|
|
: self::URI . '/' . $url;
|
|
}
|
|
|
|
public function getName()
|
|
{
|
|
if (!empty($this->seriesTitle)) {
|
|
return $this->seriesTitle . ' - bpb.de Mediathek';
|
|
}
|
|
return parent::getName();
|
|
}
|
|
|
|
public function getURI()
|
|
{
|
|
$seriesSlug = $this->getInput('series_slug');
|
|
|
|
if (!empty($seriesSlug)) {
|
|
return self::URI . '/mediathek/reihen/' . $seriesSlug . '/';
|
|
}
|
|
|
|
return self::URI;
|
|
}
|
|
|
|
public function getIcon()
|
|
{
|
|
return 'https://www.google.com/s2/favicons?domain=www.bpb.de&sz=32';
|
|
}
|
|
|
|
public function detectParameters($url)
|
|
{
|
|
// Pattern: https://www.bpb.de/mediathek/reihen/SLUG/
|
|
if (preg_match('#bpb\.de/mediathek/reihen/([a-z0-9-]+)/?#i', $url, $matches)) {
|
|
return [
|
|
'series_slug' => $matches[1]
|
|
];
|
|
}
|
|
|
|
return null;
|
|
}
|
|
}
|