Füge MainPostBridge hinzu
This commit is contained in:
659
MainPostBridge.php
Normal file
659
MainPostBridge.php
Normal file
@ -0,0 +1,659 @@
|
||||
<?php
|
||||
class MainPostBridge extends BridgeAbstract {
|
||||
const NAME = 'Main-Post Nachrichten';
|
||||
const URI = 'https://www.mainpost.de/';
|
||||
const DESCRIPTION = 'Nachrichten und Artikel von der Main-Post';
|
||||
const MAINTAINER = 'Akamaru';
|
||||
const PARAMETERS = [
|
||||
'Regionen' => [
|
||||
'ort' => [
|
||||
'name' => 'Ort',
|
||||
'type' => 'list',
|
||||
'title' => 'Wähle den Ort',
|
||||
'required' => true,
|
||||
'values' => [
|
||||
'Bad Kissingen' => [
|
||||
'Aura' => 'bad-kissingen/aura',
|
||||
'Bad Bocklet' => 'bad-kissingen/bad-bocklet',
|
||||
'Bad Brückenau' => 'bad-kissingen/bad-brueckenau',
|
||||
'Bad Kissingen' => 'bad-kissingen/bad-kissingen',
|
||||
'Burkardroth' => 'bad-kissingen/burkardroth',
|
||||
'Elfershausen' => 'bad-kissingen/elfershausen',
|
||||
'Euerdorf' => 'bad-kissingen/euerdorf',
|
||||
'Fuchsstadt' => 'bad-kissingen/fuchsstadt',
|
||||
'Geroda' => 'bad-kissingen/geroda',
|
||||
'Hammelburg' => 'bad-kissingen/hammelburg',
|
||||
'Maßbach' => 'bad-kissingen/massbach',
|
||||
'Motten' => 'bad-kissingen/motten',
|
||||
'Münnerstadt' => 'bad-kissingen/muennerstadt',
|
||||
'Nüdlingen' => 'bad-kissingen/nuedlingen',
|
||||
'Oberleichtersbach' => 'bad-kissingen/oberleichtersbach',
|
||||
'Oberthulba' => 'bad-kissingen/oberthulba',
|
||||
'Oerlenbach' => 'bad-kissingen/oerlenbach',
|
||||
'Ramsthal' => 'bad-kissingen/ramsthal',
|
||||
'Rannungen' => 'bad-kissingen/rannungen',
|
||||
'Riedenberg' => 'bad-kissingen/riedenberg',
|
||||
'Schondra' => 'bad-kissingen/schondra',
|
||||
'Sulzthal' => 'bad-kissingen/sulzthal',
|
||||
'Thundorf' => 'bad-kissingen/thundorf',
|
||||
'Wartmannsroth' => 'bad-kissingen/wartmannsroth',
|
||||
'Wildflecken' => 'bad-kissingen/wildflecken',
|
||||
'Zeitlofs' => 'bad-kissingen/zeitlofs',
|
||||
],
|
||||
'Hassberge' => [
|
||||
'Aidhausen' => 'hassberge/aidhausen',
|
||||
'Breitbrunn' => 'hassberge/breitbrunn',
|
||||
'Bundorf' => 'hassberge/bundorf',
|
||||
'Burgpreppach' => 'hassberge/burgpreppach',
|
||||
'Ebelsbach' => 'hassberge/ebelsbach',
|
||||
'Ebern' => 'hassberge/ebern',
|
||||
'Eltmann' => 'hassberge/eltmann',
|
||||
'Ermershausen' => 'hassberge/ermershausen',
|
||||
'Gädheim' => 'hassberge/gaedheim',
|
||||
'Haßfurt' => 'hassberge/hassfurt',
|
||||
'Hofheim' => 'hassberge/hofheim',
|
||||
'Kirchlauter' => 'hassberge/kirchlauter',
|
||||
'Knetzgau' => 'hassberge/knetzgau',
|
||||
'Königsberg' => 'hassberge/koenigsberg',
|
||||
'Maroldsweisach' => 'hassberge/maroldsweisach',
|
||||
'Oberaurach' => 'hassberge/oberaurach',
|
||||
'Pfarrweisach' => 'hassberge/pfarrweisach',
|
||||
'Rauhenebrach' => 'hassberge/rauhenebrach',
|
||||
'Rentweinsdorf' => 'hassberge/rentweinsdorf',
|
||||
'Riedbach' => 'hassberge/riedbach',
|
||||
'Sand am Main' => 'hassberge/sand-am-main',
|
||||
'Stettfeld' => 'hassberge/stettfeld',
|
||||
'Theres' => 'hassberge/theres',
|
||||
'Untermerzbach' => 'hassberge/untermerzbach',
|
||||
'Wonfurt' => 'hassberge/wonfurt',
|
||||
'Zeil' => 'hassberge/zeil',
|
||||
],
|
||||
'Kitzingen' => [
|
||||
'Abtswind' => 'kitzingen/abtswind',
|
||||
'Albertshofen' => 'kitzingen/albertshofen',
|
||||
'Biebelried' => 'kitzingen/biebelried',
|
||||
'Buchbrunn' => 'kitzingen/buchbrunn',
|
||||
'Castell' => 'kitzingen/castell',
|
||||
'Dettelbach' => 'kitzingen/dettelbach',
|
||||
'Geiselwind' => 'kitzingen/geiselwind',
|
||||
'Großlangheim' => 'kitzingen/grosslangheim',
|
||||
'Iphofen' => 'kitzingen/iphofen',
|
||||
'Kleinlangheim' => 'kitzingen/kleinlangheim',
|
||||
'Mainbernheim' => 'kitzingen/mainbernheim',
|
||||
'Mainstockheim' => 'kitzingen/mainstockheim',
|
||||
'Markt Einersheim' => 'kitzingen/markteinersheim',
|
||||
'Marktbreit' => 'kitzingen/marktbreit',
|
||||
'Marktsteft' => 'kitzingen/marktsteft',
|
||||
'Martinsheim' => 'kitzingen/martinsheim',
|
||||
'Nordheim' => 'kitzingen/nordheim',
|
||||
'Obernbreit' => 'kitzingen/obernbreit',
|
||||
'Prichsenstadt' => 'kitzingen/prichsenstadt',
|
||||
'Rödelsee' => 'kitzingen/roedelsee',
|
||||
'Rüdenhausen' => 'kitzingen/ruedenhausen',
|
||||
'Schwarzach' => 'kitzingen/schwarzach',
|
||||
'Segnitz' => 'kitzingen/segnitz',
|
||||
'Seinsheim' => 'kitzingen/seinsheim',
|
||||
'Sommerach' => 'kitzingen/sommerach',
|
||||
'Sulzfeld' => 'kitzingen/sulzfeld',
|
||||
'Volkach' => 'kitzingen/volkach',
|
||||
'Wiesenbronn' => 'kitzingen/wiesenbronn',
|
||||
'Wiesentheid' => 'kitzingen/wiesentheid',
|
||||
'Willanzheim' => 'kitzingen/willanzheim',
|
||||
],
|
||||
'Main-Spessart' => [
|
||||
'Arnstein' => 'main-spessart/arnstein',
|
||||
'Aura im Sinngrund' => 'main-spessart/aura-im-sinngrund',
|
||||
'Birkenfeld' => 'main-spessart/birkenfeld',
|
||||
'Bischbrunn' => 'main-spessart/bischbrunn',
|
||||
'Burgsinn' => 'main-spessart/burgsinn',
|
||||
'Erlenbach' => 'main-spessart/erlenbach',
|
||||
'Esselbach' => 'main-spessart/esselbach',
|
||||
'Eußenheim' => 'main-spessart/eussenheim',
|
||||
'Fellen' => 'main-spessart/fellen',
|
||||
'Frammersbach' => 'main-spessart/frammersbach',
|
||||
'Gemünden' => 'main-spessart/gemuenden',
|
||||
'Gössenheim' => 'main-spessart/goessenheim',
|
||||
'Gräfendorf' => 'main-spessart/graefendorf',
|
||||
'Hafenlohr' => 'main-spessart/hafenlohr',
|
||||
'Hasloch' => 'main-spessart/hasloch',
|
||||
'Himmelstadt' => 'main-spessart/himmelstadt',
|
||||
'Karbach' => 'main-spessart/karbach',
|
||||
'Karlstadt' => 'main-spessart/karlstadt',
|
||||
'Karsbach' => 'main-spessart/karsbach',
|
||||
'Kreuzwertheim' => 'main-spessart/kreuzwertheim',
|
||||
'Lohr' => 'main-spessart/lohr',
|
||||
'Marktheidenfeld' => 'main-spessart/marktheidenfeld',
|
||||
'Mittelsinn' => 'main-spessart/mittelsinn',
|
||||
'Neuendorf' => 'main-spessart/neuendorf',
|
||||
'Neuhütten' => 'main-spessart/neuhuetten',
|
||||
'Neustadt am Main' => 'main-spessart/neustadt-am-main',
|
||||
'Obersinn' => 'main-spessart/obersinn',
|
||||
'Partenstein' => 'main-spessart/partenstein',
|
||||
'Rechtenbach' => 'main-spessart/rechtenbach',
|
||||
'Retzstadt' => 'main-spessart/retzstadt',
|
||||
'Rieneck' => 'main-spessart/rieneck',
|
||||
'Roden' => 'main-spessart/roden',
|
||||
'Rothenfels' => 'main-spessart/rothenfels',
|
||||
'Schollbrunn' => 'main-spessart/schollbrunn',
|
||||
'Steinfeld' => 'main-spessart/steinfeld',
|
||||
'Thüngen' => 'main-spessart/thuengen',
|
||||
'Triefenstein' => 'main-spessart/triefenstein',
|
||||
'Urspringen' => 'main-spessart/urspringen',
|
||||
'Wiesthal' => 'main-spessart/wiesthal',
|
||||
'Zellingen' => 'main-spessart/zellingen',
|
||||
],
|
||||
'Main-Tauber' => [
|
||||
'Berolzheim' => 'main-tauber/berolzheim',
|
||||
'Buch am Ahorn' => 'main-tauber/buch-am-ahorn',
|
||||
'Eubigheim' => 'main-tauber/eubigheim',
|
||||
'Hohenstadt' => 'main-tauber/hohenstadt',
|
||||
'Schillingstadt' => 'main-tauber/schillingstadt',
|
||||
'Assamstadt' => 'main-tauber/assamstadt',
|
||||
'Bad Mergentheim' => 'main-tauber/bad-mergentheim',
|
||||
'Boxberg' => 'main-tauber/boxberg',
|
||||
'Creglingen' => 'main-tauber/creglingen',
|
||||
'Königheim' => 'main-tauber/koenigheim',
|
||||
'Niederstetten' => 'main-tauber/niederstetten',
|
||||
'Weikersheim' => 'main-tauber/weikersheim',
|
||||
'Freudenberg' => 'main-tauber/freudenberg',
|
||||
'Großrinderfeld' => 'main-tauber/grossrinderfeld',
|
||||
'Grünsfeld' => 'main-tauber/gruensfeld',
|
||||
'Igersheim' => 'main-tauber/igersheim',
|
||||
'Külsheim' => 'main-tauber/kuelsheim',
|
||||
'Lauda-Königshofen' => 'main-tauber/lauda-koenigshofen',
|
||||
'Tauberbischofsheim' => 'main-tauber/tauberbischofsheim',
|
||||
'Werbach' => 'main-tauber/werbach',
|
||||
'Wertheim' => 'main-tauber/wertheim',
|
||||
'Wittighausen' => 'main-tauber/wittighausen',
|
||||
],
|
||||
'Rhön-Grabfeld' => [
|
||||
'Aubstadt' => 'rhoengrabfeld/aubstadt',
|
||||
'Bad Königshofen' => 'rhoengrabfeld/bad-koenigshofen',
|
||||
'Bad Neustadt' => 'rhoengrabfeld/bad-neustadt',
|
||||
'Bastheim' => 'rhoengrabfeld/bastheim',
|
||||
'Bischofsheim' => 'rhoengrabfeld/bischofsheim',
|
||||
'Burglauer' => 'rhoengrabfeld/burglauer',
|
||||
'Fladungen' => 'rhoengrabfeld/fladungen',
|
||||
'Großbardorf' => 'rhoengrabfeld/grossbardorf',
|
||||
'Großeibstadt' => 'rhoengrabfeld/grosseibstadt',
|
||||
'Hausen' => 'rhoengrabfeld/hausen',
|
||||
'Hendungen' => 'rhoengrabfeld/hendungen',
|
||||
'Herbstadt' => 'rhoengrabfeld/herbstadt',
|
||||
'Heustreu' => 'rhoengrabfeld/heustreu',
|
||||
'Höchheim' => 'rhoengrabfeld/hoechheim',
|
||||
'Hohenroth' => 'rhoengrabfeld/hohenroth',
|
||||
'Hollstadt' => 'rhoengrabfeld/hollstadt',
|
||||
'Mellrichstadt' => 'rhoengrabfeld/mellrichstadt',
|
||||
'Niederlauer' => 'rhoengrabfeld/niederlauer',
|
||||
'Nordheim' => 'rhoengrabfeld/nordheim',
|
||||
'Oberelsbach' => 'rhoengrabfeld/oberelsbach',
|
||||
'Oberstreu' => 'rhoengrabfeld/oberstreu',
|
||||
'Ostheim' => 'rhoengrabfeld/ostheim',
|
||||
'Rödelmaier' => 'rhoengrabfeld/roedelmaier',
|
||||
'Saal' => 'rhoengrabfeld/saal',
|
||||
'Salz' => 'rhoengrabfeld/salz',
|
||||
'Sandberg' => 'rhoengrabfeld/sandberg',
|
||||
'Schönau' => 'rhoengrabfeld/schoenau',
|
||||
'Sondheim' => 'rhoengrabfeld/sondheim',
|
||||
'Stockheim' => 'rhoengrabfeld/stockheim',
|
||||
'Strahlungen' => 'rhoengrabfeld/strahlungen',
|
||||
'Sulzdorf' => 'rhoengrabfeld/sulzdorf',
|
||||
'Sulzfeld' => 'rhoengrabfeld/sulzfeld',
|
||||
'Trappstadt' => 'rhoengrabfeld/trappstadt',
|
||||
'Unsleben' => 'rhoengrabfeld/unsleben',
|
||||
'Willmars' => 'rhoengrabfeld/willmars',
|
||||
'Wollbach' => 'rhoengrabfeld/wollbach',
|
||||
'Wülfershausen' => 'rhoengrabfeld/wuelfershausen',
|
||||
],
|
||||
'Schweinfurt' => [
|
||||
'Bergrheinfeld' => 'schweinfurt/bergrheinfeld',
|
||||
'Dingolshausen' => 'schweinfurt/dingolshausen',
|
||||
'Dittelbrunn' => 'schweinfurt/dittelbrunn',
|
||||
'Donnersdorf' => 'schweinfurt/donnersdorf',
|
||||
'Ebrach' => 'schweinfurt/ebrach',
|
||||
'Euerbach' => 'schweinfurt/euerbach',
|
||||
'Frankenwinheim' => 'schweinfurt/frankenwinheim',
|
||||
'Geldersheim' => 'schweinfurt/geldersheim',
|
||||
'Gerolzhofen' => 'schweinfurt/gerolzhofen',
|
||||
'Gochsheim' => 'schweinfurt/gochsheim',
|
||||
'Grafenrheinfeld' => 'schweinfurt/grafenrheinfeld',
|
||||
'Grettstadt' => 'schweinfurt/grettstadt',
|
||||
'Kolitzheim' => 'schweinfurt/kolitzheim',
|
||||
'Lülsfeld' => 'schweinfurt/luelsfeld',
|
||||
'Michelau' => 'schweinfurt/michelau',
|
||||
'Niederwerrn' => 'schweinfurt/niederwerrn',
|
||||
'Oberschwarzach' => 'schweinfurt/oberschwarzach',
|
||||
'Poppenhausen' => 'schweinfurt/poppenhausen',
|
||||
'Röthlein' => 'schweinfurt/roethlein',
|
||||
'Stadtlauringen' => 'schweinfurt/stadtlauringen',
|
||||
'Schonungen' => 'schweinfurt/schonungen',
|
||||
'Schwanfeld' => 'schweinfurt/schwanfeld',
|
||||
'Schwebheim' => 'schweinfurt/schwebheim',
|
||||
'Schweinfurt' => 'schweinfurt/schweinfurt',
|
||||
'Sennfeld' => 'schweinfurt/sennfeld',
|
||||
'Sulzheim' => 'schweinfurt/sulzheim',
|
||||
'Üchtelhausen' => 'schweinfurt/uechtelhausen',
|
||||
'Waigolshausen' => 'schweinfurt/waigolshausen',
|
||||
'Wasserlosen' => 'schweinfurt/wasserlosen',
|
||||
'Werneck' => 'schweinfurt/werneck',
|
||||
'Wipfeld' => 'schweinfurt/wipfeld',
|
||||
],
|
||||
'Würzburg' => [
|
||||
'Altertheim' => 'wuerzburg/altertheim',
|
||||
'Aub' => 'wuerzburg/aub',
|
||||
'Bergtheim' => 'wuerzburg/bergtheim',
|
||||
'Bieberehren' => 'wuerzburg/bieberehren',
|
||||
'Bütthard' => 'wuerzburg/buetthard',
|
||||
'Eibelstadt' => 'wuerzburg/eibelstadt',
|
||||
'Eisenheim' => 'wuerzburg/eisenheim',
|
||||
'Eisingen' => 'wuerzburg/eisingen',
|
||||
'Erlabrunn' => 'wuerzburg/erlabrunn',
|
||||
'Estenfeld' => 'wuerzburg/estenfeld',
|
||||
'Frickenhausen' => 'wuerzburg/frickenhausen',
|
||||
'Gaukönigshofen' => 'wuerzburg/gaukoenigshofen',
|
||||
'Gelchsheim' => 'wuerzburg/gelchsheim',
|
||||
'Gerbrunn' => 'wuerzburg/gerbrunn',
|
||||
'Geroldshausen' => 'wuerzburg/geroldshausen',
|
||||
'Giebelstadt' => 'wuerzburg/giebelstadt',
|
||||
'Greußenheim' => 'wuerzburg/greussenheim',
|
||||
'Güntersleben' => 'wuerzburg/guentersleben',
|
||||
'Hausen' => 'wuerzburg/hausen',
|
||||
'Helmstadt' => 'wuerzburg/helmstadt',
|
||||
'Hettstadt' => 'wuerzburg/hettstadt',
|
||||
'Höchberg' => 'wuerzburg/hoechberg',
|
||||
'Holzkirchen' => 'wuerzburg/holzkirchen',
|
||||
'Kirchheim' => 'wuerzburg/kirchheim',
|
||||
'Kist' => 'wuerzburg/kist',
|
||||
'Kleinrinderfeld' => 'wuerzburg/kleinrinderfeld',
|
||||
'Kürnach' => 'wuerzburg/kuernach',
|
||||
'Leinach' => 'wuerzburg/leinach',
|
||||
'Margetshöchheim' => 'wuerzburg/margetshoechheim',
|
||||
'Neubrunn' => 'wuerzburg/neubrunn',
|
||||
'Oberpleichfeld' => 'wuerzburg/oberpleichfeld',
|
||||
'Ochsenfurt' => 'wuerzburg/ochsenfurt',
|
||||
'Prosselsheim' => 'wuerzburg/prosselsheim',
|
||||
'Randersacker' => 'wuerzburg/randersacker',
|
||||
'Reichenberg' => 'wuerzburg/reichenberg',
|
||||
'Remlingen' => 'wuerzburg/remlingen',
|
||||
'Riedenheim' => 'wuerzburg/riedenheim',
|
||||
'Rimpar' => 'wuerzburg/rimpar',
|
||||
'Rottendorf' => 'wuerzburg/rottendorf',
|
||||
'Sonderhofen' => 'wuerzburg/sonderhofen',
|
||||
'Sommerhausen' => 'wuerzburg/sommerhausen',
|
||||
'Tauberrettersheim' => 'wuerzburg/tauberrettersheim',
|
||||
'Theilheim' => 'wuerzburg/theilheim',
|
||||
'Thüngersheim' => 'wuerzburg/thuengersheim',
|
||||
'Uettingen' => 'wuerzburg/uettingen',
|
||||
'Unterpleichfeld' => 'wuerzburg/unterpleichfeld',
|
||||
'Veitshöchheim' => 'wuerzburg/veitshoechheim',
|
||||
'Waldbrunn' => 'wuerzburg/waldbrunn',
|
||||
'Waldbüttelbrunn' => 'wuerzburg/waldbuettelbrunn',
|
||||
'Winterhausen' => 'wuerzburg/winterhausen',
|
||||
'Würzburg' => 'wuerzburg/wuerzburg',
|
||||
'Zell' => 'wuerzburg/zell',
|
||||
'Röttingen' => 'wuerzburg/roettingen',
|
||||
],
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function getIcon() {
|
||||
return 'https://www.mainpost.de/favicon.ico';
|
||||
}
|
||||
|
||||
private function extractArticlesFromJSON($html) {
|
||||
$articles = [];
|
||||
$debugInfo = '';
|
||||
|
||||
// Debug: Speichere die ersten 1000 Zeichen des HTML
|
||||
$debugInfo .= "HTML sample (first 1000 chars): " . substr($html, 0, 1000) . "\n\n";
|
||||
|
||||
// Verschiedene Muster für JSON-LD Daten testen
|
||||
$patterns = [
|
||||
'/<script type="application\/ld\+json">\s*(.*?)\s*<\/script>/s',
|
||||
'/<script type="application\/ld\+json" id="[^"]*">\s*(.*?)\s*<\/script>/s',
|
||||
'/<script type=\'application\/ld\+json\'>\s*(.*?)\s*<\/script>/s'
|
||||
];
|
||||
|
||||
foreach ($patterns as $pattern) {
|
||||
if (preg_match_all($pattern, $html, $matches)) {
|
||||
$debugInfo .= "JSON Pattern matched: " . $pattern . "\n";
|
||||
$debugInfo .= "Found " . count($matches[1]) . " JSON blocks\n";
|
||||
|
||||
foreach ($matches[1] as $index => $jsonStr) {
|
||||
$debugInfo .= "JSON block $index (first 300 chars): " . substr($jsonStr, 0, 300) . "...\n";
|
||||
$data = json_decode($jsonStr, true);
|
||||
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||
$debugInfo .= "JSON Error in block $index: " . json_last_error_msg() . "\n";
|
||||
continue;
|
||||
}
|
||||
$debugInfo .= "JSON structure: " . print_r(array_keys($data), true) . "\n";
|
||||
|
||||
// NEU: Wenn $data ein numerisch indiziertes Array ist, alle Elemente prüfen
|
||||
if (is_array($data) && array_keys($data) === range(0, count($data) - 1)) {
|
||||
foreach ($data as $subIndex => $subData) {
|
||||
// Prüfe auf hasPart
|
||||
if (isset($subData['hasPart']) && is_array($subData['hasPart'])) {
|
||||
$debugInfo .= "Found hasPart with " . count($subData['hasPart']) . " items in block $index/$subIndex\n";
|
||||
foreach ($subData['hasPart'] as $article) {
|
||||
if (isset($article['@type']) && $article['@type'] === 'NewsArticle') {
|
||||
if (isset($article['image']) && !is_array($article['image'])) {
|
||||
$imageUrl = $article['image'];
|
||||
$article['image'] = [
|
||||
'@type' => 'ImageObject',
|
||||
'url' => $imageUrl
|
||||
];
|
||||
}
|
||||
$articles[] = $article;
|
||||
$debugInfo .= "Added article from hasPart: " . $article['headline'] . "\n";
|
||||
$debugInfo .= "Article structure: " . print_r($article, true) . "\n";
|
||||
}
|
||||
}
|
||||
} elseif (isset($subData['@type']) && $subData['@type'] === 'NewsArticle') {
|
||||
if (isset($subData['image']) && !is_array($subData['image'])) {
|
||||
$imageUrl = $subData['image'];
|
||||
$subData['image'] = [
|
||||
'@type' => 'ImageObject',
|
||||
'url' => $imageUrl
|
||||
];
|
||||
}
|
||||
$debugInfo .= "Found direct NewsArticle in block $index/$subIndex\n";
|
||||
$articles[] = $subData;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Ursprüngliche Logik für assoziative Arrays
|
||||
if (isset($data['hasPart']) && is_array($data['hasPart'])) {
|
||||
$debugInfo .= "Found hasPart with " . count($data['hasPart']) . " items in block $index\n";
|
||||
foreach ($data['hasPart'] as $article) {
|
||||
if (isset($article['@type']) && $article['@type'] === 'NewsArticle') {
|
||||
if (isset($article['image']) && !is_array($article['image'])) {
|
||||
$imageUrl = $article['image'];
|
||||
$article['image'] = [
|
||||
'@type' => 'ImageObject',
|
||||
'url' => $imageUrl
|
||||
];
|
||||
}
|
||||
$articles[] = $article;
|
||||
$debugInfo .= "Added article from hasPart: " . $article['headline'] . "\n";
|
||||
$debugInfo .= "Article structure: " . print_r($article, true) . "\n";
|
||||
}
|
||||
}
|
||||
} else if (isset($data['@type']) && $data['@type'] === 'NewsArticle') {
|
||||
if (isset($data['image']) && !is_array($data['image'])) {
|
||||
$imageUrl = $data['image'];
|
||||
$data['image'] = [
|
||||
'@type' => 'ImageObject',
|
||||
'url' => $imageUrl
|
||||
];
|
||||
}
|
||||
$debugInfo .= "Found direct NewsArticle in block $index\n";
|
||||
$articles[] = $data;
|
||||
} else if (isset($data[0]) && isset($data[0]['@type']) && $data[0]['@type'] === 'NewsArticle') {
|
||||
$debugInfo .= "Found array of NewsArticles in block $index\n";
|
||||
foreach ($data as $article) {
|
||||
if (isset($article['@type']) && $article['@type'] === 'NewsArticle') {
|
||||
if (isset($article['image']) && !is_array($article['image'])) {
|
||||
$imageUrl = $article['image'];
|
||||
$article['image'] = [
|
||||
'@type' => 'ImageObject',
|
||||
'url' => $imageUrl
|
||||
];
|
||||
}
|
||||
$articles[] = $article;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Speichern der Debug-Informationen
|
||||
$this->debugJsonInfo = $debugInfo;
|
||||
|
||||
return $articles;
|
||||
}
|
||||
|
||||
// Alternative Methode zur Extraktion von Artikeln direkt aus HTML
|
||||
private function extractArticlesFromHTML($html) {
|
||||
$articles = [];
|
||||
$debugInfo = '';
|
||||
$dom = $html;
|
||||
|
||||
// Verschiedene Artikel-Selektoren probieren
|
||||
$selectors = [
|
||||
'div.teaser, article',
|
||||
'.teaser-box',
|
||||
'.article-teaser',
|
||||
'.article-list-item',
|
||||
'.article-card',
|
||||
'.content-teaser'
|
||||
];
|
||||
|
||||
foreach ($selectors as $selector) {
|
||||
$teasers = $dom->find($selector);
|
||||
$debugInfo .= "Selector '$selector' found " . count($teasers) . " elements\n";
|
||||
|
||||
if (count($teasers) > 0) {
|
||||
foreach ($teasers as $teaser) {
|
||||
$article = [];
|
||||
|
||||
// Verschiedene Selektoren für Überschriften probieren
|
||||
$headlineSelectors = ['a.headline-link', 'h2 a', 'h3 a', '.headline a', '.title a', 'a.title', 'h4 a', '.teaser-headline a'];
|
||||
$headlineLink = null;
|
||||
|
||||
foreach ($headlineSelectors as $headlineSelector) {
|
||||
$headlineLink = $teaser->find($headlineSelector, 0);
|
||||
if ($headlineLink) {
|
||||
$debugInfo .= "Found headline with selector: $headlineSelector\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($headlineLink) {
|
||||
$article['headline'] = trim($headlineLink->plaintext);
|
||||
$article['url'] = $headlineLink->href;
|
||||
|
||||
// Relative URLs zu absoluten machen
|
||||
if (strpos($article['url'], 'http') !== 0) {
|
||||
$article['url'] = self::URI . ltrim($article['url'], '/');
|
||||
}
|
||||
|
||||
// Bild suchen mit verschiedenen Methoden
|
||||
$imgFound = false;
|
||||
|
||||
// 1. Direktes Bild im Teaser
|
||||
$img = $teaser->find('img', 0);
|
||||
if ($img && isset($img->src) && !empty($img->src)) {
|
||||
$article['image'] = [
|
||||
'@type' => 'ImageObject',
|
||||
'url' => $img->src
|
||||
];
|
||||
$imgFound = true;
|
||||
}
|
||||
|
||||
// 2. Bild im Figure-Tag
|
||||
if (!$imgFound) {
|
||||
$figure = $teaser->find('figure', 0);
|
||||
if ($figure) {
|
||||
$img = $figure->find('img', 0);
|
||||
if ($img && isset($img->src) && !empty($img->src)) {
|
||||
$article['image'] = [
|
||||
'@type' => 'ImageObject',
|
||||
'url' => $img->src
|
||||
];
|
||||
$imgFound = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Hintergrund-Bild im Style-Attribut
|
||||
if (!$imgFound) {
|
||||
$divWithStyle = $teaser->find('div[style*="background-image"]', 0);
|
||||
if ($divWithStyle) {
|
||||
if (preg_match('/background-image:\s*url\([\'"]?(.*?)[\'"]?\)/i', $divWithStyle->style, $matches)) {
|
||||
$article['image'] = [
|
||||
'@type' => 'ImageObject',
|
||||
'url' => $matches[1]
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Datum suchen
|
||||
$timeSelectors = ['time', '.date', '.pubdate', '.time', '.timestamp'];
|
||||
foreach ($timeSelectors as $timeSelector) {
|
||||
$date = $teaser->find($timeSelector, 0);
|
||||
if ($date) {
|
||||
if (isset($date->datetime)) {
|
||||
$article['datePublished'] = $date->datetime;
|
||||
} else {
|
||||
$article['datePublished'] = trim($date->plaintext);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Wenn kein Datum gefunden wird, aktuelles Datum verwenden
|
||||
if (!isset($article['datePublished'])) {
|
||||
$article['datePublished'] = date('Y-m-d\TH:i:s\Z');
|
||||
}
|
||||
|
||||
$articles[] = $article;
|
||||
}
|
||||
}
|
||||
|
||||
// Wenn Artikel gefunden wurden, mit diesem Selektor aufhören
|
||||
if (count($articles) > 0) {
|
||||
$debugInfo .= "Found " . count($articles) . " articles with selector '$selector'. Stopping search.\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Speichern der Debug-Informationen
|
||||
$this->debugHtmlInfo = $debugInfo;
|
||||
|
||||
return $articles;
|
||||
}
|
||||
|
||||
public function collectData() {
|
||||
$ort = $this->getInput('ort');
|
||||
|
||||
// Erstelle URL basierend auf Ort
|
||||
if (empty($ort)) {
|
||||
// Wenn kein Ort ausgewählt wurde, nur die Region anzeigen
|
||||
$url = self::URI . $ort;
|
||||
} else {
|
||||
// Wenn ein Ort ausgewählt wurde, die spezifische URL erstellen
|
||||
$url = self::URI . $ort;
|
||||
}
|
||||
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
if (!$html) {
|
||||
throw new \Exception('Die Mainpost-Seite konnte nicht geladen werden: ' . $url);
|
||||
}
|
||||
|
||||
// Debug-Info initialisieren
|
||||
$debugInfo = "URL: " . $url . "\n";
|
||||
|
||||
// Save HTML content for debugging
|
||||
$htmlContent = $html->save();
|
||||
|
||||
// Versuche Artikel aus JSON zu extrahieren
|
||||
$articles = $this->extractArticlesFromJSON($htmlContent);
|
||||
$debugInfo .= "JSON Extraction Info:\n" . $this->debugJsonInfo . "\n";
|
||||
|
||||
// Debug: Anzahl der aus JSON extrahierten Artikel und Beispielstruktur
|
||||
$debugInfo .= "Number of articles extracted from JSON: " . count($articles) . "\n";
|
||||
if (!empty($articles)) {
|
||||
$debugInfo .= "First article structure (JSON): " . print_r($articles[0], true) . "\n";
|
||||
}
|
||||
|
||||
// Wenn keine Artikel aus JSON gefunden wurden, Fehler werfen
|
||||
if (empty($articles)) {
|
||||
$htmlSnippet = substr($htmlContent, 0, 500) . '... [truncated] ...' . substr($htmlContent, -500);
|
||||
$debugInfo .= "HTML Sample:\n" . $htmlSnippet . "\n";
|
||||
throw new \Exception('Keine Artikel im JSON-LD gefunden. Debug-Info: ' . $debugInfo);
|
||||
}
|
||||
|
||||
// Verarbeite die gefundenen Artikel
|
||||
foreach ($articles as $article) {
|
||||
$item = [];
|
||||
|
||||
if (!isset($article['headline']) || !isset($article['url'])) {
|
||||
continue; // Überspringe unvollständige Artikel
|
||||
}
|
||||
|
||||
$item['title'] = $article['headline'];
|
||||
$item['uri'] = $article['url'];
|
||||
|
||||
// Debug: Zeige die Artikelstruktur
|
||||
$debugInfo .= "Processing article: " . print_r($article, true) . "\n";
|
||||
|
||||
// Korrekte Timestamp-Verarbeitung für ISO 8601 Datum (2025-04-25T15:00:00Z)
|
||||
if (isset($article['datePublished'])) {
|
||||
$timestamp = strtotime($article['datePublished']);
|
||||
// Prüfen, ob das Parsen erfolgreich war
|
||||
if ($timestamp !== false) {
|
||||
$item['timestamp'] = $timestamp;
|
||||
} else {
|
||||
// Bei ungültigen Formaten aktuelles Datum verwenden
|
||||
$item['timestamp'] = time();
|
||||
}
|
||||
} else {
|
||||
$item['timestamp'] = time();
|
||||
}
|
||||
|
||||
// Description hinzufügen falls vorhanden
|
||||
if (isset($article['description'])) {
|
||||
$item['content'] = '<p>' . $article['description'] . '</p>';
|
||||
} else {
|
||||
$item['content'] = '';
|
||||
}
|
||||
|
||||
// Bild korrekt hinzufügen
|
||||
if (isset($article['image'])) {
|
||||
// Immer auf das Objekt prüfen und nur image['url'] verwenden, falls vorhanden
|
||||
if (is_array($article['image']) && isset($article['image']['url'])) {
|
||||
$imageUrl = $article['image']['url'];
|
||||
$imageUrl = str_replace('\\/', '/', $imageUrl);
|
||||
$item['content'] .= '<p><img src="' . $imageUrl . '" alt="' . $item['title'] . '"></p>';
|
||||
$item['enclosures'] = [$imageUrl];
|
||||
} elseif (is_string($article['image'])) {
|
||||
// Fallback: falls das Bild nur als String vorliegt
|
||||
$imageUrl = str_replace('\\/', '/', $article['image']);
|
||||
$item['content'] .= '<p><img src="' . $imageUrl . '" alt="' . $item['title'] . '"></p>';
|
||||
$item['enclosures'] = [$imageUrl];
|
||||
}
|
||||
}
|
||||
|
||||
// Autor hinzufügen (falls verfügbar)
|
||||
if (isset($article['author'])) {
|
||||
if (is_array($article['author'])) {
|
||||
if (isset($article['author']['name'])) {
|
||||
$item['author'] = $article['author']['name'];
|
||||
}
|
||||
} else {
|
||||
$item['author'] = $article['author'];
|
||||
}
|
||||
} else {
|
||||
$item['author'] = 'Main-Post';
|
||||
}
|
||||
|
||||
// Kategorien hinzufügen
|
||||
$categories = [];
|
||||
if (preg_match('/\/([^\/]+)\/[^\/]+\/[^\/]+$/', $article['url'], $matches)) {
|
||||
$categories[] = $matches[1];
|
||||
}
|
||||
$item['categories'] = $categories;
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
// Deduplizierung von Artikeln basierend auf URL
|
||||
$this->items = array_map("unserialize", array_unique(array_map("serialize", $this->items)));
|
||||
}
|
||||
}
|
@ -18,7 +18,6 @@ Diese Sammlung enthält verschiedene Bridge-Implementierungen für RSS-Bridge, u
|
||||
|
||||
### EverSD News Bridge
|
||||
- **Beschreibung**: Generiert einen RSS-Feed für EverSD News und Changelogs
|
||||
- **Besonderheiten**: Extrahiert vollständige Changelog-Inhalte, Videos und Bilder
|
||||
|
||||
### Kemono Friends Music News Bridge
|
||||
- **Beschreibung**: Generiert einen RSS-Feed für Kemono Friends Musik Neuigkeiten
|
||||
@ -28,11 +27,14 @@ Diese Sammlung enthält verschiedene Bridge-Implementierungen für RSS-Bridge, u
|
||||
- **Parameter**:
|
||||
- **Limit**: Maximale Anzahl der angezeigten Einträge (Standard: 15)
|
||||
- **Vollständige Detailseiten**: Wenn aktiviert, werden zusätzliche Informationen von den Detailseiten abgerufen (langsamer)
|
||||
- **Besonderheiten**: Verwendet den FreshRSS User-Agent zum Umgehen von CloudFlare-Schutz
|
||||
|
||||
### Ananta Game News Bridge
|
||||
- **Beschreibung**: Generiert einen RSS-Feed für Ananta Game Neuigkeiten
|
||||
- **Besonderheiten**: Extrahiert Titel, Inhalte, Bilder, Kategorien und Datum der News-Einträge
|
||||
|
||||
### MainPostBridge
|
||||
- **Beschreibung**: Generiert einen RSS-Feed für Nachrichten und Artikel der Main-Post (mainpost.de)
|
||||
- **Parameter**:
|
||||
- **Ort**: Wähle einen Ort aus einer Region (z.B. Bad Kissingen, Schweinfurt, Würzburg usw.)
|
||||
|
||||
## Installation
|
||||
|
||||
|
Reference in New Issue
Block a user