diff --git a/AnnasArchiveBlogBridge.php b/AnnasArchiveBlogBridge.php new file mode 100644 index 0000000..a9dfcd1 --- /dev/null +++ b/AnnasArchiveBlogBridge.php @@ -0,0 +1,121 @@ + array( + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'defaultValue' => 20, + 'title' => 'Maximum number of posts to return (max 20)' + ) + ) + ); + + public function collectData() + { + $limit = $this->getInput('limit') ?? 20; + // Cap at 20 posts maximum + $limit = min($limit, 20); + + // Fetch the main blog page + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not fetch blog listing.'); + + $posts = []; + + // Find all links and dates in the main div + $mainDiv = $html->find('div.main', 0); + if (!$mainDiv) { + returnServerError('Could not find blog post listing.'); + } + + // Extract post links - looking for anchor tags within the main div + $links = $mainDiv->find('a'); + + foreach ($links as $link) { + $href = $link->href; + + // Filter for blog post links (exclude language variants like [zh]) + if (strpos($href, '.html') !== false && strpos($link->plaintext, '[') === false) { + $posts[] = [ + 'title' => trim($link->plaintext), + 'url' => $href + ]; + } + } + + // Limit the number of posts + $posts = array_slice($posts, 0, $limit); + + // Fetch full content for each post + foreach ($posts as $post) { + $postUrl = self::URI . '/' . $post['url']; + $postData = $this->fetchPostContent($postUrl); + + $item = [ + 'uri' => $postUrl, + 'title' => $post['title'], + 'timestamp' => $postData['timestamp'], + 'author' => 'Anna', + 'content' => $postData['tldr'] + ]; + + $this->items[] = $item; + } + } + + private function fetchPostContent($url) + { + $html = getSimpleHTMLDOM($url); + + if (!$html) { + return [ + 'tldr' => 'Could not fetch post content.', + 'timestamp' => null + ]; + } + + // Extract TLDR - can be either
or

+ $tldr = ''; + $tldrElement = $html->find('.tldr', 0); + if ($tldrElement) { + // Try innertext first, fallback to plaintext if empty + $tldr = trim($tldrElement->innertext); + if (empty($tldr)) { + $tldr = trim($tldrElement->plaintext); + } + } + + // Extract date from the main content + // Date appears as "annas-archive.li/blog, 2025-12-20" + $timestamp = null; + $mainDiv = $html->find('div.main', 0); + if ($mainDiv) { + $mainText = $mainDiv->plaintext; + if (preg_match('/(\d{4}-\d{2}-\d{2})/', $mainText, $matches)) { + $timestamp = strtotime($matches[1]); + } + } + + return [ + 'tldr' => $tldr, + 'timestamp' => $timestamp + ]; + } +} diff --git a/README.md b/README.md index fe1fd01..0ce4ecb 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,15 @@ Diese Sammlung enthält verschiedene Bridge-Implementierungen für RSS-Bridge, u ### [Ananta Game News Bridge](https://bridge.ponywave.de/#bridge-AnantaBridge) (Von Akamaru) - **Beschreibung**: Zeigt die neuesten Nachrichten von Ananta Game +### [Anna's Archive Blog Bridge](https://bridge.ponywave.de/#bridge-AnnasArchiveBlogBridge) (Von Akamaru) +- **Beschreibung**: Zeigt die neuesten Blog-Posts von Anna's Archive +- **Parameter**: + - **Limit** (optional): Maximale Anzahl an Posts (Standard: 20, max: 20) +- **Hinweise**: + - Extrahiert TLDR-Zusammenfassungen der Posts + - Datum wird aus jedem Post extrahiert + - Autor ist auf "Anna" gesetzt + ### [Antenne Bayern Nachrichten Bridge](https://bridge.ponywave.de/#bridge-AntenneBayernBridge) (Von Akamaru) - **Beschreibung**: Aktuelle Nachrichten von Antenne Bayern - **Parameter**: