122 lines
3.5 KiB
PHP
122 lines
3.5 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
class AnnasArchiveBlogBridge extends BridgeAbstract
|
|
{
|
|
const MAINTAINER = 'Akamaru';
|
|
const NAME = "Anna's Archive Blog";
|
|
const URI = 'https://annas-archive.li/blog';
|
|
const CACHE_TIMEOUT = 3600; // 1h
|
|
const DESCRIPTION = "Get the latest posts from Anna's Archive Blog";
|
|
|
|
public function getIcon()
|
|
{
|
|
return 'https://www.google.com/s2/favicons?domain=annas-archive.li&sz=32';
|
|
}
|
|
|
|
const PARAMETERS = array(
|
|
array(
|
|
'limit' => array(
|
|
'name' => 'Limit',
|
|
'type' => 'number',
|
|
'required' => false,
|
|
'defaultValue' => 20,
|
|
'title' => 'Maximum number of posts to return (max 20)'
|
|
)
|
|
)
|
|
);
|
|
|
|
public function collectData()
|
|
{
|
|
$limit = $this->getInput('limit') ?? 20;
|
|
// Cap at 20 posts maximum
|
|
$limit = min($limit, 20);
|
|
|
|
// Fetch the main blog page
|
|
$html = getSimpleHTMLDOM(self::URI)
|
|
or returnServerError('Could not fetch blog listing.');
|
|
|
|
$posts = [];
|
|
|
|
// Find all links and dates in the main div
|
|
$mainDiv = $html->find('div.main', 0);
|
|
if (!$mainDiv) {
|
|
returnServerError('Could not find blog post listing.');
|
|
}
|
|
|
|
// Extract post links - looking for anchor tags within the main div
|
|
$links = $mainDiv->find('a');
|
|
|
|
foreach ($links as $link) {
|
|
$href = $link->href;
|
|
|
|
// Filter for blog post links (exclude language variants like [zh])
|
|
if (strpos($href, '.html') !== false && strpos($link->plaintext, '[') === false) {
|
|
$posts[] = [
|
|
'title' => trim($link->plaintext),
|
|
'url' => $href
|
|
];
|
|
}
|
|
}
|
|
|
|
// Limit the number of posts
|
|
$posts = array_slice($posts, 0, $limit);
|
|
|
|
// Fetch full content for each post
|
|
foreach ($posts as $post) {
|
|
$postUrl = self::URI . '/' . $post['url'];
|
|
$postData = $this->fetchPostContent($postUrl);
|
|
|
|
$item = [
|
|
'uri' => $postUrl,
|
|
'title' => $post['title'],
|
|
'timestamp' => $postData['timestamp'],
|
|
'author' => 'Anna',
|
|
'content' => $postData['tldr']
|
|
];
|
|
|
|
$this->items[] = $item;
|
|
}
|
|
}
|
|
|
|
private function fetchPostContent($url)
|
|
{
|
|
$html = getSimpleHTMLDOM($url);
|
|
|
|
if (!$html) {
|
|
return [
|
|
'tldr' => 'Could not fetch post content.',
|
|
'timestamp' => null
|
|
];
|
|
}
|
|
|
|
// Extract TLDR - can be either <div class="tldr"> or <p class="tldr">
|
|
$tldr = '';
|
|
$tldrElement = $html->find('.tldr', 0);
|
|
if ($tldrElement) {
|
|
// Try innertext first, fallback to plaintext if empty
|
|
$tldr = trim($tldrElement->innertext);
|
|
if (empty($tldr)) {
|
|
$tldr = trim($tldrElement->plaintext);
|
|
}
|
|
}
|
|
|
|
// Extract date from the main content
|
|
// Date appears as "annas-archive.li/blog, 2025-12-20"
|
|
$timestamp = null;
|
|
$mainDiv = $html->find('div.main', 0);
|
|
if ($mainDiv) {
|
|
$mainText = $mainDiv->plaintext;
|
|
if (preg_match('/(\d{4}-\d{2}-\d{2})/', $mainText, $matches)) {
|
|
$timestamp = strtotime($matches[1]);
|
|
}
|
|
}
|
|
|
|
return [
|
|
'tldr' => $tldr,
|
|
'timestamp' => $timestamp
|
|
];
|
|
}
|
|
}
|