imgur-rescue-project/yatwiki-scrape/scrape-wikidb.php

33 lines
695 B
PHP

#!/usr/bin/php
<?php
$db = new \PDO("sqlite:wiki.db");
$matches = [];
$links = [];
foreach($db->query('SELECT id, body FROM articles') as $article) {
$body = gzinflate($article['body']);
preg_match_all('~\[imgur\](.+?)\[~', $body, $matches);
if (count($matches)) {
foreach($matches[1] as $short) {
$links[] = 'https://i.imgur.com/'.$short;
}
}
// Inline links
preg_match_all('~https?://[^ \t\n"><\]\[]+imgur.com[^ \t\n"><\]\[]*~', $body, $matches);
if (count($matches)) {
foreach($matches[0] as $link) {
$links[] = $link;
}
}
}
// Output
foreach($links as $link) {
echo str_replace("https://i.imgur.com/http://i.imgur.com", "https://i.imgur.com", $link)."\n";
}