2023-06-15 07:49:23 +00:00
|
|
|
#!/usr/bin/php
|
2022-08-08 07:36:19 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
error_reporting(E_ALL);
|
|
|
|
|
|
|
|
function parse_episode($html) {
|
|
|
|
|
|
|
|
$matches = [];
|
|
|
|
|
|
|
|
preg_match('~<title>Episode #([0-9]{3}) - (.+)</title>~', $html, $matches);
|
|
|
|
$title = html_entity_decode($matches[2]);
|
|
|
|
$title = str_replace(' - My Running Man (MyRM)', '', $title);
|
|
|
|
|
|
|
|
$episode_num = $matches[1];
|
|
|
|
|
|
|
|
$stream_link = '';
|
|
|
|
if (preg_match('~data-url="([^"]+)"~', $html, $matches)) {
|
|
|
|
$stream_link = html_entity_decode($matches[1]);
|
|
|
|
} else {
|
|
|
|
error_log("WARNING: no stream link for episode $episode_num");
|
|
|
|
}
|
|
|
|
|
|
|
|
preg_match_all(
|
|
|
|
// <a href="/tag/photo" title="Added by andrew">photo</a>
|
|
|
|
'~<a href="/tag/[^"]+" title="Added by ([^"]+)">([^<]+)</a>~ms',
|
|
|
|
$html,
|
|
|
|
$matches,
|
|
|
|
PREG_SET_ORDER
|
|
|
|
);
|
|
|
|
$tags = [];
|
|
|
|
foreach($matches as $match) {
|
|
|
|
$tags[html_entity_decode($match[2])] = html_entity_decode($match[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
preg_match('~Broadcast Date: ([0-9-]+)~', $html, $matches);
|
|
|
|
$broadcast_date = $matches[1];
|
|
|
|
|
|
|
|
$filming_date = '';
|
|
|
|
if (preg_match('~filmed on ([0-9-]+)~', $html, $matches)) {
|
|
|
|
$filming_date = $matches[1];
|
|
|
|
} else {
|
|
|
|
error_log("WARNING: no filming date for episode $episode_num");
|
|
|
|
}
|
|
|
|
|
|
|
|
$location = '';
|
|
|
|
if (preg_match('~Location: ([^<]+)<~', $html, $matches)) {
|
|
|
|
$location = html_entity_decode($matches[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
preg_match('~href="(magnet:[^"]+)"~', $html, $matches);
|
|
|
|
$torrent = html_entity_decode($matches[1]);
|
|
|
|
|
|
|
|
$description = preg_match('~Description: ([^<]+)<~', $html, $matches)
|
|
|
|
? html_entity_decode($matches[1])
|
|
|
|
: "";
|
|
|
|
|
|
|
|
$ret = [
|
|
|
|
'title' => $title,
|
|
|
|
'stream' => $stream_link,
|
|
|
|
'tags' => $tags,
|
|
|
|
'broadcast_date' => $broadcast_date,
|
|
|
|
'filming_date' => $filming_date,
|
|
|
|
'location' => $location,
|
|
|
|
'description' => $description,
|
|
|
|
'torrent' => $torrent,
|
|
|
|
];
|
|
|
|
|
|
|
|
return $ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
function main() {
|
|
|
|
|
2023-06-15 07:49:23 +00:00
|
|
|
$max_episode = intval(file_get_contents('max_episode'));
|
|
|
|
|
2022-08-08 07:36:19 +00:00
|
|
|
$ret = [];
|
2023-06-15 07:49:23 +00:00
|
|
|
for($i = 1; $i < $max_episode+1; ++$i) {
|
|
|
|
$html = file_get_contents('original-html/'.$i);
|
2022-08-08 07:36:19 +00:00
|
|
|
$info = parse_episode($html);
|
|
|
|
$ret[$i] = $info;
|
|
|
|
}
|
|
|
|
|
|
|
|
echo json_encode($ret, JSON_PRETTY_PRINT);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
main($_SERVER['argv']);
|