<?php

error_reporting(E_ALL);

function parse_episode($html) {

    $matches = [];

    preg_match('~<title>Episode #([0-9]{3}) - (.+)</title>~', $html, $matches);
    $title = html_entity_decode($matches[2]);
    $title = str_replace(' - My Running Man (MyRM)', '', $title);

    $episode_num = $matches[1];
    
    $stream_link = '';
    if (preg_match('~data-url="([^"]+)"~', $html, $matches)) {
        $stream_link = html_entity_decode($matches[1]);
    } else {
        error_log("WARNING: no stream link for episode $episode_num");    
    }

    preg_match_all(
        // <a href="/tag/photo" title="Added by andrew">photo</a>
        '~<a href="/tag/[^"]+" title="Added by ([^"]+)">([^<]+)</a>~ms',
        $html,
        $matches,
        PREG_SET_ORDER
    );
    $tags = [];
    foreach($matches as $match) {
        $tags[html_entity_decode($match[2])] = html_entity_decode($match[1]);
    }

    preg_match('~Broadcast Date: ([0-9-]+)~', $html, $matches);
    $broadcast_date = $matches[1];

    $filming_date = '';
    if (preg_match('~filmed on ([0-9-]+)~', $html, $matches)) {
        $filming_date = $matches[1];
    } else {
        error_log("WARNING: no filming date for episode $episode_num");    
    }
    
    $location = '';
    if (preg_match('~Location: ([^<]+)<~', $html, $matches)) {
        $location = html_entity_decode($matches[1]);
    }

    preg_match('~href="(magnet:[^"]+)"~', $html, $matches);
    $torrent = html_entity_decode($matches[1]);

    $description = preg_match('~Description: ([^<]+)<~', $html, $matches)
        ? html_entity_decode($matches[1])
        : "";

    $ret = [
        'title' => $title,
        'stream' => $stream_link,
        'tags' => $tags,
        'broadcast_date' => $broadcast_date,
        'filming_date' => $filming_date,
        'location' => $location,
        'description' => $description,
        'torrent' => $torrent,
    ];

    return $ret;
}

function main() {

    $ret = [];
    for($i = 1; $i < 601; ++$i) {
        $html = file_get_contents('running-pages/'.$i);
        $info = parse_episode($html);
        $ret[$i] = $info;
    }
    
    echo json_encode($ret, JSON_PRETTY_PRINT);
    
}

main($_SERVER['argv']);