diff --git a/download-pages.sh b/download-pages.sh old mode 100644 new mode 100755 index fca677c..ceab551 --- a/download-pages.sh +++ b/download-pages.sh @@ -1,3 +1,39 @@ #!/bin/bash -for i in {1..600} ; do wget 'https://www.myrunningman.com/ep/'$i ; done +set -eu +BASEURL=https://www.myrunningman.com + +# Update magnet rss.xml + +wget "${BASEURL}/rss.xml" -O rss.xml + +# Find latest episode + +MAX_EPISODE=$(fgrep '' rss.xml | sort | tail -n-1 | egrep -Eo '[0-9]{3,4}') +echo $MAX_EPISODE > max_episode + +# Scrape pages and thumbnails + +mkdir -p {original-html,thumb} +for i in $(seq 1 ${MAX_EPISODE}) ; do + + # Raw HTML + if [[ ! -f original-html/$i ]] ; then + wget "${BASEURL}/ep/$i" -O original-html/$i + fi + + # thumbnails + PADNAME=$(printf "%03d" $i) + if [[ ! -f thumb/${PADNAME}.jpg ]] ; then + if [[ $i == 310 ]] ; then + echo "no image for 310" + elif [[ $i < 397 ]] ; then + wget "${BASEURL}/assets/epimg/${PADNAME}.jpg" -O thumb/${PADNAME}.jpg + else + # _temp suffix for 397++ + wget "${BASEURL}/assets/epimg/${PADNAME}_temp.jpg" -O thumb/${PADNAME}.jpg + fi + fi +done + +echo "Finished (up to episode ${MAX_EPISODE})" diff --git a/download-thumbs.sh b/download-thumbs.sh deleted file mode 100644 index 9904b7a..0000000 --- a/download-thumbs.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -for i in {1..600} ; do wget 'https://www.myrunningman.com/assets/epimg/'$(printf "%03d" $i)'.jpg' ; done - -for i in {397..600} ; do wget 'https://www.myrunningman.com/assets/epimg/'$(printf "%03d" $i)_temp'.jpg' ; done -