combine download-pages and download-thumbs; stash max_episode file

This commit is contained in:
mappu04 2023-06-15 19:49:04 +12:00
parent af0a0e5aba
commit edf934c6a6
2 changed files with 37 additions and 7 deletions

38
download-pages.sh Normal file → Executable file
View File

@ -1,3 +1,39 @@
#!/bin/bash
for i in {1..600} ; do wget 'https://www.myrunningman.com/ep/'$i ; done
set -eu
BASEURL=https://www.myrunningman.com
# Update magnet rss.xml
wget "${BASEURL}/rss.xml" -O rss.xml
# Find latest episode
MAX_EPISODE=$(fgrep '<title>' rss.xml | sort | tail -n-1 | egrep -Eo '[0-9]{3,4}')
echo $MAX_EPISODE > max_episode
# Scrape pages and thumbnails
mkdir -p {original-html,thumb}
for i in $(seq 1 ${MAX_EPISODE}) ; do
# Raw HTML
if [[ ! -f original-html/$i ]] ; then
wget "${BASEURL}/ep/$i" -O original-html/$i
fi
# thumbnails
PADNAME=$(printf "%03d" $i)
if [[ ! -f thumb/${PADNAME}.jpg ]] ; then
if [[ $i == 310 ]] ; then
echo "no image for 310"
elif [[ $i < 397 ]] ; then
wget "${BASEURL}/assets/epimg/${PADNAME}.jpg" -O thumb/${PADNAME}.jpg
else
# _temp suffix for 397++
wget "${BASEURL}/assets/epimg/${PADNAME}_temp.jpg" -O thumb/${PADNAME}.jpg
fi
fi
done
echo "Finished (up to episode ${MAX_EPISODE})"

View File

@ -1,6 +0,0 @@
#!/bin/bash
for i in {1..600} ; do wget 'https://www.myrunningman.com/assets/epimg/'$(printf "%03d" $i)'.jpg' ; done
for i in {397..600} ; do wget 'https://www.myrunningman.com/assets/epimg/'$(printf "%03d" $i)_temp'.jpg' ; done