combine download-pages and download-thumbs; stash max_episode file
This commit is contained in:
parent
af0a0e5aba
commit
edf934c6a6
38
download-pages.sh
Normal file → Executable file
38
download-pages.sh
Normal file → Executable file
@ -1,3 +1,39 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
for i in {1..600} ; do wget 'https://www.myrunningman.com/ep/'$i ; done
|
set -eu
|
||||||
|
BASEURL=https://www.myrunningman.com
|
||||||
|
|
||||||
|
# Update magnet rss.xml
|
||||||
|
|
||||||
|
wget "${BASEURL}/rss.xml" -O rss.xml
|
||||||
|
|
||||||
|
# Find latest episode
|
||||||
|
|
||||||
|
MAX_EPISODE=$(fgrep '<title>' rss.xml | sort | tail -n-1 | egrep -Eo '[0-9]{3,4}')
|
||||||
|
echo $MAX_EPISODE > max_episode
|
||||||
|
|
||||||
|
# Scrape pages and thumbnails
|
||||||
|
|
||||||
|
mkdir -p {original-html,thumb}
|
||||||
|
for i in $(seq 1 ${MAX_EPISODE}) ; do
|
||||||
|
|
||||||
|
# Raw HTML
|
||||||
|
if [[ ! -f original-html/$i ]] ; then
|
||||||
|
wget "${BASEURL}/ep/$i" -O original-html/$i
|
||||||
|
fi
|
||||||
|
|
||||||
|
# thumbnails
|
||||||
|
PADNAME=$(printf "%03d" $i)
|
||||||
|
if [[ ! -f thumb/${PADNAME}.jpg ]] ; then
|
||||||
|
if [[ $i == 310 ]] ; then
|
||||||
|
echo "no image for 310"
|
||||||
|
elif [[ $i < 397 ]] ; then
|
||||||
|
wget "${BASEURL}/assets/epimg/${PADNAME}.jpg" -O thumb/${PADNAME}.jpg
|
||||||
|
else
|
||||||
|
# _temp suffix for 397++
|
||||||
|
wget "${BASEURL}/assets/epimg/${PADNAME}_temp.jpg" -O thumb/${PADNAME}.jpg
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Finished (up to episode ${MAX_EPISODE})"
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
for i in {1..600} ; do wget 'https://www.myrunningman.com/assets/epimg/'$(printf "%03d" $i)'.jpg' ; done
|
|
||||||
|
|
||||||
for i in {397..600} ; do wget 'https://www.myrunningman.com/assets/epimg/'$(printf "%03d" $i)_temp'.jpg' ; done
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user