#!/bin/bash

ask_yn () {
    echo "$@"
    while true; do
        read yn
        case $yn in
            [Yy]*) return 0;;
            [Nn]*) return 1;;
            *) echo "Enter yes or no";;
        esac
    done
}

site=${1?Need site to archive}

initial=$(mktemp)
trap "rm -f $initial" ERR EXIT
cat >>$initial <<"__EOF__"
# This will archive an entire website, including potentially other hosts
# mentioned in that website.  This generates a lot of load on the remote
# site; be careful before running and adjust arguments as necessary.

# Recipe taken from <https://lwn.net/Articles/766374/>.  See that page
# for more robust methods.

nice wget --mirror \
 `#--execute robots=off`  `# ignore robots.txt` \
 --convert-links \
 --backup-converted  `# make .orig files` \
 --page-requisites  `# download stylesheets, images, etc.` \
 --adjust-extension  `# add .html suffix for example` \
 --base=./ \
 --directory-prefix=./  `# change if you don't want to save to the current dir` \
 `#--span-hosts --domains=hostname1,hostname2`  `# follow links to specific other hosts` \
 `#--reject-regex 'regex'`  `# ignore patterns matching this regex` \
__EOF__
echo "  $site" >>$initial
"${EDITOR:-vi}" $initial
if ask_yn "run it?"; then
    . $initial
fi

