#!/bin/sh

set -u

ARCHIVE=obarchive@archive.occupyboston.org:archive.occupyboston.org/web/mailing-lists/

# url of mailing list archive.  For example
#  https://lists.mayfirst.org/mailman/private/everyone-submit/ or
#  https://lists.mayfirst.org/pipermail/obit/
URL=

# for non-public archives, we need a cookie
COOKIE=

# name of list
LIST=

# me is us :)
me=`basename $0`


bad_news() {
    echo "*** $me: $*" >&2
    exit 1
}


prep_args() {
    URL=$1; shift
    if test $# -gt 0; then
        COOKIE=$1; shift
    fi

    if echo "$URL" | egrep -v -q "^https://"; then
        bad_news "ARCHIVE-URL should start with https://, goodbye"
    fi

    # figure out list name
    xurl=`echo "$URL" | sed 's,/$,,'`
    LIST=`echo $xurl | awk -F/ '{print $(NF)}'`

    # if the url is public, then we should be good to go
    if echo "$URL" | grep -q -v /private/; then
        COOKIE=
        return
    fi

    if test -z "$COOKIE"; then
        bad_news "$URL appears to be a non-public archive, cookie required"
    fi
}


download_stuff() {
    echo "$me: downloading $URL"
    
    cookie_hdr=
    if test -n "$COOKIE"; then
        cookie_hdr="--header 'Cookie: $COOKIE'"
    fi

    cut_dirs=1
    if test -n "$COOKIE"; then
        cut_dirs=2
    fi

    eval wget --convert-links \
        --no-verbose \
        --execute "robots=off" \
        $cookie_hdr \
        --no-host-directories \
        --cut-dirs=$cut_dirs \
        --level=3 \
        --recursive \
        --no-parent \
        $URL
    
    if test $? -ne 0; then
        bad_news "wget $URL failed, goodbye"
    fi
}


check_download() {
    if test ! -d "$LIST"; then
        bad_news "problem with download.  `pwd`/$LIST not a directory"
    fi

    files=`find $LIST/ -type f | wc -l`
    
    if test $files -lt 5; then
        echo "$me: download $files file(s) from URL"
        echo "$me: seems like too few files"
        echo "$me: check contents of `pwd`/$LIST"
        echo "$me: press RETURN if looks okay, or Ctrl-C to stop"
        read line
    fi

    echo "$me: downloaded $files file(s) from $URL"
}


package_download() {
    echo "$me: creating $LIST.tgz"

    rm -f $LIST.tgz
    tar -zcpf $LIST.tgz $LIST

    if test $? -ne 0; then
        bad_news "tar failed, goodbye"
    fi
}


upload_archive() {
    echo "$me: uploading $LIST.tgz => $ARCHIVE"
    rsync --progress $LIST.tgz $ARCHIVE

    if test $? -ne 0; then
        bad_news "remote file transfer failed"
    fi
}


main() {
    if test $# -eq 0 -o $# -gt 2; then
        bad_news "usage: me ARCHIVE-URL [COOKIE]"
    fi
        
    prep_args "$@"

    download_stuff

    check_download

    package_download

    upload_archive

    echo "$me: DONE"
}

main "$@"
exit 0
