#!/bin/bash

# Safely back up a git repository via rsync. (Only local paths are supported).
# Runs git-fsck on the source repo first to verify nothing's wrong, then locks
# the source repo (via an update hook) and copies the entire repo via rsync.
# Then runs git-fsck on the backup repo. If either the rsync or the git-fsck
# fail, they are retried (up to 10 times). Assuming all is successful, unlocks
# the source and backup repos.
#
# Works on both bare and non-bare repositories.


# rsync+git-fsck idea from this git mailing list post:
# http://marc.info/?l=git&m=136422341014631&w=2

LOCK_MSG="Locked for backups"

die () {
    echo "$@" >&2
    exit 1
}

unlock_and_die () {
    unlock_repo "$1" || echo "Unlocking repo at $1 failed" >&2
    shift
    die "$@"
}

realpath () {
    # Portable way of resolving symlinks, relative paths, etc.
    python -c 'import os,sys; sys.stdout.write(os.path.realpath(sys.argv[1])+"\n")' "$1"
}

load_sleep () {
    # sleeps based on load average
    local multiplier
    multiplier=${1:-5}
    sleep $(uptime | grep -o 'load averages:.*' | awk "{print $multiplier * (1 + \$3)}")
}

lock_repo () {
    local gitdir hook hooksave

    gitdir=$1
    hook=$gitdir/hooks/update
    hooksave=$hook.backupsave

    (
        set -e
        if [[ -e $hook ]]; then
            if grep -qF "$LOCK_MSG" "$hook"; then
                echo "$gitdir already locked"
                echo "Remove $hook to break lock"
                return 1
            else
                mv -f "$hook" "$hooksave"
            fi
        fi

        echo > "$hook" \
            "#!/bin/sh
             echo $LOCK_MSG
             exit 1"

        chmod +x "$hook"
    )
    return $?
}

unlock_repo () {
    local gitdir hook hooksave

    gitdir=$1
    hook=$gitdir/hooks/update
    hooksave=$hook.backupsave

    if [[ ! -e $hook ]] || ! grep -qF "$LOCK_MSG" "$hook"; then
        echo "$gitdir not locked"
        return 0
    fi

    (
        set -e
        rm -f "$hook"
        if [[ -e $hooksave ]]; then
            mv -f "$hooksave" "$hook"
        fi
    )
    return $?
}

fsck_repo () {
    local gitdir fsckout fsckret

    gitdir=$1
    fsckout=$(git --git-dir="$gitdir" fsck --no-dangling --full --strict 2>&1)
    fsckret=$?

    if (( fsckret != 0 )) || \
        echo "$fsckout" |  grep -q "error|fatal|mismatch|unreachable|missing|corrupt"
    then
        echo "fsck of $gitdir failed. Output:"
        echo "$fsckout"
        return 1
    fi
    return 0
}

main() {
    if (( $# != 2 )) || [[ ${1-} == -h ]]; then
        echo "Usage: $0 <from> <to>"
        exit 2
    fi

    local from to to_parent
    from=$(realpath "$1")
    to=$(realpath "$2")
    to_parent=$(dirname "$to")

    if [[ $from == $to ]]; then
        die "Backup path is the same as source path!"
    fi
    mkdir -p "$to"
    if [[ ! ( -w $to && -x $to ) ]]; then
        die "Cannot write to $to !"
    fi

    local from_gitdir to_gitdir
    if [[ -d $from/.git ]]; then
        # not a bare repo
        from_gitdir=$from/.git
        to_gitdir=$to/.git
    else
        # bare repo
        from_gitdir=$from
        to_gitdir=$to
    fi

    # See if $from_gitdir does actually point to a git dir
    # Git will give a suitable error message if it isn't
    git --git-dir="$from_gitdir" show-ref --quiet --verify HEAD || exit $?

    lock_repo "$from_gitdir" || die "Locking source repo failed. Aborting backup."

    # Wait for any running git processes to finish
    local tries
    for (( tries = 20; tries > 0; tries-- )); do
        lsof +D "$from" | awk '{print $1}' | grep -q git || break
        load_sleep 10
    done
    if (( tries == 0 )); then
        unlock_and_die "$from_gitdir" "git operation still running on source repo. Aborting backup."
    fi

    local output
    for (( tries = 20; tries > 0; tries-- )); do
        output=$(rsync --archive --quiet --delete-after --partial "$from/" "$to" 2>&1 && \
                 fsck_repo "$to_gitdir" 2>&1)
        ret=$?
        if (( ret == 0 )); then
            break
        fi
        load_sleep 10
    done

    if (( tries == 0 )); then
        unlock_and_die "$from_gitdir" "Backups failed! Output from last try: $output"
    fi

    unlock_repo "$from_gitdir" || echo "Unlocking source repo failed." >&2
    unlock_repo "$to_gitdir" || echo "Unlocking backup repo failed." >&2
}

main "$@"

