#! /bin/bash

# Copyright 2012-2017 Francesco Riosa
# Distributed under the terms of the GNU General Public License v2

# @AUTHOR:
# Francesco Riosa <fr@f1r.eu>
# @BLURB: fetch incremental zfs snapshots from a remote host
# @DESCRIPTION:
#   This program fetch snapshots produced by ZFS on linux "zfs-auto-snapshot"
#   script.
#   Moving data is done incrementally, starting with latest common snapshot
#   present on both machine including the latest snapshot of the remote host.
#   Dependancies are ZOL zfs, zfs-auto-snapshot, util-linux, openssh
#   and optionally sudo (remote machine)
#   Config files are located in /etc/st-zfs-send-recv.d/*.pool
#   Every *.pool file is required to set some variables, see conf1.pool.example
#   for which ones.
# TODO: unify (remote == src) && (local == dst)

if [[ -e /etc/${0##*/}.conf ]] ; then
    source /etc/${0##*/}.conf
fi

# TODO: move into library
print_log () # level, message, ...
{
    local prefix=st-zfs-send-recv
    local level=${1}
    shift 1

    case ${level} in
        eme*)
            ${cmd[logger]} -t "${prefix}" -p daemon.emerge ${@}
            echo Emergency: ${@} 1>&2
            ;;
        ale*)
            ${cmd[logger]} -t "${prefix}" -p daemon.alert ${@}
            echo Alert: ${@} 1>&2
            ;;
        cri*)
            ${cmd[logger]} -t "${prefix}" -p daemon.crit ${@}
            echo Critical: ${@} 1>&2
            ;;
        err*)
            ${cmd[logger]} -t "${prefix}" -p daemon.err ${@}
            echo Error: ${@} 1>&2
            ;;
        war*)
            ${cmd[logger]} -t "${prefix}" -p daemon.warning ${@}
            #test -z "$opt_quiet" && echo Warning: ${@} 1>&2
            ;;
        not*)
            ${cmd[logger]} -t "${prefix}" -p daemon.notice ${@}
            #test -z "$opt_quiet" && echo ${@}
            ;;
        inf*)
            : #test -z ${opt_quiet+x} && test -n "$opt_verbose" && echo ${@}
            ;;
        deb*)
            : # test -n "$opt_debug" && echo Debug: ${@}
            ;;
        *)
            ${cmd[logger]} -t "${0##*/}" ${@}
            echo ${@} 1>&2
            ;;
    esac
}


# @FUNCTION: filter_array
# @USAGE: <array[@]>
# @DESCRIPTION:
# use global variable ${FS_SKIP} to remove matching strings
# from given array.
# ${FS_SKIP} is also an array of regular expressions these
# are used by bash =~ comparison operator
# @EXAMPLE:
# filter_array $( get_remote_fs_list thez tank-thez )
# TODO: move into library
function filter_array() {
    local ko=0
    for i in ${@} ; do
        ko=0
        for filter in ${FS_SKIP[@]} ; do
            if [[ ${i} =~ ${filter} ]] ; then
                ko=1
                break
            fi
        done
        [[ ${ko} == 0 ]] && echo ${i}
    done
}

###############################################################################
####  initialization and checks
###############################################################################

# we want boring english messagges
LC_ALL=C

# compression related variables
COMPRESS="${COMPRESS:=lz4}"
#COMPRESS_FLAGS=${COMPRESS_FLAGS:=}
UNCOMPRESS_FLAGS="${UNCOMPRESS_FLAGS:=-dcfm}"

# associative array for commands (local machine)
declare -A cmd

# check that we are root
if test ${UID} -ne 0 ; then
    print_log critical "need root privileges"
    exit 1
fi

# initialize the cmd array it will be possible to call the command
# as `${cmd[zfs]}` if command is missing exit with error
for c in flock logger zfs zfs-auto-snapshot sqlite3 ${COMPRESS} ; do
    cmd[${c}]=$( type -p ${c} )
    if [[ ${cmd[${c}]} == "" ]] ; then
        print_log critical "command \"${c}\" not found! Exiting now."
        exit 1
    fi
done


###############################################################################
####  functions
###############################################################################

# @FUNCTION: initialize_db
# @USAGE: <dbpath>
# @DESCRIPTION:
# Initialize the database
function initialize_db() {
    local dbpath=${1}

    cat << 'EOSQL' | ${cmd[sqlite3]} "${dbpath}"
    CREATE TABLE IF NOT EXISTS fs_src (
      "pool" TEXT NOT NULL,
      "guid" TEXT NOT NULL PRIMARY KEY,
      "createtxg" TEXT NOT NULL,
      "creation" TEXT NOT NULL,
      "type" TEXT NOT NULL,
      "used" TEXT NOT NULL,
      "name" TEXT NOT NULL
    );
    CREATE TABLE IF NOT EXISTS fs_dst (
      "pool" TEXT NOT NULL,
      "guid" TEXT NOT NULL PRIMARY KEY,
      "createtxg" TEXT NOT NULL,
      "creation" TEXT NOT NULL,
      "type" TEXT NOT NULL,
      "used" TEXT NOT NULL,
      "name" TEXT NOT NULL
    );
    CREATE TABLE IF NOT EXISTS snap_src (
      "parent_guid" TEXT NOT NULL,
      "guid" TEXT NOT NULL PRIMARY KEY,
      "createtxg" TEXT NOT NULL,
      "creation" TEXT NOT NULL,
      "type" TEXT NOT NULL,
      "used" TEXT NOT NULL,
      "name" TEXT NOT NULL,
      "snapshot" TEXT NOT NULL
    );
    CREATE TABLE IF NOT EXISTS snap_dst (
      "parent_guid" TEXT NOT NULL,
      "guid" TEXT NOT NULL PRIMARY KEY,
      "createtxg" TEXT NOT NULL,
      "creation" TEXT NOT NULL,
      "type" TEXT NOT NULL,
      "used" TEXT NOT NULL,
      "name" TEXT NOT NULL,
      "snapshot" TEXT NOT NULL
    );
    DELETE FROM fs_src;
    DELETE FROM fs_dst;
    DELETE FROM snap_src;
    DELETE FROM snap_dst;
EOSQL
}


# @FUNCTION: discover_fs_list
# @USAGE: <zcmd> <dbpath> <table_suffix> <pool>
# @DESCRIPTION:
# Get the list of filesystems from zfs pool
# @GLOBAL: cmd
# @EXAMPLE:
# discover_fs_list zfs my-sqlite.db src my_pool
# discover_fs_list "${cmd[ssh]} ${rcmd[zfs]}" my-sqlite.db src my_pool
function discover_fs_list() {
    local zcmd=${1}
    local dbpath=${2}
    local table_suffix=${3}
    local pool=${4}
    ${zcmd} list \
        -Hpr \
        -t filesystem,volume \
        -o guid,createtxg,creation,type,used,name \
        -s name \
        ${pool} \
    | sed \
        -e "s:^:${pool}|:" \
        -e 's:\t:|:g' \
    | ${cmd[sqlite3]} \
        -csv \
        -separator '|' \
        ${dbpath} \
        '.import /dev/stdin 'fs_${table_suffix}
}


# @FUNCTION: discover_snapshots
# @USAGE: <zcmd> <dbpath> <table_suffix> <filesystem> <pool>
# @DESCRIPTION:
# Fill the snapshot table with 1000 most recent snapshots
# @EXAMPLE:
# discover_snapshots zfs my-sqlite.db dst my_pool my_pool
# discover_snapshots zfs my-sqlite.db dst my_pool/my_fs my_pool
# echo "select * from fs_dst join  snap_dst on fs_dst.guid = snap_dst.parent_guid" | sqlite3 my_sqlite.db
function discover_snapshots() {
    local zcmd=${1}
    local dbpath=${2}
    local table_suffix=${3}
    local fs=${4}
    local pool=${5}
    local parent_guid=$(
        echo "SELECT guid \
        FROM fs_${table_suffix} \
        WHERE pool='${pool}' AND name = '${fs}' \
        ORDER BY creation DESC \
        LIMIT 1" \
        | ${cmd[sqlite3]} "${dbpath}"
    )
    ${zcmd} list \
        -Hpr \
        -d 1 \
        -t snapshot \
        -S creation \
        -o guid,createtxg,creation,type,used,name \
        ${fs} \
    | sed \
        -e "s:^:${parent_guid}|:" \
        -e 's:\t:|:g' \
        -e 's:@:|:' \
        -e 1000q \
    | ${cmd[sqlite3]} -csv -separator '|' ${dbpath} '.import /dev/stdin 'snap_${table_suffix}
}


# @FUNCTION: fill_snapshots_table
# @USAGE: <zcmd> <dbpath> <table_suffix> <pool>
# @DESCRIPTION:
# Fill the snapshot table from known filesystems
function fill_snapshots_table() {
    local zcmd=${1}
    local dbpath=${2}
    local table_suffix=${3}
    local pool=${4}

    for fs in $(
        echo "SELECT name FROM fs_${table_suffix} WHERE pool='${pool}' ORDER BY name" \
        | ${cmd[sqlite3]} "${dbpath}"
    ) ; do
        discover_snapshots "${zcmd}" "${dbpath}" ${table_suffix} "${fs}" "${pool}"
    done
}


# @FUNCTION: get_remote_fs_list
# @USAGE: <dbpath> <pool>
# @DESCRIPTION:
# Get the list of filesystems from a remote pool
# The pool is excluded, both from list and from the name of the filesystem
# @GLOBAL: cmd
# @EXAMPLE:
# declare -a filesystems=( $( get_remote_fs_list my-sqlite.db my_pool ) )
function get_remote_fs_list() {
    local dbpath=${1}
    local pool=${2}
    echo "SELECT name FROM fs_src WHERE pool='${pool}' AND name != pool ORDER BY name" \
    | ${cmd[sqlite3]} "${dbpath}" \
    | sed -e "s:^${pool}/::"
}


# @FUNCTION: get_fs_prop
# @USAGE: <dbpath> <table_suffix> <field> <fs>
# @DESCRIPTION:
# query the fs table for any field
function get_fs_prop() {
    local dbpath=${1}
    local table_suffix=${2}
    local field=${3}
    local fs=${4}
    echo "SELECT ${field} \
    FROM fs_${table_suffix} \
    WHERE name ='${fs}' \
    ORDER BY creation DESC \
    LIMIT 1" \
    | ${cmd[sqlite3]} "${dbpath}" \
    | sed -e "s:^${pool}/::"
}


# @FUNCTION: get_newest_snap
# @USAGE: <dbpath> <table_suffix> <filesystem> <pool>
# @DESCRIPTION:
# Get name of most recent snapshot from filesystem.
# without the filesystem part
# @EXAMPLE:
# get_newest_snap zfs my-sqlite.db src my_pool/my_fs
# return: @zfs-auto-snap_daily-2018-01-02-1230
function get_newest_snap() {
    local dbpath=${1}
    local table_suffix=${2}
    local fs=${3}
    local pool=${4}
    echo "
    SELECT '@' || s.snapshot \
    FROM fs_${table_suffix} AS f \
    JOIN snap_${table_suffix} AS s \
      ON f.guid = s.parent_guid \
    WHERE f.pool = '${pool}' \
      AND f.name = '${fs}' \
    ORDER BY s.creation DESC \
    LIMIT 1 \
    " \
    | ${cmd[sqlite3]} "${dbpath}" \
    | sed -e "s:^${pool}/::"
}


# @FUNCTION: get_remote_newest_snap
# @USAGE: <dbpath> <filesystem> <pool>
# @DESCRIPTION:
# see get_newest_snap()
function get_remote_newest_snap() {
    local dbpath=${1}
    local fs=${2}
    local pool=${3}
    get_newest_snap "${dbpath}" src "${fs}" ${pool}
}


# @FUNCTION: get_common_snap
# @USAGE: <dbpath> <filesystem> <src_pool>
# @DESCRIPTION:
# Get name of most recent _common_ snapshot.
# @EXAMPLE:
# get_common_snap my-sqlite.db my_pool/my_fs my_pool
# return: @zfs-auto-snap_daily-2018-01-02-1230
function get_common_snap() {
    local dbpath=${1}
    local fs=${2}
    local src_pool=${3}
    echo " \
    SELECT '@' || s1.snapshot \
    FROM fs_src AS f \
    JOIN snap_src AS s1 \
      ON f.guid = s1.parent_guid \
    JOIN snap_dst AS s2 \
      ON s1.guid = s2.guid \
    WHERE f.pool = '${src_pool}' \
      AND f.name = '${fs}' \
    ORDER BY s1.creation DESC \
    LIMIT 1 \
    " \
    | ${cmd[sqlite3]} "${dbpath}" \
    | sed -e "s:^${pool}/::"
}


# @FUNCTION: send_receive_fs
# @USAGE: <dbpath> <src_filesystem> <dst_filesystem>
# @DESCRIPTION:
# Send and Receive zfs stream
# @EXAMPLE:
# send_receive_fs my_pool backups_pool/my_pool
function send_receive_fs() {
    local dbpath=${1}
    # remote filesystem/pool
    local fs=${2}
    # destination (local) filesystem
    local dst=${3}
    # last snap on current pool, will be the base point for the incremental send
    local current_snap=$( get_common_snap ${dbpath} ${fs} ${REMOTE_POOL} )
    # last snap on the remote filesystem
    local remote_snap=$( get_remote_newest_snap ${dbpath} ${fs} ${REMOTE_POOL} )
    # return code
    local rc=0

    # variables for snapshot cleanup
    local k
    local keep_num
    local keep_label
    local ignore_mountpoint="-x mountpoint"

    if [[ ${current_snap} == ${remote_snap} ]] ; then
        print_log notice "${fs}${current_snap} is alreay up to date"
    else if [[ ${current_snap} == "" ]] ; then
        print_log critical "${fs}${current_snap} Cannot find common snapshot"
        rc=1
    else
        print_log notice "${fs}${current_snap} => ${remote_snap}"

        # volumes have no mountpoint property
        if [[ $(get_fs_prop ${dbpath} src type ${fs}) == volume ]] ; then
            ignore_mountpoint="-o volmode=none"
        else
            ignore_mountpoint="-x mountpoint"
        fi
        ${cmd[ssh]} "( \
        ${rcmd[zfs]} send -I \
                ${fs}${current_snap} \
                ${fs}${remote_snap} \
            | ${rcmd[${COMPRESS}]} ${COMPRESS_FLAGS} \
        )" \
        | ${COMPRESS} ${UNCOMPRESS_FLAGS} \
        | ${cmd[zfs]} recv -F -eu ${ignore_mountpoint} ${dst}

        rc=${?}
        if [[ ${rc} != 0 ]]; then
            print_log error "failed ${fs}${current_snap} => ${remote_snap} (${rc})"
        fi

        # destroy old snapshot if transfer was succesful
        if [[ ${rc} == 0 ]] ; then
            for k in ${KEEP_SNAP[@]} ; do
                keep_num=${k%,*}
                keep_label="${k#*,}"
                ${cmd[zfs-auto-snapshot]} --quiet --syslog --destroy-only \
                    --label="${keep_label}" --keep=${keep_num} \
                    ${dst}
            done
        else
            print_log critical "${fs}${current_snap} => ${remote_snap} (${rc}) failed"
            # recv -F -du -x mountpoint ${dst}
        fi
    fi # [[ ${current_snap} == "" ]]
    fi # [[ ${current_snap} == ${remote_snap} ]]

    return ${rc}
}


# @FUNCTION: send_receive_pool
# @USAGE: <dbpath> <src_pool> <dst_pool/dst_filesystem>
# @DESCRIPTION:
# Recursively Send and Receive a pool
# filesystems are looped, send is _not_ recursive (i.e. no -r)
# @EXAMPLE:
# send_receive_pool thez tank-thez P000204/pnpitalia
function send_receive_pool() {
    local dbpath=${1}
    local remote_pool=${2}
    local local_pool=${3}
    local ret=0

    # pool
    send_receive_fs "${dbpath}" ${remote_pool} ${local_pool}
    ret=$(( ${ret} + ${?} ))

    # loop for all known filesystems
    for fs in $( filter_array $( get_remote_fs_list ${dbpath} ${remote_pool} ) ) ; do
        send_receive_fs "${dbpath}" ${remote_pool}/${fs} ${local_pool}/${fs}
        ret=$(( ${ret} + ${?} ))
    done

    if [[ ${ret} != 0 ]] ; then
        print_log error "error: at least one send/receive has failed"
    fi
    return ${ret}
}

###############################################################################
####  main
###############################################################################

(
    # Wait for lock on /run/${0##*/}.lock (fd 200) for 10 seconds
    ${cmd[flock]} -x -w 10 200 || exit 1

    for conf in /etc/${0##*/}.d/*.pool ; do
        [[ -e "${conf}" ]] && (

                source "${conf}"
                dbpath=$(mktemp /tmp/${LOCAL_POOL//\//--}-XXXXXXXX.db)

                #### initialization of variables requiring remote #############
                # associative array for commands (remote machine)
                declare -A rcmd

                # initialize our ssh cmd
                cmd[ssh]="$( type -p ssh ) ${REMOTE_HOST} -- "

                # check that we are root remotely
                if ${cmd[ssh]} 'test ${UID} -ne 0' ; then
                    cmd[ssh]="${cmd[ssh]} sudo "
                    if ! cmd[ssh] ls / > /dev/null ; then
                        print_log critical "critical: cannot execute sudo remotely (on ${REMOTE_HOST}) and I'm not root"
                    fi
                fi
                # initialize the rcmd array it will be possible to call the command
                # as `${rcmd[zfs]}` if command is missing exit with error
                for c in zfs zfs-auto-snapshot ${COMPRESS} ; do
                    rcmd[${c}]=$( ${cmd[ssh]} type -p ${c} )
                    if [[ ${rcmd[${c}]} == "" ]] ; then
                        print_log critical "critical: remote command \"${c}\" not found (on ${REMOTE_HOST})! Exiting now."
                        exit 1
                    fi
                done

                #### database initialization #############
                initialize_db "${dbpath}"
                discover_fs_list "${cmd[ssh]} ${rcmd[zfs]}" "${dbpath}" src ${REMOTE_POOL}
                discover_fs_list ${cmd[zfs]} "${dbpath}" dst ${LOCAL_POOL}
                fill_snapshots_table "${cmd[ssh]} ${rcmd[zfs]}" "${dbpath}" src ${REMOTE_POOL}
                fill_snapshots_table ${cmd[zfs]} "${dbpath}" dst ${LOCAL_POOL}

                #### send and receive #############
                send_receive_pool "${dbpath}" ${REMOTE_POOL} ${LOCAL_POOL} \
                && rm -f ${dbpath}
        )
    done

) 200>/run/${0##*/}.lock

# kate: encoding utf-8; eol unix; syntax Bash;
# kate: indent-width 4; mixedindent off; replace-tabs on;
# kate: remove-trailing-space on; space-indent on;
# kate: word-wrap-column 500; word-wrap off;
