#!/bin/sh

# Support for checkpointing with DMTCP
# <http://sourceforge.net/projects/dmtcp/> under SGE.

# Copyright (C) 2012, 2015  Dave Love, University of Liverpool

# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.

# See file GPL-3 in the SGE LICENCES directory.

# Borrows from the shim_dmtcp script distributed in the Debian version
# of Condor, but not sufficiently to be a copyright derived work.
# Originally intended for use as a starter_method, but this version
# won't directly work that way.

# Requires DMTCP v2 commands.

# Fixme:  Consider SGE_STARTER_SHELL_PATH,
# SGE_STARTER_SHELL_START_MODE for possible use as a starter.

prog_version=1

[ "$SGE_TASK_ID" = undefined ] && SGE_TASK_ID=1

self=$(basename $0)

usage () {
    help="\
Usage: $self [options] [[--] <program> <args>]

DMTCP checkpointing support for SGE, covering running, checkpoint,
migrate and clean.

Actions (only one specified, default -r):
  -c <command>
      Behave like \"-r sh -c '<command>'\", e.g. for use with qsub -S.
      Quoting may be problematic due to an extra shell expansion.
  -k  Clean up (stop processes and delete <dir>)
  -m  Migrate (checkpoint and exit with code 99)
  -p  Make a checkpoint
  -r  Run <program> <args> under checkpointing (default action).
      Tries to restart an existing checkpoint if RESTARTED is not 0.
      Due to an apparent bug in DMTCP 2.4, at least, <program> is not found
      on PATH, and so must be a file name.

Options:
  -d <dir>     Directory in which to write checkpoints (default
               $SGE_CKPT_DIR/$JOB_ID.$SGE_TASK_ID)
  -s <signal>  Signal to cause a checkpoint (e.g. \"10\", \"USR1\"),
               probably as in checkpoint(5).

Signals caught:  USR1, and <signal>, as above, causes a checkpoint;
                 USR2 initiates the migrate action, and returns 99
                 to cause rescheduling.

Files ~/.dmtcpckpt and .dmtcpckpt are sourced in that order, if they
exist, to supply hook variables.  Of these, variables
{pre,post}_{migrate,checkpoint}_hook and pre_launch_hook are evaluated
as commands before/after the relevant actions, and $launch_opts is
expanded as extra options for dmtcp_launch when <program> is run
initially, e.g. to use plugins, or add --ckpt-open-files or
--interval.  The migrate and checkpoint actions call dmctp_command
with the appropriate coordinator commands as arguments.

The RESTARTED environment variable must have a valid value per submit(1),
and is used to decide whether or not to start from scratch.

Example checkpoint(5):

  ckpt_name          dmtcp
  interface          application-level
  ckpt_command       $sge_root/site/dmtcpckpt -p
  migr_command       $sge_root/site/dmtcpckpt -m
  restart_command    NONE
  clean_command      $sge_root/site/dmtcpckpt -k
  ckpt_dir           /scratch/$job_owner/checkpoints
  signal             NONE
  when               xs
"

    if [ -z "$1" ]; then
        echo "$help"
        exit 0
    else
        echo "$help" >&2
        exit $1
    fi
}

error () {
    echo "$self: $@" >&2
    exit 1
}

OPTS=$(getopt -o hc:rkmd:p -l help,version -n "$self" -- "$@") : ||
  usage $?

eval set -- "$OPTS"

cpsignal=                       # arg of -s
cmd=                            # arg of -c

while [ $# -gt 0 ]; do
    case $1 in
        -h|--help) usage;;
        -p) do_ckpt=1;;
        -c) do_c=1; cmd=$2; shift;;
        -r) do_run=1;;
        -k) do_clean=1;;
        -m) do_migrate=1;;
        -d) cpdir=$2; shift;;
        -s) cpsignal=$2; shift;;
        -v|--version) echo $prog_version; exit;;
        --) shift; break;;
        *) break;;
    esac
    shift
done

# only one allowed
case "$do_ckpt$do_run$do_clean$do_migrate$do_c" in
    '') [ $# -eq 0 ] && usage 1 # need command
        do_run=1;;
    11*) usage 1;;
esac

case $(dmtcp_command --version 2>&1) in
"dmtcp_command (DMTCP) 2"*) :;;
*) error "DMTCP version 2 required";;
esac

[ -n "$cmd" ] && set -- /bin/sh -c "$cmd"

# cpdir is a useful abbreviation
[ -n "$cpdir" ] || cpdir="$SGE_CKPT_DIR/$JOB_ID.$SGE_TASK_ID"
DMTCP_CHECKPOINT_DIR=$cpdir; export DMTCP_CHECKPOINT_DIR

DMTCP_QUIET=1; export DMTCP_QUIET

# possible config of hooks
[ -f ~/.dmtcpckpt ] && source ~/.dmtcpckpt
[ -f .dmtcpckpt ] && source .dmtcpckpt

# Deal with -notify or checkpointing signals
launch_pid=              # pid of (re-)started command being waited on
# For pending STOP (USR1) or a defined checkpointing one
trap 'checkpoint; wait $launch_pid' USR1 $cpsignal
# Pending KILL -- reschedule; perhaps reschedule should be optional
trap 'migrate; exit 99' USR2

# read coordinator port from cpdir
get_port () {
    # Would use read, but it returns non-zero due with no newline in the file
    cat "$cpdir/coord-port" ||
      error "Can't read port from $cpdir/coord-port"
}

# Clean up
clean () {
    # may fail if communicator is dead
    dmtcp_command -p $(get_port) -k 2>/dev/null
    rm -rf "$cpdir"
    exit 0
}

# Migration action
migrate () {
    $pre_migrate_hook
    checkpoint &&
    dmtcp_command -p $(get_port) -q ||
      error "migration failed"
    $post_migrate_hook
}

# Checkpoint action
checkpoint () {
    $pre_checkpoint_hook
    # fixme: check for error (e.g. filesystem full?)
    dmtcp_command -p $(get_port) -bc
    $post_checkpoint_hook
}

# Launch from scratch
from_scratch () {
    rm -rf "$cpdir"
    (umask o-rwx; mkdir -p "$cpdir") ||
      error "Can't make checkpoint directory \"$cpdir\""
    $pre_launch_hook
    # Launch in background and wait, so that signals can be caught.
    # Fixme: -p 0 doesn't retry when the random port is in use --
    # either grab one initially or fix the dmtcp code.
    dmtcp_launch --new-coordinator --port-file "$cpdir/coord-port" -p 0 $launch_opts "$@" ||
      error 'launch failed' &
    launch_pid=$!
    wait $launch_pid
}

# Re-start from existing checkpoint, else launch
restarter () {
    if [ -f "$cpdir/dmtcp_restart_script.sh" ]; then
        echo "$self: restarting from checkpoint"
        # Fixme:  Deal properly with port; restart script doesn't have
        # --port-file to use with -p 0.
        "$cpdir/dmtcp_restart_script.sh" -p $(get_port) -h $(hostname) ||
          error 'restart failed' &
        launch_pid=$!
        wait $launch_pid
    else
        echo "$self: no checkpoint -- doing fresh start"
        from_scratch "$@"
    fi
}

run () {
    case "$RESTARTED" in
        0) from_scratch "$@";;
        1|2) restarter "$@";;   # restarted somehow
        *) echo "$self: Bad value ($RESTARTED) of RESTARTED -- doing fresh start"
           from_scratch "$@";;
    esac
}

if [ -n "$do_run" -a $# -gt 0 ]; then
    run "$@"
elif [ -n "$do_run" ]; then
    restarter "$@"
elif [ -n "$do_c" ]; then
    run "$@"
elif [ -n "$do_ckpt" ]; then
    checkpoint
elif [ -n "$do_migrate" ]; then
    migrate
elif [ -n "$do_clean" ]; then
    clean
else
    error "internal error: no action"
fi
