#!/bin/bash
#
# __copy1__
# __copy2__
#
export CMD=$(basename $0)
export CMDVER="__TOOLKIT_VERSION__"
export CMDREL="__TOOLKIT_RELEASE__"
export CMDVERSTRING="__TOOLKIT_SIGNATURE__"
export CONFDIR="/etc/$CMD"
export STATEDIR="/var/run/$CMD"
export TEMPDIR="/tmp/$CMD"
export KU_LOCKFILE="/var/lock/$CMD"
export LOGTAG="$CMD"
export LOGSYSLOG=false
export LOGFILE="/var/log/$CMD.log"

export HELPERSDIR="/usr/lib/$CMD/helpers"
export CHECKSDIR="/usr/lib/$CMD/checks"
export PATH="$PATH:$HELPERSDIR:$CHECKSDIR"


# (FUNCTIONS)

. /lib/ku-base/log.sh
. /lib/ku-base/lock.sh

usage()
{
	echo "
$CMD $CMDVERSTRING

usage:	$CMD [options]
	$CMD [options] {--list|--dump}

options:
 -v|--verbose	be verbose (default: $VERBOSE)
 -q|--quiet	be quiet
 -D|--debug	activate debug
 -n|--dry-run	don't execute actions (note: tests are always executed)
 -r|--reset	reset all state infos
 -d|--daemon	runs forever (default: run once)
 -t|--time N	interval of checks, in secs, when running as daemon
 		(default: $KUWATCHER_INTERVAL)
" >&2
	exit 1
}

cleanup()
{
	trap '' 1 2 3 ERR TERM
	ku_log "removing lock and closing"
	ku_lock_remove
	trap 1 2 3 ERR TERM
}


dump_all()
{
	jtconf --dump "check." | sed -e '1,2d'
	jtconf --dump "test" | sed -e '1,2d'
}

list_checks()
{
	jtconf --list "check."
}


refresh_checks_cache()
{
	local checklist=$TEMPDIR/checklist
	local check=
	local ignored=0
	local changed=

	[ -f $checklist ] && {
		changed=$(find $CONFDIR/conf.d/* -type f -newer $checklist)
		[ "$changed" == "" ] && return 0
		mylog "config changed: " $changed
	}

	mylog "rebuilding checks cache ..."

	:> $checklist

	for check in $(list_checks)
	do
		rebuild_check_cache $check && echo $check >>$checklist || ignored=$(expr $ignored + 1)
	done
	set -- $(wc -l $checklist) 0
	mylog "rebuilt cache for $1 checks ($ignored ignored)"
}


rebuild_check_cache()
{
	local check="$1"
	local checkname=$(echo "$check" | sed -e 's/^check\.//' -e 's/ /_/g')
	local tbase="$TEMPDIR/$checkname"
	local statefile="$STATEDIR/$checkname.state"
	local interval=
	local cmd=
	local macro=
	local expanded=
	local action=
	local startstate=

	rm -f $tbase.*

	interval=$(jtconf $check.interval) || {
		mylog "warn: ignore check '$check', no interval defined"
		return 1
	}

	cmd=$(jtconf $check.test) || {
		mylog "warn: ignore check '$check', no test defined"
		return 1
	}

	# 2026-01-20
	# - fix: don't change statefile is already in place
	[ -f $statefile ] || {
		startstate=$(jtconf $check.startstate 2>/dev/null) && {
			echo "$startstate" >$statefile
		}
	}

	action_ok=$(jtconf $check.action_ok 2>/dev/null)
	action_fail=$(jtconf $check.action_fail 2>/dev/null)
	[ "$action_ok" == "" -a "$action_fail" == "" ] && {
		mylog "warn: ignore check '$check', no actions (ok/fail) defined"
		return 1
	}

	do_savestate=$(jtconf $check.savestate 2>/dev/null)
	case $do_savestate in
	  yes|true|1)	do_savestate=true ;;
	  *)		do_savestate=false ;;
	esac

	macro=$(echo "$cmd" | sed -e 's/[ \t].*//')
	expanded=$(jtconf "test.$macro" 2>/dev/null) && {
		pdebug "$check: '$macro' expanded in '$expanded'"
		cmd=$(echo "$cmd" | sed -e "s/$macro[ \t]*//")
		cmd="$expanded $cmd"
	}
	[ "$action_ok" != "" ] && {
		macro=$(echo "$action_ok" | sed -e 's/[ \t].*//')
		expanded=$(jtconf "helper.$macro" 2>/dev/null) && {
			pdebug "$check: '$macro' expanded in '$expanded'"
			action_ok=$(echo "$action_ok" | sed -e "s/$macro[ \t]*//")
			action_ok="$expanded $action_ok"
		}
		echo "$action_ok"	>$tbase.action_ok
	}
	[ "$action_fail" != "" ] && {
		macro=$(echo "$action_fail" | sed -e 's/[ \t].*//')
		expanded=$(jtconf "helper.$macro" 2>/dev/null) && {
			pdebug "$check: '$macro' expanded in '$expanded'"
			action_fail=$(echo "$action_fail" | sed -e "s/$macro[ \t]*//")
			action_fail="$expanded $action_fail"
		}
		echo "$action_fail"	>$tbase.action_fail
	}

	if jtconf $check.debug >/dev/null 2>&1
	then
		:> $tbase.debug
	else
		rm -f $tbase.debug
	fi

	mylog "  every $interval secs: $check"

	echo "$interval"	>$tbase.interval
	echo "$cmd"		>$tbase.cmd
	echo "$do_savestate"	>$tbase.do_savestate

	return 0
}

run_check()
{
	local check="$1"
	local checkname=$(echo "$check" | sed -e 's/^check\.//' -e 's/ /_/g')
	local tbase=$TEMPDIR/$checkname

	[ -f $tbase.interval ] || return 0

	local cmd=$(cat $tbase.cmd)
	local interval=$(cat $tbase.interval)
	local do_savestate=$(cat $tbase.do_savestate)
	local action_ok=$([ -f $tbase.action_ok ] && cat $tbase.action_ok)
	local action_fail=$([ -f $tbase.action_fail ] && cat $tbase.action_fail)

	local statefile="$STATEDIR/${checkname}.state"
	local prevstate=$([ -f $statefile ] && cat $statefile)
	local errfile="$tbase.err"
	local outfile="$tbase.out"
	local testfile="$tbase.test"

	local action=
	local state=
	local actionstate=
	local lastrun=
	local now=
	local delta=
	local olddebug=$DEBUG

	# activate/deactivate debug on-the-fly?
	#
	[ -f $TEMPDIR/debug ] &&	{ DEBUG=true; olddebug=true; rm -f $TEMPDIR/debug; }
	[ -f $TEMPDIR/nodebug ] &&	{ DEBUG=false; olddebug=false; rm -f $TEMPDIR/nodebug; }
	[ -f $tbase.debug ] &&		{ DEBUG=true; }

	# it's time to run this check?
	#
	[ -f $testfile ] && {
		:> $TEMPDIR/now
		lastrun=$(stat --format='%Y' $testfile)
		now=$(stat --format='%Y' $TEMPDIR/now)
		delta=$(expr $now - $lastrun)
		pdebug "$check: interval=$interval, lastrun=$lastrun, now=$now delta=$delta"
		[ $delta -lt $interval ] && {
			DEBUG=$olddebug
			return 0
		}
	}

	pdebug "executing test cmd: $cmd"
	sh $tbase.cmd >$testfile 2>&1
	state=$?
	pdebug "test cmd returned: $state"

	# ignore this errors, but log a warning
	#
	case $state in
		127|255)
			mylog "($checkname) WARN: check returned $state (usually syntax error or command not found)"
			mylog "($checkname) check command is: $cmd"
			mylog "($checkname) see $tbase.test for errors"
			rm -f $tbase.interval	# disables since next cache rebuild
			DEBUG=$olddebug
			return 0
			;;
	esac

	$do_savestate && {
		[ "$state" == "$prevstate" ] && {
			pdebug "state unchanged: $state (no action)"
			DEBUG=$olddebug
			return 0
		}
		mylog "($checkname) state changed: actual=$state previous=$prevstate"
	}

	if [ $state == 0 ]
	then
		action="$action_ok"
		actionfile=$tbase.action_ok
		errfile="$tbase.ok-err"
		outfile="$tbase.ok-out"
	else
		action="$action_fail"
		actionfile=$tbase.action_fail
		errfile="$tbase.fail-err"
		outfile="$tbase.fail-out"
	fi

	if [ "$action" == "" ]
	then
		pdebug "(no action)"
	else
		mylog "($checkname) run $action"
		if $dry_run
		then
			pdebug "(DUMMY) run $action"
		else
			pdebug "run $action"
			sh $actionfile >$outfile 2>$errfile
			actionstate=$?
			
			if [ $actionstate == 0 ]
			then
				# action succeded, records current state if needed
				$do_savestate && {
					mylog "($checkname) savestate: $state"
					echo "$state" >$statefile
					pdebug "state saved in $statefile"
				}
			else
				mylog "($checkname) run error $? (no savestate)"
			fi
		fi
	fi

	DEBUG=$olddebug
	return 0
}



sanity_checks()
{
	[ -d "$STATEDIR" ] || {
		mkdir -p $STATEDIR || exit $?
		mylog "  created statedir '$STATEDIR'"
	}

	[ -d "$TEMPDIR" ] || {
		mkdir --mode 600 -p $TEMPDIR || exit $?
		mylog "  created tempdir '$TEMPDIR'"
	}
	return 0
}

mylog()
{
	$VERBOSE && echo -e "$@" || ku_log "$@"
	return 0
}
pdebug()
{
	$DEBUG || return 0
	$VERBOSE && echo -e "D> $@" >&2 || ku_log "D> $@"
	return 0
}

# (MAIN)

# get system defaults
[ -f /etc/default/$CMD ] && . /etc/default/$CMD

VERBOSE=false
DEBUG=false
do_list=false
do_dump=false
do_reset=false
dry_run=false
daemon_mode=false

KUWATCHER_INTERVAL=${KUWATCHER_INTERVAL:-5}	# seconds

while [ $# != 0 ]
do
  case $1 in
    -v|--verbose)	VERBOSE=true ;;
    -q|--quiet)		VERBOSE=false ;;
    -D|--debug)		DEBUG=true ;;
    -n|--dry-run)	dry_run=true ;;
    -d|--daemon)	daemon_mode=true ;;
    -r|--reset)		do_reset=true ;;
    --list)		do_list=true ;;
    --dump)		do_dump=true ;;
    -t|--time)		shift
    			[ $# == 0 ] && usage
			KUWATCHER_INTERVAL=$1
			;;
    --)			shift ; break ;;
    -*|"")		usage ;;
  esac
  shift
done

[ -d "$CONFDIR" ] || {
	echo "error: confdir '$CONFDIR' not exists" >&2
	exit 1
}

# query functions? don't mess with locks, dir creations etc
#
$do_list && {
	list_checks
	exit $?
}
$do_dump && {
	dump_all
	exit $?
}

# acquire lock
#
ku_lock || exit 0	# already running

trap "ku_log '*INTR*' ; exit 255" 1 2 3
trap "ku_log 'exiting on signal TERM' ; exit 0" TERM
trap 'ku_log "unexpected error $? at $LINENO"' ERR
trap 'cleanup' EXIT

ku_cap_logfile	|| exit $?

$VERBOSE || exec >>$LOGFILE 2>&1

$daemon_mode && mylog "$CMD $CMDVERSTRING daemon started, interval=$KUWATCHER_INTERVAL"

$do_reset && {
	mylog "resetting all state infos"
	rm -rf $STATEDIR $TEMPDIR
}


# daemon mode? runs forever
#
if $daemon_mode
then
	while :
	do
		sanity_checks || exit $?
		refresh_checks_cache || exit $?
		for check in $(cat $TEMPDIR/checklist)
		do
			run_check $check
		done
		sleep $KUWATCHER_INTERVAL
	done
fi

# not daemon mode, runs once and exits
#
sanity_checks || exit $?
refresh_checks_cache || exit $?

for check in $(cat $TEMPDIR/checklist)
do
	run_check $check
done

ku_lock_remove
exit 0
