#!/bin/bash
#
##
# Process Resource Monitor (PRM) v1.0.6
#             (C) 2002-2010, R-fx Networks <proj@r-fx.org>
#             (C) 2010, Ryan MacDonald <ryan@r-fx.org>
# This program may be freely redistributed under the terms of the GNU GPL v2
##
#

inspath=/usr/local/prm
cnf=$inspath/conf.prm
intcnf=$inspath/internals.conf
appn=prm
ver=1.0.6

header() {
        echo "Process Resource Monitor (PRM) v$ver"
        echo "            (C) 2002-2010, R-fx Networks <proj@r-fx.org>"
        echo "            (C) 2010, Ryan MacDonald <ryan@r-fx.org>"
        echo "This program may be freely redistributed under the terms of the GNU GPL v2"
        echo ""
}

if [ -f "$cnf" ] && [ ! "$cnf" == "" ]; then
   source $cnf
else
   header
   echo "prm[$$]: {glob} $cnf not found, aborting."
   exit 1
fi

if [ -f "$intcnf" ] && [ ! "$intcnf" == "" ]; then
   source $intcnf
else
   header
   echo "prm[$$]: {glob} $intcnf not found, aborting."
   exit 1
fi



usage_short() {
cat <<EOF
usage prm [-h|--help] [-v|--verbose] [-q|--quiet] [-m|--monitor]
EOF
}

trim_log() {
log=$1
logtrim=$2
if [ -f "$log" ]; then
  log_size=`wc -l $log | awk '{print$1}'`
  if [ "$log_size" -gt "$logtrim" ]; then
    trim=$[logtrim/10]
    printf "%s\n" "$trim,${log_size}d" w | ed -s $log
  fi
fi
}

eout() {
string=$1
outstate=$2

	trim_log $LOG_FILE 5000
        if [ ! "$string" == "" ]; then
		if [ "$outstate" == "1" ]; then
        	        echo "$(date +"%b %d %H:%M:%S") $(hostname -s) $appn[$$]: $string" >> $LOG_FILE
		else
                	echo "$(date +"%b %d %H:%M:%S") $(hostname -s) $appn[$$]: $string"
	                echo "$(date +"%b %d %H:%M:%S") $(hostname -s) $appn[$$]: $string" >> $LOG_FILE
		fi

                if [ "$LOG_SYSLOG" == "1" ]; then
			if [ -f "/bin/logger" ]; then
				/bin/logger -t "prm[$$]" -p "$LOG_LEVEL" "$string"
			else
				echo "$(date +"%b %d %H:%M:%S") $(hostname -s) prm[$$]: $string" >> /var/log/messages
			fi
                fi
        fi
}

get_state() {
UTIME=`date +"%s"`

if [ -f "$LOCK_FILE" ]; then
        OVAL=`cat $LOCK_FILE`
        DIFF=$[UTIME-OVAL]
        if [ "$DIFF" -gt "$LOCK_TIMEOUT" ]; then
                echo "$UTIME" > $LOCK_FILE
		if [ -f "$inspath/tmp/prm.pid" ]; then
			opid=`cat $inspath/tmp/prm.pid` 
			kill -9 $opid >> /dev/null 2>&1
		        eout "cleared stale lock file ($DIFF > $LOCK_TIMEOUT) and killed pid $opid."
		else
		        eout "cleared stale lock file ($DIFF > $LOCK_TIMEOUT)."
		fi
        else
                header
                if [ -f "$inspath/tmp/prm.pid" ]; then
                        opid=`cat $inspath/tmp/prm.pid`
	                eout "locked subsystem by pid $opid ($LOCK_FILE is $DIFF seconds old), aborting."
        	        exit 1
		else
	                eout "locked subsystem, already running ? ($LOCK_FILE is $DIFF seconds old), aborting."
        	        exit 1
		fi
        fi
else
	echo "$$" > $inspath/tmp/prm.pid
        echo "$UTIME" > $LOCK_FILE
fi
}


pre() {
	if [ ! -f "/usr/bin/expr" ]; then
		eout "could not find needed binary /usr/bin/exprt, aborting."
		exit
	fi
	if [ ! -f "/usr/bin/bc" ]; then
		eout "could not find needed binary /usr/bin/bc, aborting."
		exit
	fi
	if [ ! -f "$IGNORE_USER" ]; then
		touch $IGNORE_USER
		chmod 640 $IGNORE_USER
	fi
	if [ ! -f "$IGNORE_CMD" ]; then
		touch $IGNORE_CMD
		chmod 640 $IGNORE_CMD
	fi
}

get_pslist() {
	if [ ! -f "PSLIST_CACHE" ]; then
		touch $PSLIST_CACHE
		chmod 640 $PSLIST_CACHE
	fi
	if [ ! -f "$IGNORE_PSLIST" ]; then
		touch $IGNORE_PSLIST
		chmod 640 $IGNORE_PSLIST
	fi
	if [ "$IGNORE_ROOT" == "1" ]; then
		/bin/nice -n 19 /bin/ps --no-headers --user root -N -o "ppid pid user pcpu pmem etime comm cmd" --sort comm | grep -vwf $IGNORE_PSLIST > $PSLIST_CACHE
	else
		/bin/nice -n 19 /bin/ps --no-headers -A -o "ppid pid user pcpu pmem etime comm cmd" --sort comm | grep -vwf $IGNORE_PSLIST > $PSLIST_CACHE
	fi
}


recheck_proc() {
rpid=$1
col=$2
if [ "$col" == "proc" ]; then
	rval=`/bin/nice -n 19 /bin/ps --no-headers -C $cmd -o "pid comm" | wc -l | awk '{print$1}'`
else
	rval=`/bin/nice -n 19 /bin/ps --no-headers -p $rpid -o "pid $col" | awk '{print$2}' | cut -d'.' -f1 | tr -d '\:-'`
fi
if [ "$rval" == "" ]; then
	rval=0
fi
}

get_procinfo() {
for i in `cat $PSLIST_CACHE | tr ' ' '^'`; do
      i=`echo $i | tr '^' ' '`
      ppid=`echo $i | awk '{print$1}'`
      pid=`echo $i | awk '{print$2}'`
      user=`echo $i | awk '{print$3}' | grep -vf $IGNORE_USER`
      cpu=`echo $i | awk '{print$4}' | tr '.' ' ' | awk '{print$1}'`
      mem=`echo $i | awk '{print$5}' | tr '.' ' ' | awk '{print$1}'`
      etime=`echo $i | awk '{print$6}' | tr -d '\:-'`
      cmd=`echo $i | awk '{print$7}' | grep -vf $IGNORE_CMD`
      cmd_full=`echo $i | awk '{print$8,$9,$10,$11,$12}'`
      include_cmd=`echo "$cmd" | tr -d '[:punct:]' | tr -d '[:cntrl:]'`
      proc=`cat $PSLIST_CACHE | grep -w "$cmd" | wc -l | awk '{print$1}'`

      if [ "$ppid" ] && [ "$pid" ] && [ "$cpu" ] && [ "$mem" ] && [ "$etime" ] && [ "$cmd" ] && [ "$user" ] && [ "$cmd" ]; then
       user_ignore=`cat $IGNORE_USER | grep -w "$user"`
       if [ ! -z "$user_ignore" ]; then
	skip=1
       fi
       cmd_ignore=`cat $IGNORE_CMD | grep -w "$cmd"`
       if [ ! -z "$cmd_ignore" ]; then
        skip=1
       fi
       if [ ! "$skip" == "1" ]; then
	. $cnf
	. $intcnf
        if [ -f "$RULES/$user.user" ]; then
		eout "checking proc pid:$pid {user:$user cmd:$cmd} with rule $RULES/$user.user"
                . $RULES/$user.user
                rules_run=1
        elif [ -f "$RULES/$cmd.cmd" ]; then
		eout "checking proc pid:$pid {user:$user cmd:$cmd} with rule $RULES/$cmd.cmd"
                . $RULES/$include_cmd.cmd
                rules_run=1
        fi

        if [ ! -z "$IGNORE" ]; then
	 user_ignore=`echo $user | grep -E "($IGNORE)"`
	 cmd_ignore=`echo $cmd | grep -E "($IGNORE)"`
	 if [ ! -z "$user_ignore" ] || [ ! -z "$cmd_ignore" ]; then
		skip=1
	 fi
        fi

	if [ "$RULES_ONLY" == "1" ] && [ "$rules_run" == "1" ] && [ ! "$skip" == "1" ]; then
		check_proc
	elif [ "$RULES_ONLY" == "0" ] && [ ! "$skip" == "1" ]; then
		check_proc
	fi

       fi
      fi
      unset ppid pid user cpu mem etime cmd rules_run used_cpu used_mem used_proc used_etime exprlist skip rval fcnt cnt child_pidlist pidlist IGNORE
done
}

alert() {
if [ "$EMAIL_ALERT" == "1" ] && [ -f "$EMAIL_TMPL" ]; then
	eout "email alert sent to $EMAIL_ADDR for proc $pid"
	. $EMAIL_TMPL
	cat $tmpemail | mail -s "$EMAIL_SUBJ" "$EMAIL_ADDR"
elif [ ! -f "$EMAIL_TMPL" ]; then
	eout "email template $EMAIL_TMPL could not be found, alert not sent!"
fi
}

kill_check() {
MAX_NAME=$1
MAX_VAL=$2
PS_COL=$3
PS_VAL=$4

if [ "$PS_VAL" -ge "$MAX_VAL" ] && [ ! "$MAX_VAL" == "0" ]; then
 cnt=0
 fcnt=0
 while [ "$cnt" -lt "$KILL_TRIG" ]; do
	cnt=$[cnt+1]
	sleep $KILL_WAIT
	recheck_proc $pid $PS_COL
	if [ "$rval" -ge "$MAX_VAL" ]; then
		eout "proc pid:$pid {user:$user cmd:$cmd} soft fail #$cnt $MAX_NAME use:${rval}/max:${MAX_VAL}"
	        fcnt=$[fcnt+1]
	else
		eout "proc pid:$pid {user:$user cmd:$cmd} has gone away or come out of soft fail"
		break
	fi
 done

 if [ "$fcnt" -ge "$KILL_TRIG" ]; then
	if [ "$KILL_PARENT" == "1" ] && [ "$ppid" -gt "$KILL_MINPID" ]; then
		child_pidlist=`/bin/nice /bin/ps --pid $pid -o "pid" --ppid $ppid --no-headers | tr '\n' ' '`
		for i in `echo $child_pidlist`; do
			if [ "$pidlist" ]; then
				pidlist="$pidlist $i"
			else
				pidlist="$i"
			fi
		done
		if [ "$ALERT_ONLY" == "1" ]; then
		 pidlist="$ppid $pidlist"
		 eout "proc ppid:$ppid pid:$pid {user:$user cmd:$cmd} HARD FAIL $MAX_NAME use:${rval}/max:${MAX_VAL} ALERT ONLY PARENT/CHILDREN pidlist:$ppid $pidlist"
                 alert
		else
		 eout "proc ppid:$ppid pid:$pid {user:$user cmd:$cmd} HARD FAIL $MAX_NAME use:${rval}/max:${MAX_VAL} KILLING PARENT/CHILDREN pidlist:$ppid $pidlist"

		 # KILL PARENT FIRST SO IT WILL NOT SPAWN NEW CHILDREN
		 kill -${KILL_SIG} $ppid >> /dev/null 2>&1

		 # KILL CHILDREN
		 kill -${KILL_SIG} $pidlist >> /dev/null 2>&1

		 # KILL EVERYTHING AGAIN FOR GOOD MEASURE
		 kill -${KILL_SIG} $ppid $pidlist >> /dev/null 2>&1

		 if [ "$KILL_RESTART_CMD" ]; then		 
		 	eout "proc ppid:$ppid pid:$pid {user:$user cmd:$cmd} KILL_RESTART_CMD SET, running: '$KILL_RESTART_CMD'"
			$KILL_RESTART_CMD >> /dev/null 2>&1 &
			sleep 2
		 fi

		 pidlist="$ppid $pidlist"
		 alert
                fi
	elif [ "$KILL_PARENT" == "1" ] && [ "$ppid" -le "$KILL_MINPID" ]; then
		if [ "$ALERT_ONLY" == "1" ]; then
                 pidlist="$pid"
		 eout "proc pid:$pid {user:$user cmd:$cmd} HARD FAIL $MAX_NAME use:${rval}/max:${MAX_VAL} ALERT ONLY pid:$pid"
                 alert
		else
		 eout "proc ppid:ppid lower than KILL_MINPID $KILL_MINPID, ignoring KILL_PARENT"
		 kill -${KILL_SIG} $pid >> /dev/null 2>&1
		 kill -${KILL_SIG} $pid >> /dev/null 2>&1
		 eout "proc pid:$pid {user:$user cmd:$cmd} HARD FAIL $MAX_NAME use:${rval}/max:${MAX_VAL} KILLED pid:$pid"
                 if [ "$KILL_RESTART_CMD" ]; then
                        eout "proc ppid:$ppid pid:$pid {user:$user cmd:$cmd} KILL_RESTART_CMD SET, running: '$KILL_RESTART_CMD'"
                        $KILL_RESTART_CMD >> /dev/null 2>&1 &
                        sleep 2
                 fi
		 pidlist="$pid"
		 alert
		fi
	else
		if [ "$ALERT_ONLY" == "1" ]; then
                 pidlist="$pid"
                 eout "proc pid:$pid {user:$user cmd:$cmd} HARD FAIL $MAX_NAME use:${rval}/max:${MAX_VAL} ALERT ONLY pid:$pid"
                 alert
		else
                 kill -${KILL_SIG} $pid >> /dev/null 2>&1
                 kill -${KILL_SIG} $pid >> /dev/null 2>&1
                 eout "proc pid:$pid {user:$user cmd:$cmd} HARD FAIL $MAX_NAME use:${rval}/max:${MAX_VAL} KILLED pid:$pid"
                 if [ "$KILL_RESTART_CMD" ]; then
                        eout "proc ppid:$ppid pid:$pid {user:$user cmd:$cmd} KILL_RESTART_CMD SET, running: '$KILL_RESTART_CMD'"
                        $KILL_RESTART_CMD >> /dev/null 2>&1 &
                        sleep 2
                 fi
		 pidlist="$pid"
		 alert
		fi
	fi
 fi
fi
}

check_proc() {
# check_cpu
kill_check MAX_CPU "$MAX_CPU" "pcpu" "$cpu"

# check_mem
kill_check MAX_MEM "$MAX_MEM" "pmem" "$mem"

# check_proc
kill_check MAX_PROC "$MAX_PROC" "proc" "$proc"

# check_etime
MAX_ETIME=`echo $MAX_ETIME | tr -d '\:-'`
kill_check MAX_ETIME "$MAX_ETIME" "etime" "$etime"
}

case "$1" in
s|-s|--standard)
	header
	get_state
	pre
	get_pslist
	get_procinfo
	ps_size=`wc -l $PSLIST_CACHE | awk '{print$1}'`
	eout "checked $ps_size processes"
	rm -f $LOCK_FILE
	;;
q|-q|--quiet)
	$0 -s >> /dev/null 2>&1
	;;
*)
	header
	echo "usage $0 [-s|--standard] [-q|--quiet]"
esac
exit 0
