#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2013             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# ails.  You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

# Inventory creates three checks per default:
inventory_kernel_counters = [ "pgmajfault", "ctxt", "processes" ]
kernel_default_levels = None

kernel_counter_names = {
  "ctxt"       : "Context Switches",
  "processes"  : "Process Creations",
  "pgmajfault" : "Major Page Faults",
}

def inventory_kernel(info):
    inventory = []
    for counter in inventory_kernel_counters:
        hits = [ line[0] for line in info[1:] if line[0] == counter ]
        if len(hits) == 1:
            countername = kernel_counter_names.get(counter, counter)
            inventory.append( (countername, "kernel_default_levels") )
    return inventory


# item is one of the keys in /proc/stat or /proc/vmstat
def check_kernel(item, params, info):
    this_time = int(info[0][0])

    hits = [ (line[0], line[1])
             for line in info[1:]
             if line[0] == item or kernel_counter_names.get(line[0], line[0]) == item ]
    if len(hits) == 0:
        return (3, "UNKNOWN - item '%s' not found in agent output" % item)
    elif len(hits) > 1:
        return (3, "UNKNOWN - item '%s' not unique (found %d times)" % (item, len(hits)))

    counter = hits[0][0]
    this_val = int(hits[0][1])
    timedif, per_sec = get_counter("kernel." + item, this_time, this_val)
    infotext = " - %.0f/s in last %d secs" % (per_sec, timedif)

    if params == None:
        return (0, "OK" + infotext, [ (counter, per_sec) ])

    warn, crit = params
    perfdata = [ (counter, per_sec, warn, crit) ]
    if warn == None and crit != None:
        infotext += " (critical at %.0f/s)" % crit
    elif warn != None and crit == None:
        infotext += " (warning at %.0f/s)" % warn
    elif warn != None:
        infotext += " (warn/crit at %.0f/%.0f per sec)" % (warn, crit)

    if crit != None and per_sec >= crit:
        return (2, "CRIT" + infotext, perfdata)
    elif warn != None and per_sec >= warn:
        return (1, "WARN" + infotext, perfdata)
    else:
        return (0, "OK" + infotext, perfdata)

kernel_util_default_levels = None

def inventory_cpu_utilization(info):
    for x in info:
        if len(x) > 0 and x[0] == 'cpu':
            return [(None, "kernel_util_default_levels")]

# Columns of cpu usage /proc/stat:
# - cpuX: number of CPU or only 'cpu' for aggregation
# - user: normal processes executing in user mode
# - nice: niced processes executing in user mode
# - system: processes executing in kernel mode
# - idle: twiddling thumbs
# - iowait: waiting for I/O to complete
# - irq: servicing interrupts
# - softirq: servicing softirqs
# - steal: involuntary wait
def check_cpu_utilization(item, params, info):
    global g_counters
    # Look for entry beginning with "cpu"
    f = [ l for l in info if l[0] == "cpu" ]
    if len(f) != 1:
        return (3, "UNKNOWN - invalid output from plugin")
    line = f[0]
    if len(line) < 8:
        line = line + ['0', '0', '0', '0'] # needed for Linux 2.4

    # line contains now the following columns:
    # 'cpu' user nice system idle wait hw-int sw-int (steal ...)
    # convert number to int
    values = [ int(x) for x in line[1:8] ]
    this_time = int(time.time())
    diff_values = []
    n = 0
    for v in values:
        n += 1
        countername = "cpu.util.%d" % n
        last_time, last_val = g_counters.get(countername, (0, 0))
        diff_values.append(v - last_val)
        g_counters[countername] = (this_time, v)

    sum_jiffies = sum(diff_values[0:7]) # do not account for steal!
    if sum_jiffies == 0:
        return (0, "OK - too short interval")
    user        = diff_values[0] + diff_values[1] # add user + nice
    system      = diff_values[2]
    wait        = diff_values[4]
    user_perc   = 100.0 * float(user)   / float(sum_jiffies)
    system_perc = 100.0 * float(system) / float(sum_jiffies)
    wait_perc   = 100.0 * float(wait)   / float(sum_jiffies)
    perfdata = [
          ( "user",   "%.3f" % user_perc ),
          ( "system", "%.3f" % system_perc ),
          ( "wait",   "%.3f" % wait_perc ) ]

    infotext = " - user: %2.1f%%, system: %2.1f%%, wait: %2.1f%%" % (user_perc, system_perc, wait_perc)

    # You may set a warning/critical level on the io wait
    # percentage. This can be done by setting params to
    # a pair of (warn, crit)
    result = 0
    try:
        warn, crit = params
        if wait_perc >= crit:
            result = 2
            infotext += "(!!)"
        elif wait_perc >= warn:
            result = 1
            infotext += "(!)"
    except:
        pass

    return (result, nagios_state_names[result] + infotext, perfdata)

check_info['kernel.util'] = (check_cpu_utilization, "CPU utilization", 1,  inventory_cpu_utilization)
checkgroup_of['kernel.util'] = "cpu_iowait"

check_info['kernel'] = (check_kernel, "Kernel %s", 1,  inventory_kernel)
checkgroup_of['kernel'] = "vm_counter"
