kernel/watchdog.c: allow hardlockup to panic by default
authorDon Zickus <dzickus@redhat.com>
Tue, 22 Mar 2011 23:34:16 +0000 (16:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 23 Mar 2011 00:44:12 +0000 (17:44 -0700)
When a cpu is considered stuck, instead of limping along and just printing
a warning, it is sometimes preferred to just panic, let kdump capture the
vmcore and reboot.  This gets the machine back into a stable state quickly
while saving the info that got it into a stuck state to begin with.

Add a Kconfig option to allow users to set the hardlockup to panic
by default.  Also add in a 'nmi_watchdog=nopanic' to override this.

[akpm@linux-foundation.org: fix strncmp length]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/kernel-parameters.txt
kernel/watchdog.c
lib/Kconfig.debug

index a3b688287a8d1b4be61bfc67466756942a2255bc..e9261e938f6ab8657586f6c068be1766bd725667 100644 (file)
@@ -1597,11 +1597,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        Format: [state][,regs][,debounce][,die]
 
        nmi_watchdog=   [KNL,BUGS=X86] Debugging features for SMP kernels
-                       Format: [panic,][num]
+                       Format: [panic,][nopanic,][num]
                        Valid num: 0
                        0 - turn nmi_watchdog off
                        When panic is specified, panic when an NMI watchdog
-                       timeout occurs.
+                       timeout occurs (or 'nopanic' to override the opposite
+                       default).
                        This is useful when you use a panic=... timeout and
                        need the box quickly up again.
 
index 18bb15776c57162b60675adf69e7afd765b34fb0..054a67cca9da37b07ebd1fad5f415d8bb7b711db 100644 (file)
@@ -48,12 +48,15 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic;
+static int hardlockup_panic =
+                       CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 
 static int __init hardlockup_panic_setup(char *str)
 {
        if (!strncmp(str, "panic", 5))
                hardlockup_panic = 1;
+       else if (!strncmp(str, "nopanic", 7))
+               hardlockup_panic = 0;
        else if (!strncmp(str, "0", 1))
                watchdog_enabled = 0;
        return 1;
index 191c5c4c89fcf92fc18e3728cd382f4f39f8e1ce..fb0afeff9436c55d2f10325798d08df74dafbae1 100644 (file)
@@ -171,6 +171,23 @@ config HARDLOCKUP_DETECTOR
        def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
                 !ARCH_HAS_NMI_WATCHDOG
 
+config BOOTPARAM_HARDLOCKUP_PANIC
+       bool "Panic (Reboot) On Hard Lockups"
+       depends on LOCKUP_DETECTOR
+       help
+         Say Y here to enable the kernel to panic on "hard lockups",
+         which are bugs that cause the kernel to loop in kernel
+         mode with interrupts disabled for more than 60 seconds.
+
+         Say N if unsure.
+
+config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
+       int
+       depends on LOCKUP_DETECTOR
+       range 0 1
+       default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
+       default 1 if BOOTPARAM_HARDLOCKUP_PANIC
+
 config BOOTPARAM_SOFTLOCKUP_PANIC
        bool "Panic (Reboot) On Soft Lockups"
        depends on LOCKUP_DETECTOR