x86: Add sysctl to allow panic on IOCK NMI error
authorKurt Garloff <garloff@suse.de>
Wed, 24 Jun 2009 21:32:11 +0000 (14:32 -0700)
committerIngo Molnar <mingo@elte.hu>
Thu, 25 Jun 2009 20:06:11 +0000 (22:06 +0200)
This patch introduces a new sysctl:

    /proc/sys/kernel/panic_on_io_nmi

which defaults to 0 (off).

When enabled, the kernel panics when the kernel receives an NMI
caused by an IO error.

The IO error triggered NMI indicates a serious system
condition, which could result in IO data corruption. Rather
than contiuing, panicing and dumping might be a better choice,
so one can figure out what's causing the IO error.

This could be especially important to companies running IO
intensive applications where corruption must be avoided, e.g. a
bank's databases.

[ SuSE has been shipping it for a while, it was done at the
  request of a large database vendor, for their users. ]

Signed-off-by: Kurt Garloff <garloff@suse.de>
Signed-off-by: Roberto Angelino <robertangelino@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
LKML-Reference: <20090624213211.GA11291@kroah.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/dumpstack.c
arch/x86/kernel/traps.c
include/linux/kernel.h
kernel/sysctl.c

index 95ea5fa7d4445540c67bdb28ad13102df4248143..c8405718a4c3be91cab8948e95937c7c0bc568b9 100644 (file)
@@ -22,6 +22,7 @@
 #include "dumpstack.h"
 
 int panic_on_unrecovered_nmi;
+int panic_on_io_nmi;
 unsigned int code_bytes = 64;
 int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
index a0f48f5671c076fdde9bbccf10300c3724c840a9..5204332f475d86e1caa1ede6bb39fcc28b78d13f 100644 (file)
@@ -346,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
        printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
        show_registers(regs);
 
+       if (panic_on_io_nmi)
+               panic("NMI IOCK error: Not continuing");
+
        /* Re-enable the IOCK line, wait for a few seconds */
        reason = (reason & 0xf) | 8;
        outb(reason, 0x61);
index fac104e7186ae0bb13ef19ceaaf87022446af966..d6320a3e8def0fc22a9595d8ff03f52c08983404 100644 (file)
@@ -303,6 +303,7 @@ extern int oops_in_progress;                /* If set, an oops, panic(), BUG() or die() is in
 extern int panic_timeout;
 extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
+extern int panic_on_io_nmi;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned flag);
 extern int test_taint(unsigned flag);
index 62e4ff9968b5fe64a06c9f8735af10d3b6a304b9..fba42eda8de298f9d8cbff4c8a05c59d561c623e 100644 (file)
@@ -743,6 +743,14 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "panic_on_io_nmi",
+               .data           = &panic_on_io_nmi,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
        {
                .ctl_name       = KERN_BOOTLOADER_TYPE,
                .procname       = "bootloader_type",