x86/exceptions: Split debug IST stack

author Thomas Gleixner <tglx@linutronix.de>

Sun, 14 Apr 2019 15:59:57 +0000 (17:59 +0200)

committer Borislav Petkov <bp@suse.de>

Wed, 17 Apr 2019 13:14:28 +0000 (15:14 +0200)
author Thomas Gleixner <tglx@linutronix.de>
Sun, 14 Apr 2019 15:59:57 +0000 (17:59 +0200)
committer Borislav Petkov <bp@suse.de>
Wed, 17 Apr 2019 13:14:28 +0000 (15:14 +0200)
diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks

index 1b04596caea988f5a45637b1bc7d22e26528f70c..d1bfb0b95ee0bcb3428598547742ff6460223f05 100644 (file)
--- a/Documentation/x86/kernel-stacks
+++ b/Documentation/x86/kernel-stacks
@@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
    middle of switching stacks.  Using IST for NMI events avoids making
    assumptions about the previous state of the kernel stack.
  
-* ESTACK_DB.  DEBUG_STKSZ
+* ESTACK_DB.  EXCEPTION_STKSZ (PAGE_SIZE).
  
    Used for hardware debug interrupts (interrupt 1) and for software
    debug interrupts (INT3).
@@ -86,6 +86,11 @@ The currently assigned IST stacks are :-
    avoids making assumptions about the previous state of the kernel
    stack.
  
+  To handle nested #DB correctly there exist two instances of DB stacks. On
+  #DB entry the IST stackpointer for #DB is switched to the second instance
+  so a nested #DB starts from a clean stack. The nested #DB switches
+  the IST stackpointer to a guard hole to catch triple nesting.
+
  * ESTACK_MCE.  EXCEPTION_STKSZ (PAGE_SIZE).
  
    Used for interrupt 18 - Machine Check Exception (#MC).
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S

index 5c0348504a4bc0d10610124a960f2f7ede60aa78..ee649f1f279ee9b046976c6d9ab040bbd4022e73 100644 (file)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR                       irq_work_interrupt              smp_irq_work_interrupt
   * @paranoid == 2 is special: the stub will never switch stacks.  This is for
   * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
   */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
  ENTRY(\sym)
         UNWIND_HINT_IRET_REGS offset=\has_error_code*8
  
@@ -925,13 +925,13 @@ ENTRY(\sym)
         .endif
  
         .if \shift_ist != -1
-       subq    $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+       subq    $\ist_offset, CPU_TSS_IST(\shift_ist)
         .endif
  
         call    \do_sym
  
         .if \shift_ist != -1
-       addq    $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+       addq    $\ist_offset, CPU_TSS_IST(\shift_ist)
         .endif
  
         /* these procedures expect "no swapgs" flag in ebx */
@@ -1129,7 +1129,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
         hv_stimer0_callback_vector hv_stimer0_vector_handler
  #endif /* CONFIG_HYPERV */
  
-idtentry debug                 do_debug                has_error_code=0        paranoid=1 shift_ist=IST_INDEX_DB
+idtentry debug                 do_debug                has_error_code=0        paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
  idtentry int3                  do_int3                 has_error_code=0
  idtentry stack_segment         do_stack_segment        has_error_code=1
  
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h

index 9c96406e6d2b877845c6607b4d93c9113640cad2..cff3f3f3bfe0895a4e6c78b515b86c29b54cb2ca 100644 (file)
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -10,25 +10,29 @@
  #ifdef CONFIG_X86_64
  
  /* Macro to enforce the same ordering and stack sizes */
-#define ESTACKS_MEMBERS(guardsize)             \
+#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
         char    DF_stack_guard[guardsize];      \
         char    DF_stack[EXCEPTION_STKSZ];      \
         char    NMI_stack_guard[guardsize];     \
         char    NMI_stack[EXCEPTION_STKSZ];     \
+       char    DB2_stack_guard[guardsize];     \
+       char    DB2_stack[db2_holesize];        \
+       char    DB1_stack_guard[guardsize];     \
+       char    DB1_stack[EXCEPTION_STKSZ];     \
         char    DB_stack_guard[guardsize];      \
-       char    DB_stack[DEBUG_STKSZ];          \
+       char    DB_stack[EXCEPTION_STKSZ];      \
         char    MCE_stack_guard[guardsize];     \
         char    MCE_stack[EXCEPTION_STKSZ];     \
         char    IST_top_guard[guardsize];       \
  
  /* The exception stacks' physical storage. No guard pages required */
  struct exception_stacks {
-       ESTACKS_MEMBERS(0)
+       ESTACKS_MEMBERS(0, 0)
  };
  
  /* The effective cpu entry area mapping with guard pages. */
  struct cea_exception_stacks {
-       ESTACKS_MEMBERS(PAGE_SIZE)
+       ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
  };
  
  /*
@@ -37,6 +41,8 @@ struct cea_exception_stacks {
  enum exception_stack_ordering {
         ESTACK_DF,
         ESTACK_NMI,
+       ESTACK_DB2,
+       ESTACK_DB1,
         ESTACK_DB,
         ESTACK_MCE,
         N_EXCEPTION_STACKS
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h

index 9e5ca30738e5824ac5d9d049030e3c87ae5a72b4..1a8609a15856f6e02076f04324d06a803e3133c0 100644 (file)
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
  {
         __this_cpu_dec(debug_stack_usage);
  }
-int is_debug_stack(unsigned long addr);
  void debug_stack_set_zero(void);
  void debug_stack_reset(void);
  #else /* !X86_64 */
-static inline int is_debug_stack(unsigned long addr) { return 0; }
  static inline void debug_stack_set_zero(void) { }
  static inline void debug_stack_reset(void) { }
  static inline void debug_stack_usage_inc(void) { }
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h

index 056de887b2208ae4d448f010d7034adc7af224dd..793c14c372cba2d29a05f3f93a883fb25307888a 100644 (file)
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -18,9 +18,6 @@
  #define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
  #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
  
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
  #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
  #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
  
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c

index ddced33184b552ba0ad3a17267d7a99291591d1c..f5281567e28ef70a4a00d87963ee4166b1bbb04c 100644 (file)
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -68,6 +68,8 @@ int main(void)
  #undef ENTRY
  
         OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+       DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
+              offsetof(struct cea_exception_stacks, DB1_stack));
         BLANK();
  
  #ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index 143aceaf9a9a597b094950994372359c904e5cbd..88cab45707a9a53c11461794f78bf684b884e2ea 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1549,17 +1549,7 @@ void syscall_init(void)
                X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
  }
  
-static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
  DEFINE_PER_CPU(int, debug_stack_usage);
-
-int is_debug_stack(unsigned long addr)
-{
-       return __this_cpu_read(debug_stack_usage) ||
-               (addr <= __this_cpu_read(debug_stack_addr) &&
-                addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
-}
-NOKPROBE_SYMBOL(is_debug_stack);
-
  DEFINE_PER_CPU(u32, debug_idt_ctr);
  
  void debug_stack_set_zero(void)
@@ -1735,7 +1725,6 @@ void cpu_init(void)
                 t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
                 t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
                 t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
-               per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[IST_INDEX_DB];
         }
  
         t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c

index f6fbd0438f9e071acb4ffe4eae9aaa395b885b68..fca97bd3d8ae73fea832257898d58cb9230cb520 100644 (file)
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -19,16 +19,18 @@
  #include <asm/cpu_entry_area.h>
  #include <asm/stacktrace.h>
  
-static const char *exception_stack_names[N_EXCEPTION_STACKS] = {
+static const char * const exception_stack_names[] = {
                 [ ESTACK_DF     ]       = "#DF",
                 [ ESTACK_NMI    ]       = "NMI",
+               [ ESTACK_DB2    ]       = "#DB2",
+               [ ESTACK_DB1    ]       = "#DB1",
                 [ ESTACK_DB     ]       = "#DB",
                 [ ESTACK_MCE    ]       = "#MC",
  };
  
  const char *stack_type_name(enum stack_type type)
  {
-       BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+       BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
  
         if (type == STACK_TYPE_IRQ)
                 return "IRQ";
@@ -58,9 +60,11 @@ struct estack_layout {
         .end    = offsetof(struct cea_exception_stacks, x## _stack_guard) \
         }
  
-static const struct estack_layout layout[N_EXCEPTION_STACKS] = {
+static const struct estack_layout layout[] = {
         [ ESTACK_DF     ]       = ESTACK_ENTRY(DF),
         [ ESTACK_NMI    ]       = ESTACK_ENTRY(NMI),
+       [ ESTACK_DB2    ]       = { .begin = 0, .end = 0},
+       [ ESTACK_DB1    ]       = ESTACK_ENTRY(DB1),
         [ ESTACK_DB     ]       = ESTACK_ENTRY(DB),
         [ ESTACK_MCE    ]       = ESTACK_ENTRY(MCE),
  };
@@ -71,7 +75,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
         struct pt_regs *regs;
         unsigned int k;
  
-       BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+       BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
  
         estacks = (unsigned long)__this_cpu_read(cea_exception_stacks);
  
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c

index 18bc9b51ac9b99ffaf51e85daf490b0ba108bcc9..3755d0310026aab7d8496afe2efb57b7e9f747bb 100644 (file)
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -21,13 +21,14 @@
  #include <linux/ratelimit.h>
  #include <linux/slab.h>
  #include <linux/export.h>
+#include <linux/atomic.h>
  #include <linux/sched/clock.h>
  
  #if defined(CONFIG_EDAC)
  #include <linux/edac.h>
  #endif
  
-#include <linux/atomic.h>
+#include <asm/cpu_entry_area.h>
  #include <asm/traps.h>
  #include <asm/mach_traps.h>
  #include <asm/nmi.h>
@@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2);
   * switch back to the original IDT.
   */
  static DEFINE_PER_CPU(int, update_debug_stack);
+
+static bool notrace is_debug_stack(unsigned long addr)
+{
+       struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
+       unsigned long top = CEA_ESTACK_TOP(cs, DB);
+       unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
+
+       if (__this_cpu_read(debug_stack_usage))
+               return true;
+       /*
+        * Note, this covers the guard page between DB and DB1 as well to
+        * avoid two checks. But by all means @addr can never point into
+        * the guard page.
+        */
+       return addr >= bot && addr < top;
+}
+NOKPROBE_SYMBOL(is_debug_stack);
  #endif
  
  dotraplinkage notrace void
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c

index a00d0d059c8addda579e0877ca09aaa446eb83e6..752ad11d6868269449ef925fca0af44df24db94d 100644 (file)
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -98,10 +98,12 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
  
         /*
          * The exceptions stack mappings in the per cpu area are protected
-        * by guard pages so each stack must be mapped separately.
+        * by guard pages so each stack must be mapped separately. DB2 is
+        * not mapped; it just exists to catch triple nesting of #DB.
          */
         cea_map_stack(DF);
         cea_map_stack(NMI);
+       cea_map_stack(DB1);
         cea_map_stack(DB);
         cea_map_stack(MCE);
  }
author	Thomas Gleixner <tglx@linutronix.de>
	Sun, 14 Apr 2019 15:59:57 +0000 (17:59 +0200)
committer	Borislav Petkov <bp@suse.de>
	Wed, 17 Apr 2019 13:14:28 +0000 (15:14 +0200)
Documentation/x86/kernel-stacks		patch \| blob \| history
arch/x86/entry/entry_64.S		patch \| blob \| history
arch/x86/include/asm/cpu_entry_area.h		patch \| blob \| history
arch/x86/include/asm/debugreg.h		patch \| blob \| history
arch/x86/include/asm/page_64_types.h		patch \| blob \| history
arch/x86/kernel/asm-offsets_64.c		patch \| blob \| history
arch/x86/kernel/cpu/common.c		patch \| blob \| history
arch/x86/kernel/dumpstack_64.c		patch \| blob \| history
arch/x86/kernel/nmi.c		patch \| blob \| history
arch/x86/mm/cpu_entry_area.c		patch \| blob \| history