x86, trace: Further robustify CR2 handling vs tracing
authorPeter Zijlstra <peterz@infradead.org>
Wed, 5 Mar 2014 13:07:49 +0000 (14:07 +0100)
committerH. Peter Anvin <hpa@linux.intel.com>
Thu, 6 Mar 2014 18:58:18 +0000 (10:58 -0800)
Building on commit 0ac09f9f8cd1 ("x86, trace: Fix CR2 corruption when
tracing page faults") this patch addresses another few issues:

 - Now that read_cr2() is lifted into trace_do_page_fault(), we should
   pass the address to trace_page_fault_entries() to avoid it
   re-reading a potentially changed cr2.

 - Put both trace_do_page_fault() and trace_page_fault_entries() under
   CONFIG_TRACING.

 - Mark both fault entry functions {,trace_}do_page_fault() as notrace
   to avoid getting __mcount or other function entry trace callbacks
   before we've observed CR2.

 - Mark __do_page_fault() as noinline to guarantee the function tracer
   does get to see the fault.

Cc: <jolsa@redhat.com>
Cc: <vincent.weaver@maine.edu>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140306145300.GO9987@twins.programming.kicks-ass.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/mm/fault.c

index e7fa28bf3262d34671adf2c05a396e405f06f632..a10c8c79216187d2faa5add449762710d51c759b 100644 (file)
@@ -1020,8 +1020,12 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
  * routines.
+ *
+ * This function must have noinline because both callers
+ * {,trace_}do_page_fault() have notrace on. Having this an actual function
+ * guarantees there's a function trace entry.
  */
-static void __kprobes
+static void __kprobes noinline
 __do_page_fault(struct pt_regs *regs, unsigned long error_code,
                unsigned long address)
 {
@@ -1245,31 +1249,38 @@ good_area:
        up_read(&mm->mmap_sem);
 }
 
-dotraplinkage void __kprobes
+dotraplinkage void __kprobes notrace
 do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
+       unsigned long address = read_cr2(); /* Get the faulting address */
        enum ctx_state prev_state;
-       /* Get the faulting address: */
-       unsigned long address = read_cr2();
+
+       /*
+        * We must have this function tagged with __kprobes, notrace and call
+        * read_cr2() before calling anything else. To avoid calling any kind
+        * of tracing machinery before we've observed the CR2 value.
+        *
+        * exception_{enter,exit}() contain all sorts of tracepoints.
+        */
 
        prev_state = exception_enter();
        __do_page_fault(regs, error_code, address);
        exception_exit(prev_state);
 }
 
-static void trace_page_fault_entries(struct pt_regs *regs,
+#ifdef CONFIG_TRACING
+static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
                                     unsigned long error_code)
 {
        if (user_mode(regs))
-               trace_page_fault_user(read_cr2(), regs, error_code);
+               trace_page_fault_user(address, regs, error_code);
        else
-               trace_page_fault_kernel(read_cr2(), regs, error_code);
+               trace_page_fault_kernel(address, regs, error_code);
 }
 
-dotraplinkage void __kprobes
+dotraplinkage void __kprobes notrace
 trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
-       enum ctx_state prev_state;
        /*
         * The exception_enter and tracepoint processing could
         * trigger another page faults (user space callchain
@@ -1277,9 +1288,11 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
         * the faulting address now.
         */
        unsigned long address = read_cr2();
+       enum ctx_state prev_state;
 
        prev_state = exception_enter();
-       trace_page_fault_entries(regs, error_code);
+       trace_page_fault_entries(address, regs, error_code);
        __do_page_fault(regs, error_code, address);
        exception_exit(prev_state);
 }
+#endif /* CONFIG_TRACING */