x86/mm, kexec: Allow kexec to be used with SME
authorTom Lendacky <thomas.lendacky@amd.com>
Mon, 17 Jul 2017 21:10:28 +0000 (16:10 -0500)
committerIngo Molnar <mingo@kernel.org>
Tue, 18 Jul 2017 09:38:04 +0000 (11:38 +0200)
Provide support so that kexec can be used to boot a kernel when SME is
enabled.

Support is needed to allocate pages for kexec without encryption.  This
is needed in order to be able to reboot in the kernel in the same manner
as originally booted.

Additionally, when shutting down all of the CPUs we need to be sure to
flush the caches and then halt. This is needed when booting from a state
where SME was not active into a state where SME is active (or vice-versa).
Without these steps, it is possible for cache lines to exist for the same
physical location but tagged both with and without the encryption bit. This
can cause random memory corruption when caches are flushed depending on
which cacheline is written last.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: <kexec@lists.infradead.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
Cc: kasan-dev@googlegroups.com
Cc: kvm@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-efi@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/b95ff075db3e7cd545313f2fb609a49619a09625.1500319216.git.thomas.lendacky@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/init.h
arch/x86/include/asm/kexec.h
arch/x86/include/asm/pgtable_types.h
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/process.c
arch/x86/mm/ident_map.c
include/linux/kexec.h
kernel/kexec_core.c

index 474eb8c66feeb2c98de2f5d6fe1db84de752c806..05c4aa00cc862e3b1dad1b344b0eddb9d6f44db4 100644 (file)
@@ -7,6 +7,7 @@ struct x86_mapping_info {
        unsigned long page_flag;         /* page flag for PMD or PUD entry */
        unsigned long offset;            /* ident mapping offset */
        bool direct_gbpages;             /* PUD level 1GB page support */
+       unsigned long kernpg_flag;       /* kernel pagetable flag override */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
index 70ef205489f00e53ff568180c4dcbf6fb9e6ded1..e8183acf931faf7ec4cc444de386f9db826ec952 100644 (file)
@@ -207,6 +207,14 @@ struct kexec_entry64_regs {
        uint64_t r15;
        uint64_t rip;
 };
+
+extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
+                                      gfp_t gfp);
+#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
+
+extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
+#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
+
 #endif
 
 typedef void crash_vmclear_fn(void);
index 32095af0fefb904cc87737e1d6ea4217531937cb..830992fc5a069937e6cc6f4c83d59e3afd599df4 100644 (file)
@@ -213,6 +213,7 @@ enum page_cache_mode {
 #define PAGE_KERNEL            __pgprot(__PAGE_KERNEL | _PAGE_ENC)
 #define PAGE_KERNEL_RO         __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
 #define PAGE_KERNEL_EXEC       __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC)
 #define PAGE_KERNEL_RX         __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
 #define PAGE_KERNEL_NOCACHE    __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
 #define PAGE_KERNEL_LARGE      __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
index cb0a30473c2310b76695c73ec6fad3cd1e7b051f..9cf8daacc0469ca1644eb7da98f762b7c58f88de 100644 (file)
@@ -87,7 +87,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
                set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
        }
        pte = pte_offset_kernel(pmd, vaddr);
-       set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+       set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
        return 0;
 err:
        free_transition_pgtable(image);
@@ -115,6 +115,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
                .alloc_pgt_page = alloc_pgt_page,
                .context        = image,
                .page_flag      = __PAGE_KERNEL_LARGE_EXEC,
+               .kernpg_flag    = _KERNPG_TABLE_NOENC,
        };
        unsigned long mstart, mend;
        pgd_t *level4p;
@@ -602,3 +603,22 @@ void arch_kexec_unprotect_crashkres(void)
 {
        kexec_mark_crashkres(false);
 }
+
+int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
+{
+       /*
+        * If SME is active we need to be sure that kexec pages are
+        * not encrypted because when we boot to the new kernel the
+        * pages won't be accessed encrypted (initially).
+        */
+       return set_memory_decrypted((unsigned long)vaddr, pages);
+}
+
+void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
+{
+       /*
+        * If SME is active we need to reset the pages back to being
+        * an encrypted mapping before freeing them.
+        */
+       set_memory_encrypted((unsigned long)vaddr, pages);
+}
index 3ca198080ea9294486ae9a1121e7815dfba7cb19..bd6b85fac66696da70e316656ad6f0d51291f8aa 100644 (file)
@@ -355,6 +355,7 @@ bool xen_set_default_idle(void)
        return ret;
 }
 #endif
+
 void stop_this_cpu(void *dummy)
 {
        local_irq_disable();
@@ -365,8 +366,20 @@ void stop_this_cpu(void *dummy)
        disable_local_APIC();
        mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 
-       for (;;)
-               halt();
+       for (;;) {
+               /*
+                * Use wbinvd followed by hlt to stop the processor. This
+                * provides support for kexec on a processor that supports
+                * SME. With kexec, going from SME inactive to SME active
+                * requires clearing cache entries so that addresses without
+                * the encryption bit set don't corrupt the same physical
+                * address that has the encryption bit set when caches are
+                * flushed. To achieve this a wbinvd is performed followed by
+                * a hlt. Even if the processor is not in the kexec/SME
+                * scenario this only adds a wbinvd to a halting processor.
+                */
+               asm volatile("wbinvd; hlt" : : : "memory");
+       }
 }
 
 /*
index adab1595f4bd89ba0729db70dd21d619d93e829d..31cea988fa36c5571687d1e18e2d5c914271b0f8 100644 (file)
@@ -51,7 +51,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
                if (!pmd)
                        return -ENOMEM;
                ident_pmd_init(info, pmd, addr, next);
-               set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+               set_pud(pud, __pud(__pa(pmd) | info->kernpg_flag));
        }
 
        return 0;
@@ -79,7 +79,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
                if (!pud)
                        return -ENOMEM;
                ident_pud_init(info, pud, addr, next);
-               set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+               set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
        }
 
        return 0;
@@ -93,6 +93,10 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
        unsigned long next;
        int result;
 
+       /* Set the default pagetable flags if not supplied */
+       if (!info->kernpg_flag)
+               info->kernpg_flag = _KERNPG_TABLE;
+
        for (; addr < end; addr = next) {
                pgd_t *pgd = pgd_page + pgd_index(addr);
                p4d_t *p4d;
@@ -116,14 +120,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
                if (result)
                        return result;
                if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-                       set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+                       set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
                } else {
                        /*
                         * With p4d folded, pgd is equal to p4d.
                         * The pgd entry has to point to the pud page table in this case.
                         */
                        pud_t *pud = pud_offset(p4d, 0);
-                       set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+                       set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
                }
        }
 
index dd056fab9e35c958c7aee14156e1df5640fb2556..2b7590f5483a1fc4474fbecddd099977222531cb 100644 (file)
@@ -327,6 +327,14 @@ static inline void *boot_phys_to_virt(unsigned long entry)
        return phys_to_virt(boot_phys_to_phys(entry));
 }
 
+#ifndef arch_kexec_post_alloc_pages
+static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { return 0; }
+#endif
+
+#ifndef arch_kexec_pre_free_pages
+static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
+#endif
+
 #else /* !CONFIG_KEXEC_CORE */
 struct pt_regs;
 struct task_struct;
index 1ae7c41c33c19c54e4b08d33c0c59da78244efba..20fef1a38602d9d0ed6fdb5d359d5604fbafc3dd 100644 (file)
@@ -301,7 +301,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
        struct page *pages;
 
-       pages = alloc_pages(gfp_mask, order);
+       pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
        if (pages) {
                unsigned int count, i;
 
@@ -310,6 +310,13 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
                count = 1 << order;
                for (i = 0; i < count; i++)
                        SetPageReserved(pages + i);
+
+               arch_kexec_post_alloc_pages(page_address(pages), count,
+                                           gfp_mask);
+
+               if (gfp_mask & __GFP_ZERO)
+                       for (i = 0; i < count; i++)
+                               clear_highpage(pages + i);
        }
 
        return pages;
@@ -321,6 +328,9 @@ static void kimage_free_pages(struct page *page)
 
        order = page_private(page);
        count = 1 << order;
+
+       arch_kexec_pre_free_pages(page_address(page), count);
+
        for (i = 0; i < count; i++)
                ClearPageReserved(page + i);
        __free_pages(page, order);