From d832aee900a92d14a08a6a2a552894188404b6a4 Mon Sep 17 00:00:00 2001 From: dp-arm Date: Tue, 23 May 2017 09:32:49 +0100 Subject: [PATCH] aarch64: Enable Statistical Profiling Extensions for lower ELs SPE is only supported in non-secure state. Accesses to SPE specific registers from SEL1 will trap to EL3. During a world switch, before `TTBR` is modified the SPE profiling buffers are drained. This is to avoid a potential invalid memory access in SEL1. SPE is architecturally specified only for AArch64. Change-Id: I04a96427d9f9d586c331913d815fdc726855f6b0 Signed-off-by: dp-arm --- Makefile | 2 ++ docs/user-guide.md | 5 ++++ include/common/aarch64/el3_common_macros.S | 17 +++++++++++ include/lib/aarch64/arch.h | 10 +++++++ include/lib/aarch64/arch_helpers.h | 1 + include/lib/el3_runtime/aarch64/context.h | 1 + include/plat/arm/common/plat_arm.h | 3 ++ lib/el3_runtime/aarch64/context.S | 35 ++++++++++++++++++++-- lib/el3_runtime/aarch64/context_mgmt.c | 31 +++++++++++++++++-- make_helpers/defaults.mk | 17 +++++++++++ plat/arm/board/fvp/fvp_pm.c | 8 +++++ plat/arm/common/aarch64/arm_helpers.S | 29 ++++++++++++++++++ 12 files changed, 155 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index aec10c95..31964def 100644 --- a/Makefile +++ b/Makefile @@ -454,6 +454,7 @@ $(eval $(call assert_boolean,TRUSTED_BOARD_BOOT)) $(eval $(call assert_boolean,USE_COHERENT_MEM)) $(eval $(call assert_boolean,USE_TBBR_DEFS)) $(eval $(call assert_boolean,WARMBOOT_ENABLE_DCACHE_EARLY)) +$(eval $(call assert_boolean,ENABLE_SPE_FOR_LOWER_ELS)) $(eval $(call assert_numeric,ARM_ARCH_MAJOR)) $(eval $(call assert_numeric,ARM_ARCH_MINOR)) @@ -493,6 +494,7 @@ $(eval $(call add_define,TRUSTED_BOARD_BOOT)) $(eval $(call add_define,USE_COHERENT_MEM)) $(eval $(call add_define,USE_TBBR_DEFS)) $(eval $(call add_define,WARMBOOT_ENABLE_DCACHE_EARLY)) +$(eval $(call add_define,ENABLE_SPE_FOR_LOWER_ELS)) # Define the EL3_PAYLOAD_BASE flag only if it is provided. ifdef EL3_PAYLOAD_BASE diff --git a/docs/user-guide.md b/docs/user-guide.md index 0065ac01..d5423ca3 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -542,6 +542,11 @@ performed. cluster platforms). If this option is enabled, then warm boot path enables D-caches immediately after enabling MMU. This option defaults to 0. +* `ENABLE_SPE_FOR_LOWER_ELS` : Boolean option to enable Statistical Profiling + extensions. This is an optional architectural feature available only for + AArch64 8.2 onwards. This option defaults to 1 but is automatically + disabled when the target architecture is AArch32 or AArch64 8.0/8.1. + #### ARM development platform specific build options * `ARM_BL31_IN_DRAM`: Boolean option to select loading of BL31 in TZC secured diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S index ed35df82..34fdaee9 100644 --- a/include/common/aarch64/el3_common_macros.S +++ b/include/common/aarch64/el3_common_macros.S @@ -95,6 +95,10 @@ * MDCR_EL3.SPD32: Set to 0b10 to disable AArch32 Secure self-hosted * privileged debug from S-EL1. * + * MDCR_EL3.NSPB (ARM v8.2): SPE enabled in non-secure state and + * disabled in secure state. Accesses to SPE registers at SEL1 generate + * trap exceptions to EL3. + * * MDCR_EL3.TDOSA: Set to zero so that EL2 and EL2 System register * access to the powerdown debug registers do not trap to EL3. * @@ -108,6 +112,19 @@ */ mov_imm x0, ((MDCR_EL3_RESET_VAL | MDCR_SDD_BIT | MDCR_SPD32(MDCR_SPD32_DISABLE)) \ & ~(MDCR_TDOSA_BIT | MDCR_TDA_BIT | MDCR_TPM_BIT)) + +#if ENABLE_SPE_FOR_LOWER_ELS + /* Detect if SPE is implemented */ + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH + cmp x1, #0x1 + b.ne 1f + + /* Enable SPE for use by normal world */ + orr x0, x0, #MDCR_NSPB(MDCR_NSPB_EL1) +1: +#endif + msr mdcr_el3, x0 /* --------------------------------------------------------------------- diff --git a/include/lib/aarch64/arch.h b/include/lib/aarch64/arch.h index 990c1692..7bceea77 100644 --- a/include/lib/aarch64/arch.h +++ b/include/lib/aarch64/arch.h @@ -110,6 +110,11 @@ #define ID_AA64PFR0_EL3_SHIFT U(12) #define ID_AA64PFR0_ELX_MASK U(0xf) +/* ID_AA64DFR0_EL1.PMS definitions (for ARMv8.2+) */ +#define ID_AA64DFR0_PMS_SHIFT U(32) +#define ID_AA64DFR0_PMS_LENGTH U(4) +#define ID_AA64DFR0_PMS_MASK U(0xf) + #define EL_IMPL_NONE U(0) #define EL_IMPL_A64ONLY U(1) #define EL_IMPL_A64_A32 U(2) @@ -189,6 +194,8 @@ #define MDCR_SPD32_DISABLE U(0x2) #define MDCR_SPD32_ENABLE U(0x3) #define MDCR_SDD_BIT (U(1) << 16) +#define MDCR_NSPB(x) ((x) << 12) +#define MDCR_NSPB_EL1 U(0x3) #define MDCR_TDOSA_BIT (U(1) << 10) #define MDCR_TDA_BIT (U(1) << 9) #define MDCR_TPM_BIT (U(1) << 6) @@ -199,6 +206,9 @@ #endif /* MDCR_EL2 definitions */ +#define MDCR_EL2_TPMS (U(1) << 14) +#define MDCR_EL2_E2PB(x) ((x) << 12) +#define MDCR_EL2_E2PB_EL1 U(0x3) #define MDCR_EL2_TDRA_BIT (U(1) << 11) #define MDCR_EL2_TDOSA_BIT (U(1) << 10) #define MDCR_EL2_TDA_BIT (U(1) << 9) diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h index 32290e26..0d0d7d33 100644 --- a/include/lib/aarch64/arch_helpers.h +++ b/include/lib/aarch64/arch_helpers.h @@ -184,6 +184,7 @@ DEFINE_SYSREG_WRITE_CONST_FUNC(daifclr) DEFINE_SYSREG_READ_FUNC(par_el1) DEFINE_SYSREG_READ_FUNC(id_pfr1_el1) DEFINE_SYSREG_READ_FUNC(id_aa64pfr0_el1) +DEFINE_SYSREG_READ_FUNC(id_aa64dfr0_el1) DEFINE_SYSREG_READ_FUNC(CurrentEl) DEFINE_SYSREG_RW_FUNCS(daif) DEFINE_SYSREG_RW_FUNCS(spsr_el1) diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h index dead971c..dcbf1c9d 100644 --- a/include/lib/el3_runtime/aarch64/context.h +++ b/include/lib/el3_runtime/aarch64/context.h @@ -308,6 +308,7 @@ CASSERT(CTX_EL3STATE_OFFSET == __builtin_offsetof(cpu_context_t, el3state_ctx), * Function prototypes ******************************************************************************/ void el1_sysregs_context_save(el1_sys_regs_t *regs); +void el1_sysregs_context_save_post_ops(void); void el1_sysregs_context_restore(el1_sys_regs_t *regs); #if CTX_INCLUDE_FPREGS void fpregs_context_save(fp_regs_t *regs); diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h index 62c0ce7e..3335b320 100644 --- a/include/plat/arm/common/plat_arm.h +++ b/include/plat/arm/common/plat_arm.h @@ -218,4 +218,7 @@ int arm_execution_state_switch(unsigned int smc_fid, uint32_t cookie_lo, void *handle); +/* Disable Statistical Profiling Extensions helper */ +void arm_disable_spe(void); + #endif /* __PLAT_ARM_H__ */ diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S index afe912ab..8a6c11b7 100644 --- a/lib/el3_runtime/aarch64/context.S +++ b/lib/el3_runtime/aarch64/context.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -9,6 +9,7 @@ #include .global el1_sysregs_context_save + .global el1_sysregs_context_save_post_ops .global el1_sysregs_context_restore #if CTX_INCLUDE_FPREGS .global fpregs_context_save @@ -108,6 +109,36 @@ func el1_sysregs_context_save ret endfunc el1_sysregs_context_save +/* ----------------------------------------------------- + * The following function strictly follows the AArch64 + * PCS to use x9-x17 (temporary caller-saved registers) + * to do post operations after saving the EL1 system + * register context. + * ----------------------------------------------------- + */ +func el1_sysregs_context_save_post_ops +#if ENABLE_SPE_FOR_LOWER_ELS + /* Detect if SPE is implemented */ + mrs x9, id_aa64dfr0_el1 + ubfx x9, x9, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH + cmp x9, #0x1 + b.ne 1f + + /* + * Before switching from normal world to secure world + * the profiling buffers need to be drained out to memory. This is + * required to avoid an invalid memory access when TTBR is switched + * for entry to SEL1. + */ + .arch armv8.2-a+profile + psb csync + dsb nsh + .arch armv8-a +1: +#endif + ret +endfunc el1_sysregs_context_save_post_ops + /* ----------------------------------------------------- * The following function strictly follows the AArch64 * PCS to use x9-x17 (temporary caller-saved registers) @@ -343,7 +374,7 @@ func restore_gp_registers_callee_eret ldp x24, x25, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X24] ldp x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26] ldp x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28] - ldp x30, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR] + ldp x30, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR] msr sp_el0, x17 ldp x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16] eret diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c index 11ff1632..5257bf1c 100644 --- a/lib/el3_runtime/aarch64/context_mgmt.c +++ b/lib/el3_runtime/aarch64/context_mgmt.c @@ -218,7 +218,7 @@ void cm_init_my_context(const entry_point_info_t *ep) ******************************************************************************/ void cm_prepare_el3_exit(uint32_t security_state) { - uint32_t sctlr_elx, scr_el3; + uint32_t sctlr_elx, scr_el3, mdcr_el2; cpu_context_t *ctx = cm_get_context(security_state); assert(ctx); @@ -315,6 +315,13 @@ void cm_prepare_el3_exit(uint32_t security_state) * relying on hw. Some fields are architecturally * UNKNOWN on reset. * + * MDCR_EL2.TPMS (ARM v8.2): Do not trap statistical + * profiling controls to EL2. + * + * MDCR_EL2.E2PB (ARM v8.2): SPE enabled in non-secure + * state. Accesses to profiling buffer controls at + * non-secure EL1 are not trapped to EL2. + * * MDCR_EL2.TDRA: Set to zero so that Non-secure EL0 and * EL1 System register accesses to the Debug ROM * registers are not trapped to EL2. @@ -343,13 +350,32 @@ void cm_prepare_el3_exit(uint32_t security_state) * MDCR_EL2.HPMN: Set to value of PMCR_EL0.N which is the * architecturally-defined reset value. */ - write_mdcr_el2((MDCR_EL2_RESET_VAL | + mdcr_el2 = ((MDCR_EL2_RESET_VAL | ((read_pmcr_el0() & PMCR_EL0_N_BITS) >> PMCR_EL0_N_SHIFT)) & ~(MDCR_EL2_TDRA_BIT | MDCR_EL2_TDOSA_BIT | MDCR_EL2_TDA_BIT | MDCR_EL2_TDE_BIT | MDCR_EL2_HPME_BIT | MDCR_EL2_TPM_BIT | MDCR_EL2_TPMCR_BIT)); + +#if ENABLE_SPE_FOR_LOWER_ELS + uint64_t id_aa64dfr0_el1; + + /* Detect if SPE is implemented */ + id_aa64dfr0_el1 = read_id_aa64dfr0_el1() >> + ID_AA64DFR0_PMS_SHIFT; + if ((id_aa64dfr0_el1 & ID_AA64DFR0_PMS_MASK) == 1) { + /* + * Make sure traps to EL2 are not generated if + * EL2 is implemented but not used. + */ + mdcr_el2 &= ~MDCR_EL2_TPMS; + mdcr_el2 |= MDCR_EL2_E2PB(MDCR_EL2_E2PB_EL1); + } +#endif + + write_mdcr_el2(mdcr_el2); + /* * Initialise HSTR_EL2. All fields are architecturally * UNKNOWN on reset. @@ -389,6 +415,7 @@ void cm_el1_sysregs_context_save(uint32_t security_state) assert(ctx); el1_sysregs_context_save(get_sysregs_ctx(ctx)); + el1_sysregs_context_save_post_ops(); } void cm_el1_sysregs_context_restore(uint32_t security_state) diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk index 2c8f82a4..9946fea1 100644 --- a/make_helpers/defaults.mk +++ b/make_helpers/defaults.mk @@ -136,3 +136,20 @@ V := 0 # required to enable cache coherency after warm reset (eg: single cluster # platforms). WARMBOOT_ENABLE_DCACHE_EARLY := 0 + +# By default, enable Statistical Profiling Extensions. +# The top level Makefile will disable this feature depending on +# the target architecture and version number. +ENABLE_SPE_FOR_LOWER_ELS := 1 + +# SPE is enabled by default but only supported on AArch64 8.2 onwards. +# Disable it in all other cases. +ifeq (${ARCH},aarch32) + override ENABLE_SPE_FOR_LOWER_ELS := 0 +else + ifeq (${ARM_ARCH_MAJOR},8) + ifeq ($(ARM_ARCH_MINOR),$(filter $(ARM_ARCH_MINOR),0 1)) + ENABLE_SPE_FOR_LOWER_ELS := 0 + endif + endif +endif diff --git a/plat/arm/board/fvp/fvp_pm.c b/plat/arm/board/fvp/fvp_pm.c index f4df658a..e39a4d50 100644 --- a/plat/arm/board/fvp/fvp_pm.c +++ b/plat/arm/board/fvp/fvp_pm.c @@ -48,6 +48,14 @@ static void fvp_cluster_pwrdwn_common(void) { uint64_t mpidr = read_mpidr_el1(); +#if ENABLE_SPE_FOR_LOWER_ELS + /* + * On power down we need to disable statistical profiling extensions + * before exiting coherency. + */ + arm_disable_spe(); +#endif + /* Disable coherency if this cluster is to be turned off */ fvp_interconnect_disable(); diff --git a/plat/arm/common/aarch64/arm_helpers.S b/plat/arm/common/aarch64/arm_helpers.S index 1f20cb50..86565f57 100644 --- a/plat/arm/common/aarch64/arm_helpers.S +++ b/plat/arm/common/aarch64/arm_helpers.S @@ -12,6 +12,7 @@ .globl plat_crash_console_putc .globl plat_crash_console_flush .globl platform_mem_init + .globl arm_disable_spe /* ----------------------------------------------------- @@ -86,3 +87,31 @@ endfunc plat_crash_console_flush func platform_mem_init ret endfunc platform_mem_init + + /* ----------------------------------------------------- + * void arm_disable_spe (void); + * ----------------------------------------------------- + */ +#if ENABLE_SPE_FOR_LOWER_ELS +func arm_disable_spe + /* Detect if SPE is implemented */ + mrs x0, id_aa64dfr0_el1 + ubfx x0, x0, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH + cmp x0, #0x1 + b.ne 1f + + /* Drain buffered data */ + .arch armv8.2-a+profile + psb csync + dsb nsh + + /* Disable Profiling Buffer */ + mrs x0, pmblimitr_el1 + bic x0, x0, #1 + msr pmblimitr_el1, x0 + isb + .arch armv8-a +1: + ret +endfunc arm_disable_spe +#endif -- 2.30.2