arm64: Add support for new control bits CTR_EL0.DIC and CTR_EL0.IDC
authorShanker Donthineni <shankerd@codeaurora.org>
Wed, 7 Mar 2018 15:00:08 +0000 (09:00 -0600)
committerWill Deacon <will.deacon@arm.com>
Fri, 9 Mar 2018 13:57:57 +0000 (13:57 +0000)
The DCache clean & ICache invalidation requirements for instructions
to be data coherence are discoverable through new fields in CTR_EL0.
The following two control bits DIC and IDC were defined for this
purpose. No need to perform point of unification cache maintenance
operations from software on systems where CPU caches are transparent.

This patch optimize the three functions __flush_cache_user_range(),
clean_dcache_area_pou() and invalidate_icache_range() if the hardware
reports CTR_EL0.IDC and/or CTR_EL0.IDC. Basically it skips the two
instructions 'DC CVAU' and 'IC IVAU', and the associated loop logic
in order to avoid the unnecessary overhead.

CTR_EL0.DIC: Instruction cache invalidation requirements for
 instruction to data coherence. The meaning of this bit[29].
  0: Instruction cache invalidation to the point of unification
     is required for instruction to data coherence.
  1: Instruction cache cleaning to the point of unification is
      not required for instruction to data coherence.

CTR_EL0.IDC: Data cache clean requirements for instruction to data
 coherence. The meaning of this bit[28].
  0: Data cache clean to the point of unification is required for
     instruction to data coherence, unless CLIDR_EL1.LoC == 0b000
     or (CLIDR_EL1.LoUIS == 0b000 && CLIDR_EL1.LoUU == 0b000).
  1: Data cache clean to the point of unification is not required
     for instruction to data coherence.

Co-authored-by: Philip Elcan <pelcan@codeaurora.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/include/asm/cache.h
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/cpucaps.h
arch/arm64/kernel/cpufeature.c
arch/arm64/mm/cache.S

index b2e6ece237130b50e8a0554cdeea8fb1f61dd73b..5df5cfe1c1431a763657a19339150b0f5b788159 100644 (file)
 
 #define CTR_L1IP_SHIFT         14
 #define CTR_L1IP_MASK          3
+#define CTR_DMINLINE_SHIFT     16
+#define CTR_ERG_SHIFT          20
 #define CTR_CWG_SHIFT          24
 #define CTR_CWG_MASK           15
+#define CTR_IDC_SHIFT          28
+#define CTR_DIC_SHIFT          29
 
 #define CTR_L1IP(ctr)          (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
 
index bef9f418f08986830e68f6bc2c41c88eb5333a29..7dfcec4700fef0355372cbf7d4c2f11176c914d6 100644 (file)
@@ -133,6 +133,9 @@ extern void flush_dcache_page(struct page *);
 
 static inline void __flush_icache_all(void)
 {
+       if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
+               return;
+
        asm("ic ialluis");
        dsb(ish);
 }
index 39134c46bb136d23a15bdedc4672838b62cef053..ff9fb3aba17bab52ddb0e0b92eb1cab282354ded 100644 (file)
@@ -46,7 +46,9 @@
 #define ARM64_HARDEN_BP_POST_GUEST_EXIT                25
 #define ARM64_HAS_RAS_EXTN                     26
 #define ARM64_WORKAROUND_843419                        27
+#define ARM64_HAS_CACHE_IDC                    28
+#define ARM64_HAS_CACHE_DIC                    29
 
-#define ARM64_NCAPS                            28
+#define ARM64_NCAPS                            30
 
 #endif /* __ASM_CPUCAPS_H */
index f96b3449034beb713620081789cc95ed283d7a81..bdab5522386613b8365d21cd52ebb761b4f24f7d 100644 (file)
@@ -199,12 +199,12 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
 };
 
 static const struct arm64_ftr_bits ftr_ctr[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),           /* RES1 */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1),      /* DIC */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1),      /* IDC */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),     /* CWG */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0),     /* ERG */
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),      /* DminLine */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_CWG_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_ERG_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1),
        /*
         * Linux can handle differing I-cache policies. Userspace JITs will
         * make use of *minLine.
@@ -852,6 +852,18 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
                                        ID_AA64PFR0_FP_SHIFT) < 0;
 }
 
+static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
+                         int __unused)
+{
+       return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT);
+}
+
+static bool has_cache_dic(const struct arm64_cpu_capabilities *entry,
+                         int __unused)
+{
+       return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT);
+}
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
@@ -1088,6 +1100,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .enable = cpu_clear_disr,
        },
 #endif /* CONFIG_ARM64_RAS_EXTN */
+       {
+               .desc = "Data cache clean to the PoU not required for I/D coherence",
+               .capability = ARM64_HAS_CACHE_IDC,
+               .def_scope = SCOPE_SYSTEM,
+               .matches = has_cache_idc,
+       },
+       {
+               .desc = "Instruction cache invalidation not required for I/D coherence",
+               .capability = ARM64_HAS_CACHE_DIC,
+               .def_scope = SCOPE_SYSTEM,
+               .matches = has_cache_dic,
+       },
        {},
 };
 
index 758bde7e2fa68a9d1a241857ad593b39a4d6e705..30334d81b0215347129898913d89fb5f26721513 100644 (file)
@@ -50,6 +50,10 @@ ENTRY(flush_icache_range)
  */
 ENTRY(__flush_cache_user_range)
        uaccess_ttbr0_enable x2, x3, x4
+alternative_if ARM64_HAS_CACHE_IDC
+       dsb     ishst
+       b       7f
+alternative_else_nop_endif
        dcache_line_size x2, x3
        sub     x3, x2, #1
        bic     x4, x0, x3
@@ -60,8 +64,13 @@ user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
        b.lo    1b
        dsb     ish
 
+7:
+alternative_if ARM64_HAS_CACHE_DIC
+       isb
+       b       8f
+alternative_else_nop_endif
        invalidate_icache_by_line x0, x1, x2, x3, 9f
-       mov     x0, #0
+8:     mov     x0, #0
 1:
        uaccess_ttbr0_disable x1, x2
        ret
@@ -80,6 +89,12 @@ ENDPROC(__flush_cache_user_range)
  *     - end     - virtual end address of region
  */
 ENTRY(invalidate_icache_range)
+alternative_if ARM64_HAS_CACHE_DIC
+       mov     x0, xzr
+       isb
+       ret
+alternative_else_nop_endif
+
        uaccess_ttbr0_enable x2, x3, x4
 
        invalidate_icache_by_line x0, x1, x2, x3, 2f
@@ -116,6 +131,10 @@ ENDPIPROC(__flush_dcache_area)
  *     - size    - size in question
  */
 ENTRY(__clean_dcache_area_pou)
+alternative_if ARM64_HAS_CACHE_IDC
+       dsb     ishst
+       ret
+alternative_else_nop_endif
        dcache_by_line_op cvau, ish, x0, x1, x2, x3
        ret
 ENDPROC(__clean_dcache_area_pou)