From cd1e9c88f339b582a77c5d08b11d3483e90f4d69 Mon Sep 17 00:00:00 2001 From: Oskari Lemmela Date: Sun, 27 Jan 2019 20:20:18 +0200 Subject: [PATCH] sunxi: fix: A64 unstable timer Backport A64 unstable timer patches from linux 5.1 Signed-off-by: Oskari Lemmela [Split the single patch into the two original patches] Signed-off-by: Hauke Mehrtens --- target/linux/sunxi/cortexa53/config-4.14 | 1 + target/linux/sunxi/cortexa53/config-4.19 | 1 + ...arch_counter_get_cntpct-to-read-the-.patch | 118 +++++++++ ...ers-arch_timer-Workaround-for-Allwin.patch | 244 ++++++++++++++++++ ...nner-a64-Enable-A64-timer-workaround.patch | 26 ++ ...ers-arch_timer-Workaround-for-Allwin.patch | 244 ++++++++++++++++++ ...nner-a64-Enable-A64-timer-workaround.patch | 26 ++ 7 files changed, 660 insertions(+) create mode 100644 target/linux/sunxi/patches-4.14/031-arm64-Implement-arch_counter_get_cntpct-to-read-the-.patch create mode 100644 target/linux/sunxi/patches-4.14/100-clocksource-drivers-arch_timer-Workaround-for-Allwin.patch create mode 100644 target/linux/sunxi/patches-4.14/101-arm64-dts-allwinner-a64-Enable-A64-timer-workaround.patch create mode 100644 target/linux/sunxi/patches-4.19/100-clocksource-drivers-arch_timer-Workaround-for-Allwin.patch create mode 100644 target/linux/sunxi/patches-4.19/101-arm64-dts-allwinner-a64-Enable-A64-timer-workaround.patch diff --git a/target/linux/sunxi/cortexa53/config-4.14 b/target/linux/sunxi/cortexa53/config-4.14 index 154f5e9a48..b46c4aa414 100644 --- a/target/linux/sunxi/cortexa53/config-4.14 +++ b/target/linux/sunxi/cortexa53/config-4.14 @@ -97,6 +97,7 @@ CONFIG_SPARSEMEM_MANUAL=y CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SUN50I_A64_CCU=y +CONFIG_SUN50I_ERRATUM_UNKNOWN1=y CONFIG_SYSCTL_EXCEPTION_TRACE=y CONFIG_THREAD_INFO_IN_TASK=y CONFIG_UNMAP_KERNEL_AT_EL0=y diff --git a/target/linux/sunxi/cortexa53/config-4.19 b/target/linux/sunxi/cortexa53/config-4.19 index 819a51e243..a35c84b905 100644 --- a/target/linux/sunxi/cortexa53/config-4.19 +++ b/target/linux/sunxi/cortexa53/config-4.19 @@ -93,6 +93,7 @@ CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SUN50I_A64_CCU=y CONFIG_SUN50I_DE2_BUS=y +CONFIG_SUN50I_ERRATUM_UNKNOWN1=y CONFIG_SUN50I_H6_CCU=y CONFIG_SUN50I_H6_R_CCU=y CONFIG_SWIOTLB=y diff --git a/target/linux/sunxi/patches-4.14/031-arm64-Implement-arch_counter_get_cntpct-to-read-the-.patch b/target/linux/sunxi/patches-4.14/031-arm64-Implement-arch_counter_get_cntpct-to-read-the-.patch new file mode 100644 index 0000000000..60f0cb6c9b --- /dev/null +++ b/target/linux/sunxi/patches-4.14/031-arm64-Implement-arch_counter_get_cntpct-to-read-the-.patch @@ -0,0 +1,118 @@ +From f2e600c149fda3453344f89c7e9353fe278ebd32 Mon Sep 17 00:00:00 2001 +From: Christoffer Dall +Date: Wed, 18 Oct 2017 13:06:25 +0200 +Subject: [PATCH] arm64: Implement arch_counter_get_cntpct to read the physical + counter + +As we are about to use the physical counter on arm64 systems that have +KVM support, implement arch_counter_get_cntpct() and the associated +errata workaround functionality for stable timer reads. + +Cc: Will Deacon +Cc: Mark Rutland +Acked-by: Catalin Marinas +Acked-by: Marc Zyngier +Signed-off-by: Christoffer Dall +--- + arch/arm64/include/asm/arch_timer.h | 8 +++----- + drivers/clocksource/arm_arch_timer.c | 23 +++++++++++++++++++++++ + 2 files changed, 26 insertions(+), 5 deletions(-) + +--- a/arch/arm64/include/asm/arch_timer.h ++++ b/arch/arm64/include/asm/arch_timer.h +@@ -52,6 +52,7 @@ struct arch_timer_erratum_workaround { + const char *desc; + u32 (*read_cntp_tval_el0)(void); + u32 (*read_cntv_tval_el0)(void); ++ u64 (*read_cntpct_el0)(void); + u64 (*read_cntvct_el0)(void); + int (*set_next_event_phys)(unsigned long, struct clock_event_device *); + int (*set_next_event_virt)(unsigned long, struct clock_event_device *); +@@ -148,11 +149,8 @@ static inline void arch_timer_set_cntkct + + static inline u64 arch_counter_get_cntpct(void) + { +- /* +- * AArch64 kernel and user space mandate the use of CNTVCT. +- */ +- BUG(); +- return 0; ++ isb(); ++ return arch_timer_reg_read_stable(cntpct_el0); + } + + static inline u64 arch_counter_get_cntvct(void) +--- a/drivers/clocksource/arm_arch_timer.c ++++ b/drivers/clocksource/arm_arch_timer.c +@@ -217,6 +217,11 @@ static u32 notrace fsl_a008585_read_cntv + return __fsl_a008585_read_reg(cntv_tval_el0); + } + ++static u64 notrace fsl_a008585_read_cntpct_el0(void) ++{ ++ return __fsl_a008585_read_reg(cntpct_el0); ++} ++ + static u64 notrace fsl_a008585_read_cntvct_el0(void) + { + return __fsl_a008585_read_reg(cntvct_el0); +@@ -258,6 +263,11 @@ static u32 notrace hisi_161010101_read_c + return __hisi_161010101_read_reg(cntv_tval_el0); + } + ++static u64 notrace hisi_161010101_read_cntpct_el0(void) ++{ ++ return __hisi_161010101_read_reg(cntpct_el0); ++} ++ + static u64 notrace hisi_161010101_read_cntvct_el0(void) + { + return __hisi_161010101_read_reg(cntvct_el0); +@@ -288,6 +298,15 @@ static struct ate_acpi_oem_info hisi_161 + #endif + + #ifdef CONFIG_ARM64_ERRATUM_858921 ++static u64 notrace arm64_858921_read_cntpct_el0(void) ++{ ++ u64 old, new; ++ ++ old = read_sysreg(cntpct_el0); ++ new = read_sysreg(cntpct_el0); ++ return (((old ^ new) >> 32) & 1) ? old : new; ++} ++ + static u64 notrace arm64_858921_read_cntvct_el0(void) + { + u64 old, new; +@@ -346,6 +365,7 @@ static const struct arch_timer_erratum_w + .desc = "Freescale erratum a005858", + .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0, + .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0, ++ .read_cntpct_el0 = fsl_a008585_read_cntpct_el0, + .read_cntvct_el0 = fsl_a008585_read_cntvct_el0, + .set_next_event_phys = erratum_set_next_event_tval_phys, + .set_next_event_virt = erratum_set_next_event_tval_virt, +@@ -358,6 +378,7 @@ static const struct arch_timer_erratum_w + .desc = "HiSilicon erratum 161010101", + .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, + .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, ++ .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, + .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, + .set_next_event_phys = erratum_set_next_event_tval_phys, + .set_next_event_virt = erratum_set_next_event_tval_virt, +@@ -368,6 +389,7 @@ static const struct arch_timer_erratum_w + .desc = "HiSilicon erratum 161010101", + .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, + .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, ++ .read_cntpct_el0 = hisi_161010101_read_cntpct_el0, + .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, + .set_next_event_phys = erratum_set_next_event_tval_phys, + .set_next_event_virt = erratum_set_next_event_tval_virt, +@@ -378,6 +400,7 @@ static const struct arch_timer_erratum_w + .match_type = ate_match_local_cap_id, + .id = (void *)ARM64_WORKAROUND_858921, + .desc = "ARM erratum 858921", ++ .read_cntpct_el0 = arm64_858921_read_cntpct_el0, + .read_cntvct_el0 = arm64_858921_read_cntvct_el0, + }, + #endif diff --git a/target/linux/sunxi/patches-4.14/100-clocksource-drivers-arch_timer-Workaround-for-Allwin.patch b/target/linux/sunxi/patches-4.14/100-clocksource-drivers-arch_timer-Workaround-for-Allwin.patch new file mode 100644 index 0000000000..6da300312f --- /dev/null +++ b/target/linux/sunxi/patches-4.14/100-clocksource-drivers-arch_timer-Workaround-for-Allwin.patch @@ -0,0 +1,244 @@ +From 7cd6dca3600d8d71328950216688ecd00015d1ce Mon Sep 17 00:00:00 2001 +From: Samuel Holland +Date: Sat, 12 Jan 2019 20:17:18 -0600 +Subject: [PATCH] clocksource/drivers/arch_timer: Workaround for Allwinner A64 + timer instability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The Allwinner A64 SoC is known[1] to have an unstable architectural +timer, which manifests itself most obviously in the time jumping forward +a multiple of 95 years[2][3]. This coincides with 2^56 cycles at a +timer frequency of 24 MHz, implying that the time went slightly backward +(and this was interpreted by the kernel as it jumping forward and +wrapping around past the epoch). + +Investigation revealed instability in the low bits of CNTVCT at the +point a high bit rolls over. This leads to power-of-two cycle forward +and backward jumps. (Testing shows that forward jumps are about twice as +likely as backward jumps.) Since the counter value returns to normal +after an indeterminate read, each "jump" really consists of both a +forward and backward jump from the software perspective. + +Unless the kernel is trapping CNTVCT reads, a userspace program is able +to read the register in a loop faster than it changes. A test program +running on all 4 CPU cores that reported jumps larger than 100 ms was +run for 13.6 hours and reported the following: + + Count | Event +-------+--------------------------- + 9940 | jumped backward 699ms + 268 | jumped backward 1398ms + 1 | jumped backward 2097ms + 16020 | jumped forward 175ms + 6443 | jumped forward 699ms + 2976 | jumped forward 1398ms + 9 | jumped forward 356516ms + 9 | jumped forward 357215ms + 4 | jumped forward 714430ms + 1 | jumped forward 3578440ms + +This works out to a jump larger than 100 ms about every 5.5 seconds on +each CPU core. + +The largest jump (almost an hour!) was the following sequence of reads: + 0x0000007fffffffff → 0x00000093feffffff → 0x0000008000000000 + +Note that the middle bits don't necessarily all read as all zeroes or +all ones during the anomalous behavior; however the low 10 bits checked +by the function in this patch have never been observed with any other +value. + +Also note that smaller jumps are much more common, with backward jumps +of 2048 (2^11) cycles observed over 400 times per second on each core. +(Of course, this is partially explained by lower bits rolling over more +frequently.) Any one of these could have caused the 95 year time skip. + +Similar anomalies were observed while reading CNTPCT (after patching the +kernel to allow reads from userspace). However, the CNTPCT jumps are +much less frequent, and only small jumps were observed. The same program +as before (except now reading CNTPCT) observed after 72 hours: + + Count | Event +-------+--------------------------- + 17 | jumped backward 699ms + 52 | jumped forward 175ms + 2831 | jumped forward 699ms + 5 | jumped forward 1398ms + +Further investigation showed that the instability in CNTPCT/CNTVCT also +affected the respective timer's TVAL register. The following values were +observed immediately after writing CNVT_TVAL to 0x10000000: + + CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error +--------------------+------------+--------------------+----------------- + 0x000000d4a2d8bfff | 0x10003fff | 0x000000d4b2d8bfff | +0x00004000 + 0x000000d4a2d94000 | 0x0fffffff | 0x000000d4b2d97fff | -0x00004000 + 0x000000d4a2d97fff | 0x10003fff | 0x000000d4b2d97fff | +0x00004000 + 0x000000d4a2d9c000 | 0x0fffffff | 0x000000d4b2d9ffff | -0x00004000 + +The pattern of errors in CNTV_TVAL seemed to depend on exactly which +value was written to it. For example, after writing 0x10101010: + + CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error +--------------------+------------+--------------------+----------------- + 0x000001ac3effffff | 0x1110100f | 0x000001ac4f10100f | +0x1000000 + 0x000001ac40000000 | 0x1010100f | 0x000001ac5110100f | -0x1000000 + 0x000001ac58ffffff | 0x1110100f | 0x000001ac6910100f | +0x1000000 + 0x000001ac66000000 | 0x1010100f | 0x000001ac7710100f | -0x1000000 + 0x000001ac6affffff | 0x1110100f | 0x000001ac7b10100f | +0x1000000 + 0x000001ac6e000000 | 0x1010100f | 0x000001ac7f10100f | -0x1000000 + +I was also twice able to reproduce the issue covered by Allwinner's +workaround[4], that writing to TVAL sometimes fails, and both CVAL and +TVAL are left with entirely bogus values. One was the following values: + + CNTVCT | CNTV_TVAL | CNTV_CVAL +--------------------+------------+-------------------------------------- + 0x000000d4a2d6014c | 0x8fbd5721 | 0x000000d132935fff (615s in the past) +Reviewed-by: Marc Zyngier + +======================================================================== + +Because the CPU can read the CNTPCT/CNTVCT registers faster than they +change, performing two reads of the register and comparing the high bits +(like other workarounds) is not a workable solution. And because the +timer can jump both forward and backward, no pair of reads can +distinguish a good value from a bad one. The only way to guarantee a +good value from consecutive reads would be to read _three_ times, and +take the middle value only if the three values are 1) each unique and +2) increasing. This takes at minimum 3 counter cycles (125 ns), or more +if an anomaly is detected. + +However, since there is a distinct pattern to the bad values, we can +optimize the common case (1022/1024 of the time) to a single read by +simply ignoring values that match the error pattern. This still takes no +more than 3 cycles in the worst case, and requires much less code. As an +additional safety check, we still limit the loop iteration to the number +of max-frequency (1.2 GHz) CPU cycles in three 24 MHz counter periods. + +For the TVAL registers, the simple solution is to not use them. Instead, +read or write the CVAL and calculate the TVAL value in software. + +Although the manufacturer is aware of at least part of the erratum[4], +there is no official name for it. For now, use the kernel-internal name +"UNKNOWN1". + +[1]: https://github.com/armbian/build/commit/a08cd6fe7ae9 +[2]: https://forum.armbian.com/topic/3458-a64-datetime-clock-issue/ +[3]: https://irclog.whitequark.org/linux-sunxi/2018-01-26 +[4]: https://github.com/Allwinner-Homlet/H6-BSP4.9-linux/blob/master/drivers/clocksource/arm_arch_timer.c#L272 + +Acked-by: Maxime Ripard +Tested-by: Andre Przywara +Signed-off-by: Samuel Holland +Cc: stable@vger.kernel.org +Signed-off-by: Daniel Lezcano +--- + Documentation/arm64/silicon-errata.txt | 2 + + drivers/clocksource/Kconfig | 10 +++++ + drivers/clocksource/arm_arch_timer.c | 55 ++++++++++++++++++++++++++ + 3 files changed, 67 insertions(+) + +--- a/Documentation/arm64/silicon-errata.txt ++++ b/Documentation/arm64/silicon-errata.txt +@@ -44,6 +44,8 @@ stable kernels. + + | Implementor | Component | Erratum ID | Kconfig | + +----------------+-----------------+-----------------+-----------------------------+ ++| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 | ++| | | | | + | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | + | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | + | ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | +--- a/drivers/clocksource/Kconfig ++++ b/drivers/clocksource/Kconfig +@@ -374,6 +374,16 @@ config ARM64_ERRATUM_858921 + The workaround will be dynamically enabled when an affected + core is detected. + ++config SUN50I_ERRATUM_UNKNOWN1 ++ bool "Workaround for Allwinner A64 erratum UNKNOWN1" ++ default y ++ depends on ARM_ARCH_TIMER && ARM64 && ARCH_SUNXI ++ select ARM_ARCH_TIMER_OOL_WORKAROUND ++ help ++ This option enables a workaround for instability in the timer on ++ the Allwinner A64 SoC. The workaround will only be active if the ++ allwinner,erratum-unknown1 property is found in the timer node. ++ + config ARM_GLOBAL_TIMER + bool "Support for the ARM global timer" if COMPILE_TEST + select TIMER_OF if OF +--- a/drivers/clocksource/arm_arch_timer.c ++++ b/drivers/clocksource/arm_arch_timer.c +@@ -317,6 +317,48 @@ static u64 notrace arm64_858921_read_cnt + } + #endif + ++#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 ++/* ++ * The low bits of the counter registers are indeterminate while bit 10 or ++ * greater is rolling over. Since the counter value can jump both backward ++ * (7ff -> 000 -> 800) and forward (7ff -> fff -> 800), ignore register values ++ * with all ones or all zeros in the low bits. Bound the loop by the maximum ++ * number of CPU cycles in 3 consecutive 24 MHz counter periods. ++ */ ++#define __sun50i_a64_read_reg(reg) ({ \ ++ u64 _val; \ ++ int _retries = 150; \ ++ \ ++ do { \ ++ _val = read_sysreg(reg); \ ++ _retries--; \ ++ } while (((_val + 1) & GENMASK(9, 0)) <= 1 && _retries); \ ++ \ ++ WARN_ON_ONCE(!_retries); \ ++ _val; \ ++}) ++ ++static u64 notrace sun50i_a64_read_cntpct_el0(void) ++{ ++ return __sun50i_a64_read_reg(cntpct_el0); ++} ++ ++static u64 notrace sun50i_a64_read_cntvct_el0(void) ++{ ++ return __sun50i_a64_read_reg(cntvct_el0); ++} ++ ++static u32 notrace sun50i_a64_read_cntp_tval_el0(void) ++{ ++ return read_sysreg(cntp_cval_el0) - sun50i_a64_read_cntpct_el0(); ++} ++ ++static u32 notrace sun50i_a64_read_cntv_tval_el0(void) ++{ ++ return read_sysreg(cntv_cval_el0) - sun50i_a64_read_cntvct_el0(); ++} ++#endif ++ + #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND + DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, + timer_unstable_counter_workaround); +@@ -404,6 +446,19 @@ static const struct arch_timer_erratum_w + .read_cntvct_el0 = arm64_858921_read_cntvct_el0, + }, + #endif ++#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 ++ { ++ .match_type = ate_match_dt, ++ .id = "allwinner,erratum-unknown1", ++ .desc = "Allwinner erratum UNKNOWN1", ++ .read_cntp_tval_el0 = sun50i_a64_read_cntp_tval_el0, ++ .read_cntv_tval_el0 = sun50i_a64_read_cntv_tval_el0, ++ .read_cntpct_el0 = sun50i_a64_read_cntpct_el0, ++ .read_cntvct_el0 = sun50i_a64_read_cntvct_el0, ++ .set_next_event_phys = erratum_set_next_event_tval_phys, ++ .set_next_event_virt = erratum_set_next_event_tval_virt, ++ }, ++#endif + }; + + typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, diff --git a/target/linux/sunxi/patches-4.14/101-arm64-dts-allwinner-a64-Enable-A64-timer-workaround.patch b/target/linux/sunxi/patches-4.14/101-arm64-dts-allwinner-a64-Enable-A64-timer-workaround.patch new file mode 100644 index 0000000000..5bfd33b944 --- /dev/null +++ b/target/linux/sunxi/patches-4.14/101-arm64-dts-allwinner-a64-Enable-A64-timer-workaround.patch @@ -0,0 +1,26 @@ +From 55ec26d6a4241363fa94f15377ebd8f1116fbfd7 Mon Sep 17 00:00:00 2001 +From: Samuel Holland +Date: Sat, 12 Jan 2019 20:17:19 -0600 +Subject: [PATCH] arm64: dts: allwinner: a64: Enable A64 timer workaround + +As instability in the architectural timer has been observed on multiple +devices using this SoC, inluding the Pine64 and the Orange Pi Win, +enable the workaround in the SoC's device tree. + +Acked-by: Maxime Ripard +Signed-off-by: Samuel Holland +Signed-off-by: Chen-Yu Tsai +--- + arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi ++++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +@@ -114,6 +114,7 @@ + + timer { + compatible = "arm,armv8-timer"; ++ allwinner,erratum-unknown1; + interrupts = , + +Date: Sat, 12 Jan 2019 20:17:18 -0600 +Subject: [PATCH] clocksource/drivers/arch_timer: Workaround for Allwinner A64 + timer instability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The Allwinner A64 SoC is known[1] to have an unstable architectural +timer, which manifests itself most obviously in the time jumping forward +a multiple of 95 years[2][3]. This coincides with 2^56 cycles at a +timer frequency of 24 MHz, implying that the time went slightly backward +(and this was interpreted by the kernel as it jumping forward and +wrapping around past the epoch). + +Investigation revealed instability in the low bits of CNTVCT at the +point a high bit rolls over. This leads to power-of-two cycle forward +and backward jumps. (Testing shows that forward jumps are about twice as +likely as backward jumps.) Since the counter value returns to normal +after an indeterminate read, each "jump" really consists of both a +forward and backward jump from the software perspective. + +Unless the kernel is trapping CNTVCT reads, a userspace program is able +to read the register in a loop faster than it changes. A test program +running on all 4 CPU cores that reported jumps larger than 100 ms was +run for 13.6 hours and reported the following: + + Count | Event +-------+--------------------------- + 9940 | jumped backward 699ms + 268 | jumped backward 1398ms + 1 | jumped backward 2097ms + 16020 | jumped forward 175ms + 6443 | jumped forward 699ms + 2976 | jumped forward 1398ms + 9 | jumped forward 356516ms + 9 | jumped forward 357215ms + 4 | jumped forward 714430ms + 1 | jumped forward 3578440ms + +This works out to a jump larger than 100 ms about every 5.5 seconds on +each CPU core. + +The largest jump (almost an hour!) was the following sequence of reads: + 0x0000007fffffffff → 0x00000093feffffff → 0x0000008000000000 + +Note that the middle bits don't necessarily all read as all zeroes or +all ones during the anomalous behavior; however the low 10 bits checked +by the function in this patch have never been observed with any other +value. + +Also note that smaller jumps are much more common, with backward jumps +of 2048 (2^11) cycles observed over 400 times per second on each core. +(Of course, this is partially explained by lower bits rolling over more +frequently.) Any one of these could have caused the 95 year time skip. + +Similar anomalies were observed while reading CNTPCT (after patching the +kernel to allow reads from userspace). However, the CNTPCT jumps are +much less frequent, and only small jumps were observed. The same program +as before (except now reading CNTPCT) observed after 72 hours: + + Count | Event +-------+--------------------------- + 17 | jumped backward 699ms + 52 | jumped forward 175ms + 2831 | jumped forward 699ms + 5 | jumped forward 1398ms + +Further investigation showed that the instability in CNTPCT/CNTVCT also +affected the respective timer's TVAL register. The following values were +observed immediately after writing CNVT_TVAL to 0x10000000: + + CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error +--------------------+------------+--------------------+----------------- + 0x000000d4a2d8bfff | 0x10003fff | 0x000000d4b2d8bfff | +0x00004000 + 0x000000d4a2d94000 | 0x0fffffff | 0x000000d4b2d97fff | -0x00004000 + 0x000000d4a2d97fff | 0x10003fff | 0x000000d4b2d97fff | +0x00004000 + 0x000000d4a2d9c000 | 0x0fffffff | 0x000000d4b2d9ffff | -0x00004000 + +The pattern of errors in CNTV_TVAL seemed to depend on exactly which +value was written to it. For example, after writing 0x10101010: + + CNTVCT | CNTV_TVAL | CNTV_CVAL | CNTV_TVAL Error +--------------------+------------+--------------------+----------------- + 0x000001ac3effffff | 0x1110100f | 0x000001ac4f10100f | +0x1000000 + 0x000001ac40000000 | 0x1010100f | 0x000001ac5110100f | -0x1000000 + 0x000001ac58ffffff | 0x1110100f | 0x000001ac6910100f | +0x1000000 + 0x000001ac66000000 | 0x1010100f | 0x000001ac7710100f | -0x1000000 + 0x000001ac6affffff | 0x1110100f | 0x000001ac7b10100f | +0x1000000 + 0x000001ac6e000000 | 0x1010100f | 0x000001ac7f10100f | -0x1000000 + +I was also twice able to reproduce the issue covered by Allwinner's +workaround[4], that writing to TVAL sometimes fails, and both CVAL and +TVAL are left with entirely bogus values. One was the following values: + + CNTVCT | CNTV_TVAL | CNTV_CVAL +--------------------+------------+-------------------------------------- + 0x000000d4a2d6014c | 0x8fbd5721 | 0x000000d132935fff (615s in the past) +Reviewed-by: Marc Zyngier + +======================================================================== + +Because the CPU can read the CNTPCT/CNTVCT registers faster than they +change, performing two reads of the register and comparing the high bits +(like other workarounds) is not a workable solution. And because the +timer can jump both forward and backward, no pair of reads can +distinguish a good value from a bad one. The only way to guarantee a +good value from consecutive reads would be to read _three_ times, and +take the middle value only if the three values are 1) each unique and +2) increasing. This takes at minimum 3 counter cycles (125 ns), or more +if an anomaly is detected. + +However, since there is a distinct pattern to the bad values, we can +optimize the common case (1022/1024 of the time) to a single read by +simply ignoring values that match the error pattern. This still takes no +more than 3 cycles in the worst case, and requires much less code. As an +additional safety check, we still limit the loop iteration to the number +of max-frequency (1.2 GHz) CPU cycles in three 24 MHz counter periods. + +For the TVAL registers, the simple solution is to not use them. Instead, +read or write the CVAL and calculate the TVAL value in software. + +Although the manufacturer is aware of at least part of the erratum[4], +there is no official name for it. For now, use the kernel-internal name +"UNKNOWN1". + +[1]: https://github.com/armbian/build/commit/a08cd6fe7ae9 +[2]: https://forum.armbian.com/topic/3458-a64-datetime-clock-issue/ +[3]: https://irclog.whitequark.org/linux-sunxi/2018-01-26 +[4]: https://github.com/Allwinner-Homlet/H6-BSP4.9-linux/blob/master/drivers/clocksource/arm_arch_timer.c#L272 + +Acked-by: Maxime Ripard +Tested-by: Andre Przywara +Signed-off-by: Samuel Holland +Cc: stable@vger.kernel.org +Signed-off-by: Daniel Lezcano +--- + Documentation/arm64/silicon-errata.txt | 2 + + drivers/clocksource/Kconfig | 10 +++++ + drivers/clocksource/arm_arch_timer.c | 55 ++++++++++++++++++++++++++ + 3 files changed, 67 insertions(+) + +--- a/Documentation/arm64/silicon-errata.txt ++++ b/Documentation/arm64/silicon-errata.txt +@@ -44,6 +44,8 @@ stable kernels. + + | Implementor | Component | Erratum ID | Kconfig | + +----------------+-----------------+-----------------+-----------------------------+ ++| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 | ++| | | | | + | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | + | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | + | ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | +--- a/drivers/clocksource/Kconfig ++++ b/drivers/clocksource/Kconfig +@@ -365,6 +365,16 @@ config ARM64_ERRATUM_858921 + The workaround will be dynamically enabled when an affected + core is detected. + ++config SUN50I_ERRATUM_UNKNOWN1 ++ bool "Workaround for Allwinner A64 erratum UNKNOWN1" ++ default y ++ depends on ARM_ARCH_TIMER && ARM64 && ARCH_SUNXI ++ select ARM_ARCH_TIMER_OOL_WORKAROUND ++ help ++ This option enables a workaround for instability in the timer on ++ the Allwinner A64 SoC. The workaround will only be active if the ++ allwinner,erratum-unknown1 property is found in the timer node. ++ + config ARM_GLOBAL_TIMER + bool "Support for the ARM global timer" if COMPILE_TEST + select TIMER_OF if OF +--- a/drivers/clocksource/arm_arch_timer.c ++++ b/drivers/clocksource/arm_arch_timer.c +@@ -319,6 +319,48 @@ static u64 notrace arm64_858921_read_cnt + } + #endif + ++#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 ++/* ++ * The low bits of the counter registers are indeterminate while bit 10 or ++ * greater is rolling over. Since the counter value can jump both backward ++ * (7ff -> 000 -> 800) and forward (7ff -> fff -> 800), ignore register values ++ * with all ones or all zeros in the low bits. Bound the loop by the maximum ++ * number of CPU cycles in 3 consecutive 24 MHz counter periods. ++ */ ++#define __sun50i_a64_read_reg(reg) ({ \ ++ u64 _val; \ ++ int _retries = 150; \ ++ \ ++ do { \ ++ _val = read_sysreg(reg); \ ++ _retries--; \ ++ } while (((_val + 1) & GENMASK(9, 0)) <= 1 && _retries); \ ++ \ ++ WARN_ON_ONCE(!_retries); \ ++ _val; \ ++}) ++ ++static u64 notrace sun50i_a64_read_cntpct_el0(void) ++{ ++ return __sun50i_a64_read_reg(cntpct_el0); ++} ++ ++static u64 notrace sun50i_a64_read_cntvct_el0(void) ++{ ++ return __sun50i_a64_read_reg(cntvct_el0); ++} ++ ++static u32 notrace sun50i_a64_read_cntp_tval_el0(void) ++{ ++ return read_sysreg(cntp_cval_el0) - sun50i_a64_read_cntpct_el0(); ++} ++ ++static u32 notrace sun50i_a64_read_cntv_tval_el0(void) ++{ ++ return read_sysreg(cntv_cval_el0) - sun50i_a64_read_cntvct_el0(); ++} ++#endif ++ + #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND + DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); + EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); +@@ -408,6 +450,19 @@ static const struct arch_timer_erratum_w + .read_cntvct_el0 = arm64_858921_read_cntvct_el0, + }, + #endif ++#ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 ++ { ++ .match_type = ate_match_dt, ++ .id = "allwinner,erratum-unknown1", ++ .desc = "Allwinner erratum UNKNOWN1", ++ .read_cntp_tval_el0 = sun50i_a64_read_cntp_tval_el0, ++ .read_cntv_tval_el0 = sun50i_a64_read_cntv_tval_el0, ++ .read_cntpct_el0 = sun50i_a64_read_cntpct_el0, ++ .read_cntvct_el0 = sun50i_a64_read_cntvct_el0, ++ .set_next_event_phys = erratum_set_next_event_tval_phys, ++ .set_next_event_virt = erratum_set_next_event_tval_virt, ++ }, ++#endif + }; + + typedef bool (*ate_match_fn_t)(const struct arch_timer_erratum_workaround *, diff --git a/target/linux/sunxi/patches-4.19/101-arm64-dts-allwinner-a64-Enable-A64-timer-workaround.patch b/target/linux/sunxi/patches-4.19/101-arm64-dts-allwinner-a64-Enable-A64-timer-workaround.patch new file mode 100644 index 0000000000..ef7867af81 --- /dev/null +++ b/target/linux/sunxi/patches-4.19/101-arm64-dts-allwinner-a64-Enable-A64-timer-workaround.patch @@ -0,0 +1,26 @@ +From 55ec26d6a4241363fa94f15377ebd8f1116fbfd7 Mon Sep 17 00:00:00 2001 +From: Samuel Holland +Date: Sat, 12 Jan 2019 20:17:19 -0600 +Subject: [PATCH] arm64: dts: allwinner: a64: Enable A64 timer workaround + +As instability in the architectural timer has been observed on multiple +devices using this SoC, inluding the Pine64 and the Orange Pi Win, +enable the workaround in the SoC's device tree. + +Acked-by: Maxime Ripard +Signed-off-by: Samuel Holland +Signed-off-by: Chen-Yu Tsai +--- + arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi ++++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +@@ -159,6 +159,7 @@ + + timer { + compatible = "arm,armv8-timer"; ++ allwinner,erratum-unknown1; + interrupts = , +