allwinner: Use the arisc to turn off ARM cores
authorAndre Przywara <andre.przywara@arm.com>
Thu, 27 Sep 2018 23:43:32 +0000 (00:43 +0100)
committerAndre Przywara <andre.przywara@arm.com>
Sat, 20 Oct 2018 15:23:59 +0000 (16:23 +0100)
PSCI requires a core to turn itself off, which we can't do properly by
just executing an algorithm on that very core. As a consequence we just
put a core into WFI on CPU_OFF right now.
To fix this let's task the "arisc" management processor (an OpenRISC
core) with that task of asserting reset and turning off the core's power
domain. We use a handcrafted sequence of OpenRISC instructions to
achieve this, and hand this data over to the new sunxi_execute_arisc_code()
routine.
The commented source code for this routine is provided in a separate file,
but the ATF code contains the already encoded instructions as data.
The H6 uses the same algorithm, but differs in the MMIO addresses, so
provide a SoC (family) specific copy of that code.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
plat/allwinner/common/arisc_off.S [new file with mode: 0644]
plat/allwinner/common/sunxi_cpu_ops.c
plat/allwinner/common/sunxi_pm.c
plat/allwinner/sun50i_a64/include/core_off_arisc.h [new file with mode: 0644]
plat/allwinner/sun50i_h6/include/core_off_arisc.h [new file with mode: 0644]

diff --git a/plat/allwinner/common/arisc_off.S b/plat/allwinner/common/arisc_off.S
new file mode 100644 (file)
index 0000000..ed10832
--- /dev/null
@@ -0,0 +1,115 @@
+# turn_off_core.S
+#
+# Copyright (c) 2018, Andre Przywara <osp@andrep.de>
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# OpenRISC assembly to turn off an ARM core on an Allwinner SoC from
+# the arisc management controller.
+# Generate a binary representation with:
+# $ or1k-elf-as -c -o turn_off_core.o turn_off_core.S
+# $ or1k-elf-objcopy -O binary --reverse-bytes=4 turn_off_core.o \
+#   turn_off_core.bin
+# The encoded instructions go into an array defined in
+# plat/allwinner/sun50i_*/include/core_off_arisc.h, to be handed off to
+# the arisc processor.
+#
+# This routine is meant to be called directly from arisc reset (put the
+# start address in the reset vector), to be actually triggered by that
+# very ARM core to be turned off.
+# It expects the core number presented as a mask in the upper half of
+# r3, so to be patched in the lower 16 bits of the first instruction,
+# overwriting the 0 in this code here.
+# The code will do the following:
+# - Read the C_CPU_STATUS register, which contains the status of the WFI
+#   lines of each of the four A53 cores.
+# - Loop until the core in question reaches WFI.
+# - Using that mask, activate the core output clamps by setting the
+#   respective core bit in CPUX_PWROFF_GATING_REG (0x1f01500).
+#   Note that the clamp for core 0 covers more than just the core, activating
+#   it hangs the whole system. So we skip this step for core 0.
+# - Using the negated mask, assert the core's reset line by clearing the
+#   respective bit in C_RST_CTRL (0x1f01c30).
+# - Finally turn off the core's power switch by writing 0xff to the
+#   respective CPUx_PWR_SWITCH_REG (0x1f01540 ff.)
+# - Assert the arisc's own reset to end execution.
+#   This also signals other arisc users that the chip is free again.
+# So in C this would look like:
+#      while (!(readl(0x1700030) & (1U << core_nr)))
+#              ;
+#      if (core_nr != 0)
+#              writel(readl(0x1f01500) | (1U << core_nr), 0x1f01500);
+#      writel(readl(0x1f01c30) & ~(1U << core_nr), 0x1f01c30);
+#      writel(0xff, 0x1f01540 + (core_nr * 4));
+# (using A64/H5 addresses)
+
+.text
+_start:
+       l.movhi r3, 0                           # FIXUP! with core mask
+       l.movhi r0, 0                           # clear r0
+       l.movhi r13, 0x170                      # r13: CPU_CFG_BASE=0x01700000
+wait_wfi:
+       l.lwz   r5, 0x30(r13)                   # load C_CPU_STATUS
+       l.and   r5, r5, r3                      # mask requested core
+       l.sfeq  r5, r0                          # is it not yet in WFI?
+       l.bf    wait_wfi                        # try again
+
+       l.srli  r6, r3, 16                      # move mask to lower 16 bits
+       l.sfeqi r6, 1                           # core 0 is special
+       l.bf    1f                              # don't touch the bit for core 0
+       l.movhi r13, 0x1f0                      # address of R_CPUCFG (delay)
+       l.lwz   r5, 0x1500(r13)                 # core output clamps
+       l.or    r5, r5, r6                      # set bit to ...
+       l.sw    0x1500(r13), r5                 # ... activate for our core
+
+1:     l.lwz   r5, 0x1c30(r13)                 # CPU power-on reset
+       l.xori  r6, r6, -1                      # negate core mask
+       l.and   r5, r5, r6                      # clear bit to ...
+       l.sw    0x1c30(r13), r5                 # ... assert for our core
+
+       l.ff1   r6, r3                          # get core number from high mask
+       l.addi  r6, r6, -17                     # convert to 0-3
+       l.slli  r6, r6, 2                       # r5: core number*4 (0-12)
+       l.add   r6, r6, r13                     # add to base address
+       l.ori   r5, r0, 0xff                    # 0xff means all switches off
+       l.sw    0x1540(r6), r5                  # core power switch registers
+
+reset: l.sw    0x1c00(r13),r0                  # pull down our own reset line
+
+       l.j     reset                           # just in case ....
+       l.nop   0x0                             # (delay slot)
+
+# same as above, but with the MMIO addresses matching the H6 SoC
+_start_h6:
+       l.movhi r3, 0                           # FIXUP! with core mask
+       l.movhi r0, 0                           # clear r0
+       l.movhi r13, 0x901                      # r13: CPU_CFG_BASE=0x09010000
+1:
+       l.lwz   r5, 0x80(r13)                   # load C_CPU_STATUS
+       l.and   r5, r5, r3                      # mask requested core
+       l.sfeq  r5, r0                          # is it not yet in WFI?
+       l.bf    1b                              # try again
+
+       l.srli  r6, r3, 16                      # move mask to lower 16 bits(ds)
+       l.sfeqi r6, 1                           # core 0 is special
+       l.bf    1f                              # don't touch the bit for core 0
+       l.movhi r13, 0x700                      # address of R_CPUCFG (ds)
+       l.lwz   r5, 0x0444(r13)                 # core output clamps
+       l.or    r5, r5, r6                      # set bit to ...
+       l.sw    0x0444(r13), r5                 # ... activate for our core
+
+1:     l.lwz   r5, 0x0440(r13)                 # CPU power-on reset
+       l.xori  r6, r6, -1                      # negate core mask
+       l.and   r5, r5, r6                      # clear bit to ...
+       l.sw    0x0440(r13), r5                 # ... assert for our core
+
+       l.ff1   r6, r3                          # get core number from high mask
+       l.addi  r6, r6, -17                     # convert to 0-3
+       l.slli  r6, r6, 2                       # r5: core number*4 (0-12)
+       l.add   r6, r6, r13                     # add to base address
+       l.ori   r5, r0, 0xff                    # 0xff means all switches off
+       l.sw    0x0450(r6), r5                  # core power switch registers
+
+1:     l.sw    0x0400(r13),r0                  # pull down our own reset line
+
+       l.j     1b                              # just in case ...
+       l.nop   0x0                             # (delay slot)
index 2db26977d29f745f158cec0002b49b880bc0a838..3b732b5d13a824261c749069ac2b4bb004dbf466 100644 (file)
@@ -4,11 +4,16 @@
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
+#include <arch_helpers.h>
+#include <assert.h>
+#include <core_off_arisc.h>
 #include <debug.h>
+#include <delay_timer.h>
 #include <mmio.h>
+#include <platform.h>
 #include <platform_def.h>
-#include <sunxi_mmap.h>
 #include <sunxi_cpucfg.h>
+#include <sunxi_mmap.h>
 #include <sunxi_private.h>
 #include <utils_def.h>
 
@@ -39,16 +44,37 @@ static void sunxi_cpu_enable_power(unsigned int cluster, unsigned int core)
 
 void sunxi_cpu_off(unsigned int cluster, unsigned int core)
 {
+       int corenr = cluster * PLATFORM_MAX_CPUS_PER_CLUSTER + core;
+
        VERBOSE("PSCI: Powering off cluster %d core %d\n", cluster, core);
 
        /* Deassert DBGPWRDUP */
        mmio_clrbits_32(SUNXI_CPUCFG_DBG_REG0, BIT(core));
-       /* Activate the core output clamps */
-       mmio_setbits_32(SUNXI_POWEROFF_GATING_REG(cluster), BIT(core));
-       /* Assert CPU power-on reset */
-       mmio_clrbits_32(SUNXI_POWERON_RST_REG(cluster), BIT(core));
-       /* Remove power from the CPU */
-       sunxi_cpu_disable_power(cluster, core);
+
+       /* We can't turn ourself off like this, but it works for other cores. */
+       if (plat_my_core_pos() != corenr) {
+               /* Activate the core output clamps, but not for core 0. */
+               if (corenr != 0)
+                       mmio_setbits_32(SUNXI_POWEROFF_GATING_REG(cluster),
+                                       BIT(core));
+               /* Assert CPU power-on reset */
+               mmio_clrbits_32(SUNXI_POWERON_RST_REG(cluster), BIT(core));
+               /* Remove power from the CPU */
+               sunxi_cpu_disable_power(cluster, core);
+
+               return;
+       }
+
+       /* Simplifies assembly, all SoCs so far are single cluster anyway. */
+       assert(cluster == 0);
+
+       /*
+        * If we are supposed to turn ourself off, tell the arisc SCP
+        * to do that work for us. The code expects the core mask to be
+        * patched into the first instruction.
+        */
+       sunxi_execute_arisc_code(arisc_core_off, sizeof(arisc_core_off),
+                                0, BIT_32(core));
 }
 
 void sunxi_cpu_on(unsigned int cluster, unsigned int core)
index 86336f017d80a9392fe83f327be3ecb8de90974b..7d13cdad15dc920fd90094e71f2beccbe485bebd 100644 (file)
@@ -42,6 +42,16 @@ static void sunxi_pwr_domain_off(const psci_power_state_t *target_state)
        gicv2_cpuif_disable();
 }
 
+static void __dead2 sunxi_pwr_down_wfi(const psci_power_state_t *target_state)
+{
+       u_register_t mpidr = read_mpidr();
+
+       sunxi_cpu_off(MPIDR_AFFLVL1_VAL(mpidr), MPIDR_AFFLVL0_VAL(mpidr));
+
+       while (1)
+               wfi();
+}
+
 static void sunxi_pwr_domain_on_finish(const psci_power_state_t *target_state)
 {
        gicv2_pcpu_distif_init();
@@ -82,6 +92,7 @@ static int sunxi_validate_ns_entrypoint(uintptr_t ns_entrypoint)
 static plat_psci_ops_t sunxi_psci_ops = {
        .pwr_domain_on                  = sunxi_pwr_domain_on,
        .pwr_domain_off                 = sunxi_pwr_domain_off,
+       .pwr_domain_pwr_down_wfi        = sunxi_pwr_down_wfi,
        .pwr_domain_on_finish           = sunxi_pwr_domain_on_finish,
        .system_off                     = sunxi_system_off,
        .system_reset                   = sunxi_system_reset,
diff --git a/plat/allwinner/sun50i_a64/include/core_off_arisc.h b/plat/allwinner/sun50i_a64/include/core_off_arisc.h
new file mode 100644 (file)
index 0000000..ae436ca
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+static uint32_t arisc_core_off[] = {
+       0x18600000, /* l.movhi  r3, <corenr>    */
+       0x18000000, /* l.movhi  r0, 0x0         */
+       0x19a00170, /* l.movhi  r13, 0x170      */
+       0x84ad0030, /* l.lwz    r5, 0x30(r13)   */
+       0xe0a51803, /* l.and    r5, r5, r3      */
+       0xe4050000, /* l.sfeq   r5, r0          */
+       0x13fffffd, /* l.bf     -12             */
+
+       0xb8c30050, /* l.srli   r6, r3, 16      */
+       0xbc060001, /* l.sfeqi  r6, 1           */
+       0x10000005, /* l.bf     +20             */
+       0x19a001f0, /* l.movhi  r13, 0x1f0      */
+       0x84ad1500, /* l.lwz    r5, 0x1500(r13) */
+       0xe0a53004, /* l.or     r5, r5, r6      */
+       0xd44d2d00, /* l.sw     0x1500(r13), r5 */
+
+       0x84ad1c30, /* l.lwz    r5, 0x1c30(r13) */
+       0xacc6ffff, /* l.xori   r6, r6, -1      */
+       0xe0a53003, /* l.and    r5, r5, r6      */
+       0xd46d2c30, /* l.sw     0x1c30(r13), r5 */
+
+       0xe0c3000f, /* l.ff1    r6, r3          */
+       0x9cc6ffef, /* l.addi   r6, r6, -17     */
+       0xb8c60002, /* l.slli   r6, r6, 2       */
+       0xe0c66800, /* l.add    r6, r6, r13     */
+       0xa8a000ff, /* l.ori    r5, r0, 0xff    */
+       0xd4462d40, /* l.sw     0x1540(r6), r5  */
+
+       0xd46d0400, /* l.sw     0x1c00(r13), r0 */
+       0x03ffffff, /* l.j      -1              */
+       0x15000000, /* l.nop                    */
+};
diff --git a/plat/allwinner/sun50i_h6/include/core_off_arisc.h b/plat/allwinner/sun50i_h6/include/core_off_arisc.h
new file mode 100644 (file)
index 0000000..63a5d8d
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+static uint32_t arisc_core_off[] = {
+       0x18600000, /* l.movhi  r3, <corenr>    */
+       0x18000000, /* l.movhi  r0, 0x0         */
+       0x19a00901, /* l.movhi  r13, 0x901      */
+       0x84ad0080, /* l.lwz    r5, 0x80(r13)   */
+       0xe0a51803, /* l.and    r5, r5, r3      */
+       0xe4050000, /* l.sfeq   r5, r0          */
+       0x13fffffd, /* l.bf     -12             */
+       0xb8c30050, /* l.srli   r6, r3, 16      */
+
+       0xbc060001, /* l.sfeqi  r6, 1           */
+       0x10000005, /* l.bf     +20             */
+       0x19a00700, /* l.movhi  r13, 0x700      */
+       0x84ad0444, /* l.lwz    r5, 0x0444(r13) */
+       0xe0a53004, /* l.or     r5, r5, r6      */
+       0xd40d2c44, /* l.sw     0x0444(r13), r5 */
+
+       0x84ad0440, /* l.lwz    r5, 0x0440(r13) */
+       0xacc6ffff, /* l.xori   r6, r6, -1      */
+       0xe0a53003, /* l.and    r5, r5, r6      */
+       0xd40d2c40, /* l.sw     0x0440(r13), r5 */
+
+       0xe0c3000f, /* l.ff1    r6, r3          */
+       0x9cc6ffef, /* l.addi   r6, r6, -17     */
+       0xb8c60002, /* l.slli   r6, r6, 2       */
+       0xe0c66800, /* l.add    r6, r6, r13     */
+       0xa8a000ff, /* l.ori    r5, r0, 0xff    */
+       0xd4062c50, /* l.sw     0x0450(r6), r5  */
+
+       0xd40d0400, /* l.sw     0x0400(r13), r0 */
+       0x03ffffff, /* l.j      -1              */
+       0x15000000, /* l.nop                    */
+};