mpc85xx: add SPI kernel loader for TP-Link TL-WDR4900 v1
authorMatthias Schiffer <mschiffer@universe-factory.net>
Sun, 27 Mar 2022 18:57:01 +0000 (20:57 +0200)
committerDavid Bauer <mail@david-bauer.net>
Fri, 14 Oct 2022 21:13:02 +0000 (23:13 +0200)
Similar to the lzma-loader on our MIPS targets, the spi-loader acts as
a second-stage loader that will then load and start the actual kernel.
As the TL-WDR4900 uses SPI-NOR and the P1010 family does not have support
for memory mapping of this type of flash, this loader needs to contain a
basic driver for the FSL ESPI controller.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
22 files changed:
target/linux/mpc85xx/image/spi-loader/.gitignore [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/Makefile [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/config/tplink_tl-wdr4900-v1.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/config/tplink_tl-wdr4900-v1.mk [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/drivers/serial/ns16550.c [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/drivers/spi/fsl_espi.c [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/drivers/spi/spi-nor.c [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/head.S [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/image.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/init.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/io.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/ppc_asm.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/serial.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/spi-nor.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/spi.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/stdio.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/string.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/include/types.h [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/loader.c [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/loader.lds [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/stdio.c [new file with mode: 0644]
target/linux/mpc85xx/image/spi-loader/string.S [new file with mode: 0644]

diff --git a/target/linux/mpc85xx/image/spi-loader/.gitignore b/target/linux/mpc85xx/image/spi-loader/.gitignore
new file mode 100644 (file)
index 0000000..e2e7327
--- /dev/null
@@ -0,0 +1 @@
+/out
diff --git a/target/linux/mpc85xx/image/spi-loader/Makefile b/target/linux/mpc85xx/image/spi-loader/Makefile
new file mode 100644 (file)
index 0000000..45af2c3
--- /dev/null
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (C) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+
+CONFIG     ?= $(error No configuration set)
+
+include config/$(CONFIG).mk
+
+MKIMAGE    := mkimage
+KARCH      := powerpc
+CC         := $(CROSS_COMPILE)gcc
+LD         := $(CROSS_COMPILE)ld
+OBJCOPY    := $(CROSS_COMPILE)objcopy
+
+PROGRAM_NAME := MPC85xx SPI loader
+
+BIN_FLAGS  := -O binary --pad-to=$(PAD_TO)
+
+CFLAGS     += -std=gnu17 -Os -Wall -Wstrict-prototypes \
+               -fomit-frame-pointer -ffreestanding \
+               -ffunction-sections -fno-pic \
+               -Iinclude -include ../config/$(CONFIG).h \
+               -DCONFIG_PROGRAM_NAME='"$(PROGRAM_NAME)"' \
+               -DCONFIG_IMAGE_OFFSET=$(IMAGE_OFFSET)
+ASFLAGS    := $(CFLAGS)
+
+LDS        := loader.lds
+LDFLAGS    := -static --gc-sections -T $(LDS) -Ttext $(TEXT_START)
+
+OBJECTS    := head.o loader.o string.o stdio.o  drivers/serial/ns16550.o \
+               drivers/spi/fsl_espi.o drivers/spi/spi-nor.o
+
+OUTDIR     := out
+
+all: $(OUTDIR)/uImage
+
+-include $(OBJECTS:%.o=$(OUTDIR)/%.d)
+
+$(OUTDIR)/%.o: %.c Makefile config/$(CONFIG).mk
+       @mkdir -p $(dir $@)
+       $(CC) $(CFLAGS) -c -o $@ -MD -MP $<
+
+$(OUTDIR)/%.o: %.S Makefile config/$(CONFIG).mk
+       @mkdir -p $(dir $@)
+       $(CC) $(ASFLAGS) -c -o $@ -MD -MP $<
+
+$(OUTDIR)/loader.elf: $(OBJECTS:%=$(OUTDIR)/%) $(LDS)
+       $(LD) $(LDFLAGS) -o $@ $(OBJECTS:%=$(OUTDIR)/%)
+
+$(OUTDIR)/loader.bin: $(OUTDIR)/loader.elf
+       $(OBJCOPY) $(BIN_FLAGS) $< $@
+
+$(OUTDIR)/uImage: $(OUTDIR)/loader.bin
+       $(MKIMAGE) -A $(KARCH) -O linux -T kernel -C none \
+               -a $(TEXT_START) -e $(TEXT_START) -n '$(PROGRAM_NAME)' -d $< $@
+
+clean:
+       rm -rf $(OUTDIR)
+
+.PHONY: all clean
diff --git a/target/linux/mpc85xx/image/spi-loader/config/tplink_tl-wdr4900-v1.h b/target/linux/mpc85xx/image/spi-loader/config/tplink_tl-wdr4900-v1.h
new file mode 100644 (file)
index 0000000..c3aca32
--- /dev/null
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define CONFIG_FREQ_SYSTEMBUS 399999996
+#define CONFIG_SPI_MAX_HZ 25000000
+
+#define CONFIG_SERIAL_NS16550_REG_BASE 0xffe04500
+#define CONFIG_SERIAL_NS16550_REG_SHIFT 0
+
+#define CONFIG_SPI_FSL_ESPI_REG_BASE 0xffe07000
diff --git a/target/linux/mpc85xx/image/spi-loader/config/tplink_tl-wdr4900-v1.mk b/target/linux/mpc85xx/image/spi-loader/config/tplink_tl-wdr4900-v1.mk
new file mode 100644 (file)
index 0000000..ca8793f
--- /dev/null
@@ -0,0 +1,13 @@
+TEXT_START   := 0x800000
+
+# It seems that the TP-Link TL-WDR4900 U-Boot does not flush the cache before
+# jumping into the uImage. Padding the image to 32k seems make the boot work
+# reliably.
+#
+# We leave 0x40 for the uImage header, which allows us to append the kernel
+# uImage after the loader and read with an added offset 0x8000 from the
+# SPI flash.
+PAD_TO       := 0x807fc0
+
+# Kernel begins at 0x60200 in flash, loader adds 0x8000
+IMAGE_OFFSET := 0x68200
diff --git a/target/linux/mpc85xx/image/spi-loader/drivers/serial/ns16550.c b/target/linux/mpc85xx/image/spi-loader/drivers/serial/ns16550.c
new file mode 100644 (file)
index 0000000..d454917
--- /dev/null
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * 16550 serial console support.
+ *
+ * Original copied from <file:arch/ppc/boot/common/ns16550.c>
+ * (which had no copyright)
+ * Modifications: 2006 (c) MontaVista Software, Inc.
+ *
+ * Modified by: Mark A. Greer <mgreer@mvista.com>
+ *
+ * Adapted by:
+ *
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ */
+
+#include <io.h>
+#include <serial.h>
+
+#define UART_DLL       0       /* Out: Divisor Latch Low */
+#define UART_DLM       1       /* Out: Divisor Latch High */
+#define UART_FCR       2       /* Out: FIFO Control Register */
+#define UART_LCR       3       /* Out: Line Control Register */
+#define UART_MCR       4       /* Out: Modem Control Register */
+#define UART_LSR       5       /* In:  Line Status Register */
+#define UART_LSR_THRE  0x20    /* Transmit-hold-register empty */
+#define UART_LSR_DR    0x01    /* Receiver data ready */
+#define UART_MSR       6       /* In:  Modem Status Register */
+#define UART_SCR       7       /* I/O: Scratch Register */
+
+static uint8_t *const reg_base = (uint8_t *)CONFIG_SERIAL_NS16550_REG_BASE;
+static const int reg_shift = CONFIG_SERIAL_NS16550_REG_SHIFT;
+
+void serial_console_putchar(char c)
+{
+       while ((in_8(reg_base + (UART_LSR << reg_shift)) & UART_LSR_THRE) == 0);
+       out_8(reg_base, c);
+}
+
+int serial_console_getc(void)
+{
+       while ((in_8(reg_base + (UART_LSR << reg_shift)) & UART_LSR_DR) == 0);
+       return in_8(reg_base);
+}
+
+int serial_console_tstc(void)
+{
+       return ((in_8(reg_base + (UART_LSR << reg_shift)) & UART_LSR_DR) != 0);
+}
+
+void serial_console_init(void)
+{
+       out_8(reg_base + (UART_FCR << reg_shift), 0x06);
+}
diff --git a/target/linux/mpc85xx/image/spi-loader/drivers/spi/fsl_espi.c b/target/linux/mpc85xx/image/spi-loader/drivers/spi/fsl_espi.c
new file mode 100644 (file)
index 0000000..7e9d8ed
--- /dev/null
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * eSPI controller driver.
+ *
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ *
+ * Based on U-Boot code:
+ *
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ * Copyright 2020 NXP
+ * Author: Mingkai Hu (Mingkai.hu@freescale.com)
+ *        Chuanhua Han (chuanhua.han@nxp.com)
+ */
+
+#include <io.h>
+#include <stdio.h>
+#include <spi.h>
+
+/* eSPI Registers */
+typedef struct ccsr_espi {
+       uint32_t mode;       /* eSPI mode */
+       uint32_t event;      /* eSPI event */
+       uint32_t mask;       /* eSPI mask */
+       uint32_t com;        /* eSPI command */
+       uint32_t tx;         /* eSPI transmit FIFO access */
+       uint32_t rx;         /* eSPI receive FIFO access */
+       uint8_t  res1[8];    /* reserved */
+       uint32_t csmode[4];  /* 0x2c: sSPI CS0/1/2/3 mode */
+       uint8_t  res2[4048]; /* fill up to 0x1000 */
+} ccsr_espi_t;
+
+struct fsl_spi {
+       ccsr_espi_t     *espi;
+       uint32_t        cs;
+       uint32_t        div16;
+       uint32_t        pm;
+       uint32_t        mode;
+};
+
+#define ESPI_MAX_CS_NUM                4
+#define ESPI_FIFO_WIDTH_BIT    32
+
+#define ESPI_EV_RNE            BIT(9)
+#define ESPI_EV_TNF            BIT(8)
+#define ESPI_EV_DON            BIT(14)
+#define ESPI_EV_TXE            BIT(15)
+#define ESPI_EV_RFCNT_SHIFT    24
+#define ESPI_EV_RFCNT_MASK     (0x3f << ESPI_EV_RFCNT_SHIFT)
+
+#define ESPI_MODE_EN           BIT(31) /* Enable interface */
+#define ESPI_MODE_TXTHR(x)     ((x) << 8)      /* Tx FIFO threshold */
+#define ESPI_MODE_RXTHR(x)     ((x) << 0)      /* Rx FIFO threshold */
+
+#define ESPI_COM_CS(x)         ((x) << 30)
+#define ESPI_COM_TRANLEN(x)    ((x) << 0)
+
+#define ESPI_CSMODE_CI_INACTIVEHIGH    BIT(31)
+#define ESPI_CSMODE_CP_BEGIN_EDGCLK    BIT(30)
+#define ESPI_CSMODE_REV_MSB_FIRST      BIT(29)
+#define ESPI_CSMODE_DIV16              BIT(28)
+#define ESPI_CSMODE_PM(x)              ((x) << 24)
+#define ESPI_CSMODE_POL_ASSERTED_LOW   BIT(20)
+#define ESPI_CSMODE_LEN(x)             ((x) << 16)
+#define ESPI_CSMODE_CSBEF(x)           ((x) << 12)
+#define ESPI_CSMODE_CSAFT(x)           ((x) << 8)
+#define ESPI_CSMODE_CSCG(x)            ((x) << 3)
+
+#define ESPI_CSMODE_INIT_VAL (ESPI_CSMODE_POL_ASSERTED_LOW | \
+               ESPI_CSMODE_CSBEF(0) | ESPI_CSMODE_CSAFT(0) | \
+               ESPI_CSMODE_CSCG(1))
+
+#define ESPI_MAX_DATA_TRANSFER_LEN 0x10000
+
+static int espi_xfer(struct fsl_spi *fsl, const struct spi_transfer *msg, int n)
+{
+       ccsr_espi_t *espi = fsl->espi;
+       size_t len = spi_message_len(msg, n);
+
+       if (len > ESPI_MAX_DATA_TRANSFER_LEN)
+               return -1;
+
+       /* clear the RXCNT and TXCNT */
+       out_be32(&espi->mode, in_be32(&espi->mode) & (~ESPI_MODE_EN));
+       out_be32(&espi->mode, in_be32(&espi->mode) | ESPI_MODE_EN);
+       out_be32(&espi->com, ESPI_COM_CS(fsl->cs) | ESPI_COM_TRANLEN(len - 1));
+
+       int last_msg = n - 1;
+       int tx_msg = -1, rx_msg = -1;
+       size_t tx_len = 0, rx_len = 0, tx_pos = 0, rx_pos = 0;
+
+       while (true) {
+               if (tx_pos == tx_len && tx_msg < last_msg) {
+                       tx_msg++;
+                       tx_pos = 0;
+                       tx_len = msg[tx_msg].len;
+               }
+               if (rx_pos == rx_len && rx_msg < last_msg) {
+                       rx_msg++;
+                       rx_pos = 0;
+                       rx_len = msg[rx_msg].len;
+               }
+               if (rx_pos == rx_len)
+                       break;
+
+               const uint8_t *tx_buf = msg[tx_msg].tx_buf;
+               uint8_t *rx_buf = msg[rx_msg].rx_buf;
+
+               uint32_t event = in_be32(&espi->event);
+
+               /* TX */
+               if ((event & ESPI_EV_TNF) && tx_len > 0) {
+                       uint8_t v = 0;
+                       if (tx_buf)
+                               v = tx_buf[tx_pos];
+                       out_8((uint8_t *)&espi->tx, v);
+                       tx_pos++;
+               }
+
+               /* RX */
+               if (event & ESPI_EV_RNE) {
+                       uint8_t v = in_8((uint8_t *)&espi->rx);
+                       if (rx_buf)
+                               rx_buf[rx_pos] = v;
+                       rx_pos++;
+               }
+       }
+
+       return 0;
+}
+
+static void espi_claim_bus(struct fsl_spi *fsl)
+{
+       ccsr_espi_t *espi = fsl->espi;
+       uint32_t csmode;
+       int i;
+
+       /* Enable eSPI interface */
+       out_be32(&espi->mode, ESPI_MODE_RXTHR(3)
+                       | ESPI_MODE_TXTHR(4) | ESPI_MODE_EN);
+
+       out_be32(&espi->mask, 0x00000000); /* Mask  all eSPI interrupts */
+
+       /* Init CS mode interface */
+       for (i = 0; i < ESPI_MAX_CS_NUM; i++)
+               out_be32(&espi->csmode[i], ESPI_CSMODE_INIT_VAL);
+
+       csmode = ESPI_CSMODE_INIT_VAL;
+
+       /* Set eSPI BRG clock source */
+       csmode |= ESPI_CSMODE_PM(fsl->pm) | fsl->div16;
+
+       /* Set eSPI mode */
+       if (fsl->mode & SPI_CPHA)
+               csmode |= ESPI_CSMODE_CP_BEGIN_EDGCLK;
+       if (fsl->mode & SPI_CPOL)
+               csmode |= ESPI_CSMODE_CI_INACTIVEHIGH;
+
+       /* Character bit order: msb first */
+       csmode |= ESPI_CSMODE_REV_MSB_FIRST;
+
+       /* Character length in bits, between 0x3~0xf, i.e. 4bits~16bits */
+       csmode |= ESPI_CSMODE_LEN(7);
+
+       out_be32(&espi->csmode[fsl->cs], csmode);
+}
+
+static void espi_release_bus(struct fsl_spi *fsl)
+{
+       /* Disable the SPI hardware */
+       out_be32(&fsl->espi->mode,
+               in_be32(&fsl->espi->mode) & (~ESPI_MODE_EN));
+}
+
+static void espi_setup_spi(struct fsl_spi *fsl, unsigned int max_hz)
+{
+       unsigned long spibrg;
+       uint32_t pm;
+
+       spibrg = CONFIG_FREQ_SYSTEMBUS / 2;
+       fsl->div16 = 0;
+       if ((spibrg / max_hz) > 32) {
+               fsl->div16 = ESPI_CSMODE_DIV16;
+               pm = spibrg / (max_hz * 16 * 2);
+               if (pm > 16) {
+                       /* max_hz too low */
+                       pm = 16;
+               }
+       } else {
+               pm = spibrg / (max_hz * 2);
+       }
+       if (pm)
+               pm--;
+       fsl->pm = pm;
+}
+
+static struct fsl_spi spi;
+
+int spi_init(unsigned int cs, unsigned int max_hz, unsigned int mode)
+{
+       if (cs >= ESPI_MAX_CS_NUM)
+               return -1;
+
+       spi.espi = (ccsr_espi_t *)CONFIG_SPI_FSL_ESPI_REG_BASE;
+       spi.cs = cs;
+       spi.mode = mode;
+
+       espi_setup_spi(&spi, max_hz);
+
+       return 0;
+}
+
+int spi_claim_bus(void)
+{
+       espi_claim_bus(&spi);
+
+       return 0;
+}
+
+void spi_release_bus(void)
+{
+       espi_release_bus(&spi);
+}
+
+int spi_xfer(const struct spi_transfer *msg, int n)
+{
+       return espi_xfer(&spi, msg, n);
+}
+
+size_t spi_max_xfer(void)
+{
+       return ESPI_MAX_DATA_TRANSFER_LEN;
+}
diff --git a/target/linux/mpc85xx/image/spi-loader/drivers/spi/spi-nor.c b/target/linux/mpc85xx/image/spi-loader/drivers/spi/spi-nor.c
new file mode 100644 (file)
index 0000000..4034348
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ */
+
+#include <spi.h>
+#include <spi-nor.h>
+#include <stdio.h>
+
+#define CMD_READ 0x03
+#define CMD_READ_ID 0x9F
+
+int spi_nor_read_id(void)
+{
+       int ret;
+
+       const uint8_t tx_buf[1] = {CMD_READ_ID};
+       uint8_t rx_buf[3] = {};
+       struct spi_transfer t[2] = {{
+               .tx_buf = tx_buf,
+               .rx_buf = NULL,
+               .len = sizeof(tx_buf),
+       }, {
+               .tx_buf = NULL,
+               .rx_buf = rx_buf,
+               .len = sizeof(rx_buf),
+       }};
+
+       ret = spi_xfer(t, ARRAY_SIZE(t));
+       if (ret) {
+               puts("SPI transfer failed\n");
+               return ret;
+       }
+
+       puts("Flash JECED ID: ");
+       put_array(rx_buf, sizeof(rx_buf));
+
+       return 0;
+}
+
+int spi_nor_read_data(void *dest, size_t pos, size_t len)
+{
+       int ret;
+
+       while (len > 0) {
+               uint8_t cmd[4] = {CMD_READ, pos >> 16, pos >> 8, pos};
+               size_t block_len = min(len, spi_max_xfer() - sizeof(cmd));
+
+               struct spi_transfer t[2] = {{
+                       .tx_buf = cmd,
+                       .rx_buf = NULL,
+                       .len = sizeof(cmd),
+               }, {
+                       .tx_buf = NULL,
+                       .rx_buf = dest,
+                       .len = block_len,
+               }};
+
+               ret = spi_xfer(t, ARRAY_SIZE(t));
+               if (ret) {
+                       puts("SPI transfer failed\n");
+                       return ret;
+               }
+
+               pos += block_len;
+               dest += block_len;
+               len -= block_len;
+       }
+
+       return 0;
+}
+
diff --git a/target/linux/mpc85xx/image/spi-loader/head.S b/target/linux/mpc85xx/image/spi-loader/head.S
new file mode 100644 (file)
index 0000000..032e914
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ *
+ * Based on Linux arch/powerpc/boot/crt0.S, which is:
+ *
+ * Copyright (C) Paul Mackerras 1997.
+ */
+
+#include <ppc_asm.h>
+
+       .text
+       .global _start
+_start:
+       /* Do a cache flush for our text, in case the loader didn't */
+       lis     r3,_text_start@ha
+       addi    r3,r3,_text_start@l
+       li      r4,_text_len
+       bl      flush_cache
+
+       /* Clear the BSS */
+       lis     r3,_bss_start@ha
+       addi    r3,r3,_bss_start@l
+       li      r4,0
+       li      r5,_bss_len
+       bl      memset
+
+       /* Set up stack */
+       lis     r1,_stack_top@ha
+       addi    r1,r1,_stack_top@l
+       /* Establish a stack frame */
+       li      r0,0
+       stwu    r0,-16(r1)
+
+       /* Call start */
+       b       start
diff --git a/target/linux/mpc85xx/image/spi-loader/include/image.h b/target/linux/mpc85xx/image/spi-loader/include/image.h
new file mode 100644 (file)
index 0000000..0377c29
--- /dev/null
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from U-Boot include/image.h:
+ *
+ * (C) Copyright 2008 Semihalf
+ *
+ * (C) Copyright 2000-2005
+ * Wolfgang Denk, DENX Software Engineering, wd@denx.de.
+ ********************************************************************
+ * NOTE: This header file defines an interface to U-Boot. Including
+ * this (unmodified) header file in another file is considered normal
+ * use of U-Boot, and does *not* fall under the heading of "derived
+ * work".
+ ********************************************************************
+ */
+
+#pragma once
+
+#include <types.h>
+
+/*
+ * Compression Types
+ *
+ * The following are exposed to uImage header.
+ * New IDs *MUST* be appended at the end of the list and *NEVER*
+ * inserted for backward compatibility.
+ */
+enum {
+       IH_COMP_NONE            = 0,    /*  No   Compression Used       */
+       IH_COMP_GZIP,                   /* gzip  Compression Used       */
+       IH_COMP_BZIP2,                  /* bzip2 Compression Used       */
+       IH_COMP_LZMA,                   /* lzma  Compression Used       */
+       IH_COMP_LZO,                    /* lzo   Compression Used       */
+       IH_COMP_LZ4,                    /* lz4   Compression Used       */
+       IH_COMP_ZSTD,                   /* zstd   Compression Used      */
+
+       IH_COMP_COUNT,
+};
+
+#define IH_MAGIC       0x27051956      /* Image Magic Number           */
+#define IH_MAGIC_OKLI  0x4f4b4c49      /* 'OKLI' Magic Number */
+#define IH_NMLEN               32      /* Image Name Length            */
+
+/*
+ * Legacy format image header,
+ * all data in network byte order (aka natural aka bigendian).
+ */
+typedef struct image_header {
+       uint32_t        ih_magic;       /* Image Header Magic Number    */
+       uint32_t        ih_hcrc;        /* Image Header CRC Checksum    */
+       uint32_t        ih_time;        /* Image Creation Timestamp     */
+       uint32_t        ih_size;        /* Image Data Size              */
+       uint32_t        ih_load;        /* Data  Load  Address          */
+       uint32_t        ih_ep;          /* Entry Point Address          */
+       uint32_t        ih_dcrc;        /* Image Data CRC Checksum      */
+       uint8_t         ih_os;          /* Operating System             */
+       uint8_t         ih_arch;        /* CPU architecture             */
+       uint8_t         ih_type;        /* Image Type                   */
+       uint8_t         ih_comp;        /* Compression Type             */
+       uint8_t         ih_name[IH_NMLEN];      /* Image Name           */
+} image_header_t;
diff --git a/target/linux/mpc85xx/image/spi-loader/include/init.h b/target/linux/mpc85xx/image/spi-loader/include/init.h
new file mode 100644 (file)
index 0000000..f3a15f5
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ */
+
+#pragma once
+
+void start(void);
diff --git a/target/linux/mpc85xx/image/spi-loader/include/io.h b/target/linux/mpc85xx/image/spi-loader/include/io.h
new file mode 100644 (file)
index 0000000..d6eed5e
--- /dev/null
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#pragma once
+
+#include <stdint.h>
+
+/*
+ * Low-level I/O routines.
+ *
+ * Copied from <file:arch/powerpc/include/asm/io.h> (which has no copyright)
+ */
+static inline uint8_t in_8(const volatile uint8_t *addr)
+{
+       int ret;
+
+       __asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync"
+                            : "=r" (ret) : "m" (*addr));
+       return ret;
+}
+
+static inline void out_8(volatile uint8_t *addr, uint8_t val)
+{
+       __asm__ __volatile__("stb%U0%X0 %1,%0; sync"
+                            : "=m" (*addr) : "r" (val));
+}
+
+static inline uint16_t in_le16(const volatile uint16_t *addr)
+{
+       uint32_t ret;
+
+       __asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync"
+                            : "=r" (ret) : "r" (addr), "m" (*addr));
+
+       return ret;
+}
+
+static inline uint16_t in_be16(const volatile uint16_t *addr)
+{
+       uint32_t ret;
+
+       __asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync"
+                            : "=r" (ret) : "m" (*addr));
+       return ret;
+}
+
+static inline void out_le16(volatile uint16_t *addr, uint16_t val)
+{
+       __asm__ __volatile__("sthbrx %1,0,%2; sync" : "=m" (*addr)
+                            : "r" (val), "r" (addr));
+}
+
+static inline void out_be16(volatile uint16_t *addr, uint16_t val)
+{
+       __asm__ __volatile__("sth%U0%X0 %1,%0; sync"
+                            : "=m" (*addr) : "r" (val));
+}
+
+static inline uint32_t in_le32(const volatile uint32_t *addr)
+{
+       uint32_t ret;
+
+       __asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync"
+                            : "=r" (ret) : "r" (addr), "m" (*addr));
+       return ret;
+}
+
+static inline uint32_t in_be32(const volatile uint32_t *addr)
+{
+       uint32_t ret;
+
+       __asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync"
+                            : "=r" (ret) : "m" (*addr));
+       return ret;
+}
+
+static inline void out_le32(volatile uint32_t *addr, uint32_t val)
+{
+       __asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr)
+                            : "r" (val), "r" (addr));
+}
+
+static inline void out_be32(volatile uint32_t *addr, uint32_t val)
+{
+       __asm__ __volatile__("stw%U0%X0 %1,%0; sync"
+                            : "=m" (*addr) : "r" (val));
+}
+
+static inline void sync(void)
+{
+       asm volatile("sync" : : : "memory");
+}
+
+static inline void eieio(void)
+{
+       asm volatile("eieio" : : : "memory");
+}
+
+static inline void barrier(void)
+{
+       asm volatile("" : : : "memory");
+}
diff --git a/target/linux/mpc85xx/image/spi-loader/include/ppc_asm.h b/target/linux/mpc85xx/image/spi-loader/include/ppc_asm.h
new file mode 100644 (file)
index 0000000..5dae34e
--- /dev/null
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+/*
+ *
+ * Definitions used by various bits of low-level assembly code on PowerPC.
+ *
+ * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan.
+ */
+
+/* Condition Register Bit Fields */
+
+#define        cr0     0
+#define        cr1     1
+#define        cr2     2
+#define        cr3     3
+#define        cr4     4
+#define        cr5     5
+#define        cr6     6
+#define        cr7     7
+
+/* General Purpose Registers (GPRs) */
+
+#define        r0      0
+#define        r1      1
+#define        r2      2
+#define        r3      3
+#define        r4      4
+#define        r5      5
+#define        r6      6
+#define        r7      7
+#define        r8      8
+#define        r9      9
+#define        r10     10
+#define        r11     11
+#define        r12     12
+#define        r13     13
+#define        r14     14
+#define        r15     15
+#define        r16     16
+#define        r17     17
+#define        r18     18
+#define        r19     19
+#define        r20     20
+#define        r21     21
+#define        r22     22
+#define        r23     23
+#define        r24     24
+#define        r25     25
+#define        r26     26
+#define        r27     27
+#define        r28     28
+#define        r29     29
+#define        r30     30
+#define        r31     31
diff --git a/target/linux/mpc85xx/image/spi-loader/include/serial.h b/target/linux/mpc85xx/image/spi-loader/include/serial.h
new file mode 100644 (file)
index 0000000..1d80ede
--- /dev/null
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+#pragma once
+
+int serial_console_getchar(void);
+int serial_console_tstc(void);
+void serial_console_putchar(char c);
+void serial_console_init(void);
+
diff --git a/target/linux/mpc85xx/image/spi-loader/include/spi-nor.h b/target/linux/mpc85xx/image/spi-loader/include/spi-nor.h
new file mode 100644 (file)
index 0000000..efbf386
--- /dev/null
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ */
+
+#pragma once
+
+#include <types.h>
+
+int spi_nor_read_id(void);
+int spi_nor_read_data(void *dest, size_t pos, size_t len);
diff --git a/target/linux/mpc85xx/image/spi-loader/include/spi.h b/target/linux/mpc85xx/image/spi-loader/include/spi.h
new file mode 100644 (file)
index 0000000..98e799c
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common SPI Interface: Controller-specific definitions
+ *
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ *
+ * Based on U-boot's spi.h:
+ *
+ * (C) Copyright 2001
+ * Gerald Van Baren, Custom IDEAS, vanbaren@cideas.com.
+ */
+
+#pragma once
+
+#include <types.h>
+
+/* SPI mode flags */
+#define SPI_CPHA       BIT(0)  /* clock phase (1 = SPI_CLOCK_PHASE_SECOND) */
+#define SPI_CPOL       BIT(1)  /* clock polarity (1 = SPI_POLARITY_HIGH) */
+#define SPI_MODE_0     (0|0)                   /* (original MicroWire) */
+#define SPI_MODE_1     (0|SPI_CPHA)
+#define SPI_MODE_2     (SPI_CPOL|0)
+#define SPI_MODE_3     (SPI_CPOL|SPI_CPHA)
+
+struct spi_transfer {
+       const void *tx_buf;
+       void *rx_buf;
+       size_t len;
+};
+
+static inline size_t spi_message_len(const struct spi_transfer *msg, int n) {
+       size_t total = 0;
+       for (int i = 0; i < n; i++) {
+               total += msg[i].len;
+       }
+       return total;
+}
+
+int spi_init(unsigned int cs, unsigned int max_hz, unsigned int mode);
+int spi_claim_bus(void);
+void spi_release_bus(void);
+int spi_xfer(const struct spi_transfer *msg, int n);
+size_t spi_max_xfer(void);
diff --git a/target/linux/mpc85xx/image/spi-loader/include/stdio.h b/target/linux/mpc85xx/image/spi-loader/include/stdio.h
new file mode 100644 (file)
index 0000000..6049430
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ */
+
+#pragma once
+
+#include <serial.h>
+#include <types.h>
+
+static inline int getchar(void)
+{
+       return serial_console_getchar();
+}
+
+static inline int tstc(void)
+{
+       return serial_console_tstc();
+}
+
+static inline int putchar(char c)
+{
+       if (c == '\n')
+               serial_console_putchar('\r');
+       serial_console_putchar(c);
+       return 0;
+}
+
+int puts(const char *s);
+
+/* Utility functions */
+void put_u4(uint8_t v);
+void put_u8(uint8_t v);
+void put_u16(uint16_t v);
+void put_u32(uint32_t v);
+void put_ptr(const void *p);
+void put_array(const void *p, size_t l);
+
+#define put_with_label(label, put, value) do { \
+               puts(label); \
+               put(value); \
+               puts("\n"); \
+       } while (0)
diff --git a/target/linux/mpc85xx/image/spi-loader/include/string.h b/target/linux/mpc85xx/image/spi-loader/include/string.h
new file mode 100644 (file)
index 0000000..f9e6fed
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#pragma once
+
+#include <stddef.h>
+
+extern char *strcpy(char *dest, const char *src);
+extern char *strncpy(char *dest, const char *src, size_t n);
+extern char *strcat(char *dest, const char *src);
+extern char *strchr(const char *s, int c);
+extern char *strrchr(const char *s, int c);
+extern int strcmp(const char *s1, const char *s2);
+extern int strncmp(const char *s1, const char *s2, size_t n);
+extern size_t strlen(const char *s);
+extern size_t strnlen(const char *s, size_t count);
+
+extern void *memset(void *s, int c, size_t n);
+extern void *memmove(void *dest, const void *src, unsigned long n);
+extern void *memcpy(void *dest, const void *src, unsigned long n);
+extern void *memchr(const void *s, int c, size_t n);
+extern int memcmp(const void *s1, const void *s2, size_t n);
+
+extern void flush_cache(void *, unsigned long);
diff --git a/target/linux/mpc85xx/image/spi-loader/include/types.h b/target/linux/mpc85xx/image/spi-loader/include/types.h
new file mode 100644 (file)
index 0000000..b80f9e6
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Code originates from Linux kernel arch/powerpc/boot
+ * (types.h, swab.h, of.h)
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define BIT(nr)        (1UL << (nr))
+
+#define min(x,y) ({ \
+       typeof(x) _x = (x);     \
+       typeof(y) _y = (y);     \
+       (void) (&_x == &_y);    \
+       _x < _y ? _x : _y; })
+
+#define max(x,y) ({ \
+       typeof(x) _x = (x);     \
+       typeof(y) _y = (y);     \
+       (void) (&_x == &_y);    \
+       _x > _y ? _x : _y; })
+
+#define min_t(type, a, b) min(((type) a), ((type) b))
+#define max_t(type, a, b) max(((type) a), ((type) b))
+
+static inline uint16_t swab16(uint16_t x)
+{
+       return  ((x & (uint16_t)0x00ffU) << 8) |
+               ((x & (uint16_t)0xff00U) >> 8);
+}
+
+static inline uint32_t swab32(uint32_t x)
+{
+       return  ((x & (uint32_t)0x000000ffUL) << 24) |
+               ((x & (uint32_t)0x0000ff00UL) <<  8) |
+               ((x & (uint32_t)0x00ff0000UL) >>  8) |
+               ((x & (uint32_t)0xff000000UL) >> 24);
+}
+
+static inline uint64_t swab64(uint64_t x)
+{
+       return  (uint64_t)((x & (uint64_t)0x00000000000000ffULL) << 56) |
+               (uint64_t)((x & (uint64_t)0x000000000000ff00ULL) << 40) |
+               (uint64_t)((x & (uint64_t)0x0000000000ff0000ULL) << 24) |
+               (uint64_t)((x & (uint64_t)0x00000000ff000000ULL) <<  8) |
+               (uint64_t)((x & (uint64_t)0x000000ff00000000ULL) >>  8) |
+               (uint64_t)((x & (uint64_t)0x0000ff0000000000ULL) >> 24) |
+               (uint64_t)((x & (uint64_t)0x00ff000000000000ULL) >> 40) |
+               (uint64_t)((x & (uint64_t)0xff00000000000000ULL) >> 56);
+}
+
+#ifdef __LITTLE_ENDIAN__
+#define cpu_to_be16(x) swab16(x)
+#define be16_to_cpu(x) swab16(x)
+#define cpu_to_be32(x) swab32(x)
+#define be32_to_cpu(x) swab32(x)
+#define cpu_to_be64(x) swab64(x)
+#define be64_to_cpu(x) swab64(x)
+#else
+#define cpu_to_be16(x) (x)
+#define be16_to_cpu(x) (x)
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#define cpu_to_be64(x) (x)
+#define be64_to_cpu(x) (x)
+#endif
diff --git a/target/linux/mpc85xx/image/spi-loader/loader.c b/target/linux/mpc85xx/image/spi-loader/loader.c
new file mode 100644 (file)
index 0000000..145a723
--- /dev/null
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ */
+
+#include <image.h>
+#include <init.h>
+#include <spi.h>
+#include <spi-nor.h>
+#include <stdio.h>
+#include <string.h>
+
+static bool check_image_header(const image_header_t *header)
+{
+       if (header->ih_magic != cpu_to_be32(IH_MAGIC_OKLI)) {
+               puts("Invalid image magic\n");
+               return false;
+       }
+
+       if (header->ih_comp != cpu_to_be32(IH_COMP_NONE)) {
+               puts("Unsupported compressed image\n");
+               return false;
+       }
+
+       return true;
+}
+
+static uint32_t do_load(void)
+{
+       image_header_t header;
+       uint32_t ih_size, ih_load, ih_ep;
+
+       if (spi_nor_read_id())
+               return UINT32_MAX;
+
+       puts("Reading image header...\n");
+       if (spi_nor_read_data(&header, CONFIG_IMAGE_OFFSET, sizeof(header)))
+               return UINT32_MAX;
+
+       if (!check_image_header(&header))
+               return UINT32_MAX;
+
+       header.ih_name[sizeof(header.ih_name) - 1] = 0;
+       ih_size = be32_to_cpu(header.ih_size);
+       ih_load = be32_to_cpu(header.ih_load);
+       ih_ep = be32_to_cpu(header.ih_ep);
+
+       put_with_label("Image Name:   ", puts, (const char *)header.ih_name);
+       put_with_label("Data Size:    ", put_u32, ih_size);
+       put_with_label("Load Address: ", put_u32, ih_load);
+       put_with_label("Entry Point:  ", put_u32, ih_ep);
+
+       puts("Reading image data...\n");
+       void *loadaddr = (void *)ih_load;
+       if (spi_nor_read_data(loadaddr, CONFIG_IMAGE_OFFSET + sizeof(header),
+                             ih_size))
+               return false;
+
+       flush_cache(loadaddr, ih_size);
+
+       return ih_ep;
+}
+
+static void enter_image(uint32_t addr)
+{
+       typedef void (*entry_t)(void);
+       entry_t entry = (entry_t)addr;
+
+       puts("Starting image...\n");
+       entry();
+}
+
+static void load(void)
+{
+       uint32_t addr;
+       int ret;
+
+       ret = spi_init(0, CONFIG_SPI_MAX_HZ, SPI_MODE_0);
+       if (ret) {
+               puts("Failed to initialize SPI controller\n");
+               return;
+       }
+
+       ret = spi_claim_bus();
+       if (ret) {
+               puts("Failed to enable SPI controller\n");
+               return;
+       }
+
+       addr = do_load();
+
+       spi_release_bus();
+
+       if (addr != UINT32_MAX)
+               enter_image(addr);
+}
+
+void start(void)
+{
+       serial_console_init();
+       puts("=== " CONFIG_PROGRAM_NAME " ===\n");
+
+       load();
+
+       puts("Halting execution.\n");
+       while (true) {}
+}
diff --git a/target/linux/mpc85xx/image/spi-loader/loader.lds b/target/linux/mpc85xx/image/spi-loader/loader.lds
new file mode 100644 (file)
index 0000000..3af24a1
--- /dev/null
@@ -0,0 +1,31 @@
+OUTPUT_ARCH(powerpc:common)
+ENTRY(_start)
+EXTERN(_start)
+SECTIONS {
+       .text :
+       {
+               _text_start = .;
+               *(.text*)
+               _text_len = ABSOLUTE(. - _text_start);
+       }
+       .data :
+       {
+               _data_start = .;
+               *(.rodata*)
+               *(.data*)
+               *(.sdata*)
+               _data_len = ABSOLUTE(. - _data_start);
+       }
+       . = ALIGN(4096);
+       .bss :
+       {
+               _bss_start = .;
+               . += 4K;
+               _stack_top = .;
+               *(.bss*)
+               *(.sbss*)
+               _bss_len = ABSOLUTE(. - _bss_start);
+       }
+       . = ALIGN(4096);
+       _end = .;
+}
diff --git a/target/linux/mpc85xx/image/spi-loader/stdio.c b/target/linux/mpc85xx/image/spi-loader/stdio.c
new file mode 100644 (file)
index 0000000..3aa55d0
--- /dev/null
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * Copyright (c) 2022 Matthias Schiffer <mschiffer@universe-factory.net>
+ */
+
+#include <stdio.h>
+
+int puts(const char *s)
+{
+       while (*s)
+               putchar(*s++);
+       return 0;
+}
+
+void put_u4(uint8_t v)
+{
+       v &= 0xf;
+       switch (v) {
+       case 0x0 ... 0x9:
+               putchar('0' + v);
+               break;
+       case 0xa ... 0xf:
+               putchar('a' + (v - 0xa));
+       }
+}
+
+void put_u8(uint8_t v)
+{
+       put_u4(v >> 4);
+       put_u4(v);
+}
+
+void put_u16(uint16_t v)
+{
+       put_u8(v >> 8);
+       put_u8(v);
+}
+
+void put_u32(uint32_t v)
+{
+       put_u16(v >> 16);
+       put_u16(v);
+}
+
+void put_ptr(const void *p)
+{
+       put_u32((uint32_t)p);
+}
+
+void put_array(const void *p, size_t l)
+{
+       const uint8_t *c = p;
+       size_t i;
+       for (i = 0; i < l; i++) {
+               put_u8(c[i]);
+               putchar(' ');
+       }
+       putchar('\n');
+}
diff --git a/target/linux/mpc85xx/image/spi-loader/string.S b/target/linux/mpc85xx/image/spi-loader/string.S
new file mode 100644 (file)
index 0000000..11d05a8
--- /dev/null
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
+ */
+
+#include <ppc_asm.h>
+
+       .text
+       .globl  strcpy
+strcpy:
+       addi    r5,r3,-1
+       addi    r4,r4,-1
+1:     lbzu    r0,1(r4)
+       cmpwi   0,r0,0
+       stbu    r0,1(r5)
+       bne     1b
+       blr
+
+       .globl  strncpy
+strncpy:
+       cmpwi   0,r5,0
+       beqlr
+       mtctr   r5
+       addi    r6,r3,-1
+       addi    r4,r4,-1
+1:     lbzu    r0,1(r4)
+       cmpwi   0,r0,0
+       stbu    r0,1(r6)
+       bdnzf   2,1b            /* dec ctr, branch if ctr != 0 && !cr0.eq */
+       blr
+
+       .globl  strcat
+strcat:
+       addi    r5,r3,-1
+       addi    r4,r4,-1
+1:     lbzu    r0,1(r5)
+       cmpwi   0,r0,0
+       bne     1b
+       addi    r5,r5,-1
+1:     lbzu    r0,1(r4)
+       cmpwi   0,r0,0
+       stbu    r0,1(r5)
+       bne     1b
+       blr
+
+       .globl  strchr
+strchr:
+       addi    r3,r3,-1
+1:     lbzu    r0,1(r3)
+       cmpw    0,r0,r4
+       beqlr
+       cmpwi   0,r0,0
+       bne     1b
+       li      r3,0
+       blr
+
+       .globl  strcmp
+strcmp:
+       addi    r5,r3,-1
+       addi    r4,r4,-1
+1:     lbzu    r3,1(r5)
+       cmpwi   1,r3,0
+       lbzu    r0,1(r4)
+       subf.   r3,r0,r3
+       beqlr   1
+       beq     1b
+       blr
+
+       .globl  strncmp
+strncmp:
+       mtctr   r5
+       addi    r5,r3,-1
+       addi    r4,r4,-1
+1:     lbzu    r3,1(r5)
+       cmpwi   1,r3,0
+       lbzu    r0,1(r4)
+       subf.   r3,r0,r3
+       beqlr   1
+       bdnzt   eq,1b
+       blr
+
+       .globl  strlen
+strlen:
+       addi    r4,r3,-1
+1:     lbzu    r0,1(r4)
+       cmpwi   0,r0,0
+       bne     1b
+       subf    r3,r3,r4
+       blr
+
+       .globl  memset
+memset:
+       rlwimi  r4,r4,8,16,23
+       rlwimi  r4,r4,16,0,15
+       addi    r6,r3,-4
+       cmplwi  0,r5,4
+       blt     7f
+       stwu    r4,4(r6)
+       beqlr
+       andi.   r0,r6,3
+       add     r5,r0,r5
+       subf    r6,r0,r6
+       rlwinm  r0,r5,32-2,2,31
+       mtctr   r0
+       bdz     6f
+1:     stwu    r4,4(r6)
+       bdnz    1b
+6:     andi.   r5,r5,3
+7:     cmpwi   0,r5,0
+       beqlr
+       mtctr   r5
+       addi    r6,r6,3
+8:     stbu    r4,1(r6)
+       bdnz    8b
+       blr
+
+       .globl  memmove
+memmove:
+       cmplw   0,r3,r4
+       bgt     backwards_memcpy
+       /* fall through */
+
+       .globl  memcpy
+memcpy:
+       rlwinm. r7,r5,32-3,3,31         /* r7 = r5 >> 3 */
+       addi    r6,r3,-4
+       addi    r4,r4,-4
+       beq     3f                      /* if less than 8 bytes to do */
+       andi.   r0,r6,3                 /* get dest word aligned */
+       mtctr   r7
+       bne     5f
+       andi.   r0,r4,3                 /* check src word aligned too */
+       bne     3f
+1:     lwz     r7,4(r4)
+       lwzu    r8,8(r4)
+       stw     r7,4(r6)
+       stwu    r8,8(r6)
+       bdnz    1b
+       andi.   r5,r5,7
+2:     cmplwi  0,r5,4
+       blt     3f
+       lwzu    r0,4(r4)
+       addi    r5,r5,-4
+       stwu    r0,4(r6)
+3:     cmpwi   0,r5,0
+       beqlr
+       mtctr   r5
+       addi    r4,r4,3
+       addi    r6,r6,3
+4:     lbzu    r0,1(r4)
+       stbu    r0,1(r6)
+       bdnz    4b
+       blr
+5:     subfic  r0,r0,4
+       cmpw    cr1,r0,r5
+       add     r7,r0,r4
+       andi.   r7,r7,3                 /* will source be word-aligned too? */
+       ble     cr1,3b
+       bne     3b                      /* do byte-by-byte if not */
+       mtctr   r0
+6:     lbz     r7,4(r4)
+       addi    r4,r4,1
+       stb     r7,4(r6)
+       addi    r6,r6,1
+       bdnz    6b
+       subf    r5,r0,r5
+       rlwinm. r7,r5,32-3,3,31
+       beq     2b
+       mtctr   r7
+       b       1b
+
+       .globl  backwards_memcpy
+backwards_memcpy:
+       rlwinm. r7,r5,32-3,3,31         /* r7 = r5 >> 3 */
+       add     r6,r3,r5
+       add     r4,r4,r5
+       beq     3f
+       andi.   r0,r6,3
+       mtctr   r7
+       bne     5f
+       andi.   r0,r4,3
+       bne     3f
+1:     lwz     r7,-4(r4)
+       lwzu    r8,-8(r4)
+       stw     r7,-4(r6)
+       stwu    r8,-8(r6)
+       bdnz    1b
+       andi.   r5,r5,7
+2:     cmplwi  0,r5,4
+       blt     3f
+       lwzu    r0,-4(r4)
+       subi    r5,r5,4
+       stwu    r0,-4(r6)
+3:     cmpwi   0,r5,0
+       beqlr
+       mtctr   r5
+4:     lbzu    r0,-1(r4)
+       stbu    r0,-1(r6)
+       bdnz    4b
+       blr
+5:     cmpw    cr1,r0,r5
+       subf    r7,r0,r4
+       andi.   r7,r7,3
+       ble     cr1,3b
+       bne     3b
+       mtctr   r0
+6:     lbzu    r7,-1(r4)
+       stbu    r7,-1(r6)
+       bdnz    6b
+       subf    r5,r0,r5
+       rlwinm. r7,r5,32-3,3,31
+       beq     2b
+       mtctr   r7
+       b       1b
+
+       .globl  memchr
+memchr:
+       cmpwi   0,r5,0
+       blelr
+       mtctr   r5
+       addi    r3,r3,-1
+1:     lbzu    r0,1(r3)
+       cmpw    r0,r4
+       beqlr
+       bdnz    1b
+       li      r3,0
+       blr
+
+       .globl  memcmp
+memcmp:
+       cmpwi   0,r5,0
+       ble     2f
+       mtctr   r5
+       addi    r6,r3,-1
+       addi    r4,r4,-1
+1:     lbzu    r3,1(r6)
+       lbzu    r0,1(r4)
+       subf.   r3,r0,r3
+       bdnzt   2,1b
+       blr
+2:     li      r3,0
+       blr
+
+
+/*
+ * Flush the dcache and invalidate the icache for a range of addresses.
+ *
+ * flush_cache(addr, len)
+ */
+       .global flush_cache
+flush_cache:
+       addi    4,4,0x1f        /* len = (len + 0x1f) / 0x20 */
+       rlwinm. 4,4,27,5,31
+       mtctr   4
+       beqlr
+1:     dcbf    0,3
+       icbi    0,3
+       addi    3,3,0x20
+       bdnz    1b
+       sync
+       isync
+       blr
+