crypto: arm64/aegis128 - implement plain NEON version

author Ard Biesheuvel <ard.biesheuvel@linaro.org>

Sun, 11 Aug 2019 22:59:12 +0000 (01:59 +0300)

committer Herbert Xu <herbert@gondor.apana.org.au>

Thu, 15 Aug 2019 11:52:15 +0000 (21:52 +1000)
author Ard Biesheuvel <ard.biesheuvel@linaro.org>
Sun, 11 Aug 2019 22:59:12 +0000 (01:59 +0300)
committer Herbert Xu <herbert@gondor.apana.org.au>
Thu, 15 Aug 2019 11:52:15 +0000 (21:52 +1000)
diff --git a/crypto/Makefile b/crypto/Makefile

index 99a9fa9087d1fcb89c289a7590a9aecb2b2cd476..0d2cdd523fd982cf532451d07ff32e7a3d6e9802 100644 (file)
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -98,7 +98,14 @@ CFLAGS_aegis128-neon-inner.o += -mfpu=crypto-neon-fp-armv8
  aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
  endif
  ifeq ($(ARCH),arm64)
-CFLAGS_aegis128-neon-inner.o += -ffreestanding -mcpu=generic+crypto
+aegis128-cflags-y := -ffreestanding -mcpu=generic+crypto
+aegis128-cflags-$(CONFIG_CC_IS_GCC) += -ffixed-q16 -ffixed-q17 -ffixed-q18 \
+                                      -ffixed-q19 -ffixed-q20 -ffixed-q21 \
+                                      -ffixed-q22 -ffixed-q23 -ffixed-q24 \
+                                      -ffixed-q25 -ffixed-q26 -ffixed-q27 \
+                                      -ffixed-q28 -ffixed-q29 -ffixed-q30 \
+                                      -ffixed-q31
+CFLAGS_aegis128-neon-inner.o += $(aegis128-cflags-y)
  CFLAGS_REMOVE_aegis128-neon-inner.o += -mgeneral-regs-only
  aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
  endif
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c

index 3d8043c4832b35c84100a88edeb3b70aa0baa2e8..ed55568afd1be7ab184015fe625ecc10f46740ba 100644 (file)
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -17,6 +17,8 @@
  
  #include <stddef.h>
  
+extern int aegis128_have_aes_insn;
+
  void *memcpy(void *dest, const void *src, size_t n);
  void *memset(void *s, int c, size_t n);
  
@@ -24,6 +26,8 @@ struct aegis128_state {
         uint8x16_t v[5];
  };
  
+extern const uint8x16x4_t crypto_aes_sbox[];
+
  static struct aegis128_state aegis128_load_state_neon(const void *state)
  {
         return (struct aegis128_state){ {
@@ -49,6 +53,46 @@ uint8x16_t aegis_aes_round(uint8x16_t w)
  {
         uint8x16_t z = {};
  
+#ifdef CONFIG_ARM64
+       if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
+               static const uint8x16_t shift_rows = {
+                       0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+                       0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+               };
+               static const uint8x16_t ror32by8 = {
+                       0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+                       0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+               };
+               uint8x16_t v;
+
+               // shift rows
+               w = vqtbl1q_u8(w, shift_rows);
+
+               // sub bytes
+               if (!IS_ENABLED(CONFIG_CC_IS_GCC)) {
+                       v = vqtbl4q_u8(crypto_aes_sbox[0], w);
+                       v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40);
+                       v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80);
+                       v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0);
+               } else {
+                       asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
+                       w -= 0x40;
+                       asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
+                       w -= 0x40;
+                       asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
+                       w -= 0x40;
+                       asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
+               }
+
+               // mix columns
+               w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+               w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
+               w ^= vqtbl1q_u8(v ^ w, ror32by8);
+
+               return w;
+       }
+#endif
+
         /*
          * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics
          * to force the compiler to issue the aese/aesmc instructions in pairs.
@@ -73,10 +117,27 @@ struct aegis128_state aegis128_update_neon(struct aegis128_state st,
         return st;
  }
  
+static inline __attribute__((always_inline))
+void preload_sbox(void)
+{
+       if (!IS_ENABLED(CONFIG_ARM64) ||
+           !IS_ENABLED(CONFIG_CC_IS_GCC) ||
+           __builtin_expect(aegis128_have_aes_insn, 1))
+               return;
+
+       asm("ld1        {v16.16b-v19.16b}, [%0], #64    \n\t"
+           "ld1        {v20.16b-v23.16b}, [%0], #64    \n\t"
+           "ld1        {v24.16b-v27.16b}, [%0], #64    \n\t"
+           "ld1        {v28.16b-v31.16b}, [%0]         \n\t"
+           :: "r"(crypto_aes_sbox));
+}
+
  void crypto_aegis128_update_neon(void *state, const void *msg)
  {
         struct aegis128_state st = aegis128_load_state_neon(state);
  
+       preload_sbox();
+
         st = aegis128_update_neon(st, vld1q_u8(msg));
  
         aegis128_save_state_neon(st, state);
@@ -88,6 +149,8 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
         struct aegis128_state st = aegis128_load_state_neon(state);
         uint8x16_t msg;
  
+       preload_sbox();
+
         while (size >= AEGIS_BLOCK_SIZE) {
                 uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
  
@@ -120,6 +183,8 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
         struct aegis128_state st = aegis128_load_state_neon(state);
         uint8x16_t msg;
  
+       preload_sbox();
+
         while (size >= AEGIS_BLOCK_SIZE) {
                 msg = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
                 st = aegis128_update_neon(st, msg);
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c

index c1c0a1686f67a1511ebb06f82f237c2eef43d745..751f9c195aa42a0b43916c16362e15b32497000c 100644 (file)
--- a/crypto/aegis128-neon.c
+++ b/crypto/aegis128-neon.c
@@ -14,9 +14,15 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
  void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
                                         unsigned int size);
  
+int aegis128_have_aes_insn __ro_after_init;
+
  bool crypto_aegis128_have_simd(void)
  {
-       return cpu_have_feature(cpu_feature(AES));
+       if (cpu_have_feature(cpu_feature(AES))) {
+               aegis128_have_aes_insn = 1;
+               return true;
+       }
+       return IS_ENABLED(CONFIG_ARM64);
  }
  
  void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
author	Ard Biesheuvel <ard.biesheuvel@linaro.org>
	Sun, 11 Aug 2019 22:59:12 +0000 (01:59 +0300)
committer	Herbert Xu <herbert@gondor.apana.org.au>
	Thu, 15 Aug 2019 11:52:15 +0000 (21:52 +1000)
crypto/Makefile		patch \| blob \| history
crypto/aegis128-neon-inner.c		patch \| blob \| history
crypto/aegis128-neon.c		patch \| blob \| history