crypto: aegis128 - duplicate init() and final() hooks in SIMD code
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Mon, 14 Oct 2019 16:16:45 +0000 (18:16 +0200)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 25 Oct 2019 15:06:05 +0000 (02:06 +1100)
In order to speed up aegis128 processing even more, duplicate the init()
and final() routines as SIMD versions in their entirety. This results
in a 2x speedup on ARM Cortex-A57 for ~1500 byte packets (using AES
instructions).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
crypto/aegis128-core.c
crypto/aegis128-neon-inner.c
crypto/aegis128-neon.c

index fe7ab66dd8f98ea752eafd8fbc98f28bd66adbe7..71c11cb5bad13ea15915df514cee5506b66febbc 100644 (file)
@@ -60,10 +60,16 @@ static bool aegis128_do_simd(void)
 
 bool crypto_aegis128_have_simd(void);
 void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg);
+void crypto_aegis128_init_simd(struct aegis_state *state,
+                              const union aegis_block *key,
+                              const u8 *iv);
 void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
                                        const u8 *src, unsigned int size);
 void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
                                        const u8 *src, unsigned int size);
+void crypto_aegis128_final_simd(struct aegis_state *state,
+                               union aegis_block *tag_xor,
+                               u64 assoclen, u64 cryptlen);
 
 static void crypto_aegis128_update(struct aegis_state *state)
 {
@@ -395,17 +401,21 @@ static int crypto_aegis128_encrypt(struct aead_request *req)
        struct skcipher_walk walk;
        struct aegis_state state;
 
-       crypto_aegis128_init(&state, &ctx->key, req->iv);
-       crypto_aegis128_process_ad(&state, req->src, req->assoclen);
-
        skcipher_walk_aead_encrypt(&walk, req, false);
-       if (aegis128_do_simd())
+       if (aegis128_do_simd()) {
+               crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
+               crypto_aegis128_process_ad(&state, req->src, req->assoclen);
                crypto_aegis128_process_crypt(&state, req, &walk,
                                              crypto_aegis128_encrypt_chunk_simd);
-       else
+               crypto_aegis128_final_simd(&state, &tag, req->assoclen,
+                                          cryptlen);
+       } else {
+               crypto_aegis128_init(&state, &ctx->key, req->iv);
+               crypto_aegis128_process_ad(&state, req->src, req->assoclen);
                crypto_aegis128_process_crypt(&state, req, &walk,
                                              crypto_aegis128_encrypt_chunk);
-       crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+               crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+       }
 
        scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
                                 authsize, 1);
@@ -426,17 +436,21 @@ static int crypto_aegis128_decrypt(struct aead_request *req)
        scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
                                 authsize, 0);
 
-       crypto_aegis128_init(&state, &ctx->key, req->iv);
-       crypto_aegis128_process_ad(&state, req->src, req->assoclen);
-
        skcipher_walk_aead_decrypt(&walk, req, false);
-       if (aegis128_do_simd())
+       if (aegis128_do_simd()) {
+               crypto_aegis128_init_simd(&state, &ctx->key, req->iv);
+               crypto_aegis128_process_ad(&state, req->src, req->assoclen);
                crypto_aegis128_process_crypt(&state, req, &walk,
                                              crypto_aegis128_decrypt_chunk_simd);
-       else
+               crypto_aegis128_final_simd(&state, &tag, req->assoclen,
+                                          cryptlen);
+       } else {
+               crypto_aegis128_init(&state, &ctx->key, req->iv);
+               crypto_aegis128_process_ad(&state, req->src, req->assoclen);
                crypto_aegis128_process_crypt(&state, req, &walk,
                                              crypto_aegis128_decrypt_chunk);
-       crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+               crypto_aegis128_final(&state, &tag, req->assoclen, cryptlen);
+       }
 
        return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
 }
index f05310ca22aa8bee8403355edeb72e4afcafb8a3..2a660ac1bc3a363b3a75219b3034a7417cef7cea 100644 (file)
@@ -132,6 +132,36 @@ void preload_sbox(void)
            :: "r"(crypto_aes_sbox));
 }
 
+void crypto_aegis128_init_neon(void *state, const void *key, const void *iv)
+{
+       static const uint8_t const0[] = {
+               0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+               0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
+       };
+       static const uint8_t const1[] = {
+               0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+               0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
+       };
+       uint8x16_t k = vld1q_u8(key);
+       uint8x16_t kiv = k ^ vld1q_u8(iv);
+       struct aegis128_state st = {{
+               kiv,
+               vld1q_u8(const1),
+               vld1q_u8(const0),
+               k ^ vld1q_u8(const0),
+               k ^ vld1q_u8(const1),
+       }};
+       int i;
+
+       preload_sbox();
+
+       for (i = 0; i < 5; i++) {
+               st = aegis128_update_neon(st, k);
+               st = aegis128_update_neon(st, kiv);
+       }
+       aegis128_save_state_neon(st, state);
+}
+
 void crypto_aegis128_update_neon(void *state, const void *msg)
 {
        struct aegis128_state st = aegis128_load_state_neon(state);
@@ -210,3 +240,23 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
 
        aegis128_save_state_neon(st, state);
 }
+
+void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
+                               uint64_t cryptlen)
+{
+       struct aegis128_state st = aegis128_load_state_neon(state);
+       uint8x16_t v;
+       int i;
+
+       preload_sbox();
+
+       v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8 * assoclen),
+                                              vmov_n_u64(8 * cryptlen));
+
+       for (i = 0; i < 7; i++)
+               st = aegis128_update_neon(st, v);
+
+       v = vld1q_u8(tag_xor);
+       v ^= st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4];
+       vst1q_u8(tag_xor, v);
+}
index 751f9c195aa42a0b43916c16362e15b32497000c..8271b1fa0fbc35418e2c6c016a504fc7d41a8a79 100644 (file)
@@ -8,11 +8,14 @@
 
 #include "aegis.h"
 
+void crypto_aegis128_init_neon(void *state, const void *key, const void *iv);
 void crypto_aegis128_update_neon(void *state, const void *msg);
 void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
                                        unsigned int size);
 void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
                                        unsigned int size);
+void crypto_aegis128_final_neon(void *state, void *tag_xor, uint64_t assoclen,
+                               uint64_t cryptlen);
 
 int aegis128_have_aes_insn __ro_after_init;
 
@@ -25,6 +28,15 @@ bool crypto_aegis128_have_simd(void)
        return IS_ENABLED(CONFIG_ARM64);
 }
 
+void crypto_aegis128_init_simd(union aegis_block *state,
+                              const union aegis_block *key,
+                              const u8 *iv)
+{
+       kernel_neon_begin();
+       crypto_aegis128_init_neon(state, key, iv);
+       kernel_neon_end();
+}
+
 void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
 {
        kernel_neon_begin();
@@ -47,3 +59,12 @@ void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
        crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
        kernel_neon_end();
 }
+
+void crypto_aegis128_final_simd(union aegis_block *state,
+                               union aegis_block *tag_xor,
+                               u64 assoclen, u64 cryptlen)
+{
+       kernel_neon_begin();
+       crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen);
+       kernel_neon_end();
+}