crypto: arm/crct10dif - revert to C code for short inputs
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Sun, 27 Jan 2019 09:16:52 +0000 (10:16 +0100)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 1 Feb 2019 06:44:39 +0000 (14:44 +0800)
The SIMD routine ported from x86 used to have a special code path
for inputs < 16 bytes, which got lost somewhere along the way.
Instead, the current glue code aligns the input pointer to permit
the NEON routine to use special versions of the vld1 instructions
that assume 16 byte alignment, but this could result in inputs of
less than 16 bytes to be passed in. This not only fails the new
extended tests that Eric has implemented, it also results in the
code reading past the end of the input, which could potentially
result in crashes when dealing with less than 16 bytes of input
at the end of a page which is followed by an unmapped page.

So update the glue code to only invoke the NEON routine if the
input is at least 16 bytes.

Reported-by: Eric Biggers <ebiggers@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Fixes: 1d481f1cd892 ("crypto: arm/crct10dif - port x86 SSE implementation to ARM")
Cc: <stable@vger.kernel.org> # v4.10+
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm/crypto/crct10dif-ce-core.S
arch/arm/crypto/crct10dif-ce-glue.c

index ce45ba0c06879b8c748ae763603cff14ef68aa60..16019b5961e7890709eb29d7f76e4b0b8a13000d 100644 (file)
@@ -124,10 +124,10 @@ ENTRY(crc_t10dif_pmull)
        vext.8          q10, qzr, q0, #4
 
        // receive the initial 64B data, xor the initial crc value
-       vld1.64         {q0-q1}, [arg2, :128]!
-       vld1.64         {q2-q3}, [arg2, :128]!
-       vld1.64         {q4-q5}, [arg2, :128]!
-       vld1.64         {q6-q7}, [arg2, :128]!
+       vld1.64         {q0-q1}, [arg2]!
+       vld1.64         {q2-q3}, [arg2]!
+       vld1.64         {q4-q5}, [arg2]!
+       vld1.64         {q6-q7}, [arg2]!
 CPU_LE(        vrev64.8        q0, q0                  )
 CPU_LE(        vrev64.8        q1, q1                  )
 CPU_LE(        vrev64.8        q2, q2                  )
@@ -167,7 +167,7 @@ CPU_LE(     vrev64.8        q7, q7                  )
 _fold_64_B_loop:
 
        .macro          fold64, reg1, reg2
-       vld1.64         {q11-q12}, [arg2, :128]!
+       vld1.64         {q11-q12}, [arg2]!
 
        vmull.p64       q8, \reg1\()h, d21
        vmull.p64       \reg1, \reg1\()l, d20
@@ -238,7 +238,7 @@ _16B_reduction_loop:
        vmull.p64       q7, d15, d21
        veor.8          q7, q7, q8
 
-       vld1.64         {q0}, [arg2, :128]!
+       vld1.64         {q0}, [arg2]!
 CPU_LE(        vrev64.8        q0, q0          )
        vswp            d0, d1
        veor.8          q7, q7, q0
@@ -335,7 +335,7 @@ _less_than_128:
        vmov.i8         q0, #0
        vmov            s3, arg1_low32          // get the initial crc value
 
-       vld1.64         {q7}, [arg2, :128]!
+       vld1.64         {q7}, [arg2]!
 CPU_LE(        vrev64.8        q7, q7          )
        vswp            d14, d15
        veor.8          q7, q7, q0
index d428355cf38d9b848c88e5024d8d4d03badeb754..14c19c70a8416bec170eebef2ee7f3fb1fb6b7ba 100644 (file)
@@ -35,26 +35,15 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
                            unsigned int length)
 {
        u16 *crc = shash_desc_ctx(desc);
-       unsigned int l;
 
-       if (!may_use_simd()) {
-               *crc = crc_t10dif_generic(*crc, data, length);
+       if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
+               kernel_neon_begin();
+               *crc = crc_t10dif_pmull(*crc, data, length);
+               kernel_neon_end();
        } else {
-               if (unlikely((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
-                       l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
-                                 ((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
-
-                       *crc = crc_t10dif_generic(*crc, data, l);
-
-                       length -= l;
-                       data += l;
-               }
-               if (length > 0) {
-                       kernel_neon_begin();
-                       *crc = crc_t10dif_pmull(*crc, data, length);
-                       kernel_neon_end();
-               }
+               *crc = crc_t10dif_generic(*crc, data, length);
        }
+
        return 0;
 }