crypto: arm - add support for GHASH using ARMv8 Crypto Extensions
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Tue, 10 Mar 2015 08:47:48 +0000 (09:47 +0100)
committerHerbert Xu <herbert@gondor.apana.org.au>
Thu, 12 Mar 2015 10:13:36 +0000 (21:13 +1100)
This implements the GHASH hash algorithm (as used by the GCM AEAD
chaining mode) using the AArch32 version of the 64x64 to 128 bit
polynomial multiplication instruction (vmull.p64) that is part of
the ARMv8 Crypto Extensions.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm/crypto/Kconfig
arch/arm/crypto/Makefile
arch/arm/crypto/ghash-ce-core.S [new file with mode: 0644]
arch/arm/crypto/ghash-ce-glue.c [new file with mode: 0644]

index 63588bdf3b5d4c292cf83bc76dcd5c0faf9f7157..d63f319924d292592f5295dda7db1206473f9600 100644 (file)
@@ -110,4 +110,14 @@ config CRYPTO_AES_ARM_CE
          Use an implementation of AES in CBC, CTR and XTS modes that uses
          ARMv8 Crypto Extensions
 
+config CRYPTO_GHASH_ARM_CE
+       tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+       depends on KERNEL_MODE_NEON
+       select CRYPTO_HASH
+       select CRYPTO_CRYPTD
+       help
+         Use an implementation of GHASH (used by the GCM AEAD chaining mode)
+         that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
+         that is part of the ARMv8 Crypto Extensions
+
 endif
index 2514c420e8d3c026537aec991db8f0b6880fe376..9a273bd7dffd3a264d8a601cbc410ef7a770751e 100644 (file)
@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
 obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
+obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
 
 aes-arm-y      := aes-armv4.o aes_glue.o
 aes-arm-bs-y   := aesbs-core.o aesbs-glue.o
@@ -19,6 +20,7 @@ sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
 sha1-arm-ce-y  := sha1-ce-core.o sha1-ce-glue.o
 sha2-arm-ce-y  := sha2-ce-core.o sha2-ce-glue.o
 aes-arm-ce-y   := aes-ce-core.o aes-ce-glue.o
+ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
 
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
new file mode 100644 (file)
index 0000000..e643a15
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ *
+ * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+       SHASH           .req    q0
+       SHASH2          .req    q1
+       T1              .req    q2
+       T2              .req    q3
+       MASK            .req    q4
+       XL              .req    q5
+       XM              .req    q6
+       XH              .req    q7
+       IN1             .req    q7
+
+       SHASH_L         .req    d0
+       SHASH_H         .req    d1
+       SHASH2_L        .req    d2
+       T1_L            .req    d4
+       MASK_L          .req    d8
+       XL_L            .req    d10
+       XL_H            .req    d11
+       XM_L            .req    d12
+       XM_H            .req    d13
+       XH_L            .req    d14
+
+       .text
+       .fpu            crypto-neon-fp-armv8
+
+       /*
+        * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+        *                         struct ghash_key const *k, const char *head)
+        */
+ENTRY(pmull_ghash_update)
+       vld1.8          {SHASH}, [r3]
+       vld1.64         {XL}, [r1]
+       vmov.i8         MASK, #0xe1
+       vext.8          SHASH2, SHASH, SHASH, #8
+       vshl.u64        MASK, MASK, #57
+       veor            SHASH2, SHASH2, SHASH
+
+       /* do the head block first, if supplied */
+       ldr             ip, [sp]
+       teq             ip, #0
+       beq             0f
+       vld1.64         {T1}, [ip]
+       teq             r0, #0
+       b               1f
+
+0:     vld1.64         {T1}, [r2]!
+       subs            r0, r0, #1
+
+1:     /* multiply XL by SHASH in GF(2^128) */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+       vrev64.8        T1, T1
+#endif
+       vext.8          T2, XL, XL, #8
+       vext.8          IN1, T1, T1, #8
+       veor            T1, T1, T2
+       veor            XL, XL, IN1
+
+       vmull.p64       XH, SHASH_H, XL_H               @ a1 * b1
+       veor            T1, T1, XL
+       vmull.p64       XL, SHASH_L, XL_L               @ a0 * b0
+       vmull.p64       XM, SHASH2_L, T1_L              @ (a1 + a0)(b1 + b0)
+
+       vext.8          T1, XL, XH, #8
+       veor            T2, XL, XH
+       veor            XM, XM, T1
+       veor            XM, XM, T2
+       vmull.p64       T2, XL_L, MASK_L
+
+       vmov            XH_L, XM_H
+       vmov            XM_H, XL_L
+
+       veor            XL, XM, T2
+       vext.8          T2, XL, XL, #8
+       vmull.p64       XL, XL_L, MASK_L
+       veor            T2, T2, XH
+       veor            XL, XL, T2
+
+       bne             0b
+
+       vst1.64         {XL}, [r1]
+       bx              lr
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
new file mode 100644 (file)
index 0000000..8c959d1
--- /dev/null
@@ -0,0 +1,318 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ *
+ * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/cryptd.h>
+#include <crypto/internal/hash.h>
+#include <crypto/gf128mul.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE       16
+#define GHASH_DIGEST_SIZE      16
+
+struct ghash_key {
+       u64     a;
+       u64     b;
+};
+
+struct ghash_desc_ctx {
+       u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+       u8 buf[GHASH_BLOCK_SIZE];
+       u32 count;
+};
+
+struct ghash_async_ctx {
+       struct cryptd_ahash *cryptd_tfm;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+                                  struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+       struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       *ctx = (struct ghash_desc_ctx){};
+       return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+                       unsigned int len)
+{
+       struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+       unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+       ctx->count += len;
+
+       if ((partial + len) >= GHASH_BLOCK_SIZE) {
+               struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+               int blocks;
+
+               if (partial) {
+                       int p = GHASH_BLOCK_SIZE - partial;
+
+                       memcpy(ctx->buf + partial, src, p);
+                       src += p;
+                       len -= p;
+               }
+
+               blocks = len / GHASH_BLOCK_SIZE;
+               len %= GHASH_BLOCK_SIZE;
+
+               kernel_neon_begin();
+               pmull_ghash_update(blocks, ctx->digest, src, key,
+                                  partial ? ctx->buf : NULL);
+               kernel_neon_end();
+               src += blocks * GHASH_BLOCK_SIZE;
+               partial = 0;
+       }
+       if (len)
+               memcpy(ctx->buf + partial, src, len);
+       return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+       struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+       unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+       if (partial) {
+               struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+               memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+               kernel_neon_begin();
+               pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+               kernel_neon_end();
+       }
+       put_unaligned_be64(ctx->digest[1], dst);
+       put_unaligned_be64(ctx->digest[0], dst + 8);
+
+       *ctx = (struct ghash_desc_ctx){};
+       return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+                       const u8 *inkey, unsigned int keylen)
+{
+       struct ghash_key *key = crypto_shash_ctx(tfm);
+       u64 a, b;
+
+       if (keylen != GHASH_BLOCK_SIZE) {
+               crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+
+       /* perform multiplication by 'x' in GF(2^128) */
+       b = get_unaligned_be64(inkey);
+       a = get_unaligned_be64(inkey + 8);
+
+       key->a = (a << 1) | (b >> 63);
+       key->b = (b << 1) | (a >> 63);
+
+       if (b >> 63)
+               key->b ^= 0xc200000000000000UL;
+
+       return 0;
+}
+
+static struct shash_alg ghash_alg = {
+       .digestsize             = GHASH_DIGEST_SIZE,
+       .init                   = ghash_init,
+       .update                 = ghash_update,
+       .final                  = ghash_final,
+       .setkey                 = ghash_setkey,
+       .descsize               = sizeof(struct ghash_desc_ctx),
+       .base                   = {
+               .cra_name       = "ghash",
+               .cra_driver_name = "__driver-ghash-ce",
+               .cra_priority   = 0,
+               .cra_flags      = CRYPTO_ALG_TYPE_SHASH,
+               .cra_blocksize  = GHASH_BLOCK_SIZE,
+               .cra_ctxsize    = sizeof(struct ghash_key),
+               .cra_module     = THIS_MODULE,
+       },
+};
+
+static int ghash_async_init(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct ahash_request *cryptd_req = ahash_request_ctx(req);
+       struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+       if (!may_use_simd()) {
+               memcpy(cryptd_req, req, sizeof(*req));
+               ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+               return crypto_ahash_init(cryptd_req);
+       } else {
+               struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+               struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+               desc->tfm = child;
+               desc->flags = req->base.flags;
+               return crypto_shash_init(desc);
+       }
+}
+
+static int ghash_async_update(struct ahash_request *req)
+{
+       struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+       if (!may_use_simd()) {
+               struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+               struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+               struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+               memcpy(cryptd_req, req, sizeof(*req));
+               ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+               return crypto_ahash_update(cryptd_req);
+       } else {
+               struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+               return shash_ahash_update(req, desc);
+       }
+}
+
+static int ghash_async_final(struct ahash_request *req)
+{
+       struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+       if (!may_use_simd()) {
+               struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+               struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+               struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+               memcpy(cryptd_req, req, sizeof(*req));
+               ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+               return crypto_ahash_final(cryptd_req);
+       } else {
+               struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+               return crypto_shash_final(desc, req->result);
+       }
+}
+
+static int ghash_async_digest(struct ahash_request *req)
+{
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct ahash_request *cryptd_req = ahash_request_ctx(req);
+       struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+       if (!may_use_simd()) {
+               memcpy(cryptd_req, req, sizeof(*req));
+               ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+               return crypto_ahash_digest(cryptd_req);
+       } else {
+               struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+               struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+               desc->tfm = child;
+               desc->flags = req->base.flags;
+               return shash_ahash_digest(req, desc);
+       }
+}
+
+static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
+                             unsigned int keylen)
+{
+       struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct crypto_ahash *child = &ctx->cryptd_tfm->base;
+       int err;
+
+       crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+       crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
+                              & CRYPTO_TFM_REQ_MASK);
+       err = crypto_ahash_setkey(child, key, keylen);
+       crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
+                              & CRYPTO_TFM_RES_MASK);
+
+       return err;
+}
+
+static int ghash_async_init_tfm(struct crypto_tfm *tfm)
+{
+       struct cryptd_ahash *cryptd_tfm;
+       struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce", 0, 0);
+       if (IS_ERR(cryptd_tfm))
+               return PTR_ERR(cryptd_tfm);
+       ctx->cryptd_tfm = cryptd_tfm;
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct ahash_request) +
+                                crypto_ahash_reqsize(&cryptd_tfm->base));
+
+       return 0;
+}
+
+static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
+{
+       struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       cryptd_free_ahash(ctx->cryptd_tfm);
+}
+
+static struct ahash_alg ghash_async_alg = {
+       .init                   = ghash_async_init,
+       .update                 = ghash_async_update,
+       .final                  = ghash_async_final,
+       .setkey                 = ghash_async_setkey,
+       .digest                 = ghash_async_digest,
+       .halg.digestsize        = GHASH_DIGEST_SIZE,
+       .halg.base              = {
+               .cra_name       = "ghash",
+               .cra_driver_name = "ghash-ce",
+               .cra_priority   = 300,
+               .cra_flags      = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+               .cra_blocksize  = GHASH_BLOCK_SIZE,
+               .cra_type       = &crypto_ahash_type,
+               .cra_ctxsize    = sizeof(struct ghash_async_ctx),
+               .cra_module     = THIS_MODULE,
+               .cra_init       = ghash_async_init_tfm,
+               .cra_exit       = ghash_async_exit_tfm,
+       },
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+       int err;
+
+       if (!(elf_hwcap2 & HWCAP2_PMULL))
+               return -ENODEV;
+
+       err = crypto_register_shash(&ghash_alg);
+       if (err)
+               return err;
+       err = crypto_register_ahash(&ghash_async_alg);
+       if (err)
+               goto err_shash;
+
+       return 0;
+
+err_shash:
+       crypto_unregister_shash(&ghash_alg);
+       return err;
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+       crypto_unregister_ahash(&ghash_async_alg);
+       crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);