i386: move crypto
authorThomas Gleixner <tglx@linutronix.de>
Thu, 11 Oct 2007 09:16:21 +0000 (11:16 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Thu, 11 Oct 2007 09:16:21 +0000 (11:16 +0200)
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
14 files changed:
arch/i386/Makefile
arch/i386/crypto/Makefile [deleted file]
arch/i386/crypto/Makefile_32 [deleted file]
arch/i386/crypto/aes-i586-asm_32.S [deleted file]
arch/i386/crypto/aes_32.c [deleted file]
arch/i386/crypto/twofish-i586-asm_32.S [deleted file]
arch/i386/crypto/twofish_32.c [deleted file]
arch/x86/crypto/Makefile [new file with mode: 0644]
arch/x86/crypto/Makefile_32 [new file with mode: 0644]
arch/x86/crypto/aes-i586-asm_32.S [new file with mode: 0644]
arch/x86/crypto/aes_32.c [new file with mode: 0644]
arch/x86/crypto/twofish-i586-asm_32.S [new file with mode: 0644]
arch/x86/crypto/twofish_32.c [new file with mode: 0644]
arch/x86_64/crypto/Makefile

index 45409c13f6efd690aa8792b83059d3704caa0456..dca07ae933d753e5643d0bf552f678fbaf06c4e4 100644 (file)
@@ -105,7 +105,7 @@ libs-y                                      += arch/i386/lib/
 core-y                                 += arch/i386/kernel/ \
                                           arch/i386/mm/ \
                                           $(mcore-y)/ \
-                                          arch/i386/crypto/
+                                          arch/x86/crypto/
 drivers-$(CONFIG_MATH_EMULATION)       += arch/i386/math-emu/
 drivers-$(CONFIG_PCI)                  += arch/i386/pci/
 # must be linked after kernel/
diff --git a/arch/i386/crypto/Makefile b/arch/i386/crypto/Makefile
deleted file mode 100644 (file)
index fbd34ac..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/i386/crypto/Makefile_32
-else
-include ${srctree}/arch/x86_64/crypto/Makefile_64
-endif
diff --git a/arch/i386/crypto/Makefile_32 b/arch/i386/crypto/Makefile_32
deleted file mode 100644 (file)
index 7154b14..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-# 
-# i386/crypto/Makefile 
-# 
-# Arch-specific CryptoAPI modules.
-# 
-
-obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
-obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
-
-aes-i586-y := aes-i586-asm_32.o aes_32.o
-twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
-
diff --git a/arch/i386/crypto/aes-i586-asm_32.S b/arch/i386/crypto/aes-i586-asm_32.S
deleted file mode 100644 (file)
index f942f0c..0000000
+++ /dev/null
@@ -1,373 +0,0 @@
-// -------------------------------------------------------------------------
-// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
-// All rights reserved.
-//
-// LICENSE TERMS
-//
-// The free distribution and use of this software in both source and binary 
-// form is allowed (with or without changes) provided that:
-//
-//   1. distributions of this source code include the above copyright 
-//      notice, this list of conditions and the following disclaimer//
-//
-//   2. distributions in binary form include the above copyright
-//      notice, this list of conditions and the following disclaimer
-//      in the documentation and/or other associated materials//
-//
-//   3. the copyright holder's name is not used to endorse products 
-//      built using this software without specific written permission.
-//
-//
-// ALTERNATIVELY, provided that this notice is retained in full, this product
-// may be distributed under the terms of the GNU General Public License (GPL),
-// in which case the provisions of the GPL apply INSTEAD OF those given above.
-//
-// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
-// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
-
-// DISCLAIMER
-//
-// This software is provided 'as is' with no explicit or implied warranties
-// in respect of its properties including, but not limited to, correctness 
-// and fitness for purpose.
-// -------------------------------------------------------------------------
-// Issue Date: 29/07/2002
-
-.file "aes-i586-asm.S"
-.text
-
-#include <asm/asm-offsets.h>
-
-#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
-
-/* offsets to parameters with one register pushed onto stack */
-#define tfm 8
-#define out_blk 12
-#define in_blk 16
-
-/* offsets in crypto_tfm structure */
-#define ekey (crypto_tfm_ctx_offset + 0)
-#define nrnd (crypto_tfm_ctx_offset + 256)
-#define dkey (crypto_tfm_ctx_offset + 260)
-
-// register mapping for encrypt and decrypt subroutines
-
-#define r0  eax
-#define r1  ebx
-#define r2  ecx
-#define r3  edx
-#define r4  esi
-#define r5  edi
-
-#define eaxl  al
-#define eaxh  ah
-#define ebxl  bl
-#define ebxh  bh
-#define ecxl  cl
-#define ecxh  ch
-#define edxl  dl
-#define edxh  dh
-
-#define _h(reg) reg##h
-#define h(reg) _h(reg)
-
-#define _l(reg) reg##l
-#define l(reg) _l(reg)
-
-// This macro takes a 32-bit word representing a column and uses
-// each of its four bytes to index into four tables of 256 32-bit
-// words to obtain values that are then xored into the appropriate
-// output registers r0, r1, r4 or r5.  
-
-// Parameters:
-// table table base address
-//   %1  out_state[0]
-//   %2  out_state[1]
-//   %3  out_state[2]
-//   %4  out_state[3]
-//   idx input register for the round (destroyed)
-//   tmp scratch register for the round
-// sched key schedule
-
-#define do_col(table, a1,a2,a3,a4, idx, tmp)   \
-       movzx   %l(idx),%tmp;                   \
-       xor     table(,%tmp,4),%a1;             \
-       movzx   %h(idx),%tmp;                   \
-       shr     $16,%idx;                       \
-       xor     table+tlen(,%tmp,4),%a2;        \
-       movzx   %l(idx),%tmp;                   \
-       movzx   %h(idx),%idx;                   \
-       xor     table+2*tlen(,%tmp,4),%a3;      \
-       xor     table+3*tlen(,%idx,4),%a4;
-
-// initialise output registers from the key schedule
-// NB1: original value of a3 is in idx on exit
-// NB2: original values of a1,a2,a4 aren't used
-#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
-       mov     0 sched,%a1;                    \
-       movzx   %l(idx),%tmp;                   \
-       mov     12 sched,%a2;                   \
-       xor     table(,%tmp,4),%a1;             \
-       mov     4 sched,%a4;                    \
-       movzx   %h(idx),%tmp;                   \
-       shr     $16,%idx;                       \
-       xor     table+tlen(,%tmp,4),%a2;        \
-       movzx   %l(idx),%tmp;                   \
-       movzx   %h(idx),%idx;                   \
-       xor     table+3*tlen(,%idx,4),%a4;      \
-       mov     %a3,%idx;                       \
-       mov     8 sched,%a3;                    \
-       xor     table+2*tlen(,%tmp,4),%a3;
-
-// initialise output registers from the key schedule
-// NB1: original value of a3 is in idx on exit
-// NB2: original values of a1,a2,a4 aren't used
-#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
-       mov     0 sched,%a1;                    \
-       movzx   %l(idx),%tmp;                   \
-       mov     4 sched,%a2;                    \
-       xor     table(,%tmp,4),%a1;             \
-       mov     12 sched,%a4;                   \
-       movzx   %h(idx),%tmp;                   \
-       shr     $16,%idx;                       \
-       xor     table+tlen(,%tmp,4),%a2;        \
-       movzx   %l(idx),%tmp;                   \
-       movzx   %h(idx),%idx;                   \
-       xor     table+3*tlen(,%idx,4),%a4;      \
-       mov     %a3,%idx;                       \
-       mov     8 sched,%a3;                    \
-       xor     table+2*tlen(,%tmp,4),%a3;
-
-
-// original Gladman had conditional saves to MMX regs.
-#define save(a1, a2)           \
-       mov     %a2,4*a1(%esp)
-
-#define restore(a1, a2)                \
-       mov     4*a2(%esp),%a1
-
-// These macros perform a forward encryption cycle. They are entered with
-// the first previous round column values in r0,r1,r4,r5 and
-// exit with the final values in the same registers, using stack
-// for temporary storage.
-
-// round column values
-// on entry: r0,r1,r4,r5
-// on exit:  r2,r1,r4,r5
-#define fwd_rnd1(arg, table)                                           \
-       save   (0,r1);                                                  \
-       save   (1,r5);                                                  \
-                                                                       \
-       /* compute new column values */                                 \
-       do_fcol(table, r2,r5,r4,r1, r0,r3, arg);        /* idx=r0 */    \
-       do_col (table, r4,r1,r2,r5, r0,r3);             /* idx=r4 */    \
-       restore(r0,0);                                                  \
-       do_col (table, r1,r2,r5,r4, r0,r3);             /* idx=r1 */    \
-       restore(r0,1);                                                  \
-       do_col (table, r5,r4,r1,r2, r0,r3);             /* idx=r5 */
-
-// round column values
-// on entry: r2,r1,r4,r5
-// on exit:  r0,r1,r4,r5
-#define fwd_rnd2(arg, table)                                           \
-       save   (0,r1);                                                  \
-       save   (1,r5);                                                  \
-                                                                       \
-       /* compute new column values */                                 \
-       do_fcol(table, r0,r5,r4,r1, r2,r3, arg);        /* idx=r2 */    \
-       do_col (table, r4,r1,r0,r5, r2,r3);             /* idx=r4 */    \
-       restore(r2,0);                                                  \
-       do_col (table, r1,r0,r5,r4, r2,r3);             /* idx=r1 */    \
-       restore(r2,1);                                                  \
-       do_col (table, r5,r4,r1,r0, r2,r3);             /* idx=r5 */
-
-// These macros performs an inverse encryption cycle. They are entered with
-// the first previous round column values in r0,r1,r4,r5 and
-// exit with the final values in the same registers, using stack
-// for temporary storage
-
-// round column values
-// on entry: r0,r1,r4,r5
-// on exit:  r2,r1,r4,r5
-#define inv_rnd1(arg, table)                                           \
-       save    (0,r1);                                                 \
-       save    (1,r5);                                                 \
-                                                                       \
-       /* compute new column values */                                 \
-       do_icol(table, r2,r1,r4,r5, r0,r3, arg);        /* idx=r0 */    \
-       do_col (table, r4,r5,r2,r1, r0,r3);             /* idx=r4 */    \
-       restore(r0,0);                                                  \
-       do_col (table, r1,r4,r5,r2, r0,r3);             /* idx=r1 */    \
-       restore(r0,1);                                                  \
-       do_col (table, r5,r2,r1,r4, r0,r3);             /* idx=r5 */
-
-// round column values
-// on entry: r2,r1,r4,r5
-// on exit:  r0,r1,r4,r5
-#define inv_rnd2(arg, table)                                           \
-       save    (0,r1);                                                 \
-       save    (1,r5);                                                 \
-                                                                       \
-       /* compute new column values */                                 \
-       do_icol(table, r0,r1,r4,r5, r2,r3, arg);        /* idx=r2 */    \
-       do_col (table, r4,r5,r0,r1, r2,r3);             /* idx=r4 */    \
-       restore(r2,0);                                                  \
-       do_col (table, r1,r4,r5,r0, r2,r3);             /* idx=r1 */    \
-       restore(r2,1);                                                  \
-       do_col (table, r5,r0,r1,r4, r2,r3);             /* idx=r5 */
-
-// AES (Rijndael) Encryption Subroutine
-/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
-
-.global  aes_enc_blk
-
-.extern  ft_tab
-.extern  fl_tab
-
-.align 4
-
-aes_enc_blk:
-       push    %ebp
-       mov     tfm(%esp),%ebp
-
-// CAUTION: the order and the values used in these assigns 
-// rely on the register mappings
-
-1:     push    %ebx
-       mov     in_blk+4(%esp),%r2
-       push    %esi
-       mov     nrnd(%ebp),%r3   // number of rounds
-       push    %edi
-#if ekey != 0
-       lea     ekey(%ebp),%ebp  // key pointer
-#endif
-
-// input four columns and xor in first round key
-
-       mov     (%r2),%r0
-       mov     4(%r2),%r1
-       mov     8(%r2),%r4
-       mov     12(%r2),%r5
-       xor     (%ebp),%r0
-       xor     4(%ebp),%r1
-       xor     8(%ebp),%r4
-       xor     12(%ebp),%r5
-
-       sub     $8,%esp         // space for register saves on stack
-       add     $16,%ebp        // increment to next round key
-       cmp     $12,%r3
-       jb      4f              // 10 rounds for 128-bit key
-       lea     32(%ebp),%ebp
-       je      3f              // 12 rounds for 192-bit key
-       lea     32(%ebp),%ebp
-
-2:     fwd_rnd1( -64(%ebp) ,ft_tab)    // 14 rounds for 256-bit key
-       fwd_rnd2( -48(%ebp) ,ft_tab)
-3:     fwd_rnd1( -32(%ebp) ,ft_tab)    // 12 rounds for 192-bit key
-       fwd_rnd2( -16(%ebp) ,ft_tab)
-4:     fwd_rnd1(    (%ebp) ,ft_tab)    // 10 rounds for 128-bit key
-       fwd_rnd2( +16(%ebp) ,ft_tab)
-       fwd_rnd1( +32(%ebp) ,ft_tab)
-       fwd_rnd2( +48(%ebp) ,ft_tab)
-       fwd_rnd1( +64(%ebp) ,ft_tab)
-       fwd_rnd2( +80(%ebp) ,ft_tab)
-       fwd_rnd1( +96(%ebp) ,ft_tab)
-       fwd_rnd2(+112(%ebp) ,ft_tab)
-       fwd_rnd1(+128(%ebp) ,ft_tab)
-       fwd_rnd2(+144(%ebp) ,fl_tab)    // last round uses a different table
-
-// move final values to the output array.  CAUTION: the 
-// order of these assigns rely on the register mappings
-
-       add     $8,%esp
-       mov     out_blk+12(%esp),%ebp
-       mov     %r5,12(%ebp)
-       pop     %edi
-       mov     %r4,8(%ebp)
-       pop     %esi
-       mov     %r1,4(%ebp)
-       pop     %ebx
-       mov     %r0,(%ebp)
-       pop     %ebp
-       mov     $1,%eax
-       ret
-
-// AES (Rijndael) Decryption Subroutine
-/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
-
-.global  aes_dec_blk
-
-.extern  it_tab
-.extern  il_tab
-
-.align 4
-
-aes_dec_blk:
-       push    %ebp
-       mov     tfm(%esp),%ebp
-
-// CAUTION: the order and the values used in these assigns 
-// rely on the register mappings
-
-1:     push    %ebx
-       mov     in_blk+4(%esp),%r2
-       push    %esi
-       mov     nrnd(%ebp),%r3   // number of rounds
-       push    %edi
-#if dkey != 0
-       lea     dkey(%ebp),%ebp  // key pointer
-#endif
-       mov     %r3,%r0
-       shl     $4,%r0
-       add     %r0,%ebp
-       
-// input four columns and xor in first round key
-
-       mov     (%r2),%r0
-       mov     4(%r2),%r1
-       mov     8(%r2),%r4
-       mov     12(%r2),%r5
-       xor     (%ebp),%r0
-       xor     4(%ebp),%r1
-       xor     8(%ebp),%r4
-       xor     12(%ebp),%r5
-
-       sub     $8,%esp         // space for register saves on stack
-       sub     $16,%ebp        // increment to next round key
-       cmp     $12,%r3
-       jb      4f              // 10 rounds for 128-bit key
-       lea     -32(%ebp),%ebp
-       je      3f              // 12 rounds for 192-bit key
-       lea     -32(%ebp),%ebp
-
-2:     inv_rnd1( +64(%ebp), it_tab)    // 14 rounds for 256-bit key
-       inv_rnd2( +48(%ebp), it_tab)
-3:     inv_rnd1( +32(%ebp), it_tab)    // 12 rounds for 192-bit key
-       inv_rnd2( +16(%ebp), it_tab)
-4:     inv_rnd1(    (%ebp), it_tab)    // 10 rounds for 128-bit key
-       inv_rnd2( -16(%ebp), it_tab)
-       inv_rnd1( -32(%ebp), it_tab)
-       inv_rnd2( -48(%ebp), it_tab)
-       inv_rnd1( -64(%ebp), it_tab)
-       inv_rnd2( -80(%ebp), it_tab)
-       inv_rnd1( -96(%ebp), it_tab)
-       inv_rnd2(-112(%ebp), it_tab)
-       inv_rnd1(-128(%ebp), it_tab)
-       inv_rnd2(-144(%ebp), il_tab)    // last round uses a different table
-
-// move final values to the output array.  CAUTION: the 
-// order of these assigns rely on the register mappings
-
-       add     $8,%esp
-       mov     out_blk+12(%esp),%ebp
-       mov     %r5,12(%ebp)
-       pop     %edi
-       mov     %r4,8(%ebp)
-       pop     %esi
-       mov     %r1,4(%ebp)
-       pop     %ebx
-       mov     %r0,(%ebp)
-       pop     %ebp
-       mov     $1,%eax
-       ret
-
diff --git a/arch/i386/crypto/aes_32.c b/arch/i386/crypto/aes_32.c
deleted file mode 100644 (file)
index 49aad93..0000000
+++ /dev/null
@@ -1,515 +0,0 @@
-/* 
- * 
- * Glue Code for optimized 586 assembler version of AES
- *
- * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
- * All rights reserved.
- *
- * LICENSE TERMS
- *
- * The free distribution and use of this software in both source and binary
- * form is allowed (with or without changes) provided that:
- *
- *   1. distributions of this source code include the above copyright
- *      notice, this list of conditions and the following disclaimer;
- *
- *   2. distributions in binary form include the above copyright
- *      notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other associated materials;
- *
- *   3. the copyright holder's name is not used to endorse products
- *      built using this software without specific written permission.
- *
- * ALTERNATIVELY, provided that this notice is retained in full, this product
- * may be distributed under the terms of the GNU General Public License (GPL),
- * in which case the provisions of the GPL apply INSTEAD OF those given above.
- *
- * DISCLAIMER
- *
- * This software is provided 'as is' with no explicit or implied warranties
- * in respect of its properties, including, but not limited to, correctness
- * and/or fitness for purpose.
- *
- * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
- * 2.5 API).
- * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
- * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
- *
- */
-
-#include <asm/byteorder.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/linkage.h>
-
-asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-#define AES_MIN_KEY_SIZE       16
-#define AES_MAX_KEY_SIZE       32
-#define AES_BLOCK_SIZE         16
-#define AES_KS_LENGTH          4 * AES_BLOCK_SIZE
-#define RC_LENGTH              29
-
-struct aes_ctx {
-       u32 ekey[AES_KS_LENGTH];
-       u32 rounds;
-       u32 dkey[AES_KS_LENGTH];
-};
-
-#define WPOLY 0x011b
-#define bytes2word(b0, b1, b2, b3)  \
-       (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
-
-/* define the finite field multiplies required for Rijndael */
-#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
-#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
-#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
-#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
-#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
-#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
-#define fi(x) ((x) ?   pow[255 - log[x]]: 0)
-
-static inline u32 upr(u32 x, int n)
-{
-       return (x << 8 * n) | (x >> (32 - 8 * n));
-}
-
-static inline u8 bval(u32 x, int n)
-{
-       return x >> 8 * n;
-}
-
-/* The forward and inverse affine transformations used in the S-box */
-#define fwd_affine(x) \
-       (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
-
-#define inv_affine(x) \
-       (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
-
-static u32 rcon_tab[RC_LENGTH];
-
-u32 ft_tab[4][256];
-u32 fl_tab[4][256];
-static u32 im_tab[4][256];
-u32 il_tab[4][256];
-u32 it_tab[4][256];
-
-static void gen_tabs(void)
-{
-       u32 i, w;
-       u8 pow[512], log[256];
-
-       /*
-        * log and power tables for GF(2^8) finite field with
-        * WPOLY as modular polynomial - the simplest primitive
-        * root is 0x03, used here to generate the tables.
-        */
-       i = 0; w = 1; 
-       
-       do {
-               pow[i] = (u8)w;
-               pow[i + 255] = (u8)w;
-               log[w] = (u8)i++;
-               w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
-       } while (w != 1);
-       
-       for(i = 0, w = 1; i < RC_LENGTH; ++i) {
-               rcon_tab[i] = bytes2word(w, 0, 0, 0);
-               w = f2(w);
-       }
-
-       for(i = 0; i < 256; ++i) {
-               u8 b;
-               
-               b = fwd_affine(fi((u8)i));
-               w = bytes2word(f2(b), b, b, f3(b));
-
-               /* tables for a normal encryption round */
-               ft_tab[0][i] = w;
-               ft_tab[1][i] = upr(w, 1);
-               ft_tab[2][i] = upr(w, 2);
-               ft_tab[3][i] = upr(w, 3);
-               w = bytes2word(b, 0, 0, 0);
-               
-               /*
-                * tables for last encryption round
-                * (may also be used in the key schedule)
-                */
-               fl_tab[0][i] = w;
-               fl_tab[1][i] = upr(w, 1);
-               fl_tab[2][i] = upr(w, 2);
-               fl_tab[3][i] = upr(w, 3);
-               
-               b = fi(inv_affine((u8)i));
-               w = bytes2word(fe(b), f9(b), fd(b), fb(b));
-
-               /* tables for the inverse mix column operation  */
-               im_tab[0][b] = w;
-               im_tab[1][b] = upr(w, 1);
-               im_tab[2][b] = upr(w, 2);
-               im_tab[3][b] = upr(w, 3);
-
-               /* tables for a normal decryption round */
-               it_tab[0][i] = w;
-               it_tab[1][i] = upr(w,1);
-               it_tab[2][i] = upr(w,2);
-               it_tab[3][i] = upr(w,3);
-
-               w = bytes2word(b, 0, 0, 0);
-               
-               /* tables for last decryption round */
-               il_tab[0][i] = w;
-               il_tab[1][i] = upr(w,1);
-               il_tab[2][i] = upr(w,2);
-               il_tab[3][i] = upr(w,3);
-    }
-}
-
-#define four_tables(x,tab,vf,rf,c)             \
-(      tab[0][bval(vf(x,0,c),rf(0,c))] ^       \
-       tab[1][bval(vf(x,1,c),rf(1,c))] ^       \
-       tab[2][bval(vf(x,2,c),rf(2,c))] ^       \
-       tab[3][bval(vf(x,3,c),rf(3,c))]         \
-)
-
-#define vf1(x,r,c)  (x)
-#define rf1(r,c)    (r)
-#define rf2(r,c)    ((r-c)&3)
-
-#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
-#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
-
-#define ff(x) inv_mcol(x)
-
-#define ke4(k,i)                                                       \
-{                                                                      \
-       k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];            \
-       k[4*(i)+5] = ss[1] ^= ss[0];                                    \
-       k[4*(i)+6] = ss[2] ^= ss[1];                                    \
-       k[4*(i)+7] = ss[3] ^= ss[2];                                    \
-}
-
-#define kel4(k,i)                                                      \
-{                                                                      \
-       k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];            \
-       k[4*(i)+5] = ss[1] ^= ss[0];                                    \
-       k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2];       \
-}
-
-#define ke6(k,i)                                                       \
-{                                                                      \
-       k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];           \
-       k[6*(i)+ 7] = ss[1] ^= ss[0];                                   \
-       k[6*(i)+ 8] = ss[2] ^= ss[1];                                   \
-       k[6*(i)+ 9] = ss[3] ^= ss[2];                                   \
-       k[6*(i)+10] = ss[4] ^= ss[3];                                   \
-       k[6*(i)+11] = ss[5] ^= ss[4];                                   \
-}
-
-#define kel6(k,i)                                                      \
-{                                                                      \
-       k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];           \
-       k[6*(i)+ 7] = ss[1] ^= ss[0];                                   \
-       k[6*(i)+ 8] = ss[2] ^= ss[1];                                   \
-       k[6*(i)+ 9] = ss[3] ^= ss[2];                                   \
-}
-
-#define ke8(k,i)                                                       \
-{                                                                      \
-       k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];           \
-       k[8*(i)+ 9] = ss[1] ^= ss[0];                                   \
-       k[8*(i)+10] = ss[2] ^= ss[1];                                   \
-       k[8*(i)+11] = ss[3] ^= ss[2];                                   \
-       k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0);                         \
-       k[8*(i)+13] = ss[5] ^= ss[4];                                   \
-       k[8*(i)+14] = ss[6] ^= ss[5];                                   \
-       k[8*(i)+15] = ss[7] ^= ss[6];                                   \
-}
-
-#define kel8(k,i)                                                      \
-{                                                                      \
-       k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];           \
-       k[8*(i)+ 9] = ss[1] ^= ss[0];                                   \
-       k[8*(i)+10] = ss[2] ^= ss[1];                                   \
-       k[8*(i)+11] = ss[3] ^= ss[2];                                   \
-}
-
-#define kdf4(k,i)                                                      \
-{                                                                      \
-       ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3];                          \
-       ss[1] = ss[1] ^ ss[3];                                          \
-       ss[2] = ss[2] ^ ss[3];                                          \
-       ss[3] = ss[3];                                                  \
-       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
-       ss[i % 4] ^= ss[4];                                             \
-       ss[4] ^= k[4*(i)];                                              \
-       k[4*(i)+4] = ff(ss[4]);                                         \
-       ss[4] ^= k[4*(i)+1];                                            \
-       k[4*(i)+5] = ff(ss[4]);                                         \
-       ss[4] ^= k[4*(i)+2];                                            \
-       k[4*(i)+6] = ff(ss[4]);                                         \
-       ss[4] ^= k[4*(i)+3];                                            \
-       k[4*(i)+7] = ff(ss[4]);                                         \
-}
-
-#define kd4(k,i)                                                       \
-{                                                                      \
-       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
-       ss[i % 4] ^= ss[4];                                             \
-       ss[4] = ff(ss[4]);                                              \
-       k[4*(i)+4] = ss[4] ^= k[4*(i)];                                 \
-       k[4*(i)+5] = ss[4] ^= k[4*(i)+1];                               \
-       k[4*(i)+6] = ss[4] ^= k[4*(i)+2];                               \
-       k[4*(i)+7] = ss[4] ^= k[4*(i)+3];                               \
-}
-
-#define kdl4(k,i)                                                      \
-{                                                                      \
-       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
-       ss[i % 4] ^= ss[4];                                             \
-       k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3];                  \
-       k[4*(i)+5] = ss[1] ^ ss[3];                                     \
-       k[4*(i)+6] = ss[0];                                             \
-       k[4*(i)+7] = ss[1];                                             \
-}
-
-#define kdf6(k,i)                                                      \
-{                                                                      \
-       ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];                         \
-       k[6*(i)+ 6] = ff(ss[0]);                                        \
-       ss[1] ^= ss[0];                                                 \
-       k[6*(i)+ 7] = ff(ss[1]);                                        \
-       ss[2] ^= ss[1];                                                 \
-       k[6*(i)+ 8] = ff(ss[2]);                                        \
-       ss[3] ^= ss[2];                                                 \
-       k[6*(i)+ 9] = ff(ss[3]);                                        \
-       ss[4] ^= ss[3];                                                 \
-       k[6*(i)+10] = ff(ss[4]);                                        \
-       ss[5] ^= ss[4];                                                 \
-       k[6*(i)+11] = ff(ss[5]);                                        \
-}
-
-#define kd6(k,i)                                                       \
-{                                                                      \
-       ss[6] = ls_box(ss[5],3) ^ rcon_tab[i];                          \
-       ss[0] ^= ss[6]; ss[6] = ff(ss[6]);                              \
-       k[6*(i)+ 6] = ss[6] ^= k[6*(i)];                                \
-       ss[1] ^= ss[0];                                                 \
-       k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1];                             \
-       ss[2] ^= ss[1];                                                 \
-       k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2];                             \
-       ss[3] ^= ss[2];                                                 \
-       k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3];                             \
-       ss[4] ^= ss[3];                                                 \
-       k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4];                             \
-       ss[5] ^= ss[4];                                                 \
-       k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5];                             \
-}
-
-#define kdl6(k,i)                                                      \
-{                                                                      \
-       ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];                         \
-       k[6*(i)+ 6] = ss[0];                                            \
-       ss[1] ^= ss[0];                                                 \
-       k[6*(i)+ 7] = ss[1];                                            \
-       ss[2] ^= ss[1];                                                 \
-       k[6*(i)+ 8] = ss[2];                                            \
-       ss[3] ^= ss[2];                                                 \
-       k[6*(i)+ 9] = ss[3];                                            \
-}
-
-#define kdf8(k,i)                                                      \
-{                                                                      \
-       ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];                         \
-       k[8*(i)+ 8] = ff(ss[0]);                                        \
-       ss[1] ^= ss[0];                                                 \
-       k[8*(i)+ 9] = ff(ss[1]);                                        \
-       ss[2] ^= ss[1];                                                 \
-       k[8*(i)+10] = ff(ss[2]);                                        \
-       ss[3] ^= ss[2];                                                 \
-       k[8*(i)+11] = ff(ss[3]);                                        \
-       ss[4] ^= ls_box(ss[3],0);                                       \
-       k[8*(i)+12] = ff(ss[4]);                                        \
-       ss[5] ^= ss[4];                                                 \
-       k[8*(i)+13] = ff(ss[5]);                                        \
-       ss[6] ^= ss[5];                                                 \
-       k[8*(i)+14] = ff(ss[6]);                                        \
-       ss[7] ^= ss[6];                                                 \
-       k[8*(i)+15] = ff(ss[7]);                                        \
-}
-
-#define kd8(k,i)                                                       \
-{                                                                      \
-       u32 __g = ls_box(ss[7],3) ^ rcon_tab[i];                        \
-       ss[0] ^= __g;                                                   \
-       __g = ff(__g);                                                  \
-       k[8*(i)+ 8] = __g ^= k[8*(i)];                                  \
-       ss[1] ^= ss[0];                                                 \
-       k[8*(i)+ 9] = __g ^= k[8*(i)+ 1];                               \
-       ss[2] ^= ss[1];                                                 \
-       k[8*(i)+10] = __g ^= k[8*(i)+ 2];                               \
-       ss[3] ^= ss[2];                                                 \
-       k[8*(i)+11] = __g ^= k[8*(i)+ 3];                               \
-       __g = ls_box(ss[3],0);                                          \
-       ss[4] ^= __g;                                                   \
-       __g = ff(__g);                                                  \
-       k[8*(i)+12] = __g ^= k[8*(i)+ 4];                               \
-       ss[5] ^= ss[4];                                                 \
-       k[8*(i)+13] = __g ^= k[8*(i)+ 5];                               \
-       ss[6] ^= ss[5];                                                 \
-       k[8*(i)+14] = __g ^= k[8*(i)+ 6];                               \
-       ss[7] ^= ss[6];                                                 \
-       k[8*(i)+15] = __g ^= k[8*(i)+ 7];                               \
-}
-
-#define kdl8(k,i)                                                      \
-{                                                                      \
-       ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];                         \
-       k[8*(i)+ 8] = ss[0];                                            \
-       ss[1] ^= ss[0];                                                 \
-       k[8*(i)+ 9] = ss[1];                                            \
-       ss[2] ^= ss[1];                                                 \
-       k[8*(i)+10] = ss[2];                                            \
-       ss[3] ^= ss[2];                                                 \
-       k[8*(i)+11] = ss[3];                                            \
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-                      unsigned int key_len)
-{
-       int i;
-       u32 ss[8];
-       struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
-       const __le32 *key = (const __le32 *)in_key;
-       u32 *flags = &tfm->crt_flags;
-
-       /* encryption schedule */
-       
-       ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
-       ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
-       ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
-       ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
-
-       switch(key_len) {
-       case 16:
-               for (i = 0; i < 9; i++)
-                       ke4(ctx->ekey, i);
-               kel4(ctx->ekey, 9);
-               ctx->rounds = 10;
-               break;
-               
-       case 24:
-               ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
-               ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
-               for (i = 0; i < 7; i++)
-                       ke6(ctx->ekey, i);
-               kel6(ctx->ekey, 7); 
-               ctx->rounds = 12;
-               break;
-
-       case 32:
-               ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
-               ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
-               ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
-               ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
-               for (i = 0; i < 6; i++)
-                       ke8(ctx->ekey, i);
-               kel8(ctx->ekey, 6);
-               ctx->rounds = 14;
-               break;
-
-       default:
-               *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-               return -EINVAL;
-       }
-       
-       /* decryption schedule */
-       
-       ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
-       ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
-       ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
-       ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
-
-       switch (key_len) {
-       case 16:
-               kdf4(ctx->dkey, 0);
-               for (i = 1; i < 9; i++)
-                       kd4(ctx->dkey, i);
-               kdl4(ctx->dkey, 9);
-               break;
-               
-       case 24:
-               ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
-               ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
-               kdf6(ctx->dkey, 0);
-               for (i = 1; i < 7; i++)
-                       kd6(ctx->dkey, i);
-               kdl6(ctx->dkey, 7);
-               break;
-
-       case 32:
-               ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
-               ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
-               ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
-               ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
-               kdf8(ctx->dkey, 0);
-               for (i = 1; i < 6; i++)
-                       kd8(ctx->dkey, i);
-               kdl8(ctx->dkey, 6);
-               break;
-       }
-       return 0;
-}
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       aes_enc_blk(tfm, dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       aes_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg aes_alg = {
-       .cra_name               =       "aes",
-       .cra_driver_name        =       "aes-i586",
-       .cra_priority           =       200,
-       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
-       .cra_blocksize          =       AES_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct aes_ctx),
-       .cra_module             =       THIS_MODULE,
-       .cra_list               =       LIST_HEAD_INIT(aes_alg.cra_list),
-       .cra_u                  =       {
-               .cipher = {
-                       .cia_min_keysize        =       AES_MIN_KEY_SIZE,
-                       .cia_max_keysize        =       AES_MAX_KEY_SIZE,
-                       .cia_setkey             =       aes_set_key,
-                       .cia_encrypt            =       aes_encrypt,
-                       .cia_decrypt            =       aes_decrypt
-               }
-       }
-};
-
-static int __init aes_init(void)
-{
-       gen_tabs();
-       return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-       crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
-MODULE_ALIAS("aes");
diff --git a/arch/i386/crypto/twofish-i586-asm_32.S b/arch/i386/crypto/twofish-i586-asm_32.S
deleted file mode 100644 (file)
index 39b98ed..0000000
+++ /dev/null
@@ -1,335 +0,0 @@
-/***************************************************************************
-*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
-*                                                                         *
-*   This program is free software; you can redistribute it and/or modify  *
-*   it under the terms of the GNU General Public License as published by  *
-*   the Free Software Foundation; either version 2 of the License, or     *
-*   (at your option) any later version.                                   *
-*                                                                         *
-*   This program is distributed in the hope that it will be useful,       *
-*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
-*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
-*   GNU General Public License for more details.                          *
-*                                                                         *
-*   You should have received a copy of the GNU General Public License     *
-*   along with this program; if not, write to the                         *
-*   Free Software Foundation, Inc.,                                       *
-*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
-***************************************************************************/
-
-.file "twofish-i586-asm.S"
-.text
-
-#include <asm/asm-offsets.h>
-
-/* return adress at 0 */
-
-#define in_blk    12  /* input byte array address parameter*/
-#define out_blk   8  /* output byte array address parameter*/
-#define tfm       4  /* Twofish context structure */
-
-#define a_offset       0
-#define b_offset       4
-#define c_offset       8
-#define d_offset       12
-
-/* Structure of the crypto context struct*/
-
-#define s0     0       /* S0 Array 256 Words each */
-#define s1     1024    /* S1 Array */
-#define s2     2048    /* S2 Array */
-#define s3     3072    /* S3 Array */
-#define w      4096    /* 8 whitening keys (word) */
-#define k      4128    /* key 1-32 ( word ) */
-
-/* define a few register aliases to allow macro substitution */
-
-#define R0D    %eax
-#define R0B    %al
-#define R0H    %ah
-
-#define R1D    %ebx
-#define R1B    %bl
-#define R1H    %bh
-
-#define R2D    %ecx
-#define R2B    %cl
-#define R2H    %ch
-
-#define R3D    %edx
-#define R3B    %dl
-#define R3H    %dh
-
-
-/* performs input whitening */
-#define input_whitening(src,context,offset)\
-       xor     w+offset(context),      src;
-
-/* performs input whitening */
-#define output_whitening(src,context,offset)\
-       xor     w+16+offset(context),   src;
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define encrypt_round(a,b,c,d,round)\
-       push    d ## D;\
-       movzx   b ## B,         %edi;\
-       mov     s1(%ebp,%edi,4),d ## D;\
-       movzx   a ## B,         %edi;\
-       mov     s2(%ebp,%edi,4),%esi;\
-       movzx   b ## H,         %edi;\
-       ror     $16,            b ## D;\
-       xor     s2(%ebp,%edi,4),d ## D;\
-       movzx   a ## H,         %edi;\
-       ror     $16,            a ## D;\
-       xor     s3(%ebp,%edi,4),%esi;\
-       movzx   b ## B,         %edi;\
-       xor     s3(%ebp,%edi,4),d ## D;\
-       movzx   a ## B,         %edi;\
-       xor     (%ebp,%edi,4),  %esi;\
-       movzx   b ## H,         %edi;\
-       ror     $15,            b ## D;\
-       xor     (%ebp,%edi,4),  d ## D;\
-       movzx   a ## H,         %edi;\
-       xor     s1(%ebp,%edi,4),%esi;\
-       pop     %edi;\
-       add     d ## D,         %esi;\
-       add     %esi,           d ## D;\
-       add     k+round(%ebp),  %esi;\
-       xor     %esi,           c ## D;\
-       rol     $15,            c ## D;\
-       add     k+4+round(%ebp),d ## D;\
-       xor     %edi,           d ## D;
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * last round has different rotations for the output preparation
- */
-#define encrypt_last_round(a,b,c,d,round)\
-       push    d ## D;\
-       movzx   b ## B,         %edi;\
-       mov     s1(%ebp,%edi,4),d ## D;\
-       movzx   a ## B,         %edi;\
-       mov     s2(%ebp,%edi,4),%esi;\
-       movzx   b ## H,         %edi;\
-       ror     $16,            b ## D;\
-       xor     s2(%ebp,%edi,4),d ## D;\
-       movzx   a ## H,         %edi;\
-       ror     $16,            a ## D;\
-       xor     s3(%ebp,%edi,4),%esi;\
-       movzx   b ## B,         %edi;\
-       xor     s3(%ebp,%edi,4),d ## D;\
-       movzx   a ## B,         %edi;\
-       xor     (%ebp,%edi,4),  %esi;\
-       movzx   b ## H,         %edi;\
-       ror     $16,            b ## D;\
-       xor     (%ebp,%edi,4),  d ## D;\
-       movzx   a ## H,         %edi;\
-       xor     s1(%ebp,%edi,4),%esi;\
-       pop     %edi;\
-       add     d ## D,         %esi;\
-       add     %esi,           d ## D;\
-       add     k+round(%ebp),  %esi;\
-       xor     %esi,           c ## D;\
-       ror     $1,             c ## D;\
-       add     k+4+round(%ebp),d ## D;\
-       xor     %edi,           d ## D;
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define decrypt_round(a,b,c,d,round)\
-       push    c ## D;\
-       movzx   a ## B,         %edi;\
-       mov     (%ebp,%edi,4),  c ## D;\
-       movzx   b ## B,         %edi;\
-       mov     s3(%ebp,%edi,4),%esi;\
-       movzx   a ## H,         %edi;\
-       ror     $16,            a ## D;\
-       xor     s1(%ebp,%edi,4),c ## D;\
-       movzx   b ## H,         %edi;\
-       ror     $16,            b ## D;\
-       xor     (%ebp,%edi,4),  %esi;\
-       movzx   a ## B,         %edi;\
-       xor     s2(%ebp,%edi,4),c ## D;\
-       movzx   b ## B,         %edi;\
-       xor     s1(%ebp,%edi,4),%esi;\
-       movzx   a ## H,         %edi;\
-       ror     $15,            a ## D;\
-       xor     s3(%ebp,%edi,4),c ## D;\
-       movzx   b ## H,         %edi;\
-       xor     s2(%ebp,%edi,4),%esi;\
-       pop     %edi;\
-       add     %esi,           c ## D;\
-       add     c ## D,         %esi;\
-       add     k+round(%ebp),  c ## D;\
-       xor     %edi,           c ## D;\
-       add     k+4+round(%ebp),%esi;\
-       xor     %esi,           d ## D;\
-       rol     $15,            d ## D;
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * last round has different rotations for the output preparation
- */
-#define decrypt_last_round(a,b,c,d,round)\
-       push    c ## D;\
-       movzx   a ## B,         %edi;\
-       mov     (%ebp,%edi,4),  c ## D;\
-       movzx   b ## B,         %edi;\
-       mov     s3(%ebp,%edi,4),%esi;\
-       movzx   a ## H,         %edi;\
-       ror     $16,            a ## D;\
-       xor     s1(%ebp,%edi,4),c ## D;\
-       movzx   b ## H,         %edi;\
-       ror     $16,            b ## D;\
-       xor     (%ebp,%edi,4),  %esi;\
-       movzx   a ## B,         %edi;\
-       xor     s2(%ebp,%edi,4),c ## D;\
-       movzx   b ## B,         %edi;\
-       xor     s1(%ebp,%edi,4),%esi;\
-       movzx   a ## H,         %edi;\
-       ror     $16,            a ## D;\
-       xor     s3(%ebp,%edi,4),c ## D;\
-       movzx   b ## H,         %edi;\
-       xor     s2(%ebp,%edi,4),%esi;\
-       pop     %edi;\
-       add     %esi,           c ## D;\
-       add     c ## D,         %esi;\
-       add     k+round(%ebp),  c ## D;\
-       xor     %edi,           c ## D;\
-       add     k+4+round(%ebp),%esi;\
-       xor     %esi,           d ## D;\
-       ror     $1,             d ## D;
-
-.align 4
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
-       push    %ebp                    /* save registers according to calling convention*/
-       push    %ebx
-       push    %esi
-       push    %edi
-
-       mov     tfm + 16(%esp), %ebp    /* abuse the base pointer: set new base bointer to the crypto tfm */
-       add     $crypto_tfm_ctx_offset, %ebp    /* ctx adress */
-       mov     in_blk+16(%esp),%edi    /* input adress in edi */
-
-       mov     (%edi),         %eax
-       mov     b_offset(%edi), %ebx
-       mov     c_offset(%edi), %ecx
-       mov     d_offset(%edi), %edx
-       input_whitening(%eax,%ebp,a_offset)
-       ror     $16,    %eax
-       input_whitening(%ebx,%ebp,b_offset)
-       input_whitening(%ecx,%ebp,c_offset)
-       input_whitening(%edx,%ebp,d_offset)
-       rol     $1,     %edx
-
-       encrypt_round(R0,R1,R2,R3,0);
-       encrypt_round(R2,R3,R0,R1,8);
-       encrypt_round(R0,R1,R2,R3,2*8);
-       encrypt_round(R2,R3,R0,R1,3*8);
-       encrypt_round(R0,R1,R2,R3,4*8);
-       encrypt_round(R2,R3,R0,R1,5*8);
-       encrypt_round(R0,R1,R2,R3,6*8);
-       encrypt_round(R2,R3,R0,R1,7*8);
-       encrypt_round(R0,R1,R2,R3,8*8);
-       encrypt_round(R2,R3,R0,R1,9*8);
-       encrypt_round(R0,R1,R2,R3,10*8);
-       encrypt_round(R2,R3,R0,R1,11*8);
-       encrypt_round(R0,R1,R2,R3,12*8);
-       encrypt_round(R2,R3,R0,R1,13*8);
-       encrypt_round(R0,R1,R2,R3,14*8);
-       encrypt_last_round(R2,R3,R0,R1,15*8);
-
-       output_whitening(%eax,%ebp,c_offset)
-       output_whitening(%ebx,%ebp,d_offset)
-       output_whitening(%ecx,%ebp,a_offset)
-       output_whitening(%edx,%ebp,b_offset)
-       mov     out_blk+16(%esp),%edi;
-       mov     %eax,           c_offset(%edi)
-       mov     %ebx,           d_offset(%edi)
-       mov     %ecx,           (%edi)
-       mov     %edx,           b_offset(%edi)
-
-       pop     %edi
-       pop     %esi
-       pop     %ebx
-       pop     %ebp
-       mov     $1,     %eax
-       ret
-
-twofish_dec_blk:
-       push    %ebp                    /* save registers according to calling convention*/
-       push    %ebx
-       push    %esi
-       push    %edi
-
-
-       mov     tfm + 16(%esp), %ebp    /* abuse the base pointer: set new base bointer to the crypto tfm */
-       add     $crypto_tfm_ctx_offset, %ebp    /* ctx adress */
-       mov     in_blk+16(%esp),%edi    /* input adress in edi */
-
-       mov     (%edi),         %eax
-       mov     b_offset(%edi), %ebx
-       mov     c_offset(%edi), %ecx
-       mov     d_offset(%edi), %edx
-       output_whitening(%eax,%ebp,a_offset)
-       output_whitening(%ebx,%ebp,b_offset)
-       ror     $16,    %ebx
-       output_whitening(%ecx,%ebp,c_offset)
-       output_whitening(%edx,%ebp,d_offset)
-       rol     $1,     %ecx
-
-       decrypt_round(R0,R1,R2,R3,15*8);
-       decrypt_round(R2,R3,R0,R1,14*8);
-       decrypt_round(R0,R1,R2,R3,13*8);
-       decrypt_round(R2,R3,R0,R1,12*8);
-       decrypt_round(R0,R1,R2,R3,11*8);
-       decrypt_round(R2,R3,R0,R1,10*8);
-       decrypt_round(R0,R1,R2,R3,9*8);
-       decrypt_round(R2,R3,R0,R1,8*8);
-       decrypt_round(R0,R1,R2,R3,7*8);
-       decrypt_round(R2,R3,R0,R1,6*8);
-       decrypt_round(R0,R1,R2,R3,5*8);
-       decrypt_round(R2,R3,R0,R1,4*8);
-       decrypt_round(R0,R1,R2,R3,3*8);
-       decrypt_round(R2,R3,R0,R1,2*8);
-       decrypt_round(R0,R1,R2,R3,1*8);
-       decrypt_last_round(R2,R3,R0,R1,0);
-
-       input_whitening(%eax,%ebp,c_offset)
-       input_whitening(%ebx,%ebp,d_offset)
-       input_whitening(%ecx,%ebp,a_offset)
-       input_whitening(%edx,%ebp,b_offset)
-       mov     out_blk+16(%esp),%edi;
-       mov     %eax,           c_offset(%edi)
-       mov     %ebx,           d_offset(%edi)
-       mov     %ecx,           (%edi)
-       mov     %edx,           b_offset(%edi)
-
-       pop     %edi
-       pop     %esi
-       pop     %ebx
-       pop     %ebp
-       mov     $1,     %eax
-       ret
diff --git a/arch/i386/crypto/twofish_32.c b/arch/i386/crypto/twofish_32.c
deleted file mode 100644 (file)
index e3004df..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *  Glue Code for optimized 586 assembler version of TWOFISH
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <crypto/twofish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       twofish_enc_blk(tfm, dst, src);
-}
-
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-       twofish_dec_blk(tfm, dst, src);
-}
-
-static struct crypto_alg alg = {
-       .cra_name               =       "twofish",
-       .cra_driver_name        =       "twofish-i586",
-       .cra_priority           =       200,
-       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
-       .cra_blocksize          =       TF_BLOCK_SIZE,
-       .cra_ctxsize            =       sizeof(struct twofish_ctx),
-       .cra_alignmask          =       3,
-       .cra_module             =       THIS_MODULE,
-       .cra_list               =       LIST_HEAD_INIT(alg.cra_list),
-       .cra_u                  =       {
-               .cipher = {
-                       .cia_min_keysize        =       TF_MIN_KEY_SIZE,
-                       .cia_max_keysize        =       TF_MAX_KEY_SIZE,
-                       .cia_setkey             =       twofish_setkey,
-                       .cia_encrypt            =       twofish_encrypt,
-                       .cia_decrypt            =       twofish_decrypt
-               }
-       }
-};
-
-static int __init init(void)
-{
-       return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-       crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
-MODULE_ALIAS("twofish");
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
new file mode 100644 (file)
index 0000000..b1bcf7c
--- /dev/null
@@ -0,0 +1,5 @@
+ifeq ($(CONFIG_X86_32),y)
+include ${srctree}/arch/x86/crypto/Makefile_32
+else
+include ${srctree}/arch/x86_64/crypto/Makefile_64
+endif
diff --git a/arch/x86/crypto/Makefile_32 b/arch/x86/crypto/Makefile_32
new file mode 100644 (file)
index 0000000..2d873a2
--- /dev/null
@@ -0,0 +1,12 @@
+# 
+# x86/crypto/Makefile 
+# 
+# Arch-specific CryptoAPI modules.
+# 
+
+obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
+obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
+
+aes-i586-y := aes-i586-asm_32.o aes_32.o
+twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
+
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
new file mode 100644 (file)
index 0000000..f942f0c
--- /dev/null
@@ -0,0 +1,373 @@
+// -------------------------------------------------------------------------
+// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
+// All rights reserved.
+//
+// LICENSE TERMS
+//
+// The free distribution and use of this software in both source and binary 
+// form is allowed (with or without changes) provided that:
+//
+//   1. distributions of this source code include the above copyright 
+//      notice, this list of conditions and the following disclaimer//
+//
+//   2. distributions in binary form include the above copyright
+//      notice, this list of conditions and the following disclaimer
+//      in the documentation and/or other associated materials//
+//
+//   3. the copyright holder's name is not used to endorse products 
+//      built using this software without specific written permission.
+//
+//
+// ALTERNATIVELY, provided that this notice is retained in full, this product
+// may be distributed under the terms of the GNU General Public License (GPL),
+// in which case the provisions of the GPL apply INSTEAD OF those given above.
+//
+// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
+// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+
+// DISCLAIMER
+//
+// This software is provided 'as is' with no explicit or implied warranties
+// in respect of its properties including, but not limited to, correctness 
+// and fitness for purpose.
+// -------------------------------------------------------------------------
+// Issue Date: 29/07/2002
+
+.file "aes-i586-asm.S"
+.text
+
+#include <asm/asm-offsets.h>
+
+#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
+
+/* offsets to parameters with one register pushed onto stack */
+#define tfm 8
+#define out_blk 12
+#define in_blk 16
+
+/* offsets in crypto_tfm structure */
+#define ekey (crypto_tfm_ctx_offset + 0)
+#define nrnd (crypto_tfm_ctx_offset + 256)
+#define dkey (crypto_tfm_ctx_offset + 260)
+
+// register mapping for encrypt and decrypt subroutines
+
+#define r0  eax
+#define r1  ebx
+#define r2  ecx
+#define r3  edx
+#define r4  esi
+#define r5  edi
+
+#define eaxl  al
+#define eaxh  ah
+#define ebxl  bl
+#define ebxh  bh
+#define ecxl  cl
+#define ecxh  ch
+#define edxl  dl
+#define edxh  dh
+
+#define _h(reg) reg##h
+#define h(reg) _h(reg)
+
+#define _l(reg) reg##l
+#define l(reg) _l(reg)
+
+// This macro takes a 32-bit word representing a column and uses
+// each of its four bytes to index into four tables of 256 32-bit
+// words to obtain values that are then xored into the appropriate
+// output registers r0, r1, r4 or r5.  
+
+// Parameters:
+// table table base address
+//   %1  out_state[0]
+//   %2  out_state[1]
+//   %3  out_state[2]
+//   %4  out_state[3]
+//   idx input register for the round (destroyed)
+//   tmp scratch register for the round
+// sched key schedule
+
+#define do_col(table, a1,a2,a3,a4, idx, tmp)   \
+       movzx   %l(idx),%tmp;                   \
+       xor     table(,%tmp,4),%a1;             \
+       movzx   %h(idx),%tmp;                   \
+       shr     $16,%idx;                       \
+       xor     table+tlen(,%tmp,4),%a2;        \
+       movzx   %l(idx),%tmp;                   \
+       movzx   %h(idx),%idx;                   \
+       xor     table+2*tlen(,%tmp,4),%a3;      \
+       xor     table+3*tlen(,%idx,4),%a4;
+
+// initialise output registers from the key schedule
+// NB1: original value of a3 is in idx on exit
+// NB2: original values of a1,a2,a4 aren't used
+#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
+       mov     0 sched,%a1;                    \
+       movzx   %l(idx),%tmp;                   \
+       mov     12 sched,%a2;                   \
+       xor     table(,%tmp,4),%a1;             \
+       mov     4 sched,%a4;                    \
+       movzx   %h(idx),%tmp;                   \
+       shr     $16,%idx;                       \
+       xor     table+tlen(,%tmp,4),%a2;        \
+       movzx   %l(idx),%tmp;                   \
+       movzx   %h(idx),%idx;                   \
+       xor     table+3*tlen(,%idx,4),%a4;      \
+       mov     %a3,%idx;                       \
+       mov     8 sched,%a3;                    \
+       xor     table+2*tlen(,%tmp,4),%a3;
+
+// initialise output registers from the key schedule
+// NB1: original value of a3 is in idx on exit
+// NB2: original values of a1,a2,a4 aren't used
+#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
+       mov     0 sched,%a1;                    \
+       movzx   %l(idx),%tmp;                   \
+       mov     4 sched,%a2;                    \
+       xor     table(,%tmp,4),%a1;             \
+       mov     12 sched,%a4;                   \
+       movzx   %h(idx),%tmp;                   \
+       shr     $16,%idx;                       \
+       xor     table+tlen(,%tmp,4),%a2;        \
+       movzx   %l(idx),%tmp;                   \
+       movzx   %h(idx),%idx;                   \
+       xor     table+3*tlen(,%idx,4),%a4;      \
+       mov     %a3,%idx;                       \
+       mov     8 sched,%a3;                    \
+       xor     table+2*tlen(,%tmp,4),%a3;
+
+
+// original Gladman had conditional saves to MMX regs.
+#define save(a1, a2)           \
+       mov     %a2,4*a1(%esp)
+
+#define restore(a1, a2)                \
+       mov     4*a2(%esp),%a1
+
+// These macros perform a forward encryption cycle. They are entered with
+// the first previous round column values in r0,r1,r4,r5 and
+// exit with the final values in the same registers, using stack
+// for temporary storage.
+
+// round column values
+// on entry: r0,r1,r4,r5
+// on exit:  r2,r1,r4,r5
+#define fwd_rnd1(arg, table)                                           \
+       save   (0,r1);                                                  \
+       save   (1,r5);                                                  \
+                                                                       \
+       /* compute new column values */                                 \
+       do_fcol(table, r2,r5,r4,r1, r0,r3, arg);        /* idx=r0 */    \
+       do_col (table, r4,r1,r2,r5, r0,r3);             /* idx=r4 */    \
+       restore(r0,0);                                                  \
+       do_col (table, r1,r2,r5,r4, r0,r3);             /* idx=r1 */    \
+       restore(r0,1);                                                  \
+       do_col (table, r5,r4,r1,r2, r0,r3);             /* idx=r5 */
+
+// round column values
+// on entry: r2,r1,r4,r5
+// on exit:  r0,r1,r4,r5
+#define fwd_rnd2(arg, table)                                           \
+       save   (0,r1);                                                  \
+       save   (1,r5);                                                  \
+                                                                       \
+       /* compute new column values */                                 \
+       do_fcol(table, r0,r5,r4,r1, r2,r3, arg);        /* idx=r2 */    \
+       do_col (table, r4,r1,r0,r5, r2,r3);             /* idx=r4 */    \
+       restore(r2,0);                                                  \
+       do_col (table, r1,r0,r5,r4, r2,r3);             /* idx=r1 */    \
+       restore(r2,1);                                                  \
+       do_col (table, r5,r4,r1,r0, r2,r3);             /* idx=r5 */
+
+// These macros performs an inverse encryption cycle. They are entered with
+// the first previous round column values in r0,r1,r4,r5 and
+// exit with the final values in the same registers, using stack
+// for temporary storage
+
+// round column values
+// on entry: r0,r1,r4,r5
+// on exit:  r2,r1,r4,r5
+#define inv_rnd1(arg, table)                                           \
+       save    (0,r1);                                                 \
+       save    (1,r5);                                                 \
+                                                                       \
+       /* compute new column values */                                 \
+       do_icol(table, r2,r1,r4,r5, r0,r3, arg);        /* idx=r0 */    \
+       do_col (table, r4,r5,r2,r1, r0,r3);             /* idx=r4 */    \
+       restore(r0,0);                                                  \
+       do_col (table, r1,r4,r5,r2, r0,r3);             /* idx=r1 */    \
+       restore(r0,1);                                                  \
+       do_col (table, r5,r2,r1,r4, r0,r3);             /* idx=r5 */
+
+// round column values
+// on entry: r2,r1,r4,r5
+// on exit:  r0,r1,r4,r5
+#define inv_rnd2(arg, table)                                           \
+       save    (0,r1);                                                 \
+       save    (1,r5);                                                 \
+                                                                       \
+       /* compute new column values */                                 \
+       do_icol(table, r0,r1,r4,r5, r2,r3, arg);        /* idx=r2 */    \
+       do_col (table, r4,r5,r0,r1, r2,r3);             /* idx=r4 */    \
+       restore(r2,0);                                                  \
+       do_col (table, r1,r4,r5,r0, r2,r3);             /* idx=r1 */    \
+       restore(r2,1);                                                  \
+       do_col (table, r5,r0,r1,r4, r2,r3);             /* idx=r5 */
+
+// AES (Rijndael) Encryption Subroutine
+/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
+
+.global  aes_enc_blk
+
+.extern  ft_tab
+.extern  fl_tab
+
+.align 4
+
+aes_enc_blk:
+       push    %ebp
+       mov     tfm(%esp),%ebp
+
+// CAUTION: the order and the values used in these assigns 
+// rely on the register mappings
+
+1:     push    %ebx
+       mov     in_blk+4(%esp),%r2
+       push    %esi
+       mov     nrnd(%ebp),%r3   // number of rounds
+       push    %edi
+#if ekey != 0
+       lea     ekey(%ebp),%ebp  // key pointer
+#endif
+
+// input four columns and xor in first round key
+
+       mov     (%r2),%r0
+       mov     4(%r2),%r1
+       mov     8(%r2),%r4
+       mov     12(%r2),%r5
+       xor     (%ebp),%r0
+       xor     4(%ebp),%r1
+       xor     8(%ebp),%r4
+       xor     12(%ebp),%r5
+
+       sub     $8,%esp         // space for register saves on stack
+       add     $16,%ebp        // increment to next round key
+       cmp     $12,%r3
+       jb      4f              // 10 rounds for 128-bit key
+       lea     32(%ebp),%ebp
+       je      3f              // 12 rounds for 192-bit key
+       lea     32(%ebp),%ebp
+
+2:     fwd_rnd1( -64(%ebp) ,ft_tab)    // 14 rounds for 256-bit key
+       fwd_rnd2( -48(%ebp) ,ft_tab)
+3:     fwd_rnd1( -32(%ebp) ,ft_tab)    // 12 rounds for 192-bit key
+       fwd_rnd2( -16(%ebp) ,ft_tab)
+4:     fwd_rnd1(    (%ebp) ,ft_tab)    // 10 rounds for 128-bit key
+       fwd_rnd2( +16(%ebp) ,ft_tab)
+       fwd_rnd1( +32(%ebp) ,ft_tab)
+       fwd_rnd2( +48(%ebp) ,ft_tab)
+       fwd_rnd1( +64(%ebp) ,ft_tab)
+       fwd_rnd2( +80(%ebp) ,ft_tab)
+       fwd_rnd1( +96(%ebp) ,ft_tab)
+       fwd_rnd2(+112(%ebp) ,ft_tab)
+       fwd_rnd1(+128(%ebp) ,ft_tab)
+       fwd_rnd2(+144(%ebp) ,fl_tab)    // last round uses a different table
+
+// move final values to the output array.  CAUTION: the 
+// order of these assigns rely on the register mappings
+
+       add     $8,%esp
+       mov     out_blk+12(%esp),%ebp
+       mov     %r5,12(%ebp)
+       pop     %edi
+       mov     %r4,8(%ebp)
+       pop     %esi
+       mov     %r1,4(%ebp)
+       pop     %ebx
+       mov     %r0,(%ebp)
+       pop     %ebp
+       mov     $1,%eax
+       ret
+
+// AES (Rijndael) Decryption Subroutine
+/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
+
+.global  aes_dec_blk
+
+.extern  it_tab
+.extern  il_tab
+
+.align 4
+
+aes_dec_blk:
+       push    %ebp
+       mov     tfm(%esp),%ebp
+
+// CAUTION: the order and the values used in these assigns 
+// rely on the register mappings
+
+1:     push    %ebx
+       mov     in_blk+4(%esp),%r2
+       push    %esi
+       mov     nrnd(%ebp),%r3   // number of rounds
+       push    %edi
+#if dkey != 0
+       lea     dkey(%ebp),%ebp  // key pointer
+#endif
+       mov     %r3,%r0
+       shl     $4,%r0
+       add     %r0,%ebp
+       
+// input four columns and xor in first round key
+
+       mov     (%r2),%r0
+       mov     4(%r2),%r1
+       mov     8(%r2),%r4
+       mov     12(%r2),%r5
+       xor     (%ebp),%r0
+       xor     4(%ebp),%r1
+       xor     8(%ebp),%r4
+       xor     12(%ebp),%r5
+
+       sub     $8,%esp         // space for register saves on stack
+       sub     $16,%ebp        // increment to next round key
+       cmp     $12,%r3
+       jb      4f              // 10 rounds for 128-bit key
+       lea     -32(%ebp),%ebp
+       je      3f              // 12 rounds for 192-bit key
+       lea     -32(%ebp),%ebp
+
+2:     inv_rnd1( +64(%ebp), it_tab)    // 14 rounds for 256-bit key
+       inv_rnd2( +48(%ebp), it_tab)
+3:     inv_rnd1( +32(%ebp), it_tab)    // 12 rounds for 192-bit key
+       inv_rnd2( +16(%ebp), it_tab)
+4:     inv_rnd1(    (%ebp), it_tab)    // 10 rounds for 128-bit key
+       inv_rnd2( -16(%ebp), it_tab)
+       inv_rnd1( -32(%ebp), it_tab)
+       inv_rnd2( -48(%ebp), it_tab)
+       inv_rnd1( -64(%ebp), it_tab)
+       inv_rnd2( -80(%ebp), it_tab)
+       inv_rnd1( -96(%ebp), it_tab)
+       inv_rnd2(-112(%ebp), it_tab)
+       inv_rnd1(-128(%ebp), it_tab)
+       inv_rnd2(-144(%ebp), il_tab)    // last round uses a different table
+
+// move final values to the output array.  CAUTION: the 
+// order of these assigns rely on the register mappings
+
+       add     $8,%esp
+       mov     out_blk+12(%esp),%ebp
+       mov     %r5,12(%ebp)
+       pop     %edi
+       mov     %r4,8(%ebp)
+       pop     %esi
+       mov     %r1,4(%ebp)
+       pop     %ebx
+       mov     %r0,(%ebp)
+       pop     %ebp
+       mov     $1,%eax
+       ret
+
diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c
new file mode 100644 (file)
index 0000000..49aad93
--- /dev/null
@@ -0,0 +1,515 @@
+/* 
+ * 
+ * Glue Code for optimized 586 assembler version of AES
+ *
+ * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
+ * All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software in both source and binary
+ * form is allowed (with or without changes) provided that:
+ *
+ *   1. distributions of this source code include the above copyright
+ *      notice, this list of conditions and the following disclaimer;
+ *
+ *   2. distributions in binary form include the above copyright
+ *      notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other associated materials;
+ *
+ *   3. the copyright holder's name is not used to endorse products
+ *      built using this software without specific written permission.
+ *
+ * ALTERNATIVELY, provided that this notice is retained in full, this product
+ * may be distributed under the terms of the GNU General Public License (GPL),
+ * in which case the provisions of the GPL apply INSTEAD OF those given above.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ *
+ * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
+ * 2.5 API).
+ * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
+ * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ *
+ */
+
+#include <asm/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/linkage.h>
+
+asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+#define AES_MIN_KEY_SIZE       16
+#define AES_MAX_KEY_SIZE       32
+#define AES_BLOCK_SIZE         16
+#define AES_KS_LENGTH          4 * AES_BLOCK_SIZE
+#define RC_LENGTH              29
+
+struct aes_ctx {
+       u32 ekey[AES_KS_LENGTH];
+       u32 rounds;
+       u32 dkey[AES_KS_LENGTH];
+};
+
+#define WPOLY 0x011b
+#define bytes2word(b0, b1, b2, b3)  \
+       (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
+
+/* define the finite field multiplies required for Rijndael */
+#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
+#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
+#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
+#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
+#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
+#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
+#define fi(x) ((x) ?   pow[255 - log[x]]: 0)
+
+static inline u32 upr(u32 x, int n)
+{
+       return (x << 8 * n) | (x >> (32 - 8 * n));
+}
+
+static inline u8 bval(u32 x, int n)
+{
+       return x >> 8 * n;
+}
+
+/* The forward and inverse affine transformations used in the S-box */
+#define fwd_affine(x) \
+       (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
+
+#define inv_affine(x) \
+       (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
+
+static u32 rcon_tab[RC_LENGTH];
+
+u32 ft_tab[4][256];
+u32 fl_tab[4][256];
+static u32 im_tab[4][256];
+u32 il_tab[4][256];
+u32 it_tab[4][256];
+
+static void gen_tabs(void)
+{
+       u32 i, w;
+       u8 pow[512], log[256];
+
+       /*
+        * log and power tables for GF(2^8) finite field with
+        * WPOLY as modular polynomial - the simplest primitive
+        * root is 0x03, used here to generate the tables.
+        */
+       i = 0; w = 1; 
+       
+       do {
+               pow[i] = (u8)w;
+               pow[i + 255] = (u8)w;
+               log[w] = (u8)i++;
+               w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
+       } while (w != 1);
+       
+       for(i = 0, w = 1; i < RC_LENGTH; ++i) {
+               rcon_tab[i] = bytes2word(w, 0, 0, 0);
+               w = f2(w);
+       }
+
+       for(i = 0; i < 256; ++i) {
+               u8 b;
+               
+               b = fwd_affine(fi((u8)i));
+               w = bytes2word(f2(b), b, b, f3(b));
+
+               /* tables for a normal encryption round */
+               ft_tab[0][i] = w;
+               ft_tab[1][i] = upr(w, 1);
+               ft_tab[2][i] = upr(w, 2);
+               ft_tab[3][i] = upr(w, 3);
+               w = bytes2word(b, 0, 0, 0);
+               
+               /*
+                * tables for last encryption round
+                * (may also be used in the key schedule)
+                */
+               fl_tab[0][i] = w;
+               fl_tab[1][i] = upr(w, 1);
+               fl_tab[2][i] = upr(w, 2);
+               fl_tab[3][i] = upr(w, 3);
+               
+               b = fi(inv_affine((u8)i));
+               w = bytes2word(fe(b), f9(b), fd(b), fb(b));
+
+               /* tables for the inverse mix column operation  */
+               im_tab[0][b] = w;
+               im_tab[1][b] = upr(w, 1);
+               im_tab[2][b] = upr(w, 2);
+               im_tab[3][b] = upr(w, 3);
+
+               /* tables for a normal decryption round */
+               it_tab[0][i] = w;
+               it_tab[1][i] = upr(w,1);
+               it_tab[2][i] = upr(w,2);
+               it_tab[3][i] = upr(w,3);
+
+               w = bytes2word(b, 0, 0, 0);
+               
+               /* tables for last decryption round */
+               il_tab[0][i] = w;
+               il_tab[1][i] = upr(w,1);
+               il_tab[2][i] = upr(w,2);
+               il_tab[3][i] = upr(w,3);
+    }
+}
+
+#define four_tables(x,tab,vf,rf,c)             \
+(      tab[0][bval(vf(x,0,c),rf(0,c))] ^       \
+       tab[1][bval(vf(x,1,c),rf(1,c))] ^       \
+       tab[2][bval(vf(x,2,c),rf(2,c))] ^       \
+       tab[3][bval(vf(x,3,c),rf(3,c))]         \
+)
+
+#define vf1(x,r,c)  (x)
+#define rf1(r,c)    (r)
+#define rf2(r,c)    ((r-c)&3)
+
+#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
+#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
+
+#define ff(x) inv_mcol(x)
+
+#define ke4(k,i)                                                       \
+{                                                                      \
+       k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];            \
+       k[4*(i)+5] = ss[1] ^= ss[0];                                    \
+       k[4*(i)+6] = ss[2] ^= ss[1];                                    \
+       k[4*(i)+7] = ss[3] ^= ss[2];                                    \
+}
+
+#define kel4(k,i)                                                      \
+{                                                                      \
+       k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];            \
+       k[4*(i)+5] = ss[1] ^= ss[0];                                    \
+       k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2];       \
+}
+
+#define ke6(k,i)                                                       \
+{                                                                      \
+       k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];           \
+       k[6*(i)+ 7] = ss[1] ^= ss[0];                                   \
+       k[6*(i)+ 8] = ss[2] ^= ss[1];                                   \
+       k[6*(i)+ 9] = ss[3] ^= ss[2];                                   \
+       k[6*(i)+10] = ss[4] ^= ss[3];                                   \
+       k[6*(i)+11] = ss[5] ^= ss[4];                                   \
+}
+
+#define kel6(k,i)                                                      \
+{                                                                      \
+       k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];           \
+       k[6*(i)+ 7] = ss[1] ^= ss[0];                                   \
+       k[6*(i)+ 8] = ss[2] ^= ss[1];                                   \
+       k[6*(i)+ 9] = ss[3] ^= ss[2];                                   \
+}
+
+#define ke8(k,i)                                                       \
+{                                                                      \
+       k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];           \
+       k[8*(i)+ 9] = ss[1] ^= ss[0];                                   \
+       k[8*(i)+10] = ss[2] ^= ss[1];                                   \
+       k[8*(i)+11] = ss[3] ^= ss[2];                                   \
+       k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0);                         \
+       k[8*(i)+13] = ss[5] ^= ss[4];                                   \
+       k[8*(i)+14] = ss[6] ^= ss[5];                                   \
+       k[8*(i)+15] = ss[7] ^= ss[6];                                   \
+}
+
+#define kel8(k,i)                                                      \
+{                                                                      \
+       k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];           \
+       k[8*(i)+ 9] = ss[1] ^= ss[0];                                   \
+       k[8*(i)+10] = ss[2] ^= ss[1];                                   \
+       k[8*(i)+11] = ss[3] ^= ss[2];                                   \
+}
+
+#define kdf4(k,i)                                                      \
+{                                                                      \
+       ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3];                          \
+       ss[1] = ss[1] ^ ss[3];                                          \
+       ss[2] = ss[2] ^ ss[3];                                          \
+       ss[3] = ss[3];                                                  \
+       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
+       ss[i % 4] ^= ss[4];                                             \
+       ss[4] ^= k[4*(i)];                                              \
+       k[4*(i)+4] = ff(ss[4]);                                         \
+       ss[4] ^= k[4*(i)+1];                                            \
+       k[4*(i)+5] = ff(ss[4]);                                         \
+       ss[4] ^= k[4*(i)+2];                                            \
+       k[4*(i)+6] = ff(ss[4]);                                         \
+       ss[4] ^= k[4*(i)+3];                                            \
+       k[4*(i)+7] = ff(ss[4]);                                         \
+}
+
+#define kd4(k,i)                                                       \
+{                                                                      \
+       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
+       ss[i % 4] ^= ss[4];                                             \
+       ss[4] = ff(ss[4]);                                              \
+       k[4*(i)+4] = ss[4] ^= k[4*(i)];                                 \
+       k[4*(i)+5] = ss[4] ^= k[4*(i)+1];                               \
+       k[4*(i)+6] = ss[4] ^= k[4*(i)+2];                               \
+       k[4*(i)+7] = ss[4] ^= k[4*(i)+3];                               \
+}
+
+#define kdl4(k,i)                                                      \
+{                                                                      \
+       ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
+       ss[i % 4] ^= ss[4];                                             \
+       k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3];                  \
+       k[4*(i)+5] = ss[1] ^ ss[3];                                     \
+       k[4*(i)+6] = ss[0];                                             \
+       k[4*(i)+7] = ss[1];                                             \
+}
+
+#define kdf6(k,i)                                                      \
+{                                                                      \
+       ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];                         \
+       k[6*(i)+ 6] = ff(ss[0]);                                        \
+       ss[1] ^= ss[0];                                                 \
+       k[6*(i)+ 7] = ff(ss[1]);                                        \
+       ss[2] ^= ss[1];                                                 \
+       k[6*(i)+ 8] = ff(ss[2]);                                        \
+       ss[3] ^= ss[2];                                                 \
+       k[6*(i)+ 9] = ff(ss[3]);                                        \
+       ss[4] ^= ss[3];                                                 \
+       k[6*(i)+10] = ff(ss[4]);                                        \
+       ss[5] ^= ss[4];                                                 \
+       k[6*(i)+11] = ff(ss[5]);                                        \
+}
+
+#define kd6(k,i)                                                       \
+{                                                                      \
+       ss[6] = ls_box(ss[5],3) ^ rcon_tab[i];                          \
+       ss[0] ^= ss[6]; ss[6] = ff(ss[6]);                              \
+       k[6*(i)+ 6] = ss[6] ^= k[6*(i)];                                \
+       ss[1] ^= ss[0];                                                 \
+       k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1];                             \
+       ss[2] ^= ss[1];                                                 \
+       k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2];                             \
+       ss[3] ^= ss[2];                                                 \
+       k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3];                             \
+       ss[4] ^= ss[3];                                                 \
+       k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4];                             \
+       ss[5] ^= ss[4];                                                 \
+       k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5];                             \
+}
+
+#define kdl6(k,i)                                                      \
+{                                                                      \
+       ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];                         \
+       k[6*(i)+ 6] = ss[0];                                            \
+       ss[1] ^= ss[0];                                                 \
+       k[6*(i)+ 7] = ss[1];                                            \
+       ss[2] ^= ss[1];                                                 \
+       k[6*(i)+ 8] = ss[2];                                            \
+       ss[3] ^= ss[2];                                                 \
+       k[6*(i)+ 9] = ss[3];                                            \
+}
+
+#define kdf8(k,i)                                                      \
+{                                                                      \
+       ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];                         \
+       k[8*(i)+ 8] = ff(ss[0]);                                        \
+       ss[1] ^= ss[0];                                                 \
+       k[8*(i)+ 9] = ff(ss[1]);                                        \
+       ss[2] ^= ss[1];                                                 \
+       k[8*(i)+10] = ff(ss[2]);                                        \
+       ss[3] ^= ss[2];                                                 \
+       k[8*(i)+11] = ff(ss[3]);                                        \
+       ss[4] ^= ls_box(ss[3],0);                                       \
+       k[8*(i)+12] = ff(ss[4]);                                        \
+       ss[5] ^= ss[4];                                                 \
+       k[8*(i)+13] = ff(ss[5]);                                        \
+       ss[6] ^= ss[5];                                                 \
+       k[8*(i)+14] = ff(ss[6]);                                        \
+       ss[7] ^= ss[6];                                                 \
+       k[8*(i)+15] = ff(ss[7]);                                        \
+}
+
+#define kd8(k,i)                                                       \
+{                                                                      \
+       u32 __g = ls_box(ss[7],3) ^ rcon_tab[i];                        \
+       ss[0] ^= __g;                                                   \
+       __g = ff(__g);                                                  \
+       k[8*(i)+ 8] = __g ^= k[8*(i)];                                  \
+       ss[1] ^= ss[0];                                                 \
+       k[8*(i)+ 9] = __g ^= k[8*(i)+ 1];                               \
+       ss[2] ^= ss[1];                                                 \
+       k[8*(i)+10] = __g ^= k[8*(i)+ 2];                               \
+       ss[3] ^= ss[2];                                                 \
+       k[8*(i)+11] = __g ^= k[8*(i)+ 3];                               \
+       __g = ls_box(ss[3],0);                                          \
+       ss[4] ^= __g;                                                   \
+       __g = ff(__g);                                                  \
+       k[8*(i)+12] = __g ^= k[8*(i)+ 4];                               \
+       ss[5] ^= ss[4];                                                 \
+       k[8*(i)+13] = __g ^= k[8*(i)+ 5];                               \
+       ss[6] ^= ss[5];                                                 \
+       k[8*(i)+14] = __g ^= k[8*(i)+ 6];                               \
+       ss[7] ^= ss[6];                                                 \
+       k[8*(i)+15] = __g ^= k[8*(i)+ 7];                               \
+}
+
+#define kdl8(k,i)                                                      \
+{                                                                      \
+       ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];                         \
+       k[8*(i)+ 8] = ss[0];                                            \
+       ss[1] ^= ss[0];                                                 \
+       k[8*(i)+ 9] = ss[1];                                            \
+       ss[2] ^= ss[1];                                                 \
+       k[8*(i)+10] = ss[2];                                            \
+       ss[3] ^= ss[2];                                                 \
+       k[8*(i)+11] = ss[3];                                            \
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+                      unsigned int key_len)
+{
+       int i;
+       u32 ss[8];
+       struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
+       const __le32 *key = (const __le32 *)in_key;
+       u32 *flags = &tfm->crt_flags;
+
+       /* encryption schedule */
+       
+       ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
+       ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
+       ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
+       ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
+
+       switch(key_len) {
+       case 16:
+               for (i = 0; i < 9; i++)
+                       ke4(ctx->ekey, i);
+               kel4(ctx->ekey, 9);
+               ctx->rounds = 10;
+               break;
+               
+       case 24:
+               ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
+               ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
+               for (i = 0; i < 7; i++)
+                       ke6(ctx->ekey, i);
+               kel6(ctx->ekey, 7); 
+               ctx->rounds = 12;
+               break;
+
+       case 32:
+               ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
+               ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
+               ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
+               ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
+               for (i = 0; i < 6; i++)
+                       ke8(ctx->ekey, i);
+               kel8(ctx->ekey, 6);
+               ctx->rounds = 14;
+               break;
+
+       default:
+               *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               return -EINVAL;
+       }
+       
+       /* decryption schedule */
+       
+       ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
+       ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
+       ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
+       ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
+
+       switch (key_len) {
+       case 16:
+               kdf4(ctx->dkey, 0);
+               for (i = 1; i < 9; i++)
+                       kd4(ctx->dkey, i);
+               kdl4(ctx->dkey, 9);
+               break;
+               
+       case 24:
+               ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
+               ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
+               kdf6(ctx->dkey, 0);
+               for (i = 1; i < 7; i++)
+                       kd6(ctx->dkey, i);
+               kdl6(ctx->dkey, 7);
+               break;
+
+       case 32:
+               ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
+               ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
+               ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
+               ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
+               kdf8(ctx->dkey, 0);
+               for (i = 1; i < 6; i++)
+                       kd8(ctx->dkey, i);
+               kdl8(ctx->dkey, 6);
+               break;
+       }
+       return 0;
+}
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+       aes_enc_blk(tfm, dst, src);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+       aes_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg aes_alg = {
+       .cra_name               =       "aes",
+       .cra_driver_name        =       "aes-i586",
+       .cra_priority           =       200,
+       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
+       .cra_blocksize          =       AES_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct aes_ctx),
+       .cra_module             =       THIS_MODULE,
+       .cra_list               =       LIST_HEAD_INIT(aes_alg.cra_list),
+       .cra_u                  =       {
+               .cipher = {
+                       .cia_min_keysize        =       AES_MIN_KEY_SIZE,
+                       .cia_max_keysize        =       AES_MAX_KEY_SIZE,
+                       .cia_setkey             =       aes_set_key,
+                       .cia_encrypt            =       aes_encrypt,
+                       .cia_decrypt            =       aes_decrypt
+               }
+       }
+};
+
+static int __init aes_init(void)
+{
+       gen_tabs();
+       return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+       crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
+MODULE_ALIAS("aes");
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
new file mode 100644 (file)
index 0000000..39b98ed
--- /dev/null
@@ -0,0 +1,335 @@
+/***************************************************************************
+*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
+*                                                                         *
+*   This program is free software; you can redistribute it and/or modify  *
+*   it under the terms of the GNU General Public License as published by  *
+*   the Free Software Foundation; either version 2 of the License, or     *
+*   (at your option) any later version.                                   *
+*                                                                         *
+*   This program is distributed in the hope that it will be useful,       *
+*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+*   GNU General Public License for more details.                          *
+*                                                                         *
+*   You should have received a copy of the GNU General Public License     *
+*   along with this program; if not, write to the                         *
+*   Free Software Foundation, Inc.,                                       *
+*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+***************************************************************************/
+
+.file "twofish-i586-asm.S"
+.text
+
+#include <asm/asm-offsets.h>
+
+/* return adress at 0 */
+
+#define in_blk    12  /* input byte array address parameter*/
+#define out_blk   8  /* output byte array address parameter*/
+#define tfm       4  /* Twofish context structure */
+
+#define a_offset       0
+#define b_offset       4
+#define c_offset       8
+#define d_offset       12
+
+/* Structure of the crypto context struct*/
+
+#define s0     0       /* S0 Array 256 Words each */
+#define s1     1024    /* S1 Array */
+#define s2     2048    /* S2 Array */
+#define s3     3072    /* S3 Array */
+#define w      4096    /* 8 whitening keys (word) */
+#define k      4128    /* key 1-32 ( word ) */
+
+/* define a few register aliases to allow macro substitution */
+
+#define R0D    %eax
+#define R0B    %al
+#define R0H    %ah
+
+#define R1D    %ebx
+#define R1B    %bl
+#define R1H    %bh
+
+#define R2D    %ecx
+#define R2B    %cl
+#define R2H    %ch
+
+#define R3D    %edx
+#define R3B    %dl
+#define R3H    %dh
+
+
+/* performs input whitening */
+#define input_whitening(src,context,offset)\
+       xor     w+offset(context),      src;
+
+/* performs input whitening */
+#define output_whitening(src,context,offset)\
+       xor     w+16+offset(context),   src;
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define encrypt_round(a,b,c,d,round)\
+       push    d ## D;\
+       movzx   b ## B,         %edi;\
+       mov     s1(%ebp,%edi,4),d ## D;\
+       movzx   a ## B,         %edi;\
+       mov     s2(%ebp,%edi,4),%esi;\
+       movzx   b ## H,         %edi;\
+       ror     $16,            b ## D;\
+       xor     s2(%ebp,%edi,4),d ## D;\
+       movzx   a ## H,         %edi;\
+       ror     $16,            a ## D;\
+       xor     s3(%ebp,%edi,4),%esi;\
+       movzx   b ## B,         %edi;\
+       xor     s3(%ebp,%edi,4),d ## D;\
+       movzx   a ## B,         %edi;\
+       xor     (%ebp,%edi,4),  %esi;\
+       movzx   b ## H,         %edi;\
+       ror     $15,            b ## D;\
+       xor     (%ebp,%edi,4),  d ## D;\
+       movzx   a ## H,         %edi;\
+       xor     s1(%ebp,%edi,4),%esi;\
+       pop     %edi;\
+       add     d ## D,         %esi;\
+       add     %esi,           d ## D;\
+       add     k+round(%ebp),  %esi;\
+       xor     %esi,           c ## D;\
+       rol     $15,            c ## D;\
+       add     k+4+round(%ebp),d ## D;\
+       xor     %edi,           d ## D;
+
+/*
+ * a input register containing a (rotated 16)
+ * b input register containing b
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * last round has different rotations for the output preparation
+ */
+#define encrypt_last_round(a,b,c,d,round)\
+       push    d ## D;\
+       movzx   b ## B,         %edi;\
+       mov     s1(%ebp,%edi,4),d ## D;\
+       movzx   a ## B,         %edi;\
+       mov     s2(%ebp,%edi,4),%esi;\
+       movzx   b ## H,         %edi;\
+       ror     $16,            b ## D;\
+       xor     s2(%ebp,%edi,4),d ## D;\
+       movzx   a ## H,         %edi;\
+       ror     $16,            a ## D;\
+       xor     s3(%ebp,%edi,4),%esi;\
+       movzx   b ## B,         %edi;\
+       xor     s3(%ebp,%edi,4),d ## D;\
+       movzx   a ## B,         %edi;\
+       xor     (%ebp,%edi,4),  %esi;\
+       movzx   b ## H,         %edi;\
+       ror     $16,            b ## D;\
+       xor     (%ebp,%edi,4),  d ## D;\
+       movzx   a ## H,         %edi;\
+       xor     s1(%ebp,%edi,4),%esi;\
+       pop     %edi;\
+       add     d ## D,         %esi;\
+       add     %esi,           d ## D;\
+       add     k+round(%ebp),  %esi;\
+       xor     %esi,           c ## D;\
+       ror     $1,             c ## D;\
+       add     k+4+round(%ebp),d ## D;\
+       xor     %edi,           d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ */
+#define decrypt_round(a,b,c,d,round)\
+       push    c ## D;\
+       movzx   a ## B,         %edi;\
+       mov     (%ebp,%edi,4),  c ## D;\
+       movzx   b ## B,         %edi;\
+       mov     s3(%ebp,%edi,4),%esi;\
+       movzx   a ## H,         %edi;\
+       ror     $16,            a ## D;\
+       xor     s1(%ebp,%edi,4),c ## D;\
+       movzx   b ## H,         %edi;\
+       ror     $16,            b ## D;\
+       xor     (%ebp,%edi,4),  %esi;\
+       movzx   a ## B,         %edi;\
+       xor     s2(%ebp,%edi,4),c ## D;\
+       movzx   b ## B,         %edi;\
+       xor     s1(%ebp,%edi,4),%esi;\
+       movzx   a ## H,         %edi;\
+       ror     $15,            a ## D;\
+       xor     s3(%ebp,%edi,4),c ## D;\
+       movzx   b ## H,         %edi;\
+       xor     s2(%ebp,%edi,4),%esi;\
+       pop     %edi;\
+       add     %esi,           c ## D;\
+       add     c ## D,         %esi;\
+       add     k+round(%ebp),  c ## D;\
+       xor     %edi,           c ## D;\
+       add     k+4+round(%ebp),%esi;\
+       xor     %esi,           d ## D;\
+       rol     $15,            d ## D;
+
+/*
+ * a input register containing a
+ * b input register containing b (rotated 16)
+ * c input register containing c
+ * d input register containing d (already rol $1)
+ * operations on a and b are interleaved to increase performance
+ * last round has different rotations for the output preparation
+ */
+#define decrypt_last_round(a,b,c,d,round)\
+       push    c ## D;\
+       movzx   a ## B,         %edi;\
+       mov     (%ebp,%edi,4),  c ## D;\
+       movzx   b ## B,         %edi;\
+       mov     s3(%ebp,%edi,4),%esi;\
+       movzx   a ## H,         %edi;\
+       ror     $16,            a ## D;\
+       xor     s1(%ebp,%edi,4),c ## D;\
+       movzx   b ## H,         %edi;\
+       ror     $16,            b ## D;\
+       xor     (%ebp,%edi,4),  %esi;\
+       movzx   a ## B,         %edi;\
+       xor     s2(%ebp,%edi,4),c ## D;\
+       movzx   b ## B,         %edi;\
+       xor     s1(%ebp,%edi,4),%esi;\
+       movzx   a ## H,         %edi;\
+       ror     $16,            a ## D;\
+       xor     s3(%ebp,%edi,4),c ## D;\
+       movzx   b ## H,         %edi;\
+       xor     s2(%ebp,%edi,4),%esi;\
+       pop     %edi;\
+       add     %esi,           c ## D;\
+       add     c ## D,         %esi;\
+       add     k+round(%ebp),  c ## D;\
+       xor     %edi,           c ## D;\
+       add     k+4+round(%ebp),%esi;\
+       xor     %esi,           d ## D;\
+       ror     $1,             d ## D;
+
+.align 4
+.global twofish_enc_blk
+.global twofish_dec_blk
+
+twofish_enc_blk:
+       push    %ebp                    /* save registers according to calling convention*/
+       push    %ebx
+       push    %esi
+       push    %edi
+
+       mov     tfm + 16(%esp), %ebp    /* abuse the base pointer: set new base bointer to the crypto tfm */
+       add     $crypto_tfm_ctx_offset, %ebp    /* ctx adress */
+       mov     in_blk+16(%esp),%edi    /* input adress in edi */
+
+       mov     (%edi),         %eax
+       mov     b_offset(%edi), %ebx
+       mov     c_offset(%edi), %ecx
+       mov     d_offset(%edi), %edx
+       input_whitening(%eax,%ebp,a_offset)
+       ror     $16,    %eax
+       input_whitening(%ebx,%ebp,b_offset)
+       input_whitening(%ecx,%ebp,c_offset)
+       input_whitening(%edx,%ebp,d_offset)
+       rol     $1,     %edx
+
+       encrypt_round(R0,R1,R2,R3,0);
+       encrypt_round(R2,R3,R0,R1,8);
+       encrypt_round(R0,R1,R2,R3,2*8);
+       encrypt_round(R2,R3,R0,R1,3*8);
+       encrypt_round(R0,R1,R2,R3,4*8);
+       encrypt_round(R2,R3,R0,R1,5*8);
+       encrypt_round(R0,R1,R2,R3,6*8);
+       encrypt_round(R2,R3,R0,R1,7*8);
+       encrypt_round(R0,R1,R2,R3,8*8);
+       encrypt_round(R2,R3,R0,R1,9*8);
+       encrypt_round(R0,R1,R2,R3,10*8);
+       encrypt_round(R2,R3,R0,R1,11*8);
+       encrypt_round(R0,R1,R2,R3,12*8);
+       encrypt_round(R2,R3,R0,R1,13*8);
+       encrypt_round(R0,R1,R2,R3,14*8);
+       encrypt_last_round(R2,R3,R0,R1,15*8);
+
+       output_whitening(%eax,%ebp,c_offset)
+       output_whitening(%ebx,%ebp,d_offset)
+       output_whitening(%ecx,%ebp,a_offset)
+       output_whitening(%edx,%ebp,b_offset)
+       mov     out_blk+16(%esp),%edi;
+       mov     %eax,           c_offset(%edi)
+       mov     %ebx,           d_offset(%edi)
+       mov     %ecx,           (%edi)
+       mov     %edx,           b_offset(%edi)
+
+       pop     %edi
+       pop     %esi
+       pop     %ebx
+       pop     %ebp
+       mov     $1,     %eax
+       ret
+
+twofish_dec_blk:
+       push    %ebp                    /* save registers according to calling convention*/
+       push    %ebx
+       push    %esi
+       push    %edi
+
+
+       mov     tfm + 16(%esp), %ebp    /* abuse the base pointer: set new base bointer to the crypto tfm */
+       add     $crypto_tfm_ctx_offset, %ebp    /* ctx adress */
+       mov     in_blk+16(%esp),%edi    /* input adress in edi */
+
+       mov     (%edi),         %eax
+       mov     b_offset(%edi), %ebx
+       mov     c_offset(%edi), %ecx
+       mov     d_offset(%edi), %edx
+       output_whitening(%eax,%ebp,a_offset)
+       output_whitening(%ebx,%ebp,b_offset)
+       ror     $16,    %ebx
+       output_whitening(%ecx,%ebp,c_offset)
+       output_whitening(%edx,%ebp,d_offset)
+       rol     $1,     %ecx
+
+       decrypt_round(R0,R1,R2,R3,15*8);
+       decrypt_round(R2,R3,R0,R1,14*8);
+       decrypt_round(R0,R1,R2,R3,13*8);
+       decrypt_round(R2,R3,R0,R1,12*8);
+       decrypt_round(R0,R1,R2,R3,11*8);
+       decrypt_round(R2,R3,R0,R1,10*8);
+       decrypt_round(R0,R1,R2,R3,9*8);
+       decrypt_round(R2,R3,R0,R1,8*8);
+       decrypt_round(R0,R1,R2,R3,7*8);
+       decrypt_round(R2,R3,R0,R1,6*8);
+       decrypt_round(R0,R1,R2,R3,5*8);
+       decrypt_round(R2,R3,R0,R1,4*8);
+       decrypt_round(R0,R1,R2,R3,3*8);
+       decrypt_round(R2,R3,R0,R1,2*8);
+       decrypt_round(R0,R1,R2,R3,1*8);
+       decrypt_last_round(R2,R3,R0,R1,0);
+
+       input_whitening(%eax,%ebp,c_offset)
+       input_whitening(%ebx,%ebp,d_offset)
+       input_whitening(%ecx,%ebp,a_offset)
+       input_whitening(%edx,%ebp,b_offset)
+       mov     out_blk+16(%esp),%edi;
+       mov     %eax,           c_offset(%edi)
+       mov     %ebx,           d_offset(%edi)
+       mov     %ecx,           (%edi)
+       mov     %edx,           b_offset(%edi)
+
+       pop     %edi
+       pop     %esi
+       pop     %ebx
+       pop     %ebp
+       mov     $1,     %eax
+       ret
diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_32.c
new file mode 100644 (file)
index 0000000..e3004df
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ *  Glue Code for optimized 586 assembler version of TWOFISH
+ *
+ * Originally Twofish for GPG
+ * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
+ * 256-bit key length added March 20, 1999
+ * Some modifications to reduce the text size by Werner Koch, April, 1998
+ * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
+ * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
+ *
+ * The original author has disclaimed all copyright interest in this
+ * code and thus put it in the public domain. The subsequent authors
+ * have put this under the GNU General Public License.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ * This code is a "clean room" implementation, written from the paper
+ * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
+ * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
+ * through http://www.counterpane.com/twofish.html
+ *
+ * For background information on multiplication in finite fields, used for
+ * the matrix operations in the key schedule, see the book _Contemporary
+ * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
+ * Third Edition.
+ */
+
+#include <crypto/twofish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+
+asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+
+static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+       twofish_enc_blk(tfm, dst, src);
+}
+
+static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+       twofish_dec_blk(tfm, dst, src);
+}
+
+static struct crypto_alg alg = {
+       .cra_name               =       "twofish",
+       .cra_driver_name        =       "twofish-i586",
+       .cra_priority           =       200,
+       .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
+       .cra_blocksize          =       TF_BLOCK_SIZE,
+       .cra_ctxsize            =       sizeof(struct twofish_ctx),
+       .cra_alignmask          =       3,
+       .cra_module             =       THIS_MODULE,
+       .cra_list               =       LIST_HEAD_INIT(alg.cra_list),
+       .cra_u                  =       {
+               .cipher = {
+                       .cia_min_keysize        =       TF_MIN_KEY_SIZE,
+                       .cia_max_keysize        =       TF_MAX_KEY_SIZE,
+                       .cia_setkey             =       twofish_setkey,
+                       .cia_encrypt            =       twofish_encrypt,
+                       .cia_decrypt            =       twofish_decrypt
+               }
+       }
+};
+
+static int __init init(void)
+{
+       return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+       crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
+MODULE_ALIAS("twofish");
index fbd34ac2cda2d89d40a95bf5572b0ea4e031635b..b1bcf7c63028822ef0c19d5509d40c7806c09190 100644 (file)
@@ -1,5 +1,5 @@
 ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/i386/crypto/Makefile_32
+include ${srctree}/arch/x86/crypto/Makefile_32
 else
 include ${srctree}/arch/x86_64/crypto/Makefile_64
 endif