#ifdef __x86_64__
-.data
+# constants in mergeable sections, linker can reorder and merge
+.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
.align 16
.Lgf128mul_x_ble_mask:
.octa 0x00000000000000010000000000000087
+.section .rodata.cst16.POLY, "aM", @progbits, 16
+.align 16
POLY: .octa 0xC2000000000000000000000000000001
+.section .rodata.cst16.TWOONE, "aM", @progbits, 16
+.align 16
TWOONE: .octa 0x00000001000000000000000000000001
-# order of these constants should not change.
-# more specifically, ALL_F should follow SHIFT_MASK,
-# and ZERO should follow ALL_F
-
+.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
+.align 16
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
+.section .rodata.cst16.MASK1, "aM", @progbits, 16
+.align 16
MASK1: .octa 0x0000000000000000ffffffffffffffff
+.section .rodata.cst16.MASK2, "aM", @progbits, 16
+.align 16
MASK2: .octa 0xffffffffffffffff0000000000000000
-SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
-ALL_F: .octa 0xffffffffffffffffffffffffffffffff
-ZERO: .octa 0x00000000000000000000000000000000
+.section .rodata.cst16.ONE, "aM", @progbits, 16
+.align 16
ONE: .octa 0x00000000000000000000000000000001
+.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
+.align 16
F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
+.section .rodata.cst16.dec, "aM", @progbits, 16
+.align 16
dec: .octa 0x1
+.section .rodata.cst16.enc, "aM", @progbits, 16
+.align 16
enc: .octa 0x2
+# order of these constants should not change.
+# more specifically, ALL_F should follow SHIFT_MASK,
+# and zero should follow ALL_F
+.section .rodata, "a", @progbits
+.align 16
+SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
+ALL_F: .octa 0xffffffffffffffffffffffffffffffff
+ .octa 0x00000000000000000000000000000000
+
.text
#include <linux/linkage.h>
#include <asm/inst.h>
-.data
+# constants in mergeable sections, linker can reorder and merge
+.section .rodata.cst16.POLY, "aM", @progbits, 16
.align 16
-
POLY: .octa 0xC2000000000000000000000000000001
+
+.section .rodata.cst16.POLY2, "aM", @progbits, 16
+.align 16
POLY2: .octa 0xC20000000000000000000001C2000000
-TWOONE: .octa 0x00000001000000000000000000000001
-# order of these constants should not change.
-# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F
+.section .rodata.cst16.TWOONE, "aM", @progbits, 16
+.align 16
+TWOONE: .octa 0x00000001000000000000000000000001
+.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
+.align 16
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
-SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
-ALL_F: .octa 0xffffffffffffffffffffffffffffffff
-ZERO: .octa 0x00000000000000000000000000000000
+
+.section .rodata.cst16.ONE, "aM", @progbits, 16
+.align 16
ONE: .octa 0x00000000000000000000000000000001
+
+.section .rodata.cst16.ONEf, "aM", @progbits, 16
+.align 16
ONEf: .octa 0x01000000000000000000000000000000
+# order of these constants should not change.
+# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F
+.section .rodata, "a", @progbits
+.align 16
+SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
+ALL_F: .octa 0xffffffffffffffffffffffffffffffff
+ .octa 0x00000000000000000000000000000000
+
.text
vmovdqu y6, 14 * 16(rio); \
vmovdqu y7, 15 * 16(rio);
-.data
+
+/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
+.section .rodata.cst16, "aM", @progbits, 16
.align 16
#define SHUFB_BYTES(idx) \
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
/* 4-bit mask */
+.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
.align 4
.L0f0f0f0f:
.long 0x0f0f0f0f
vmovdqu y6, 14 * 32(rio); \
vmovdqu y7, 15 * 32(rio);
-.data
-.align 32
+.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
+.align 32
#define SHUFB_BYTES(idx) \
0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
-
.Lshufb_16x16b:
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+.section .rodata.cst32.pack_bswap, "aM", @progbits, 32
+.align 32
.Lpack_bswap:
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
+/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
+.section .rodata.cst16, "aM", @progbits, 16
+.align 16
+
/* For CTR-mode IV byteswap */
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
.align 4
/* 4-bit mask */
.L0f0f0f0f:
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1;
-.data
-
+.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
.align 16
.Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
+.align 16
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16
+.align 16
.Lbswap_iv_mask:
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
+
+.section .rodata.cst4.16_mask, "aM", @progbits, 4
+.align 4
.L16_mask:
.byte 16, 16, 16, 16
+.section .rodata.cst4.32_mask, "aM", @progbits, 4
+.align 4
.L32_mask:
.byte 32, 0, 0, 0
+.section .rodata.cst4.first_mask, "aM", @progbits, 4
+.align 4
.Lfirst_mask:
.byte 0x1f, 0, 0, 0
vpshufb rmask, x2, x2; \
vpshufb rmask, x3, x3;
-.data
-
+.section .rodata.cst16, "aM", @progbits, 16
.align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+.section .rodata.cst4.L16_mask, "aM", @progbits, 4
+.align 4
.L16_mask:
.byte 16, 16, 16, 16
+
+.section .rodata.cst4.L32_mask, "aM", @progbits, 4
+.align 4
.L32_mask:
.byte 32, 0, 0, 0
+
+.section .rodata.cst4.first_mask, "aM", @progbits, 4
+.align 4
.Lfirst_mask:
.byte 0x1f, 0, 0, 0
#include <linux/linkage.h>
-.data
+.section .rodata.cst32.ROT8, "aM", @progbits, 32
.align 32
-
ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
.octa 0x0e0d0c0f0a09080b0605040702010003
+
+.section .rodata.cst32.ROT16, "aM", @progbits, 32
+.align 32
ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
.octa 0x0d0c0f0e09080b0a0504070601000302
+
+.section .rodata.cst32.CTRINC, "aM", @progbits, 32
+.align 32
CTRINC: .octa 0x00000003000000020000000100000000
.octa 0x00000007000000060000000500000004
#include <linux/linkage.h>
-.data
+.section .rodata.cst16.ROT8, "aM", @progbits, 16
.align 16
-
ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
+.section .rodata.cst16.ROT16, "aM", @progbits, 16
+.align 16
ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
+.section .rodata.cst16.CTRINC, "aM", @progbits, 16
+.align 16
CTRINC: .octa 0x00000003000000020000000100000000
.text
ENDPROC(crc_t10dif_pcl)
-.data
-
+.section .rodata, "a", @progbits
+.align 16
# precomputed constants
# these constants are precomputed from the poly:
# 0x8bb70000 (0x8bb7 scaled to 32 bits)
-.align 16
# Q = 0x18BB70000
# rk1 = 2^(32*3) mod Q << 32
# rk2 = 2^(32*5) mod Q << 32
+.section .rodata.cst16.mask1, "aM", @progbits, 16
+.align 16
mask1:
.octa 0x80808080808080808080808080808080
+
+.section .rodata.cst16.mask2, "aM", @progbits, 16
+.align 16
mask2:
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
+.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
+.align 16
SHUF_MASK:
.octa 0x000102030405060708090A0B0C0D0E0F
+.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32
+.align 32
pshufb_shf_table:
# use these values for shift constants for the pshufb instruction
# different alignments result in values as shown:
ret;
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
-.data
+.section .rodata, "a", @progbits
.align 16
.L_s1:
.quad 0x0010100001010400, 0x0000000000000000
#include <asm/inst.h>
#include <asm/frame.h>
-.data
-
+.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090a0b0c0d0e0f
#include <linux/linkage.h>
-.data
+.section .rodata.cst32.ANMASK, "aM", @progbits, 32
.align 32
-
ANMASK: .octa 0x0000000003ffffff0000000003ffffff
.octa 0x0000000003ffffff0000000003ffffff
+
+.section .rodata.cst32.ORMASK, "aM", @progbits, 32
+.align 32
ORMASK: .octa 0x00000000010000000000000001000000
.octa 0x00000000010000000000000001000000
#include <linux/linkage.h>
-.data
+.section .rodata.cst16.ANMASK, "aM", @progbits, 16
.align 16
-
ANMASK: .octa 0x0000000003ffffff0000000003ffffff
+
+.section .rodata.cst16.ORMASK, "aM", @progbits, 16
+.align 16
ORMASK: .octa 0x00000000010000000000000001000000
.text
.file "serpent-avx-x86_64-asm_64.S"
-.data
+.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
.align 16
-
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
+.align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.file "serpent-avx2-asm_64.S"
-.data
+.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
.align 16
-
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16
+.align 16
.Lxts_gf128mul_and_shl1_mask_0:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
+
+.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16
+.align 16
.Lxts_gf128mul_and_shl1_mask_1:
.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
ret
ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
-.data
-
+.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
.align 16
clear_low_nibble:
.octa 0x000000000000000000000000FFFFFFF0
+
+.section .rodata.cst8, "aM", @progbits, 8
+.align 8
one:
.quad 1
two:
ENDPROC(sha1_mb_mgr_submit_avx2)
-.data
-
+.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
.align 16
clear_low_nibble:
.octa 0x000000000000000000000000FFFFFFF0
ENDPROC(sha1_x8_avx2)
-.data
-
+.section .rodata.cst32.K00_19, "aM", @progbits, 32
.align 32
K00_19:
.octa 0x5A8279995A8279995A8279995A827999
.octa 0x5A8279995A8279995A8279995A827999
+
+.section .rodata.cst32.K20_39, "aM", @progbits, 32
+.align 32
K20_39:
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
+
+.section .rodata.cst32.K40_59, "aM", @progbits, 32
+.align 32
K40_59:
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
+
+.section .rodata.cst32.K60_79, "aM", @progbits, 32
+.align 32
K60_79:
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
+
+.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
+.align 32
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
.octa 0x0c0d0e0f08090a0b0405060700010203
ret
ENDPROC(sha1_ni_transform)
-.data
-
-.align 64
+.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
+.align 16
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x000102030405060708090a0b0c0d0e0f
+
+.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
+.align 16
UPPER_WORD_MASK:
.octa 0xFFFFFFFF000000000000000000000000
ret
ENDPROC(sha256_transform_avx)
-.data
+.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
+.align 16
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
+.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
+.align 16
# shuffle xBxA -> 00BA
_SHUF_00BA:
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
+.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
+.align 16
# shuffle xDxC -> DC00
_SHUF_DC00:
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
+
#endif
ret
ENDPROC(sha256_transform_rorx)
-.data
+.section .rodata.cst512.K256, "aM", @progbits, 512
.align 64
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
+.align 32
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
# shuffle xBxA -> 00BA
+.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32
+.align 32
_SHUF_00BA:
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
# shuffle xDxC -> DC00
+.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32
+.align 32
_SHUF_DC00:
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
+
#endif
ret
ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
-.data
-
+.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
.align 16
clear_low_nibble:
.octa 0x000000000000000000000000FFFFFFF0
+
+.section .rodata.cst8, "aM", @progbits, 8
+.align 8
one:
.quad 1
two:
ENDPROC(sha256_mb_mgr_submit_avx2)
-.data
-
+.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
.align 16
clear_low_nibble:
.octa 0x000000000000000000000000FFFFFFF0
ret
ENDPROC(sha256_x8_avx2)
-.data
+
+.section .rodata.K256_8, "a", @progbits
.align 64
K256_8:
.octa 0x428a2f98428a2f98428a2f98428a2f98
.octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
.octa 0xc67178f2c67178f2c67178f2c67178f2
.octa 0xc67178f2c67178f2c67178f2c67178f2
+
+.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
+.align 32
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
.octa 0x0c0d0e0f08090a0b0405060700010203
+.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
.global K256
K256:
ret
ENDPROC(sha256_transform_ssse3)
-.data
+.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
+.align 16
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
+.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
+.align 16
# shuffle xBxA -> 00BA
_SHUF_00BA:
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
+.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
+.align 16
# shuffle xDxC -> DC00
_SHUF_DC00:
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
ret
ENDPROC(sha256_ni_transform)
-.data
+.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
K256:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
+.align 16
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x0c0d0e0f08090a0b0405060700010203
########################################################################
### Binary Data
-.data
-
+.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
.align 16
-
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
XMM_QWORD_BSWAP:
.octa 0x08090a0b0c0d0e0f0001020304050607
+# Mergeable 640-byte rodata section. This allows linker to merge the table
+# with other, exactly the same 640-byte fragment of another rodata section
+# (if such section exists).
+.section .rodata.cst640.K512, "aM", @progbits, 640
+.align 64
# K[t] used in SHA512 hashing
K512:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
########################################################################
### Binary Data
-.data
+# Mergeable 640-byte rodata section. This allows linker to merge the table
+# with other, exactly the same 640-byte fragment of another rodata section
+# (if such section exists).
+.section .rodata.cst640.K512, "aM", @progbits, 640
.align 64
# K[t] used in SHA512 hashing
K512:
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
.align 32
-
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
PSHUFFLE_BYTE_FLIP_MASK:
.octa 0x08090a0b0c0d0e0f0001020304050607
.octa 0x18191a1b1c1d1e1f1011121314151617
+.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32
+.align 32
MASK_YMM_LO:
.octa 0x00000000000000000000000000000000
.octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+
#endif
pop %rbx
ret
ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
-.data
-.align 16
+.section .rodata.cst8.one, "aM", @progbits, 8
+.align 8
one:
.quad 1
+
+.section .rodata.cst8.two, "aM", @progbits, 8
+.align 8
two:
.quad 2
+
+.section .rodata.cst8.three, "aM", @progbits, 8
+.align 8
three:
.quad 3
xor job_rax, job_rax
jmp return
ENDPROC(sha512_mb_mgr_submit_avx2)
-.data
+/* UNUSED?
+.section .rodata.cst16, "aM", @progbits, 16
.align 16
H0: .int 0x6a09e667
H1: .int 0xbb67ae85
H5: .int 0x9b05688c
H6: .int 0x1f83d9ab
H7: .int 0x5be0cd19
+*/
ret
ENDPROC(sha512_x4_avx2)
-.data
+.section .rodata.K512_4, "a", @progbits
.align 64
K512_4:
.octa 0x428a2f98d728ae22428a2f98d728ae22,\
.octa 0x6c44198c4a4758176c44198c4a475817,\
0x6c44198c4a4758176c44198c4a475817
+.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
+.align 32
PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
.octa 0x18191a1b1c1d1e1f1011121314151617
########################################################################
### Binary Data
-.data
-
+.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
.align 16
-
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
XMM_QWORD_BSWAP:
.octa 0x08090a0b0c0d0e0f0001020304050607
+# Mergeable 640-byte rodata section. This allows linker to merge the table
+# with other, exactly the same 640-byte fragment of another rodata section
+# (if such section exists).
+.section .rodata.cst640.K512, "aM", @progbits, 640
+.align 64
# K[t] used in SHA512 hashing
K512:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
.file "twofish-avx-x86_64-asm_64.S"
-.data
+.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
.align 16
-
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
+.align 16
.Lxts_gf128mul_and_shl1_mask:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0