0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #include <linux/linkage.h>
0017 #include <asm/frame.h>
0018
0019 .section .rodata.cst16.bswap_mask, "aM", @progbits, 16
0020 .align 16
0021 .Lbswap_mask:
0022 .octa 0x000102030405060708090a0b0c0d0e0f
0023
0024 #define DATA %xmm0
0025 #define SHASH %xmm1
0026 #define T1 %xmm2
0027 #define T2 %xmm3
0028 #define T3 %xmm4
0029 #define BSWAP %xmm5
0030 #define IN1 %xmm6
0031
0032 .text
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046 SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
0047 movaps DATA, T1
0048 pshufd $0b01001110, DATA, T2
0049 pshufd $0b01001110, SHASH, T3
0050 pxor DATA, T2
0051 pxor SHASH, T3
0052
0053 pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0
0054 pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1
0055 pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
0056 pxor DATA, T2
0057 pxor T1, T2 # T2 = a0 * b1 + a1 * b0
0058
0059 movaps T2, T3
0060 pslldq $8, T3
0061 psrldq $8, T2
0062 pxor T3, DATA
0063 pxor T2, T1 # <T1:DATA> is result of
0064 # carry-less multiplication
0065
0066 # first phase of the reduction
0067 movaps DATA, T3
0068 psllq $1, T3
0069 pxor DATA, T3
0070 psllq $5, T3
0071 pxor DATA, T3
0072 psllq $57, T3
0073 movaps T3, T2
0074 pslldq $8, T2
0075 psrldq $8, T3
0076 pxor T2, DATA
0077 pxor T3, T1
0078
0079 # second phase of the reduction
0080 movaps DATA, T2
0081 psrlq $5, T2
0082 pxor DATA, T2
0083 psrlq $1, T2
0084 pxor DATA, T2
0085 psrlq $1, T2
0086 pxor T2, T1
0087 pxor T1, DATA
0088 RET
0089 SYM_FUNC_END(__clmul_gf128mul_ble)
0090
0091
0092 SYM_FUNC_START(clmul_ghash_mul)
0093 FRAME_BEGIN
0094 movups (%rdi), DATA
0095 movups (%rsi), SHASH
0096 movaps .Lbswap_mask, BSWAP
0097 pshufb BSWAP, DATA
0098 call __clmul_gf128mul_ble
0099 pshufb BSWAP, DATA
0100 movups DATA, (%rdi)
0101 FRAME_END
0102 RET
0103 SYM_FUNC_END(clmul_ghash_mul)
0104
0105
0106
0107
0108
0109 SYM_FUNC_START(clmul_ghash_update)
0110 FRAME_BEGIN
0111 cmp $16, %rdx
0112 jb .Lupdate_just_ret # check length
0113 movaps .Lbswap_mask, BSWAP
0114 movups (%rdi), DATA
0115 movups (%rcx), SHASH
0116 pshufb BSWAP, DATA
0117 .align 4
0118 .Lupdate_loop:
0119 movups (%rsi), IN1
0120 pshufb BSWAP, IN1
0121 pxor IN1, DATA
0122 call __clmul_gf128mul_ble
0123 sub $16, %rdx
0124 add $16, %rsi
0125 cmp $16, %rdx
0126 jge .Lupdate_loop
0127 pshufb BSWAP, DATA
0128 movups DATA, (%rdi)
0129 .Lupdate_just_ret:
0130 FRAME_END
0131 RET
0132 SYM_FUNC_END(clmul_ghash_update)