0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056 #include <linux/linkage.h>
0057
0058 #define DIGEST_PTR %rdi
0059 #define DATA_PTR %rsi
0060 #define NUM_BLKS %rdx
0061
0062 #define SHA256CONSTANTS %rax
0063
0064 #define MSG %xmm0
0065 #define STATE0 %xmm1
0066 #define STATE1 %xmm2
0067 #define MSGTMP0 %xmm3
0068 #define MSGTMP1 %xmm4
0069 #define MSGTMP2 %xmm5
0070 #define MSGTMP3 %xmm6
0071 #define MSGTMP4 %xmm7
0072
0073 #define SHUF_MASK %xmm8
0074
0075 #define ABEF_SAVE %xmm9
0076 #define CDGH_SAVE %xmm10
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098 .text
0099 .align 32
0100 SYM_FUNC_START(sha256_ni_transform)
0101
0102 shl $6, NUM_BLKS
0103 jz .Ldone_hash
0104 add DATA_PTR, NUM_BLKS
0105
0106
0107
0108
0109
0110
0111 movdqu 0*16(DIGEST_PTR), STATE0
0112 movdqu 1*16(DIGEST_PTR), STATE1
0113
0114 pshufd $0xB1, STATE0, STATE0
0115 pshufd $0x1B, STATE1, STATE1
0116 movdqa STATE0, MSGTMP4
0117 palignr $8, STATE1, STATE0
0118 pblendw $0xF0, MSGTMP4, STATE1
0119
0120 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
0121 lea K256(%rip), SHA256CONSTANTS
0122
0123 .Lloop0:
0124
0125 movdqa STATE0, ABEF_SAVE
0126 movdqa STATE1, CDGH_SAVE
0127
0128
0129 movdqu 0*16(DATA_PTR), MSG
0130 pshufb SHUF_MASK, MSG
0131 movdqa MSG, MSGTMP0
0132 paddd 0*16(SHA256CONSTANTS), MSG
0133 sha256rnds2 STATE0, STATE1
0134 pshufd $0x0E, MSG, MSG
0135 sha256rnds2 STATE1, STATE0
0136
0137
0138 movdqu 1*16(DATA_PTR), MSG
0139 pshufb SHUF_MASK, MSG
0140 movdqa MSG, MSGTMP1
0141 paddd 1*16(SHA256CONSTANTS), MSG
0142 sha256rnds2 STATE0, STATE1
0143 pshufd $0x0E, MSG, MSG
0144 sha256rnds2 STATE1, STATE0
0145 sha256msg1 MSGTMP1, MSGTMP0
0146
0147
0148 movdqu 2*16(DATA_PTR), MSG
0149 pshufb SHUF_MASK, MSG
0150 movdqa MSG, MSGTMP2
0151 paddd 2*16(SHA256CONSTANTS), MSG
0152 sha256rnds2 STATE0, STATE1
0153 pshufd $0x0E, MSG, MSG
0154 sha256rnds2 STATE1, STATE0
0155 sha256msg1 MSGTMP2, MSGTMP1
0156
0157
0158 movdqu 3*16(DATA_PTR), MSG
0159 pshufb SHUF_MASK, MSG
0160 movdqa MSG, MSGTMP3
0161 paddd 3*16(SHA256CONSTANTS), MSG
0162 sha256rnds2 STATE0, STATE1
0163 movdqa MSGTMP3, MSGTMP4
0164 palignr $4, MSGTMP2, MSGTMP4
0165 paddd MSGTMP4, MSGTMP0
0166 sha256msg2 MSGTMP3, MSGTMP0
0167 pshufd $0x0E, MSG, MSG
0168 sha256rnds2 STATE1, STATE0
0169 sha256msg1 MSGTMP3, MSGTMP2
0170
0171
0172 movdqa MSGTMP0, MSG
0173 paddd 4*16(SHA256CONSTANTS), MSG
0174 sha256rnds2 STATE0, STATE1
0175 movdqa MSGTMP0, MSGTMP4
0176 palignr $4, MSGTMP3, MSGTMP4
0177 paddd MSGTMP4, MSGTMP1
0178 sha256msg2 MSGTMP0, MSGTMP1
0179 pshufd $0x0E, MSG, MSG
0180 sha256rnds2 STATE1, STATE0
0181 sha256msg1 MSGTMP0, MSGTMP3
0182
0183
0184 movdqa MSGTMP1, MSG
0185 paddd 5*16(SHA256CONSTANTS), MSG
0186 sha256rnds2 STATE0, STATE1
0187 movdqa MSGTMP1, MSGTMP4
0188 palignr $4, MSGTMP0, MSGTMP4
0189 paddd MSGTMP4, MSGTMP2
0190 sha256msg2 MSGTMP1, MSGTMP2
0191 pshufd $0x0E, MSG, MSG
0192 sha256rnds2 STATE1, STATE0
0193 sha256msg1 MSGTMP1, MSGTMP0
0194
0195
0196 movdqa MSGTMP2, MSG
0197 paddd 6*16(SHA256CONSTANTS), MSG
0198 sha256rnds2 STATE0, STATE1
0199 movdqa MSGTMP2, MSGTMP4
0200 palignr $4, MSGTMP1, MSGTMP4
0201 paddd MSGTMP4, MSGTMP3
0202 sha256msg2 MSGTMP2, MSGTMP3
0203 pshufd $0x0E, MSG, MSG
0204 sha256rnds2 STATE1, STATE0
0205 sha256msg1 MSGTMP2, MSGTMP1
0206
0207
0208 movdqa MSGTMP3, MSG
0209 paddd 7*16(SHA256CONSTANTS), MSG
0210 sha256rnds2 STATE0, STATE1
0211 movdqa MSGTMP3, MSGTMP4
0212 palignr $4, MSGTMP2, MSGTMP4
0213 paddd MSGTMP4, MSGTMP0
0214 sha256msg2 MSGTMP3, MSGTMP0
0215 pshufd $0x0E, MSG, MSG
0216 sha256rnds2 STATE1, STATE0
0217 sha256msg1 MSGTMP3, MSGTMP2
0218
0219
0220 movdqa MSGTMP0, MSG
0221 paddd 8*16(SHA256CONSTANTS), MSG
0222 sha256rnds2 STATE0, STATE1
0223 movdqa MSGTMP0, MSGTMP4
0224 palignr $4, MSGTMP3, MSGTMP4
0225 paddd MSGTMP4, MSGTMP1
0226 sha256msg2 MSGTMP0, MSGTMP1
0227 pshufd $0x0E, MSG, MSG
0228 sha256rnds2 STATE1, STATE0
0229 sha256msg1 MSGTMP0, MSGTMP3
0230
0231
0232 movdqa MSGTMP1, MSG
0233 paddd 9*16(SHA256CONSTANTS), MSG
0234 sha256rnds2 STATE0, STATE1
0235 movdqa MSGTMP1, MSGTMP4
0236 palignr $4, MSGTMP0, MSGTMP4
0237 paddd MSGTMP4, MSGTMP2
0238 sha256msg2 MSGTMP1, MSGTMP2
0239 pshufd $0x0E, MSG, MSG
0240 sha256rnds2 STATE1, STATE0
0241 sha256msg1 MSGTMP1, MSGTMP0
0242
0243
0244 movdqa MSGTMP2, MSG
0245 paddd 10*16(SHA256CONSTANTS), MSG
0246 sha256rnds2 STATE0, STATE1
0247 movdqa MSGTMP2, MSGTMP4
0248 palignr $4, MSGTMP1, MSGTMP4
0249 paddd MSGTMP4, MSGTMP3
0250 sha256msg2 MSGTMP2, MSGTMP3
0251 pshufd $0x0E, MSG, MSG
0252 sha256rnds2 STATE1, STATE0
0253 sha256msg1 MSGTMP2, MSGTMP1
0254
0255
0256 movdqa MSGTMP3, MSG
0257 paddd 11*16(SHA256CONSTANTS), MSG
0258 sha256rnds2 STATE0, STATE1
0259 movdqa MSGTMP3, MSGTMP4
0260 palignr $4, MSGTMP2, MSGTMP4
0261 paddd MSGTMP4, MSGTMP0
0262 sha256msg2 MSGTMP3, MSGTMP0
0263 pshufd $0x0E, MSG, MSG
0264 sha256rnds2 STATE1, STATE0
0265 sha256msg1 MSGTMP3, MSGTMP2
0266
0267
0268 movdqa MSGTMP0, MSG
0269 paddd 12*16(SHA256CONSTANTS), MSG
0270 sha256rnds2 STATE0, STATE1
0271 movdqa MSGTMP0, MSGTMP4
0272 palignr $4, MSGTMP3, MSGTMP4
0273 paddd MSGTMP4, MSGTMP1
0274 sha256msg2 MSGTMP0, MSGTMP1
0275 pshufd $0x0E, MSG, MSG
0276 sha256rnds2 STATE1, STATE0
0277 sha256msg1 MSGTMP0, MSGTMP3
0278
0279
0280 movdqa MSGTMP1, MSG
0281 paddd 13*16(SHA256CONSTANTS), MSG
0282 sha256rnds2 STATE0, STATE1
0283 movdqa MSGTMP1, MSGTMP4
0284 palignr $4, MSGTMP0, MSGTMP4
0285 paddd MSGTMP4, MSGTMP2
0286 sha256msg2 MSGTMP1, MSGTMP2
0287 pshufd $0x0E, MSG, MSG
0288 sha256rnds2 STATE1, STATE0
0289
0290
0291 movdqa MSGTMP2, MSG
0292 paddd 14*16(SHA256CONSTANTS), MSG
0293 sha256rnds2 STATE0, STATE1
0294 movdqa MSGTMP2, MSGTMP4
0295 palignr $4, MSGTMP1, MSGTMP4
0296 paddd MSGTMP4, MSGTMP3
0297 sha256msg2 MSGTMP2, MSGTMP3
0298 pshufd $0x0E, MSG, MSG
0299 sha256rnds2 STATE1, STATE0
0300
0301
0302 movdqa MSGTMP3, MSG
0303 paddd 15*16(SHA256CONSTANTS), MSG
0304 sha256rnds2 STATE0, STATE1
0305 pshufd $0x0E, MSG, MSG
0306 sha256rnds2 STATE1, STATE0
0307
0308
0309 paddd ABEF_SAVE, STATE0
0310 paddd CDGH_SAVE, STATE1
0311
0312
0313 add $64, DATA_PTR
0314 cmp NUM_BLKS, DATA_PTR
0315 jne .Lloop0
0316
0317
0318 pshufd $0x1B, STATE0, STATE0
0319 pshufd $0xB1, STATE1, STATE1
0320 movdqa STATE0, MSGTMP4
0321 pblendw $0xF0, STATE1, STATE0
0322 palignr $8, MSGTMP4, STATE1
0323
0324 movdqu STATE0, 0*16(DIGEST_PTR)
0325 movdqu STATE1, 1*16(DIGEST_PTR)
0326
0327 .Ldone_hash:
0328
0329 RET
0330 SYM_FUNC_END(sha256_ni_transform)
0331
0332 .section .rodata.cst256.K256, "aM", @progbits, 256
0333 .align 64
0334 K256:
0335 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
0336 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
0337 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
0338 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
0339 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
0340 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
0341 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
0342 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
0343 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
0344 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
0345 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
0346 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
0347 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
0348 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
0349 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
0350 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
0351
0352 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
0353 .align 16
0354 PSHUFFLE_BYTE_FLIP_MASK:
0355 .octa 0x0c0d0e0f08090a0b0405060700010203