0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/linkage.h>
0010 #include <asm/frame.h>
0011
0012 #define STATE0 %xmm0
0013 #define STATE1 %xmm1
0014 #define STATE2 %xmm2
0015 #define STATE3 %xmm3
0016 #define STATE4 %xmm4
0017 #define KEY %xmm5
0018 #define MSG %xmm5
0019 #define T0 %xmm6
0020 #define T1 %xmm7
0021
0022 #define STATEP %rdi
0023 #define LEN %rsi
0024 #define SRC %rdx
0025 #define DST %rcx
0026
0027 .section .rodata.cst16.aegis128_const, "aM", @progbits, 32
0028 .align 16
0029 .Laegis128_const_0:
0030 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
0031 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
0032 .Laegis128_const_1:
0033 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
0034 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
0035
0036 .section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
0037 .align 16
0038 .Laegis128_counter:
0039 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
0040 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
0041
0042 .text
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053 .macro aegis128_update
0054 movdqa STATE4, T0
0055 aesenc STATE0, STATE4
0056 aesenc STATE1, STATE0
0057 aesenc STATE2, STATE1
0058 aesenc STATE3, STATE2
0059 aesenc T0, STATE3
0060 .endm
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074 SYM_FUNC_START_LOCAL(__load_partial)
0075 xor %r9d, %r9d
0076 pxor MSG, MSG
0077
0078 mov LEN, %r8
0079 and $0x1, %r8
0080 jz .Lld_partial_1
0081
0082 mov LEN, %r8
0083 and $0x1E, %r8
0084 add SRC, %r8
0085 mov (%r8), %r9b
0086
0087 .Lld_partial_1:
0088 mov LEN, %r8
0089 and $0x2, %r8
0090 jz .Lld_partial_2
0091
0092 mov LEN, %r8
0093 and $0x1C, %r8
0094 add SRC, %r8
0095 shl $0x10, %r9
0096 mov (%r8), %r9w
0097
0098 .Lld_partial_2:
0099 mov LEN, %r8
0100 and $0x4, %r8
0101 jz .Lld_partial_4
0102
0103 mov LEN, %r8
0104 and $0x18, %r8
0105 add SRC, %r8
0106 shl $32, %r9
0107 mov (%r8), %r8d
0108 xor %r8, %r9
0109
0110 .Lld_partial_4:
0111 movq %r9, MSG
0112
0113 mov LEN, %r8
0114 and $0x8, %r8
0115 jz .Lld_partial_8
0116
0117 mov LEN, %r8
0118 and $0x10, %r8
0119 add SRC, %r8
0120 pslldq $8, MSG
0121 movq (%r8), T0
0122 pxor T0, MSG
0123
0124 .Lld_partial_8:
0125 RET
0126 SYM_FUNC_END(__load_partial)
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140 SYM_FUNC_START_LOCAL(__store_partial)
0141 mov LEN, %r8
0142 mov DST, %r9
0143
0144 movq T0, %r10
0145
0146 cmp $8, %r8
0147 jl .Lst_partial_8
0148
0149 mov %r10, (%r9)
0150 psrldq $8, T0
0151 movq T0, %r10
0152
0153 sub $8, %r8
0154 add $8, %r9
0155
0156 .Lst_partial_8:
0157 cmp $4, %r8
0158 jl .Lst_partial_4
0159
0160 mov %r10d, (%r9)
0161 shr $32, %r10
0162
0163 sub $4, %r8
0164 add $4, %r9
0165
0166 .Lst_partial_4:
0167 cmp $2, %r8
0168 jl .Lst_partial_2
0169
0170 mov %r10w, (%r9)
0171 shr $0x10, %r10
0172
0173 sub $2, %r8
0174 add $2, %r9
0175
0176 .Lst_partial_2:
0177 cmp $1, %r8
0178 jl .Lst_partial_1
0179
0180 mov %r10b, (%r9)
0181
0182 .Lst_partial_1:
0183 RET
0184 SYM_FUNC_END(__store_partial)
0185
0186
0187
0188
0189 SYM_FUNC_START(crypto_aegis128_aesni_init)
0190 FRAME_BEGIN
0191
0192
0193 movdqu (%rdx), T1
0194
0195
0196 movdqa (%rsi), KEY
0197 pxor KEY, T1
0198 movdqa T1, STATE0
0199 movdqa KEY, STATE3
0200 movdqa KEY, STATE4
0201
0202
0203 movdqa .Laegis128_const_0, STATE2
0204 movdqa .Laegis128_const_1, STATE1
0205 pxor STATE2, STATE3
0206 pxor STATE1, STATE4
0207
0208
0209 aegis128_update; pxor KEY, STATE4
0210 aegis128_update; pxor T1, STATE3
0211 aegis128_update; pxor KEY, STATE2
0212 aegis128_update; pxor T1, STATE1
0213 aegis128_update; pxor KEY, STATE0
0214 aegis128_update; pxor T1, STATE4
0215 aegis128_update; pxor KEY, STATE3
0216 aegis128_update; pxor T1, STATE2
0217 aegis128_update; pxor KEY, STATE1
0218 aegis128_update; pxor T1, STATE0
0219
0220
0221 movdqu STATE0, 0x00(STATEP)
0222 movdqu STATE1, 0x10(STATEP)
0223 movdqu STATE2, 0x20(STATEP)
0224 movdqu STATE3, 0x30(STATEP)
0225 movdqu STATE4, 0x40(STATEP)
0226
0227 FRAME_END
0228 RET
0229 SYM_FUNC_END(crypto_aegis128_aesni_init)
0230
0231
0232
0233
0234
0235 SYM_FUNC_START(crypto_aegis128_aesni_ad)
0236 FRAME_BEGIN
0237
0238 cmp $0x10, LEN
0239 jb .Lad_out
0240
0241
0242 movdqu 0x00(STATEP), STATE0
0243 movdqu 0x10(STATEP), STATE1
0244 movdqu 0x20(STATEP), STATE2
0245 movdqu 0x30(STATEP), STATE3
0246 movdqu 0x40(STATEP), STATE4
0247
0248 mov SRC, %r8
0249 and $0xF, %r8
0250 jnz .Lad_u_loop
0251
0252 .align 8
0253 .Lad_a_loop:
0254 movdqa 0x00(SRC), MSG
0255 aegis128_update
0256 pxor MSG, STATE4
0257 sub $0x10, LEN
0258 cmp $0x10, LEN
0259 jl .Lad_out_1
0260
0261 movdqa 0x10(SRC), MSG
0262 aegis128_update
0263 pxor MSG, STATE3
0264 sub $0x10, LEN
0265 cmp $0x10, LEN
0266 jl .Lad_out_2
0267
0268 movdqa 0x20(SRC), MSG
0269 aegis128_update
0270 pxor MSG, STATE2
0271 sub $0x10, LEN
0272 cmp $0x10, LEN
0273 jl .Lad_out_3
0274
0275 movdqa 0x30(SRC), MSG
0276 aegis128_update
0277 pxor MSG, STATE1
0278 sub $0x10, LEN
0279 cmp $0x10, LEN
0280 jl .Lad_out_4
0281
0282 movdqa 0x40(SRC), MSG
0283 aegis128_update
0284 pxor MSG, STATE0
0285 sub $0x10, LEN
0286 cmp $0x10, LEN
0287 jl .Lad_out_0
0288
0289 add $0x50, SRC
0290 jmp .Lad_a_loop
0291
0292 .align 8
0293 .Lad_u_loop:
0294 movdqu 0x00(SRC), MSG
0295 aegis128_update
0296 pxor MSG, STATE4
0297 sub $0x10, LEN
0298 cmp $0x10, LEN
0299 jl .Lad_out_1
0300
0301 movdqu 0x10(SRC), MSG
0302 aegis128_update
0303 pxor MSG, STATE3
0304 sub $0x10, LEN
0305 cmp $0x10, LEN
0306 jl .Lad_out_2
0307
0308 movdqu 0x20(SRC), MSG
0309 aegis128_update
0310 pxor MSG, STATE2
0311 sub $0x10, LEN
0312 cmp $0x10, LEN
0313 jl .Lad_out_3
0314
0315 movdqu 0x30(SRC), MSG
0316 aegis128_update
0317 pxor MSG, STATE1
0318 sub $0x10, LEN
0319 cmp $0x10, LEN
0320 jl .Lad_out_4
0321
0322 movdqu 0x40(SRC), MSG
0323 aegis128_update
0324 pxor MSG, STATE0
0325 sub $0x10, LEN
0326 cmp $0x10, LEN
0327 jl .Lad_out_0
0328
0329 add $0x50, SRC
0330 jmp .Lad_u_loop
0331
0332
0333 .Lad_out_0:
0334 movdqu STATE0, 0x00(STATEP)
0335 movdqu STATE1, 0x10(STATEP)
0336 movdqu STATE2, 0x20(STATEP)
0337 movdqu STATE3, 0x30(STATEP)
0338 movdqu STATE4, 0x40(STATEP)
0339 FRAME_END
0340 RET
0341
0342 .Lad_out_1:
0343 movdqu STATE4, 0x00(STATEP)
0344 movdqu STATE0, 0x10(STATEP)
0345 movdqu STATE1, 0x20(STATEP)
0346 movdqu STATE2, 0x30(STATEP)
0347 movdqu STATE3, 0x40(STATEP)
0348 FRAME_END
0349 RET
0350
0351 .Lad_out_2:
0352 movdqu STATE3, 0x00(STATEP)
0353 movdqu STATE4, 0x10(STATEP)
0354 movdqu STATE0, 0x20(STATEP)
0355 movdqu STATE1, 0x30(STATEP)
0356 movdqu STATE2, 0x40(STATEP)
0357 FRAME_END
0358 RET
0359
0360 .Lad_out_3:
0361 movdqu STATE2, 0x00(STATEP)
0362 movdqu STATE3, 0x10(STATEP)
0363 movdqu STATE4, 0x20(STATEP)
0364 movdqu STATE0, 0x30(STATEP)
0365 movdqu STATE1, 0x40(STATEP)
0366 FRAME_END
0367 RET
0368
0369 .Lad_out_4:
0370 movdqu STATE1, 0x00(STATEP)
0371 movdqu STATE2, 0x10(STATEP)
0372 movdqu STATE3, 0x20(STATEP)
0373 movdqu STATE4, 0x30(STATEP)
0374 movdqu STATE0, 0x40(STATEP)
0375 FRAME_END
0376 RET
0377
0378 .Lad_out:
0379 FRAME_END
0380 RET
0381 SYM_FUNC_END(crypto_aegis128_aesni_ad)
0382
0383 .macro encrypt_block a s0 s1 s2 s3 s4 i
0384 movdq\a (\i * 0x10)(SRC), MSG
0385 movdqa MSG, T0
0386 pxor \s1, T0
0387 pxor \s4, T0
0388 movdqa \s2, T1
0389 pand \s3, T1
0390 pxor T1, T0
0391 movdq\a T0, (\i * 0x10)(DST)
0392
0393 aegis128_update
0394 pxor MSG, \s4
0395
0396 sub $0x10, LEN
0397 cmp $0x10, LEN
0398 jl .Lenc_out_\i
0399 .endm
0400
0401
0402
0403
0404
0405 SYM_FUNC_START(crypto_aegis128_aesni_enc)
0406 FRAME_BEGIN
0407
0408 cmp $0x10, LEN
0409 jb .Lenc_out
0410
0411
0412 movdqu 0x00(STATEP), STATE0
0413 movdqu 0x10(STATEP), STATE1
0414 movdqu 0x20(STATEP), STATE2
0415 movdqu 0x30(STATEP), STATE3
0416 movdqu 0x40(STATEP), STATE4
0417
0418 mov SRC, %r8
0419 or DST, %r8
0420 and $0xF, %r8
0421 jnz .Lenc_u_loop
0422
0423 .align 8
0424 .Lenc_a_loop:
0425 encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
0426 encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
0427 encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
0428 encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
0429 encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
0430
0431 add $0x50, SRC
0432 add $0x50, DST
0433 jmp .Lenc_a_loop
0434
0435 .align 8
0436 .Lenc_u_loop:
0437 encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
0438 encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
0439 encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
0440 encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
0441 encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
0442
0443 add $0x50, SRC
0444 add $0x50, DST
0445 jmp .Lenc_u_loop
0446
0447
0448 .Lenc_out_0:
0449 movdqu STATE4, 0x00(STATEP)
0450 movdqu STATE0, 0x10(STATEP)
0451 movdqu STATE1, 0x20(STATEP)
0452 movdqu STATE2, 0x30(STATEP)
0453 movdqu STATE3, 0x40(STATEP)
0454 FRAME_END
0455 RET
0456
0457 .Lenc_out_1:
0458 movdqu STATE3, 0x00(STATEP)
0459 movdqu STATE4, 0x10(STATEP)
0460 movdqu STATE0, 0x20(STATEP)
0461 movdqu STATE1, 0x30(STATEP)
0462 movdqu STATE2, 0x40(STATEP)
0463 FRAME_END
0464 RET
0465
0466 .Lenc_out_2:
0467 movdqu STATE2, 0x00(STATEP)
0468 movdqu STATE3, 0x10(STATEP)
0469 movdqu STATE4, 0x20(STATEP)
0470 movdqu STATE0, 0x30(STATEP)
0471 movdqu STATE1, 0x40(STATEP)
0472 FRAME_END
0473 RET
0474
0475 .Lenc_out_3:
0476 movdqu STATE1, 0x00(STATEP)
0477 movdqu STATE2, 0x10(STATEP)
0478 movdqu STATE3, 0x20(STATEP)
0479 movdqu STATE4, 0x30(STATEP)
0480 movdqu STATE0, 0x40(STATEP)
0481 FRAME_END
0482 RET
0483
0484 .Lenc_out_4:
0485 movdqu STATE0, 0x00(STATEP)
0486 movdqu STATE1, 0x10(STATEP)
0487 movdqu STATE2, 0x20(STATEP)
0488 movdqu STATE3, 0x30(STATEP)
0489 movdqu STATE4, 0x40(STATEP)
0490 FRAME_END
0491 RET
0492
0493 .Lenc_out:
0494 FRAME_END
0495 RET
0496 SYM_FUNC_END(crypto_aegis128_aesni_enc)
0497
0498
0499
0500
0501
0502 SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
0503 FRAME_BEGIN
0504
0505
0506 movdqu 0x00(STATEP), STATE0
0507 movdqu 0x10(STATEP), STATE1
0508 movdqu 0x20(STATEP), STATE2
0509 movdqu 0x30(STATEP), STATE3
0510 movdqu 0x40(STATEP), STATE4
0511
0512
0513 call __load_partial
0514
0515 movdqa MSG, T0
0516 pxor STATE1, T0
0517 pxor STATE4, T0
0518 movdqa STATE2, T1
0519 pand STATE3, T1
0520 pxor T1, T0
0521
0522 call __store_partial
0523
0524 aegis128_update
0525 pxor MSG, STATE4
0526
0527
0528 movdqu STATE4, 0x00(STATEP)
0529 movdqu STATE0, 0x10(STATEP)
0530 movdqu STATE1, 0x20(STATEP)
0531 movdqu STATE2, 0x30(STATEP)
0532 movdqu STATE3, 0x40(STATEP)
0533
0534 FRAME_END
0535 RET
0536 SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
0537
0538 .macro decrypt_block a s0 s1 s2 s3 s4 i
0539 movdq\a (\i * 0x10)(SRC), MSG
0540 pxor \s1, MSG
0541 pxor \s4, MSG
0542 movdqa \s2, T1
0543 pand \s3, T1
0544 pxor T1, MSG
0545 movdq\a MSG, (\i * 0x10)(DST)
0546
0547 aegis128_update
0548 pxor MSG, \s4
0549
0550 sub $0x10, LEN
0551 cmp $0x10, LEN
0552 jl .Ldec_out_\i
0553 .endm
0554
0555
0556
0557
0558
0559 SYM_FUNC_START(crypto_aegis128_aesni_dec)
0560 FRAME_BEGIN
0561
0562 cmp $0x10, LEN
0563 jb .Ldec_out
0564
0565
0566 movdqu 0x00(STATEP), STATE0
0567 movdqu 0x10(STATEP), STATE1
0568 movdqu 0x20(STATEP), STATE2
0569 movdqu 0x30(STATEP), STATE3
0570 movdqu 0x40(STATEP), STATE4
0571
0572 mov SRC, %r8
0573 or DST, %r8
0574 and $0xF, %r8
0575 jnz .Ldec_u_loop
0576
0577 .align 8
0578 .Ldec_a_loop:
0579 decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
0580 decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
0581 decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
0582 decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
0583 decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
0584
0585 add $0x50, SRC
0586 add $0x50, DST
0587 jmp .Ldec_a_loop
0588
0589 .align 8
0590 .Ldec_u_loop:
0591 decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
0592 decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
0593 decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
0594 decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
0595 decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
0596
0597 add $0x50, SRC
0598 add $0x50, DST
0599 jmp .Ldec_u_loop
0600
0601
0602 .Ldec_out_0:
0603 movdqu STATE4, 0x00(STATEP)
0604 movdqu STATE0, 0x10(STATEP)
0605 movdqu STATE1, 0x20(STATEP)
0606 movdqu STATE2, 0x30(STATEP)
0607 movdqu STATE3, 0x40(STATEP)
0608 FRAME_END
0609 RET
0610
0611 .Ldec_out_1:
0612 movdqu STATE3, 0x00(STATEP)
0613 movdqu STATE4, 0x10(STATEP)
0614 movdqu STATE0, 0x20(STATEP)
0615 movdqu STATE1, 0x30(STATEP)
0616 movdqu STATE2, 0x40(STATEP)
0617 FRAME_END
0618 RET
0619
0620 .Ldec_out_2:
0621 movdqu STATE2, 0x00(STATEP)
0622 movdqu STATE3, 0x10(STATEP)
0623 movdqu STATE4, 0x20(STATEP)
0624 movdqu STATE0, 0x30(STATEP)
0625 movdqu STATE1, 0x40(STATEP)
0626 FRAME_END
0627 RET
0628
0629 .Ldec_out_3:
0630 movdqu STATE1, 0x00(STATEP)
0631 movdqu STATE2, 0x10(STATEP)
0632 movdqu STATE3, 0x20(STATEP)
0633 movdqu STATE4, 0x30(STATEP)
0634 movdqu STATE0, 0x40(STATEP)
0635 FRAME_END
0636 RET
0637
0638 .Ldec_out_4:
0639 movdqu STATE0, 0x00(STATEP)
0640 movdqu STATE1, 0x10(STATEP)
0641 movdqu STATE2, 0x20(STATEP)
0642 movdqu STATE3, 0x30(STATEP)
0643 movdqu STATE4, 0x40(STATEP)
0644 FRAME_END
0645 RET
0646
0647 .Ldec_out:
0648 FRAME_END
0649 RET
0650 SYM_FUNC_END(crypto_aegis128_aesni_dec)
0651
0652
0653
0654
0655
0656 SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
0657 FRAME_BEGIN
0658
0659
0660 movdqu 0x00(STATEP), STATE0
0661 movdqu 0x10(STATEP), STATE1
0662 movdqu 0x20(STATEP), STATE2
0663 movdqu 0x30(STATEP), STATE3
0664 movdqu 0x40(STATEP), STATE4
0665
0666
0667 call __load_partial
0668
0669 pxor STATE1, MSG
0670 pxor STATE4, MSG
0671 movdqa STATE2, T1
0672 pand STATE3, T1
0673 pxor T1, MSG
0674
0675 movdqa MSG, T0
0676 call __store_partial
0677
0678
0679 movq LEN, T0
0680 punpcklbw T0, T0
0681 punpcklbw T0, T0
0682 punpcklbw T0, T0
0683 punpcklbw T0, T0
0684 movdqa .Laegis128_counter, T1
0685 pcmpgtb T1, T0
0686 pand T0, MSG
0687
0688 aegis128_update
0689 pxor MSG, STATE4
0690
0691
0692 movdqu STATE4, 0x00(STATEP)
0693 movdqu STATE0, 0x10(STATEP)
0694 movdqu STATE1, 0x20(STATEP)
0695 movdqu STATE2, 0x30(STATEP)
0696 movdqu STATE3, 0x40(STATEP)
0697
0698 FRAME_END
0699 RET
0700 SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
0701
0702
0703
0704
0705
0706 SYM_FUNC_START(crypto_aegis128_aesni_final)
0707 FRAME_BEGIN
0708
0709
0710 movdqu 0x00(STATEP), STATE0
0711 movdqu 0x10(STATEP), STATE1
0712 movdqu 0x20(STATEP), STATE2
0713 movdqu 0x30(STATEP), STATE3
0714 movdqu 0x40(STATEP), STATE4
0715
0716
0717 movq %rdx, MSG
0718 movq %rcx, T0
0719 pslldq $8, T0
0720 pxor T0, MSG
0721 psllq $3, MSG
0722
0723 pxor STATE3, MSG
0724
0725
0726 aegis128_update; pxor MSG, STATE4
0727 aegis128_update; pxor MSG, STATE3
0728 aegis128_update; pxor MSG, STATE2
0729 aegis128_update; pxor MSG, STATE1
0730 aegis128_update; pxor MSG, STATE0
0731 aegis128_update; pxor MSG, STATE4
0732 aegis128_update; pxor MSG, STATE3
0733
0734
0735 movdqu (%rsi), MSG
0736
0737 pxor STATE0, MSG
0738 pxor STATE1, MSG
0739 pxor STATE2, MSG
0740 pxor STATE3, MSG
0741 pxor STATE4, MSG
0742
0743 movdqu MSG, (%rsi)
0744
0745 FRAME_END
0746 RET
0747 SYM_FUNC_END(crypto_aegis128_aesni_final)