0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 .text
0011 .align 4
0012
0013 #ifndef MAX_STRIDE
0014 #define MAX_STRIDE 4
0015 #endif
0016
0017 #if MAX_STRIDE == 4
0018 #define ST4(x...) x
0019 #define ST5(x...)
0020 #else
0021 #define ST4(x...)
0022 #define ST5(x...) x
0023 #endif
0024
0025 SYM_FUNC_START_LOCAL(aes_encrypt_block4x)
0026 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
0027 ret
0028 SYM_FUNC_END(aes_encrypt_block4x)
0029
0030 SYM_FUNC_START_LOCAL(aes_decrypt_block4x)
0031 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
0032 ret
0033 SYM_FUNC_END(aes_decrypt_block4x)
0034
0035 #if MAX_STRIDE == 5
0036 SYM_FUNC_START_LOCAL(aes_encrypt_block5x)
0037 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
0038 ret
0039 SYM_FUNC_END(aes_encrypt_block5x)
0040
0041 SYM_FUNC_START_LOCAL(aes_decrypt_block5x)
0042 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
0043 ret
0044 SYM_FUNC_END(aes_decrypt_block5x)
0045 #endif
0046
0047
0048
0049
0050
0051
0052
0053
0054 AES_FUNC_START(aes_ecb_encrypt)
0055 stp x29, x30, [sp, #-16]!
0056 mov x29, sp
0057
0058 enc_prepare w3, x2, x5
0059
0060 .LecbencloopNx:
0061 subs w4, w4, #MAX_STRIDE
0062 bmi .Lecbenc1x
0063 ld1 {v0.16b-v3.16b}, [x1], #64
0064 ST4( bl aes_encrypt_block4x )
0065 ST5( ld1 {v4.16b}, [x1], #16 )
0066 ST5( bl aes_encrypt_block5x )
0067 st1 {v0.16b-v3.16b}, [x0], #64
0068 ST5( st1 {v4.16b}, [x0], #16 )
0069 b .LecbencloopNx
0070 .Lecbenc1x:
0071 adds w4, w4, #MAX_STRIDE
0072 beq .Lecbencout
0073 .Lecbencloop:
0074 ld1 {v0.16b}, [x1], #16
0075 encrypt_block v0, w3, x2, x5, w6
0076 st1 {v0.16b}, [x0], #16
0077 subs w4, w4, #1
0078 bne .Lecbencloop
0079 .Lecbencout:
0080 ldp x29, x30, [sp], #16
0081 ret
0082 AES_FUNC_END(aes_ecb_encrypt)
0083
0084
0085 AES_FUNC_START(aes_ecb_decrypt)
0086 stp x29, x30, [sp, #-16]!
0087 mov x29, sp
0088
0089 dec_prepare w3, x2, x5
0090
0091 .LecbdecloopNx:
0092 subs w4, w4, #MAX_STRIDE
0093 bmi .Lecbdec1x
0094 ld1 {v0.16b-v3.16b}, [x1], #64
0095 ST4( bl aes_decrypt_block4x )
0096 ST5( ld1 {v4.16b}, [x1], #16 )
0097 ST5( bl aes_decrypt_block5x )
0098 st1 {v0.16b-v3.16b}, [x0], #64
0099 ST5( st1 {v4.16b}, [x0], #16 )
0100 b .LecbdecloopNx
0101 .Lecbdec1x:
0102 adds w4, w4, #MAX_STRIDE
0103 beq .Lecbdecout
0104 .Lecbdecloop:
0105 ld1 {v0.16b}, [x1], #16
0106 decrypt_block v0, w3, x2, x5, w6
0107 st1 {v0.16b}, [x0], #16
0108 subs w4, w4, #1
0109 bne .Lecbdecloop
0110 .Lecbdecout:
0111 ldp x29, x30, [sp], #16
0112 ret
0113 AES_FUNC_END(aes_ecb_decrypt)
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129 AES_FUNC_START(aes_essiv_cbc_encrypt)
0130 ld1 {v4.16b}, [x5]
0131
0132 mov w8, #14
0133 enc_prepare w8, x6, x7
0134 encrypt_block v4, w8, x6, x7, w9
0135 enc_switch_key w3, x2, x6
0136 b .Lcbcencloop4x
0137
0138 AES_FUNC_START(aes_cbc_encrypt)
0139 ld1 {v4.16b}, [x5]
0140 enc_prepare w3, x2, x6
0141
0142 .Lcbcencloop4x:
0143 subs w4, w4, #4
0144 bmi .Lcbcenc1x
0145 ld1 {v0.16b-v3.16b}, [x1], #64
0146 eor v0.16b, v0.16b, v4.16b
0147 encrypt_block v0, w3, x2, x6, w7
0148 eor v1.16b, v1.16b, v0.16b
0149 encrypt_block v1, w3, x2, x6, w7
0150 eor v2.16b, v2.16b, v1.16b
0151 encrypt_block v2, w3, x2, x6, w7
0152 eor v3.16b, v3.16b, v2.16b
0153 encrypt_block v3, w3, x2, x6, w7
0154 st1 {v0.16b-v3.16b}, [x0], #64
0155 mov v4.16b, v3.16b
0156 b .Lcbcencloop4x
0157 .Lcbcenc1x:
0158 adds w4, w4, #4
0159 beq .Lcbcencout
0160 .Lcbcencloop:
0161 ld1 {v0.16b}, [x1], #16
0162 eor v4.16b, v4.16b, v0.16b
0163 encrypt_block v4, w3, x2, x6, w7
0164 st1 {v4.16b}, [x0], #16
0165 subs w4, w4, #1
0166 bne .Lcbcencloop
0167 .Lcbcencout:
0168 st1 {v4.16b}, [x5]
0169 ret
0170 AES_FUNC_END(aes_cbc_encrypt)
0171 AES_FUNC_END(aes_essiv_cbc_encrypt)
0172
0173 AES_FUNC_START(aes_essiv_cbc_decrypt)
0174 stp x29, x30, [sp, #-16]!
0175 mov x29, sp
0176
0177 ld1 {cbciv.16b}, [x5]
0178
0179 mov w8, #14
0180 enc_prepare w8, x6, x7
0181 encrypt_block cbciv, w8, x6, x7, w9
0182 b .Lessivcbcdecstart
0183
0184 AES_FUNC_START(aes_cbc_decrypt)
0185 stp x29, x30, [sp, #-16]!
0186 mov x29, sp
0187
0188 ld1 {cbciv.16b}, [x5]
0189 .Lessivcbcdecstart:
0190 dec_prepare w3, x2, x6
0191
0192 .LcbcdecloopNx:
0193 subs w4, w4, #MAX_STRIDE
0194 bmi .Lcbcdec1x
0195 ld1 {v0.16b-v3.16b}, [x1], #64
0196 #if MAX_STRIDE == 5
0197 ld1 {v4.16b}, [x1], #16
0198 mov v5.16b, v0.16b
0199 mov v6.16b, v1.16b
0200 mov v7.16b, v2.16b
0201 bl aes_decrypt_block5x
0202 sub x1, x1, #32
0203 eor v0.16b, v0.16b, cbciv.16b
0204 eor v1.16b, v1.16b, v5.16b
0205 ld1 {v5.16b}, [x1], #16
0206 ld1 {cbciv.16b}, [x1], #16
0207 eor v2.16b, v2.16b, v6.16b
0208 eor v3.16b, v3.16b, v7.16b
0209 eor v4.16b, v4.16b, v5.16b
0210 #else
0211 mov v4.16b, v0.16b
0212 mov v5.16b, v1.16b
0213 mov v6.16b, v2.16b
0214 bl aes_decrypt_block4x
0215 sub x1, x1, #16
0216 eor v0.16b, v0.16b, cbciv.16b
0217 eor v1.16b, v1.16b, v4.16b
0218 ld1 {cbciv.16b}, [x1], #16
0219 eor v2.16b, v2.16b, v5.16b
0220 eor v3.16b, v3.16b, v6.16b
0221 #endif
0222 st1 {v0.16b-v3.16b}, [x0], #64
0223 ST5( st1 {v4.16b}, [x0], #16 )
0224 b .LcbcdecloopNx
0225 .Lcbcdec1x:
0226 adds w4, w4, #MAX_STRIDE
0227 beq .Lcbcdecout
0228 .Lcbcdecloop:
0229 ld1 {v1.16b}, [x1], #16
0230 mov v0.16b, v1.16b
0231 decrypt_block v0, w3, x2, x6, w7
0232 eor v0.16b, v0.16b, cbciv.16b
0233 mov cbciv.16b, v1.16b
0234 st1 {v0.16b}, [x0], #16
0235 subs w4, w4, #1
0236 bne .Lcbcdecloop
0237 .Lcbcdecout:
0238 st1 {cbciv.16b}, [x5]
0239 ldp x29, x30, [sp], #16
0240 ret
0241 AES_FUNC_END(aes_cbc_decrypt)
0242 AES_FUNC_END(aes_essiv_cbc_decrypt)
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252 AES_FUNC_START(aes_cbc_cts_encrypt)
0253 adr_l x8, .Lcts_permute_table
0254 sub x4, x4, #16
0255 add x9, x8, #32
0256 add x8, x8, x4
0257 sub x9, x9, x4
0258 ld1 {v3.16b}, [x8]
0259 ld1 {v4.16b}, [x9]
0260
0261 ld1 {v0.16b}, [x1], x4
0262 ld1 {v1.16b}, [x1]
0263
0264 ld1 {v5.16b}, [x5]
0265 enc_prepare w3, x2, x6
0266
0267 eor v0.16b, v0.16b, v5.16b
0268 tbl v1.16b, {v1.16b}, v4.16b
0269 encrypt_block v0, w3, x2, x6, w7
0270
0271 eor v1.16b, v1.16b, v0.16b
0272 tbl v0.16b, {v0.16b}, v3.16b
0273 encrypt_block v1, w3, x2, x6, w7
0274
0275 add x4, x0, x4
0276 st1 {v0.16b}, [x4]
0277 st1 {v1.16b}, [x0]
0278 ret
0279 AES_FUNC_END(aes_cbc_cts_encrypt)
0280
0281 AES_FUNC_START(aes_cbc_cts_decrypt)
0282 adr_l x8, .Lcts_permute_table
0283 sub x4, x4, #16
0284 add x9, x8, #32
0285 add x8, x8, x4
0286 sub x9, x9, x4
0287 ld1 {v3.16b}, [x8]
0288 ld1 {v4.16b}, [x9]
0289
0290 ld1 {v0.16b}, [x1], x4
0291 ld1 {v1.16b}, [x1]
0292
0293 ld1 {v5.16b}, [x5]
0294 dec_prepare w3, x2, x6
0295
0296 decrypt_block v0, w3, x2, x6, w7
0297 tbl v2.16b, {v0.16b}, v3.16b
0298 eor v2.16b, v2.16b, v1.16b
0299
0300 tbx v0.16b, {v1.16b}, v4.16b
0301 decrypt_block v0, w3, x2, x6, w7
0302 eor v0.16b, v0.16b, v5.16b
0303
0304 add x4, x0, x4
0305 st1 {v2.16b}, [x4]
0306 st1 {v0.16b}, [x0]
0307 ret
0308 AES_FUNC_END(aes_cbc_cts_decrypt)
0309
0310 .section ".rodata", "a"
0311 .align 6
0312 .Lcts_permute_table:
0313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
0314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
0315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
0316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
0317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
0318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
0319 .previous
0320
0321
0322
0323
0324 .macro ctr_encrypt xctr
0325 // Arguments
0326 OUT .req x0
0327 IN .req x1
0328 KEY .req x2
0329 ROUNDS_W .req w3
0330 BYTES_W .req w4
0331 IV .req x5
0332 BYTE_CTR_W .req w6 // XCTR only
0333 // Intermediate values
0334 CTR_W .req w11 // XCTR only
0335 CTR .req x11 // XCTR only
0336 IV_PART .req x12
0337 BLOCKS .req x13
0338 BLOCKS_W .req w13
0339
0340 stp x29, x30, [sp, #-16]!
0341 mov x29, sp
0342
0343 enc_prepare ROUNDS_W, KEY, IV_PART
0344 ld1 {vctr.16b}, [IV]
0345
0346
0347
0348
0349
0350
0351 .if \xctr
0352 umov IV_PART, vctr.d[0]
0353 lsr CTR_W, BYTE_CTR_W, #4
0354 .else
0355 umov IV_PART, vctr.d[1]
0356 rev IV_PART, IV_PART
0357 .endif
0358
0359 .LctrloopNx\xctr:
0360 add BLOCKS_W, BYTES_W, #15
0361 sub BYTES_W, BYTES_W, #MAX_STRIDE << 4
0362 lsr BLOCKS_W, BLOCKS_W, #4
0363 mov w8, #MAX_STRIDE
0364 cmp BLOCKS_W, w8
0365 csel BLOCKS_W, BLOCKS_W, w8, lt
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375 .if \xctr
0376 add CTR, CTR, BLOCKS
0377 .else
0378 adds IV_PART, IV_PART, BLOCKS
0379 .endif
0380 mov v0.16b, vctr.16b
0381 mov v1.16b, vctr.16b
0382 mov v2.16b, vctr.16b
0383 mov v3.16b, vctr.16b
0384 ST5( mov v4.16b, vctr.16b )
0385 .if \xctr
0386 sub x6, CTR, #MAX_STRIDE - 1
0387 sub x7, CTR, #MAX_STRIDE - 2
0388 sub x8, CTR, #MAX_STRIDE - 3
0389 sub x9, CTR, #MAX_STRIDE - 4
0390 ST5( sub x10, CTR, #MAX_STRIDE - 5 )
0391 eor x6, x6, IV_PART
0392 eor x7, x7, IV_PART
0393 eor x8, x8, IV_PART
0394 eor x9, x9, IV_PART
0395 ST5( eor x10, x10, IV_PART )
0396 mov v0.d[0], x6
0397 mov v1.d[0], x7
0398 mov v2.d[0], x8
0399 mov v3.d[0], x9
0400 ST5( mov v4.d[0], x10 )
0401 .else
0402 bcs 0f
0403 .subsection 1
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414 0: umov x8, vctr.d[0]
0415 rev x8, x8
0416 add x8, x8, #1
0417 rev x8, x8
0418 ins vctr.d[0], x8
0419
0420
0421
0422
0423
0424
0425
0426
0427 cbz IV_PART, 2f
0428 adr x16, 1f
0429 sub x16, x16, IV_PART, lsl #3
0430 br x16
0431 bti c
0432 mov v0.d[0], vctr.d[0]
0433 bti c
0434 mov v1.d[0], vctr.d[0]
0435 bti c
0436 mov v2.d[0], vctr.d[0]
0437 bti c
0438 mov v3.d[0], vctr.d[0]
0439 ST5( bti c )
0440 ST5( mov v4.d[0], vctr.d[0] )
0441 1: b 2f
0442 .previous
0443
0444 2: rev x7, IV_PART
0445 ins vctr.d[1], x7
0446 sub x7, IV_PART, #MAX_STRIDE - 1
0447 sub x8, IV_PART, #MAX_STRIDE - 2
0448 sub x9, IV_PART, #MAX_STRIDE - 3
0449 rev x7, x7
0450 rev x8, x8
0451 mov v1.d[1], x7
0452 rev x9, x9
0453 ST5( sub x10, IV_PART, #MAX_STRIDE - 4 )
0454 mov v2.d[1], x8
0455 ST5( rev x10, x10 )
0456 mov v3.d[1], x9
0457 ST5( mov v4.d[1], x10 )
0458 .endif
0459
0460
0461
0462
0463
0464 tbnz BYTES_W, #31, .Lctrtail\xctr
0465 ld1 {v5.16b-v7.16b}, [IN], #48
0466 ST4( bl aes_encrypt_block4x )
0467 ST5( bl aes_encrypt_block5x )
0468 eor v0.16b, v5.16b, v0.16b
0469 ST4( ld1 {v5.16b}, [IN], #16 )
0470 eor v1.16b, v6.16b, v1.16b
0471 ST5( ld1 {v5.16b-v6.16b}, [IN], #32 )
0472 eor v2.16b, v7.16b, v2.16b
0473 eor v3.16b, v5.16b, v3.16b
0474 ST5( eor v4.16b, v6.16b, v4.16b )
0475 st1 {v0.16b-v3.16b}, [OUT], #64
0476 ST5( st1 {v4.16b}, [OUT], #16 )
0477 cbz BYTES_W, .Lctrout\xctr
0478 b .LctrloopNx\xctr
0479
0480 .Lctrout\xctr:
0481 .if !\xctr
0482 st1 {vctr.16b}, [IV]
0483 .endif
0484 ldp x29, x30, [sp], #16
0485 ret
0486
0487 .Lctrtail\xctr:
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500 mov x16, #16
0501 ands w7, BYTES_W, #0xf
0502 csel x13, x7, x16, ne
0503
0504 ST5( cmp BYTES_W, #64 - (MAX_STRIDE << 4))
0505 ST5( csel x14, x16, xzr, gt )
0506 cmp BYTES_W, #48 - (MAX_STRIDE << 4)
0507 csel x15, x16, xzr, gt
0508 cmp BYTES_W, #32 - (MAX_STRIDE << 4)
0509 csel x16, x16, xzr, gt
0510 cmp BYTES_W, #16 - (MAX_STRIDE << 4)
0511
0512 adr_l x9, .Lcts_permute_table
0513 add x9, x9, x13
0514 ble .Lctrtail1x\xctr
0515
0516 ST5( ld1 {v5.16b}, [IN], x14 )
0517 ld1 {v6.16b}, [IN], x15
0518 ld1 {v7.16b}, [IN], x16
0519
0520 ST4( bl aes_encrypt_block4x )
0521 ST5( bl aes_encrypt_block5x )
0522
0523 ld1 {v8.16b}, [IN], x13
0524 ld1 {v9.16b}, [IN]
0525 ld1 {v10.16b}, [x9]
0526
0527 ST4( eor v6.16b, v6.16b, v0.16b )
0528 ST4( eor v7.16b, v7.16b, v1.16b )
0529 ST4( tbl v3.16b, {v3.16b}, v10.16b )
0530 ST4( eor v8.16b, v8.16b, v2.16b )
0531 ST4( eor v9.16b, v9.16b, v3.16b )
0532
0533 ST5( eor v5.16b, v5.16b, v0.16b )
0534 ST5( eor v6.16b, v6.16b, v1.16b )
0535 ST5( tbl v4.16b, {v4.16b}, v10.16b )
0536 ST5( eor v7.16b, v7.16b, v2.16b )
0537 ST5( eor v8.16b, v8.16b, v3.16b )
0538 ST5( eor v9.16b, v9.16b, v4.16b )
0539
0540 ST5( st1 {v5.16b}, [OUT], x14 )
0541 st1 {v6.16b}, [OUT], x15
0542 st1 {v7.16b}, [OUT], x16
0543 add x13, x13, OUT
0544 st1 {v9.16b}, [x13] // overlapping stores
0545 st1 {v8.16b}, [OUT]
0546 b .Lctrout\xctr
0547
0548 .Lctrtail1x\xctr:
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563 sub x8, x7, #16
0564 csel x7, x7, x8, eq
0565 add IN, IN, x7
0566 add OUT, OUT, x7
0567 ld1 {v5.16b}, [IN]
0568 ld1 {v6.16b}, [OUT]
0569 ST5( mov v3.16b, v4.16b )
0570 encrypt_block v3, ROUNDS_W, KEY, x8, w7
0571 ld1 {v10.16b-v11.16b}, [x9]
0572 tbl v3.16b, {v3.16b}, v10.16b
0573 sshr v11.16b, v11.16b, #7
0574 eor v5.16b, v5.16b, v3.16b
0575 bif v5.16b, v6.16b, v11.16b
0576 st1 {v5.16b}, [OUT]
0577 b .Lctrout\xctr
0578
0579 // Arguments
0580 .unreq OUT
0581 .unreq IN
0582 .unreq KEY
0583 .unreq ROUNDS_W
0584 .unreq BYTES_W
0585 .unreq IV
0586 .unreq BYTE_CTR_W // XCTR only
0587 // Intermediate values
0588 .unreq CTR_W // XCTR only
0589 .unreq CTR // XCTR only
0590 .unreq IV_PART
0591 .unreq BLOCKS
0592 .unreq BLOCKS_W
0593 .endm
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606 AES_FUNC_START(aes_ctr_encrypt)
0607 ctr_encrypt 0
0608 AES_FUNC_END(aes_ctr_encrypt)
0609
0610
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621 AES_FUNC_START(aes_xctr_encrypt)
0622 ctr_encrypt 1
0623 AES_FUNC_END(aes_xctr_encrypt)
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633 .macro next_tweak, out, in, tmp
0634 sshr \tmp\().2d, \in\().2d, #63
0635 and \tmp\().16b, \tmp\().16b, xtsmask.16b
0636 add \out\().2d, \in\().2d, \in\().2d
0637 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
0638 eor \out\().16b, \out\().16b, \tmp\().16b
0639 .endm
0640
0641 .macro xts_load_mask, tmp
0642 movi xtsmask.2s, #0x1
0643 movi \tmp\().2s, #0x87
0644 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
0645 .endm
0646
0647 AES_FUNC_START(aes_xts_encrypt)
0648 stp x29, x30, [sp, #-16]!
0649 mov x29, sp
0650
0651 ld1 {v4.16b}, [x6]
0652 xts_load_mask v8
0653 cbz w7, .Lxtsencnotfirst
0654
0655 enc_prepare w3, x5, x8
0656 xts_cts_skip_tw w7, .LxtsencNx
0657 encrypt_block v4, w3, x5, x8, w7
0658 enc_switch_key w3, x2, x8
0659 b .LxtsencNx
0660
0661 .Lxtsencnotfirst:
0662 enc_prepare w3, x2, x8
0663 .LxtsencloopNx:
0664 next_tweak v4, v4, v8
0665 .LxtsencNx:
0666 subs w4, w4, #64
0667 bmi .Lxtsenc1x
0668 ld1 {v0.16b-v3.16b}, [x1], #64
0669 next_tweak v5, v4, v8
0670 eor v0.16b, v0.16b, v4.16b
0671 next_tweak v6, v5, v8
0672 eor v1.16b, v1.16b, v5.16b
0673 eor v2.16b, v2.16b, v6.16b
0674 next_tweak v7, v6, v8
0675 eor v3.16b, v3.16b, v7.16b
0676 bl aes_encrypt_block4x
0677 eor v3.16b, v3.16b, v7.16b
0678 eor v0.16b, v0.16b, v4.16b
0679 eor v1.16b, v1.16b, v5.16b
0680 eor v2.16b, v2.16b, v6.16b
0681 st1 {v0.16b-v3.16b}, [x0], #64
0682 mov v4.16b, v7.16b
0683 cbz w4, .Lxtsencret
0684 xts_reload_mask v8
0685 b .LxtsencloopNx
0686 .Lxtsenc1x:
0687 adds w4, w4, #64
0688 beq .Lxtsencout
0689 subs w4, w4, #16
0690 bmi .LxtsencctsNx
0691 .Lxtsencloop:
0692 ld1 {v0.16b}, [x1], #16
0693 .Lxtsencctsout:
0694 eor v0.16b, v0.16b, v4.16b
0695 encrypt_block v0, w3, x2, x8, w7
0696 eor v0.16b, v0.16b, v4.16b
0697 cbz w4, .Lxtsencout
0698 subs w4, w4, #16
0699 next_tweak v4, v4, v8
0700 bmi .Lxtsenccts
0701 st1 {v0.16b}, [x0], #16
0702 b .Lxtsencloop
0703 .Lxtsencout:
0704 st1 {v0.16b}, [x0]
0705 .Lxtsencret:
0706 st1 {v4.16b}, [x6]
0707 ldp x29, x30, [sp], #16
0708 ret
0709
0710 .LxtsencctsNx:
0711 mov v0.16b, v3.16b
0712 sub x0, x0, #16
0713 .Lxtsenccts:
0714 adr_l x8, .Lcts_permute_table
0715
0716 add x1, x1, w4, sxtw
0717 add w4, w4, #16
0718 add x9, x8, #32
0719 add x8, x8, x4
0720 sub x9, x9, x4
0721 add x4, x0, x4
0722
0723 ld1 {v1.16b}, [x1]
0724 ld1 {v2.16b}, [x8]
0725 ld1 {v3.16b}, [x9]
0726
0727 tbl v2.16b, {v0.16b}, v2.16b
0728 tbx v0.16b, {v1.16b}, v3.16b
0729 st1 {v2.16b}, [x4]
0730 mov w4, wzr
0731 b .Lxtsencctsout
0732 AES_FUNC_END(aes_xts_encrypt)
0733
0734 AES_FUNC_START(aes_xts_decrypt)
0735 stp x29, x30, [sp, #-16]!
0736 mov x29, sp
0737
0738
0739 sub w8, w4, #0x10
0740 tst w4, #0xf
0741 csel w4, w4, w8, eq
0742
0743 ld1 {v4.16b}, [x6]
0744 xts_load_mask v8
0745 xts_cts_skip_tw w7, .Lxtsdecskiptw
0746 cbz w7, .Lxtsdecnotfirst
0747
0748 enc_prepare w3, x5, x8
0749 encrypt_block v4, w3, x5, x8, w7
0750 .Lxtsdecskiptw:
0751 dec_prepare w3, x2, x8
0752 b .LxtsdecNx
0753
0754 .Lxtsdecnotfirst:
0755 dec_prepare w3, x2, x8
0756 .LxtsdecloopNx:
0757 next_tweak v4, v4, v8
0758 .LxtsdecNx:
0759 subs w4, w4, #64
0760 bmi .Lxtsdec1x
0761 ld1 {v0.16b-v3.16b}, [x1], #64
0762 next_tweak v5, v4, v8
0763 eor v0.16b, v0.16b, v4.16b
0764 next_tweak v6, v5, v8
0765 eor v1.16b, v1.16b, v5.16b
0766 eor v2.16b, v2.16b, v6.16b
0767 next_tweak v7, v6, v8
0768 eor v3.16b, v3.16b, v7.16b
0769 bl aes_decrypt_block4x
0770 eor v3.16b, v3.16b, v7.16b
0771 eor v0.16b, v0.16b, v4.16b
0772 eor v1.16b, v1.16b, v5.16b
0773 eor v2.16b, v2.16b, v6.16b
0774 st1 {v0.16b-v3.16b}, [x0], #64
0775 mov v4.16b, v7.16b
0776 cbz w4, .Lxtsdecout
0777 xts_reload_mask v8
0778 b .LxtsdecloopNx
0779 .Lxtsdec1x:
0780 adds w4, w4, #64
0781 beq .Lxtsdecout
0782 subs w4, w4, #16
0783 .Lxtsdecloop:
0784 ld1 {v0.16b}, [x1], #16
0785 bmi .Lxtsdeccts
0786 .Lxtsdecctsout:
0787 eor v0.16b, v0.16b, v4.16b
0788 decrypt_block v0, w3, x2, x8, w7
0789 eor v0.16b, v0.16b, v4.16b
0790 st1 {v0.16b}, [x0], #16
0791 cbz w4, .Lxtsdecout
0792 subs w4, w4, #16
0793 next_tweak v4, v4, v8
0794 b .Lxtsdecloop
0795 .Lxtsdecout:
0796 st1 {v4.16b}, [x6]
0797 ldp x29, x30, [sp], #16
0798 ret
0799
0800 .Lxtsdeccts:
0801 adr_l x8, .Lcts_permute_table
0802
0803 add x1, x1, w4, sxtw
0804 add w4, w4, #16
0805 add x9, x8, #32
0806 add x8, x8, x4
0807 sub x9, x9, x4
0808 add x4, x0, x4
0809
0810 next_tweak v5, v4, v8
0811
0812 ld1 {v1.16b}, [x1]
0813 ld1 {v2.16b}, [x8]
0814 ld1 {v3.16b}, [x9]
0815
0816 eor v0.16b, v0.16b, v5.16b
0817 decrypt_block v0, w3, x2, x8, w7
0818 eor v0.16b, v0.16b, v5.16b
0819
0820 tbl v2.16b, {v0.16b}, v2.16b
0821 tbx v0.16b, {v1.16b}, v3.16b
0822
0823 st1 {v2.16b}, [x4]
0824 mov w4, wzr
0825 b .Lxtsdecctsout
0826 AES_FUNC_END(aes_xts_decrypt)
0827
0828
0829
0830
0831
0832 AES_FUNC_START(aes_mac_update)
0833 ld1 {v0.16b}, [x4]
0834 enc_prepare w2, x1, x7
0835 cbz w5, .Lmacloop4x
0836
0837 encrypt_block v0, w2, x1, x7, w8
0838
0839 .Lmacloop4x:
0840 subs w3, w3, #4
0841 bmi .Lmac1x
0842 ld1 {v1.16b-v4.16b}, [x0], #64
0843 eor v0.16b, v0.16b, v1.16b
0844 encrypt_block v0, w2, x1, x7, w8
0845 eor v0.16b, v0.16b, v2.16b
0846 encrypt_block v0, w2, x1, x7, w8
0847 eor v0.16b, v0.16b, v3.16b
0848 encrypt_block v0, w2, x1, x7, w8
0849 eor v0.16b, v0.16b, v4.16b
0850 cmp w3, wzr
0851 csinv x5, x6, xzr, eq
0852 cbz w5, .Lmacout
0853 encrypt_block v0, w2, x1, x7, w8
0854 st1 {v0.16b}, [x4]
0855 cond_yield .Lmacout, x7, x8
0856 b .Lmacloop4x
0857 .Lmac1x:
0858 add w3, w3, #4
0859 .Lmacloop:
0860 cbz w3, .Lmacout
0861 ld1 {v1.16b}, [x0], #16
0862 eor v0.16b, v0.16b, v1.16b
0863
0864 subs w3, w3, #1
0865 csinv x5, x6, xzr, eq
0866 cbz w5, .Lmacout
0867
0868 .Lmacenc:
0869 encrypt_block v0, w2, x1, x7, w8
0870 b .Lmacloop
0871
0872 .Lmacout:
0873 st1 {v0.16b}, [x4]
0874 mov w0, w3
0875 ret
0876 AES_FUNC_END(aes_mac_update)