Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * AES-NI + SSE2 implementation of AEGIS-128
0004  *
0005  * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
0006  * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
0007  */
0008 
0009 #include <linux/linkage.h>
0010 #include <asm/frame.h>
0011 
0012 #define STATE0  %xmm0
0013 #define STATE1  %xmm1
0014 #define STATE2  %xmm2
0015 #define STATE3  %xmm3
0016 #define STATE4  %xmm4
0017 #define KEY %xmm5
0018 #define MSG %xmm5
0019 #define T0  %xmm6
0020 #define T1  %xmm7
0021 
0022 #define STATEP  %rdi
0023 #define LEN %rsi
0024 #define SRC %rdx
0025 #define DST %rcx
0026 
0027 .section .rodata.cst16.aegis128_const, "aM", @progbits, 32
0028 .align 16
0029 .Laegis128_const_0:
0030     .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
0031     .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
0032 .Laegis128_const_1:
0033     .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
0034     .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
0035 
0036 .section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
0037 .align 16
0038 .Laegis128_counter:
0039     .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
0040     .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
0041 
0042 .text
0043 
0044 /*
0045  * aegis128_update
0046  * input:
0047  *   STATE[0-4] - input state
0048  * output:
0049  *   STATE[0-4] - output state (shifted positions)
0050  * changed:
0051  *   T0
0052  */
0053 .macro aegis128_update
0054     movdqa STATE4, T0
0055     aesenc STATE0, STATE4
0056     aesenc STATE1, STATE0
0057     aesenc STATE2, STATE1
0058     aesenc STATE3, STATE2
0059     aesenc T0,     STATE3
0060 .endm
0061 
0062 /*
0063  * __load_partial: internal ABI
0064  * input:
0065  *   LEN - bytes
0066  *   SRC - src
0067  * output:
0068  *   MSG  - message block
0069  * changed:
0070  *   T0
0071  *   %r8
0072  *   %r9
0073  */
0074 SYM_FUNC_START_LOCAL(__load_partial)
0075     xor %r9d, %r9d
0076     pxor MSG, MSG
0077 
0078     mov LEN, %r8
0079     and $0x1, %r8
0080     jz .Lld_partial_1
0081 
0082     mov LEN, %r8
0083     and $0x1E, %r8
0084     add SRC, %r8
0085     mov (%r8), %r9b
0086 
0087 .Lld_partial_1:
0088     mov LEN, %r8
0089     and $0x2, %r8
0090     jz .Lld_partial_2
0091 
0092     mov LEN, %r8
0093     and $0x1C, %r8
0094     add SRC, %r8
0095     shl $0x10, %r9
0096     mov (%r8), %r9w
0097 
0098 .Lld_partial_2:
0099     mov LEN, %r8
0100     and $0x4, %r8
0101     jz .Lld_partial_4
0102 
0103     mov LEN, %r8
0104     and $0x18, %r8
0105     add SRC, %r8
0106     shl $32, %r9
0107     mov (%r8), %r8d
0108     xor %r8, %r9
0109 
0110 .Lld_partial_4:
0111     movq %r9, MSG
0112 
0113     mov LEN, %r8
0114     and $0x8, %r8
0115     jz .Lld_partial_8
0116 
0117     mov LEN, %r8
0118     and $0x10, %r8
0119     add SRC, %r8
0120     pslldq $8, MSG
0121     movq (%r8), T0
0122     pxor T0, MSG
0123 
0124 .Lld_partial_8:
0125     RET
0126 SYM_FUNC_END(__load_partial)
0127 
0128 /*
0129  * __store_partial: internal ABI
0130  * input:
0131  *   LEN - bytes
0132  *   DST - dst
0133  * output:
0134  *   T0   - message block
0135  * changed:
0136  *   %r8
0137  *   %r9
0138  *   %r10
0139  */
0140 SYM_FUNC_START_LOCAL(__store_partial)
0141     mov LEN, %r8
0142     mov DST, %r9
0143 
0144     movq T0, %r10
0145 
0146     cmp $8, %r8
0147     jl .Lst_partial_8
0148 
0149     mov %r10, (%r9)
0150     psrldq $8, T0
0151     movq T0, %r10
0152 
0153     sub $8, %r8
0154     add $8, %r9
0155 
0156 .Lst_partial_8:
0157     cmp $4, %r8
0158     jl .Lst_partial_4
0159 
0160     mov %r10d, (%r9)
0161     shr $32, %r10
0162 
0163     sub $4, %r8
0164     add $4, %r9
0165 
0166 .Lst_partial_4:
0167     cmp $2, %r8
0168     jl .Lst_partial_2
0169 
0170     mov %r10w, (%r9)
0171     shr $0x10, %r10
0172 
0173     sub $2, %r8
0174     add $2, %r9
0175 
0176 .Lst_partial_2:
0177     cmp $1, %r8
0178     jl .Lst_partial_1
0179 
0180     mov %r10b, (%r9)
0181 
0182 .Lst_partial_1:
0183     RET
0184 SYM_FUNC_END(__store_partial)
0185 
0186 /*
0187  * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
0188  */
0189 SYM_FUNC_START(crypto_aegis128_aesni_init)
0190     FRAME_BEGIN
0191 
0192     /* load IV: */
0193     movdqu (%rdx), T1
0194 
0195     /* load key: */
0196     movdqa (%rsi), KEY
0197     pxor KEY, T1
0198     movdqa T1, STATE0
0199     movdqa KEY, STATE3
0200     movdqa KEY, STATE4
0201 
0202     /* load the constants: */
0203     movdqa .Laegis128_const_0, STATE2
0204     movdqa .Laegis128_const_1, STATE1
0205     pxor STATE2, STATE3
0206     pxor STATE1, STATE4
0207 
0208     /* update 10 times with KEY / KEY xor IV: */
0209     aegis128_update; pxor KEY, STATE4
0210     aegis128_update; pxor T1,  STATE3
0211     aegis128_update; pxor KEY, STATE2
0212     aegis128_update; pxor T1,  STATE1
0213     aegis128_update; pxor KEY, STATE0
0214     aegis128_update; pxor T1,  STATE4
0215     aegis128_update; pxor KEY, STATE3
0216     aegis128_update; pxor T1,  STATE2
0217     aegis128_update; pxor KEY, STATE1
0218     aegis128_update; pxor T1,  STATE0
0219 
0220     /* store the state: */
0221     movdqu STATE0, 0x00(STATEP)
0222     movdqu STATE1, 0x10(STATEP)
0223     movdqu STATE2, 0x20(STATEP)
0224     movdqu STATE3, 0x30(STATEP)
0225     movdqu STATE4, 0x40(STATEP)
0226 
0227     FRAME_END
0228     RET
0229 SYM_FUNC_END(crypto_aegis128_aesni_init)
0230 
0231 /*
0232  * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
0233  *                               const void *data);
0234  */
0235 SYM_FUNC_START(crypto_aegis128_aesni_ad)
0236     FRAME_BEGIN
0237 
0238     cmp $0x10, LEN
0239     jb .Lad_out
0240 
0241     /* load the state: */
0242     movdqu 0x00(STATEP), STATE0
0243     movdqu 0x10(STATEP), STATE1
0244     movdqu 0x20(STATEP), STATE2
0245     movdqu 0x30(STATEP), STATE3
0246     movdqu 0x40(STATEP), STATE4
0247 
0248     mov SRC, %r8
0249     and $0xF, %r8
0250     jnz .Lad_u_loop
0251 
0252 .align 8
0253 .Lad_a_loop:
0254     movdqa 0x00(SRC), MSG
0255     aegis128_update
0256     pxor MSG, STATE4
0257     sub $0x10, LEN
0258     cmp $0x10, LEN
0259     jl .Lad_out_1
0260 
0261     movdqa 0x10(SRC), MSG
0262     aegis128_update
0263     pxor MSG, STATE3
0264     sub $0x10, LEN
0265     cmp $0x10, LEN
0266     jl .Lad_out_2
0267 
0268     movdqa 0x20(SRC), MSG
0269     aegis128_update
0270     pxor MSG, STATE2
0271     sub $0x10, LEN
0272     cmp $0x10, LEN
0273     jl .Lad_out_3
0274 
0275     movdqa 0x30(SRC), MSG
0276     aegis128_update
0277     pxor MSG, STATE1
0278     sub $0x10, LEN
0279     cmp $0x10, LEN
0280     jl .Lad_out_4
0281 
0282     movdqa 0x40(SRC), MSG
0283     aegis128_update
0284     pxor MSG, STATE0
0285     sub $0x10, LEN
0286     cmp $0x10, LEN
0287     jl .Lad_out_0
0288 
0289     add $0x50, SRC
0290     jmp .Lad_a_loop
0291 
0292 .align 8
0293 .Lad_u_loop:
0294     movdqu 0x00(SRC), MSG
0295     aegis128_update
0296     pxor MSG, STATE4
0297     sub $0x10, LEN
0298     cmp $0x10, LEN
0299     jl .Lad_out_1
0300 
0301     movdqu 0x10(SRC), MSG
0302     aegis128_update
0303     pxor MSG, STATE3
0304     sub $0x10, LEN
0305     cmp $0x10, LEN
0306     jl .Lad_out_2
0307 
0308     movdqu 0x20(SRC), MSG
0309     aegis128_update
0310     pxor MSG, STATE2
0311     sub $0x10, LEN
0312     cmp $0x10, LEN
0313     jl .Lad_out_3
0314 
0315     movdqu 0x30(SRC), MSG
0316     aegis128_update
0317     pxor MSG, STATE1
0318     sub $0x10, LEN
0319     cmp $0x10, LEN
0320     jl .Lad_out_4
0321 
0322     movdqu 0x40(SRC), MSG
0323     aegis128_update
0324     pxor MSG, STATE0
0325     sub $0x10, LEN
0326     cmp $0x10, LEN
0327     jl .Lad_out_0
0328 
0329     add $0x50, SRC
0330     jmp .Lad_u_loop
0331 
0332     /* store the state: */
0333 .Lad_out_0:
0334     movdqu STATE0, 0x00(STATEP)
0335     movdqu STATE1, 0x10(STATEP)
0336     movdqu STATE2, 0x20(STATEP)
0337     movdqu STATE3, 0x30(STATEP)
0338     movdqu STATE4, 0x40(STATEP)
0339     FRAME_END
0340     RET
0341 
0342 .Lad_out_1:
0343     movdqu STATE4, 0x00(STATEP)
0344     movdqu STATE0, 0x10(STATEP)
0345     movdqu STATE1, 0x20(STATEP)
0346     movdqu STATE2, 0x30(STATEP)
0347     movdqu STATE3, 0x40(STATEP)
0348     FRAME_END
0349     RET
0350 
0351 .Lad_out_2:
0352     movdqu STATE3, 0x00(STATEP)
0353     movdqu STATE4, 0x10(STATEP)
0354     movdqu STATE0, 0x20(STATEP)
0355     movdqu STATE1, 0x30(STATEP)
0356     movdqu STATE2, 0x40(STATEP)
0357     FRAME_END
0358     RET
0359 
0360 .Lad_out_3:
0361     movdqu STATE2, 0x00(STATEP)
0362     movdqu STATE3, 0x10(STATEP)
0363     movdqu STATE4, 0x20(STATEP)
0364     movdqu STATE0, 0x30(STATEP)
0365     movdqu STATE1, 0x40(STATEP)
0366     FRAME_END
0367     RET
0368 
0369 .Lad_out_4:
0370     movdqu STATE1, 0x00(STATEP)
0371     movdqu STATE2, 0x10(STATEP)
0372     movdqu STATE3, 0x20(STATEP)
0373     movdqu STATE4, 0x30(STATEP)
0374     movdqu STATE0, 0x40(STATEP)
0375     FRAME_END
0376     RET
0377 
0378 .Lad_out:
0379     FRAME_END
0380     RET
0381 SYM_FUNC_END(crypto_aegis128_aesni_ad)
0382 
0383 .macro encrypt_block a s0 s1 s2 s3 s4 i
0384     movdq\a (\i * 0x10)(SRC), MSG
0385     movdqa MSG, T0
0386     pxor \s1, T0
0387     pxor \s4, T0
0388     movdqa \s2, T1
0389     pand \s3, T1
0390     pxor T1, T0
0391     movdq\a T0, (\i * 0x10)(DST)
0392 
0393     aegis128_update
0394     pxor MSG, \s4
0395 
0396     sub $0x10, LEN
0397     cmp $0x10, LEN
0398     jl .Lenc_out_\i
0399 .endm
0400 
0401 /*
0402  * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
0403  *                                const void *src, void *dst);
0404  */
0405 SYM_FUNC_START(crypto_aegis128_aesni_enc)
0406     FRAME_BEGIN
0407 
0408     cmp $0x10, LEN
0409     jb .Lenc_out
0410 
0411     /* load the state: */
0412     movdqu 0x00(STATEP), STATE0
0413     movdqu 0x10(STATEP), STATE1
0414     movdqu 0x20(STATEP), STATE2
0415     movdqu 0x30(STATEP), STATE3
0416     movdqu 0x40(STATEP), STATE4
0417 
0418     mov  SRC,  %r8
0419     or   DST,  %r8
0420     and $0xF, %r8
0421     jnz .Lenc_u_loop
0422 
0423 .align 8
0424 .Lenc_a_loop:
0425     encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
0426     encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
0427     encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
0428     encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
0429     encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
0430 
0431     add $0x50, SRC
0432     add $0x50, DST
0433     jmp .Lenc_a_loop
0434 
0435 .align 8
0436 .Lenc_u_loop:
0437     encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
0438     encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
0439     encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
0440     encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
0441     encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
0442 
0443     add $0x50, SRC
0444     add $0x50, DST
0445     jmp .Lenc_u_loop
0446 
0447     /* store the state: */
0448 .Lenc_out_0:
0449     movdqu STATE4, 0x00(STATEP)
0450     movdqu STATE0, 0x10(STATEP)
0451     movdqu STATE1, 0x20(STATEP)
0452     movdqu STATE2, 0x30(STATEP)
0453     movdqu STATE3, 0x40(STATEP)
0454     FRAME_END
0455     RET
0456 
0457 .Lenc_out_1:
0458     movdqu STATE3, 0x00(STATEP)
0459     movdqu STATE4, 0x10(STATEP)
0460     movdqu STATE0, 0x20(STATEP)
0461     movdqu STATE1, 0x30(STATEP)
0462     movdqu STATE2, 0x40(STATEP)
0463     FRAME_END
0464     RET
0465 
0466 .Lenc_out_2:
0467     movdqu STATE2, 0x00(STATEP)
0468     movdqu STATE3, 0x10(STATEP)
0469     movdqu STATE4, 0x20(STATEP)
0470     movdqu STATE0, 0x30(STATEP)
0471     movdqu STATE1, 0x40(STATEP)
0472     FRAME_END
0473     RET
0474 
0475 .Lenc_out_3:
0476     movdqu STATE1, 0x00(STATEP)
0477     movdqu STATE2, 0x10(STATEP)
0478     movdqu STATE3, 0x20(STATEP)
0479     movdqu STATE4, 0x30(STATEP)
0480     movdqu STATE0, 0x40(STATEP)
0481     FRAME_END
0482     RET
0483 
0484 .Lenc_out_4:
0485     movdqu STATE0, 0x00(STATEP)
0486     movdqu STATE1, 0x10(STATEP)
0487     movdqu STATE2, 0x20(STATEP)
0488     movdqu STATE3, 0x30(STATEP)
0489     movdqu STATE4, 0x40(STATEP)
0490     FRAME_END
0491     RET
0492 
0493 .Lenc_out:
0494     FRAME_END
0495     RET
0496 SYM_FUNC_END(crypto_aegis128_aesni_enc)
0497 
0498 /*
0499  * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
0500  *                                     const void *src, void *dst);
0501  */
0502 SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
0503     FRAME_BEGIN
0504 
0505     /* load the state: */
0506     movdqu 0x00(STATEP), STATE0
0507     movdqu 0x10(STATEP), STATE1
0508     movdqu 0x20(STATEP), STATE2
0509     movdqu 0x30(STATEP), STATE3
0510     movdqu 0x40(STATEP), STATE4
0511 
0512     /* encrypt message: */
0513     call __load_partial
0514 
0515     movdqa MSG, T0
0516     pxor STATE1, T0
0517     pxor STATE4, T0
0518     movdqa STATE2, T1
0519     pand STATE3, T1
0520     pxor T1, T0
0521 
0522     call __store_partial
0523 
0524     aegis128_update
0525     pxor MSG, STATE4
0526 
0527     /* store the state: */
0528     movdqu STATE4, 0x00(STATEP)
0529     movdqu STATE0, 0x10(STATEP)
0530     movdqu STATE1, 0x20(STATEP)
0531     movdqu STATE2, 0x30(STATEP)
0532     movdqu STATE3, 0x40(STATEP)
0533 
0534     FRAME_END
0535     RET
0536 SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
0537 
0538 .macro decrypt_block a s0 s1 s2 s3 s4 i
0539     movdq\a (\i * 0x10)(SRC), MSG
0540     pxor \s1, MSG
0541     pxor \s4, MSG
0542     movdqa \s2, T1
0543     pand \s3, T1
0544     pxor T1, MSG
0545     movdq\a MSG, (\i * 0x10)(DST)
0546 
0547     aegis128_update
0548     pxor MSG, \s4
0549 
0550     sub $0x10, LEN
0551     cmp $0x10, LEN
0552     jl .Ldec_out_\i
0553 .endm
0554 
0555 /*
0556  * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
0557  *                                const void *src, void *dst);
0558  */
0559 SYM_FUNC_START(crypto_aegis128_aesni_dec)
0560     FRAME_BEGIN
0561 
0562     cmp $0x10, LEN
0563     jb .Ldec_out
0564 
0565     /* load the state: */
0566     movdqu 0x00(STATEP), STATE0
0567     movdqu 0x10(STATEP), STATE1
0568     movdqu 0x20(STATEP), STATE2
0569     movdqu 0x30(STATEP), STATE3
0570     movdqu 0x40(STATEP), STATE4
0571 
0572     mov  SRC, %r8
0573     or   DST, %r8
0574     and $0xF, %r8
0575     jnz .Ldec_u_loop
0576 
0577 .align 8
0578 .Ldec_a_loop:
0579     decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
0580     decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
0581     decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
0582     decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
0583     decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
0584 
0585     add $0x50, SRC
0586     add $0x50, DST
0587     jmp .Ldec_a_loop
0588 
0589 .align 8
0590 .Ldec_u_loop:
0591     decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
0592     decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
0593     decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
0594     decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
0595     decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
0596 
0597     add $0x50, SRC
0598     add $0x50, DST
0599     jmp .Ldec_u_loop
0600 
0601     /* store the state: */
0602 .Ldec_out_0:
0603     movdqu STATE4, 0x00(STATEP)
0604     movdqu STATE0, 0x10(STATEP)
0605     movdqu STATE1, 0x20(STATEP)
0606     movdqu STATE2, 0x30(STATEP)
0607     movdqu STATE3, 0x40(STATEP)
0608     FRAME_END
0609     RET
0610 
0611 .Ldec_out_1:
0612     movdqu STATE3, 0x00(STATEP)
0613     movdqu STATE4, 0x10(STATEP)
0614     movdqu STATE0, 0x20(STATEP)
0615     movdqu STATE1, 0x30(STATEP)
0616     movdqu STATE2, 0x40(STATEP)
0617     FRAME_END
0618     RET
0619 
0620 .Ldec_out_2:
0621     movdqu STATE2, 0x00(STATEP)
0622     movdqu STATE3, 0x10(STATEP)
0623     movdqu STATE4, 0x20(STATEP)
0624     movdqu STATE0, 0x30(STATEP)
0625     movdqu STATE1, 0x40(STATEP)
0626     FRAME_END
0627     RET
0628 
0629 .Ldec_out_3:
0630     movdqu STATE1, 0x00(STATEP)
0631     movdqu STATE2, 0x10(STATEP)
0632     movdqu STATE3, 0x20(STATEP)
0633     movdqu STATE4, 0x30(STATEP)
0634     movdqu STATE0, 0x40(STATEP)
0635     FRAME_END
0636     RET
0637 
0638 .Ldec_out_4:
0639     movdqu STATE0, 0x00(STATEP)
0640     movdqu STATE1, 0x10(STATEP)
0641     movdqu STATE2, 0x20(STATEP)
0642     movdqu STATE3, 0x30(STATEP)
0643     movdqu STATE4, 0x40(STATEP)
0644     FRAME_END
0645     RET
0646 
0647 .Ldec_out:
0648     FRAME_END
0649     RET
0650 SYM_FUNC_END(crypto_aegis128_aesni_dec)
0651 
0652 /*
0653  * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
0654  *                                     const void *src, void *dst);
0655  */
0656 SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
0657     FRAME_BEGIN
0658 
0659     /* load the state: */
0660     movdqu 0x00(STATEP), STATE0
0661     movdqu 0x10(STATEP), STATE1
0662     movdqu 0x20(STATEP), STATE2
0663     movdqu 0x30(STATEP), STATE3
0664     movdqu 0x40(STATEP), STATE4
0665 
0666     /* decrypt message: */
0667     call __load_partial
0668 
0669     pxor STATE1, MSG
0670     pxor STATE4, MSG
0671     movdqa STATE2, T1
0672     pand STATE3, T1
0673     pxor T1, MSG
0674 
0675     movdqa MSG, T0
0676     call __store_partial
0677 
0678     /* mask with byte count: */
0679     movq LEN, T0
0680     punpcklbw T0, T0
0681     punpcklbw T0, T0
0682     punpcklbw T0, T0
0683     punpcklbw T0, T0
0684     movdqa .Laegis128_counter, T1
0685     pcmpgtb T1, T0
0686     pand T0, MSG
0687 
0688     aegis128_update
0689     pxor MSG, STATE4
0690 
0691     /* store the state: */
0692     movdqu STATE4, 0x00(STATEP)
0693     movdqu STATE0, 0x10(STATEP)
0694     movdqu STATE1, 0x20(STATEP)
0695     movdqu STATE2, 0x30(STATEP)
0696     movdqu STATE3, 0x40(STATEP)
0697 
0698     FRAME_END
0699     RET
0700 SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
0701 
0702 /*
0703  * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
0704  *                                  u64 assoclen, u64 cryptlen);
0705  */
0706 SYM_FUNC_START(crypto_aegis128_aesni_final)
0707     FRAME_BEGIN
0708 
0709     /* load the state: */
0710     movdqu 0x00(STATEP), STATE0
0711     movdqu 0x10(STATEP), STATE1
0712     movdqu 0x20(STATEP), STATE2
0713     movdqu 0x30(STATEP), STATE3
0714     movdqu 0x40(STATEP), STATE4
0715 
0716     /* prepare length block: */
0717     movq %rdx, MSG
0718     movq %rcx, T0
0719     pslldq $8, T0
0720     pxor T0, MSG
0721     psllq $3, MSG /* multiply by 8 (to get bit count) */
0722 
0723     pxor STATE3, MSG
0724 
0725     /* update state: */
0726     aegis128_update; pxor MSG, STATE4
0727     aegis128_update; pxor MSG, STATE3
0728     aegis128_update; pxor MSG, STATE2
0729     aegis128_update; pxor MSG, STATE1
0730     aegis128_update; pxor MSG, STATE0
0731     aegis128_update; pxor MSG, STATE4
0732     aegis128_update; pxor MSG, STATE3
0733 
0734     /* xor tag: */
0735     movdqu (%rsi), MSG
0736 
0737     pxor STATE0, MSG
0738     pxor STATE1, MSG
0739     pxor STATE2, MSG
0740     pxor STATE3, MSG
0741     pxor STATE4, MSG
0742 
0743     movdqu MSG, (%rsi)
0744 
0745     FRAME_END
0746     RET
0747 SYM_FUNC_END(crypto_aegis128_aesni_final)