Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
0004  *
0005  * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
0006  */
0007 
0008 #include <linux/linkage.h>
0009 #include <asm/assembler.h>
0010 
0011 #define AES_FUNC_START(func)        SYM_FUNC_START(neon_ ## func)
0012 #define AES_FUNC_END(func)      SYM_FUNC_END(neon_ ## func)
0013 
0014     xtsmask     .req    v7
0015     cbciv       .req    v7
0016     vctr        .req    v4
0017 
0018     .macro      xts_reload_mask, tmp
0019     xts_load_mask   \tmp
0020     .endm
0021 
0022     /* special case for the neon-bs driver calling into this one for CTS */
0023     .macro      xts_cts_skip_tw, reg, lbl
0024     tbnz        \reg, #1, \lbl
0025     .endm
0026 
0027     /* multiply by polynomial 'x' in GF(2^8) */
0028     .macro      mul_by_x, out, in, temp, const
0029     sshr        \temp, \in, #7
0030     shl     \out, \in, #1
0031     and     \temp, \temp, \const
0032     eor     \out, \out, \temp
0033     .endm
0034 
0035     /* multiply by polynomial 'x^2' in GF(2^8) */
0036     .macro      mul_by_x2, out, in, temp, const
0037     ushr        \temp, \in, #6
0038     shl     \out, \in, #2
0039     pmul        \temp, \temp, \const
0040     eor     \out, \out, \temp
0041     .endm
0042 
0043     /* preload the entire Sbox */
0044     .macro      prepare, sbox, shiftrows, temp
0045     movi        v12.16b, #0x1b
0046     ldr_l       q13, \shiftrows, \temp
0047     ldr_l       q14, .Lror32by8, \temp
0048     adr_l       \temp, \sbox
0049     ld1     {v16.16b-v19.16b}, [\temp], #64
0050     ld1     {v20.16b-v23.16b}, [\temp], #64
0051     ld1     {v24.16b-v27.16b}, [\temp], #64
0052     ld1     {v28.16b-v31.16b}, [\temp]
0053     .endm
0054 
0055     /* do preload for encryption */
0056     .macro      enc_prepare, ignore0, ignore1, temp
0057     prepare     crypto_aes_sbox, .LForward_ShiftRows, \temp
0058     .endm
0059 
0060     .macro      enc_switch_key, ignore0, ignore1, temp
0061     /* do nothing */
0062     .endm
0063 
0064     /* do preload for decryption */
0065     .macro      dec_prepare, ignore0, ignore1, temp
0066     prepare     crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
0067     .endm
0068 
0069     /* apply SubBytes transformation using the preloaded Sbox */
0070     .macro      sub_bytes, in
0071     sub     v9.16b, \in\().16b, v15.16b
0072     tbl     \in\().16b, {v16.16b-v19.16b}, \in\().16b
0073     sub     v10.16b, v9.16b, v15.16b
0074     tbx     \in\().16b, {v20.16b-v23.16b}, v9.16b
0075     sub     v11.16b, v10.16b, v15.16b
0076     tbx     \in\().16b, {v24.16b-v27.16b}, v10.16b
0077     tbx     \in\().16b, {v28.16b-v31.16b}, v11.16b
0078     .endm
0079 
0080     /* apply MixColumns transformation */
0081     .macro      mix_columns, in, enc
0082     .if     \enc == 0
0083     /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
0084     mul_by_x2   v8.16b, \in\().16b, v9.16b, v12.16b
0085     eor     \in\().16b, \in\().16b, v8.16b
0086     rev32       v8.8h, v8.8h
0087     eor     \in\().16b, \in\().16b, v8.16b
0088     .endif
0089 
0090     mul_by_x    v9.16b, \in\().16b, v8.16b, v12.16b
0091     rev32       v8.8h, \in\().8h
0092     eor     v8.16b, v8.16b, v9.16b
0093     eor     \in\().16b, \in\().16b, v8.16b
0094     tbl     \in\().16b, {\in\().16b}, v14.16b
0095     eor     \in\().16b, \in\().16b, v8.16b
0096     .endm
0097 
0098     .macro      do_block, enc, in, rounds, rk, rkp, i
0099     ld1     {v15.4s}, [\rk]
0100     add     \rkp, \rk, #16
0101     mov     \i, \rounds
0102 1111:   eor     \in\().16b, \in\().16b, v15.16b     /* ^round key */
0103     movi        v15.16b, #0x40
0104     tbl     \in\().16b, {\in\().16b}, v13.16b   /* ShiftRows */
0105     sub_bytes   \in
0106     subs        \i, \i, #1
0107     ld1     {v15.4s}, [\rkp], #16
0108     beq     2222f
0109     mix_columns \in, \enc
0110     b       1111b
0111 2222:   eor     \in\().16b, \in\().16b, v15.16b     /* ^round key */
0112     .endm
0113 
0114     .macro      encrypt_block, in, rounds, rk, rkp, i
0115     do_block    1, \in, \rounds, \rk, \rkp, \i
0116     .endm
0117 
0118     .macro      decrypt_block, in, rounds, rk, rkp, i
0119     do_block    0, \in, \rounds, \rk, \rkp, \i
0120     .endm
0121 
0122     /*
0123      * Interleaved versions: functionally equivalent to the
0124      * ones above, but applied to AES states in parallel.
0125      */
0126 
0127     .macro      sub_bytes_4x, in0, in1, in2, in3
0128     sub     v8.16b, \in0\().16b, v15.16b
0129     tbl     \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
0130     sub     v9.16b, \in1\().16b, v15.16b
0131     tbl     \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
0132     sub     v10.16b, \in2\().16b, v15.16b
0133     tbl     \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
0134     sub     v11.16b, \in3\().16b, v15.16b
0135     tbl     \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
0136     tbx     \in0\().16b, {v20.16b-v23.16b}, v8.16b
0137     tbx     \in1\().16b, {v20.16b-v23.16b}, v9.16b
0138     sub     v8.16b, v8.16b, v15.16b
0139     tbx     \in2\().16b, {v20.16b-v23.16b}, v10.16b
0140     sub     v9.16b, v9.16b, v15.16b
0141     tbx     \in3\().16b, {v20.16b-v23.16b}, v11.16b
0142     sub     v10.16b, v10.16b, v15.16b
0143     tbx     \in0\().16b, {v24.16b-v27.16b}, v8.16b
0144     sub     v11.16b, v11.16b, v15.16b
0145     tbx     \in1\().16b, {v24.16b-v27.16b}, v9.16b
0146     sub     v8.16b, v8.16b, v15.16b
0147     tbx     \in2\().16b, {v24.16b-v27.16b}, v10.16b
0148     sub     v9.16b, v9.16b, v15.16b
0149     tbx     \in3\().16b, {v24.16b-v27.16b}, v11.16b
0150     sub     v10.16b, v10.16b, v15.16b
0151     tbx     \in0\().16b, {v28.16b-v31.16b}, v8.16b
0152     sub     v11.16b, v11.16b, v15.16b
0153     tbx     \in1\().16b, {v28.16b-v31.16b}, v9.16b
0154     tbx     \in2\().16b, {v28.16b-v31.16b}, v10.16b
0155     tbx     \in3\().16b, {v28.16b-v31.16b}, v11.16b
0156     .endm
0157 
0158     .macro      mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
0159     sshr        \tmp0\().16b, \in0\().16b, #7
0160     shl     \out0\().16b, \in0\().16b, #1
0161     sshr        \tmp1\().16b, \in1\().16b, #7
0162     and     \tmp0\().16b, \tmp0\().16b, \const\().16b
0163     shl     \out1\().16b, \in1\().16b, #1
0164     and     \tmp1\().16b, \tmp1\().16b, \const\().16b
0165     eor     \out0\().16b, \out0\().16b, \tmp0\().16b
0166     eor     \out1\().16b, \out1\().16b, \tmp1\().16b
0167     .endm
0168 
0169     .macro      mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
0170     ushr        \tmp0\().16b, \in0\().16b, #6
0171     shl     \out0\().16b, \in0\().16b, #2
0172     ushr        \tmp1\().16b, \in1\().16b, #6
0173     pmul        \tmp0\().16b, \tmp0\().16b, \const\().16b
0174     shl     \out1\().16b, \in1\().16b, #2
0175     pmul        \tmp1\().16b, \tmp1\().16b, \const\().16b
0176     eor     \out0\().16b, \out0\().16b, \tmp0\().16b
0177     eor     \out1\().16b, \out1\().16b, \tmp1\().16b
0178     .endm
0179 
0180     .macro      mix_columns_2x, in0, in1, enc
0181     .if     \enc == 0
0182     /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
0183     mul_by_x2_2x    v8, v9, \in0, \in1, v10, v11, v12
0184     eor     \in0\().16b, \in0\().16b, v8.16b
0185     rev32       v8.8h, v8.8h
0186     eor     \in1\().16b, \in1\().16b, v9.16b
0187     rev32       v9.8h, v9.8h
0188     eor     \in0\().16b, \in0\().16b, v8.16b
0189     eor     \in1\().16b, \in1\().16b, v9.16b
0190     .endif
0191 
0192     mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
0193     rev32       v10.8h, \in0\().8h
0194     rev32       v11.8h, \in1\().8h
0195     eor     v10.16b, v10.16b, v8.16b
0196     eor     v11.16b, v11.16b, v9.16b
0197     eor     \in0\().16b, \in0\().16b, v10.16b
0198     eor     \in1\().16b, \in1\().16b, v11.16b
0199     tbl     \in0\().16b, {\in0\().16b}, v14.16b
0200     tbl     \in1\().16b, {\in1\().16b}, v14.16b
0201     eor     \in0\().16b, \in0\().16b, v10.16b
0202     eor     \in1\().16b, \in1\().16b, v11.16b
0203     .endm
0204 
0205     .macro      do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
0206     ld1     {v15.4s}, [\rk]
0207     add     \rkp, \rk, #16
0208     mov     \i, \rounds
0209 1111:   eor     \in0\().16b, \in0\().16b, v15.16b   /* ^round key */
0210     eor     \in1\().16b, \in1\().16b, v15.16b   /* ^round key */
0211     eor     \in2\().16b, \in2\().16b, v15.16b   /* ^round key */
0212     eor     \in3\().16b, \in3\().16b, v15.16b   /* ^round key */
0213     movi        v15.16b, #0x40
0214     tbl     \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
0215     tbl     \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
0216     tbl     \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
0217     tbl     \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
0218     sub_bytes_4x    \in0, \in1, \in2, \in3
0219     subs        \i, \i, #1
0220     ld1     {v15.4s}, [\rkp], #16
0221     beq     2222f
0222     mix_columns_2x  \in0, \in1, \enc
0223     mix_columns_2x  \in2, \in3, \enc
0224     b       1111b
0225 2222:   eor     \in0\().16b, \in0\().16b, v15.16b   /* ^round key */
0226     eor     \in1\().16b, \in1\().16b, v15.16b   /* ^round key */
0227     eor     \in2\().16b, \in2\().16b, v15.16b   /* ^round key */
0228     eor     \in3\().16b, \in3\().16b, v15.16b   /* ^round key */
0229     .endm
0230 
0231     .macro      encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
0232     do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
0233     .endm
0234 
0235     .macro      decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
0236     do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
0237     .endm
0238 
0239 #include "aes-modes.S"
0240 
0241     .section    ".rodata", "a"
0242     .align      4
0243 .LForward_ShiftRows:
0244     .octa       0x0b06010c07020d08030e09040f0a0500
0245 
0246 .LReverse_ShiftRows:
0247     .octa       0x0306090c0f0205080b0e0104070a0d00
0248 
0249 .Lror32by8:
0250     .octa       0x0c0f0e0d080b0a090407060500030201