0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/linkage.h>
0009 #include <asm/assembler.h>
0010
0011 #define AES_FUNC_START(func) SYM_FUNC_START(neon_ ## func)
0012 #define AES_FUNC_END(func) SYM_FUNC_END(neon_ ## func)
0013
0014 xtsmask .req v7
0015 cbciv .req v7
0016 vctr .req v4
0017
0018 .macro xts_reload_mask, tmp
0019 xts_load_mask \tmp
0020 .endm
0021
0022
0023 .macro xts_cts_skip_tw, reg, lbl
0024 tbnz \reg, #1, \lbl
0025 .endm
0026
0027
0028 .macro mul_by_x, out, in, temp, const
0029 sshr \temp, \in, #7
0030 shl \out, \in, #1
0031 and \temp, \temp, \const
0032 eor \out, \out, \temp
0033 .endm
0034
0035
0036 .macro mul_by_x2, out, in, temp, const
0037 ushr \temp, \in, #6
0038 shl \out, \in, #2
0039 pmul \temp, \temp, \const
0040 eor \out, \out, \temp
0041 .endm
0042
0043
0044 .macro prepare, sbox, shiftrows, temp
0045 movi v12.16b, #0x1b
0046 ldr_l q13, \shiftrows, \temp
0047 ldr_l q14, .Lror32by8, \temp
0048 adr_l \temp, \sbox
0049 ld1 {v16.16b-v19.16b}, [\temp], #64
0050 ld1 {v20.16b-v23.16b}, [\temp], #64
0051 ld1 {v24.16b-v27.16b}, [\temp], #64
0052 ld1 {v28.16b-v31.16b}, [\temp]
0053 .endm
0054
0055
0056 .macro enc_prepare, ignore0, ignore1, temp
0057 prepare crypto_aes_sbox, .LForward_ShiftRows, \temp
0058 .endm
0059
0060 .macro enc_switch_key, ignore0, ignore1, temp
0061
0062 .endm
0063
0064
0065 .macro dec_prepare, ignore0, ignore1, temp
0066 prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
0067 .endm
0068
0069
0070 .macro sub_bytes, in
0071 sub v9.16b, \in\().16b, v15.16b
0072 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
0073 sub v10.16b, v9.16b, v15.16b
0074 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
0075 sub v11.16b, v10.16b, v15.16b
0076 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
0077 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
0078 .endm
0079
0080
0081 .macro mix_columns, in, enc
0082 .if \enc == 0
0083
0084 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
0085 eor \in\().16b, \in\().16b, v8.16b
0086 rev32 v8.8h, v8.8h
0087 eor \in\().16b, \in\().16b, v8.16b
0088 .endif
0089
0090 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
0091 rev32 v8.8h, \in\().8h
0092 eor v8.16b, v8.16b, v9.16b
0093 eor \in\().16b, \in\().16b, v8.16b
0094 tbl \in\().16b, {\in\().16b}, v14.16b
0095 eor \in\().16b, \in\().16b, v8.16b
0096 .endm
0097
0098 .macro do_block, enc, in, rounds, rk, rkp, i
0099 ld1 {v15.4s}, [\rk]
0100 add \rkp, \rk, #16
0101 mov \i, \rounds
0102 1111: eor \in\().16b, \in\().16b, v15.16b
0103 movi v15.16b, #0x40
0104 tbl \in\().16b, {\in\().16b}, v13.16b
0105 sub_bytes \in
0106 subs \i, \i, #1
0107 ld1 {v15.4s}, [\rkp], #16
0108 beq 2222f
0109 mix_columns \in, \enc
0110 b 1111b
0111 2222: eor \in\().16b, \in\().16b, v15.16b
0112 .endm
0113
0114 .macro encrypt_block, in, rounds, rk, rkp, i
0115 do_block 1, \in, \rounds, \rk, \rkp, \i
0116 .endm
0117
0118 .macro decrypt_block, in, rounds, rk, rkp, i
0119 do_block 0, \in, \rounds, \rk, \rkp, \i
0120 .endm
0121
0122
0123
0124
0125
0126
0127 .macro sub_bytes_4x, in0, in1, in2, in3
0128 sub v8.16b, \in0\().16b, v15.16b
0129 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
0130 sub v9.16b, \in1\().16b, v15.16b
0131 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
0132 sub v10.16b, \in2\().16b, v15.16b
0133 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
0134 sub v11.16b, \in3\().16b, v15.16b
0135 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
0136 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
0137 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
0138 sub v8.16b, v8.16b, v15.16b
0139 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
0140 sub v9.16b, v9.16b, v15.16b
0141 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
0142 sub v10.16b, v10.16b, v15.16b
0143 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
0144 sub v11.16b, v11.16b, v15.16b
0145 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
0146 sub v8.16b, v8.16b, v15.16b
0147 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
0148 sub v9.16b, v9.16b, v15.16b
0149 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
0150 sub v10.16b, v10.16b, v15.16b
0151 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
0152 sub v11.16b, v11.16b, v15.16b
0153 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
0154 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
0155 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
0156 .endm
0157
0158 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
0159 sshr \tmp0\().16b, \in0\().16b, #7
0160 shl \out0\().16b, \in0\().16b, #1
0161 sshr \tmp1\().16b, \in1\().16b, #7
0162 and \tmp0\().16b, \tmp0\().16b, \const\().16b
0163 shl \out1\().16b, \in1\().16b, #1
0164 and \tmp1\().16b, \tmp1\().16b, \const\().16b
0165 eor \out0\().16b, \out0\().16b, \tmp0\().16b
0166 eor \out1\().16b, \out1\().16b, \tmp1\().16b
0167 .endm
0168
0169 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
0170 ushr \tmp0\().16b, \in0\().16b, #6
0171 shl \out0\().16b, \in0\().16b, #2
0172 ushr \tmp1\().16b, \in1\().16b, #6
0173 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
0174 shl \out1\().16b, \in1\().16b, #2
0175 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
0176 eor \out0\().16b, \out0\().16b, \tmp0\().16b
0177 eor \out1\().16b, \out1\().16b, \tmp1\().16b
0178 .endm
0179
0180 .macro mix_columns_2x, in0, in1, enc
0181 .if \enc == 0
0182
0183 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
0184 eor \in0\().16b, \in0\().16b, v8.16b
0185 rev32 v8.8h, v8.8h
0186 eor \in1\().16b, \in1\().16b, v9.16b
0187 rev32 v9.8h, v9.8h
0188 eor \in0\().16b, \in0\().16b, v8.16b
0189 eor \in1\().16b, \in1\().16b, v9.16b
0190 .endif
0191
0192 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
0193 rev32 v10.8h, \in0\().8h
0194 rev32 v11.8h, \in1\().8h
0195 eor v10.16b, v10.16b, v8.16b
0196 eor v11.16b, v11.16b, v9.16b
0197 eor \in0\().16b, \in0\().16b, v10.16b
0198 eor \in1\().16b, \in1\().16b, v11.16b
0199 tbl \in0\().16b, {\in0\().16b}, v14.16b
0200 tbl \in1\().16b, {\in1\().16b}, v14.16b
0201 eor \in0\().16b, \in0\().16b, v10.16b
0202 eor \in1\().16b, \in1\().16b, v11.16b
0203 .endm
0204
0205 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
0206 ld1 {v15.4s}, [\rk]
0207 add \rkp, \rk, #16
0208 mov \i, \rounds
0209 1111: eor \in0\().16b, \in0\().16b, v15.16b
0210 eor \in1\().16b, \in1\().16b, v15.16b
0211 eor \in2\().16b, \in2\().16b, v15.16b
0212 eor \in3\().16b, \in3\().16b, v15.16b
0213 movi v15.16b, #0x40
0214 tbl \in0\().16b, {\in0\().16b}, v13.16b
0215 tbl \in1\().16b, {\in1\().16b}, v13.16b
0216 tbl \in2\().16b, {\in2\().16b}, v13.16b
0217 tbl \in3\().16b, {\in3\().16b}, v13.16b
0218 sub_bytes_4x \in0, \in1, \in2, \in3
0219 subs \i, \i, #1
0220 ld1 {v15.4s}, [\rkp], #16
0221 beq 2222f
0222 mix_columns_2x \in0, \in1, \enc
0223 mix_columns_2x \in2, \in3, \enc
0224 b 1111b
0225 2222: eor \in0\().16b, \in0\().16b, v15.16b
0226 eor \in1\().16b, \in1\().16b, v15.16b
0227 eor \in2\().16b, \in2\().16b, v15.16b
0228 eor \in3\().16b, \in3\().16b, v15.16b
0229 .endm
0230
0231 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
0232 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
0233 .endm
0234
0235 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
0236 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
0237 .endm
0238
0239 #include "aes-modes.S"
0240
0241 .section ".rodata", "a"
0242 .align 4
0243 .LForward_ShiftRows:
0244 .octa 0x0b06010c07020d08030e09040f0a0500
0245
0246 .LReverse_ShiftRows:
0247 .octa 0x0306090c0f0205080b0e0104070a0d00
0248
0249 .Lror32by8:
0250 .octa 0x0c0f0e0d080b0a090407060500030201