0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include <linux/linkage.h>
0018 #include <asm/assembler.h>
0019
0020 .text
0021
0022 rounds .req x11
0023 bskey .req x12
0024
0025 .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
0026 eor \b2, \b2, \b1
0027 eor \b5, \b5, \b6
0028 eor \b3, \b3, \b0
0029 eor \b6, \b6, \b2
0030 eor \b5, \b5, \b0
0031 eor \b6, \b6, \b3
0032 eor \b3, \b3, \b7
0033 eor \b7, \b7, \b5
0034 eor \b3, \b3, \b4
0035 eor \b4, \b4, \b5
0036 eor \b2, \b2, \b7
0037 eor \b3, \b3, \b1
0038 eor \b1, \b1, \b5
0039 .endm
0040
0041 .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
0042 eor \b0, \b0, \b6
0043 eor \b1, \b1, \b4
0044 eor \b4, \b4, \b6
0045 eor \b2, \b2, \b0
0046 eor \b6, \b6, \b1
0047 eor \b1, \b1, \b5
0048 eor \b5, \b5, \b3
0049 eor \b3, \b3, \b7
0050 eor \b7, \b7, \b5
0051 eor \b2, \b2, \b5
0052 eor \b4, \b4, \b7
0053 .endm
0054
0055 .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
0056 eor \b1, \b1, \b7
0057 eor \b4, \b4, \b7
0058 eor \b7, \b7, \b5
0059 eor \b1, \b1, \b3
0060 eor \b2, \b2, \b5
0061 eor \b3, \b3, \b7
0062 eor \b6, \b6, \b1
0063 eor \b2, \b2, \b0
0064 eor \b5, \b5, \b3
0065 eor \b4, \b4, \b6
0066 eor \b0, \b0, \b6
0067 eor \b1, \b1, \b4
0068 .endm
0069
0070 .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
0071 eor \b1, \b1, \b5
0072 eor \b2, \b2, \b7
0073 eor \b3, \b3, \b1
0074 eor \b4, \b4, \b5
0075 eor \b7, \b7, \b5
0076 eor \b3, \b3, \b4
0077 eor \b5, \b5, \b0
0078 eor \b3, \b3, \b7
0079 eor \b6, \b6, \b2
0080 eor \b2, \b2, \b1
0081 eor \b6, \b6, \b3
0082 eor \b3, \b3, \b0
0083 eor \b5, \b5, \b6
0084 .endm
0085
0086 .macro mul_gf4, x0, x1, y0, y1, t0, t1
0087 eor \t0, \y0, \y1
0088 and \t0, \t0, \x0
0089 eor \x0, \x0, \x1
0090 and \t1, \x1, \y0
0091 and \x0, \x0, \y1
0092 eor \x1, \t1, \t0
0093 eor \x0, \x0, \t1
0094 .endm
0095
0096 .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
0097 eor \t0, \y0, \y1
0098 eor \t1, \y2, \y3
0099 and \t0, \t0, \x0
0100 and \t1, \t1, \x2
0101 eor \x0, \x0, \x1
0102 eor \x2, \x2, \x3
0103 and \x1, \x1, \y0
0104 and \x3, \x3, \y2
0105 and \x0, \x0, \y1
0106 and \x2, \x2, \y3
0107 eor \x1, \x1, \x0
0108 eor \x2, \x2, \x3
0109 eor \x0, \x0, \t0
0110 eor \x3, \x3, \t1
0111 .endm
0112
0113 .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
0114 y0, y1, y2, y3, t0, t1, t2, t3
0115 eor \t0, \x0, \x2
0116 eor \t1, \x1, \x3
0117 mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
0118 eor \y0, \y0, \y2
0119 eor \y1, \y1, \y3
0120 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
0121 eor \x0, \x0, \t0
0122 eor \x2, \x2, \t0
0123 eor \x1, \x1, \t1
0124 eor \x3, \x3, \t1
0125 eor \t0, \x4, \x6
0126 eor \t1, \x5, \x7
0127 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
0128 eor \y0, \y0, \y2
0129 eor \y1, \y1, \y3
0130 mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
0131 eor \x4, \x4, \t0
0132 eor \x6, \x6, \t0
0133 eor \x5, \x5, \t1
0134 eor \x7, \x7, \t1
0135 .endm
0136
0137 .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
0138 t0, t1, t2, t3, s0, s1, s2, s3
0139 eor \t3, \x4, \x6
0140 eor \t0, \x5, \x7
0141 eor \t1, \x1, \x3
0142 eor \s1, \x7, \x6
0143 eor \s0, \x0, \x2
0144 eor \s3, \t3, \t0
0145 orr \t2, \t0, \t1
0146 and \s2, \t3, \s0
0147 orr \t3, \t3, \s0
0148 eor \s0, \s0, \t1
0149 and \t0, \t0, \t1
0150 eor \t1, \x3, \x2
0151 and \s3, \s3, \s0
0152 and \s1, \s1, \t1
0153 eor \t1, \x4, \x5
0154 eor \s0, \x1, \x0
0155 eor \t3, \t3, \s1
0156 eor \t2, \t2, \s1
0157 and \s1, \t1, \s0
0158 orr \t1, \t1, \s0
0159 eor \t3, \t3, \s3
0160 eor \t0, \t0, \s1
0161 eor \t2, \t2, \s2
0162 eor \t1, \t1, \s3
0163 eor \t0, \t0, \s2
0164 and \s0, \x7, \x3
0165 eor \t1, \t1, \s2
0166 and \s1, \x6, \x2
0167 and \s2, \x5, \x1
0168 orr \s3, \x4, \x0
0169 eor \t3, \t3, \s0
0170 eor \t1, \t1, \s2
0171 eor \s0, \t0, \s3
0172 eor \t2, \t2, \s1
0173 and \s2, \t3, \t1
0174 eor \s1, \t2, \s2
0175 eor \s3, \s0, \s2
0176 bsl \s1, \t1, \s0
0177 not \t0, \s0
0178 bsl \s0, \s1, \s3
0179 bsl \t0, \s1, \s3
0180 bsl \s3, \t3, \t2
0181 eor \t3, \t3, \t2
0182 and \s2, \s0, \s3
0183 eor \t1, \t1, \t0
0184 eor \s2, \s2, \t3
0185 mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
0186 \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
0187 .endm
0188
0189 .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
0190 t0, t1, t2, t3, s0, s1, s2, s3
0191 in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
0192 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
0193 inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
0194 \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
0195 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
0196 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
0197 out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
0198 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
0199 .endm
0200
0201 .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
0202 t0, t1, t2, t3, s0, s1, s2, s3
0203 inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
0204 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
0205 inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
0206 \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
0207 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
0208 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
0209 inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
0210 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
0211 .endm
0212
0213 .macro enc_next_rk
0214 ldp q16, q17, [bskey], #128
0215 ldp q18, q19, [bskey, #-96]
0216 ldp q20, q21, [bskey, #-64]
0217 ldp q22, q23, [bskey, #-32]
0218 .endm
0219
0220 .macro dec_next_rk
0221 ldp q16, q17, [bskey, #-128]!
0222 ldp q18, q19, [bskey, #32]
0223 ldp q20, q21, [bskey, #64]
0224 ldp q22, q23, [bskey, #96]
0225 .endm
0226
0227 .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
0228 eor \x0\().16b, \x0\().16b, v16.16b
0229 eor \x1\().16b, \x1\().16b, v17.16b
0230 eor \x2\().16b, \x2\().16b, v18.16b
0231 eor \x3\().16b, \x3\().16b, v19.16b
0232 eor \x4\().16b, \x4\().16b, v20.16b
0233 eor \x5\().16b, \x5\().16b, v21.16b
0234 eor \x6\().16b, \x6\().16b, v22.16b
0235 eor \x7\().16b, \x7\().16b, v23.16b
0236 .endm
0237
0238 .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
0239 tbl \x0\().16b, {\x0\().16b}, \mask\().16b
0240 tbl \x1\().16b, {\x1\().16b}, \mask\().16b
0241 tbl \x2\().16b, {\x2\().16b}, \mask\().16b
0242 tbl \x3\().16b, {\x3\().16b}, \mask\().16b
0243 tbl \x4\().16b, {\x4\().16b}, \mask\().16b
0244 tbl \x5\().16b, {\x5\().16b}, \mask\().16b
0245 tbl \x6\().16b, {\x6\().16b}, \mask\().16b
0246 tbl \x7\().16b, {\x7\().16b}, \mask\().16b
0247 .endm
0248
0249 .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
0250 t0, t1, t2, t3, t4, t5, t6, t7, inv
0251 ext \t0\().16b, \x0\().16b, \x0\().16b, #12
0252 ext \t1\().16b, \x1\().16b, \x1\().16b, #12
0253 eor \x0\().16b, \x0\().16b, \t0\().16b
0254 ext \t2\().16b, \x2\().16b, \x2\().16b, #12
0255 eor \x1\().16b, \x1\().16b, \t1\().16b
0256 ext \t3\().16b, \x3\().16b, \x3\().16b, #12
0257 eor \x2\().16b, \x2\().16b, \t2\().16b
0258 ext \t4\().16b, \x4\().16b, \x4\().16b, #12
0259 eor \x3\().16b, \x3\().16b, \t3\().16b
0260 ext \t5\().16b, \x5\().16b, \x5\().16b, #12
0261 eor \x4\().16b, \x4\().16b, \t4\().16b
0262 ext \t6\().16b, \x6\().16b, \x6\().16b, #12
0263 eor \x5\().16b, \x5\().16b, \t5\().16b
0264 ext \t7\().16b, \x7\().16b, \x7\().16b, #12
0265 eor \x6\().16b, \x6\().16b, \t6\().16b
0266 eor \t1\().16b, \t1\().16b, \x0\().16b
0267 eor \x7\().16b, \x7\().16b, \t7\().16b
0268 ext \x0\().16b, \x0\().16b, \x0\().16b, #8
0269 eor \t2\().16b, \t2\().16b, \x1\().16b
0270 eor \t0\().16b, \t0\().16b, \x7\().16b
0271 eor \t1\().16b, \t1\().16b, \x7\().16b
0272 ext \x1\().16b, \x1\().16b, \x1\().16b, #8
0273 eor \t5\().16b, \t5\().16b, \x4\().16b
0274 eor \x0\().16b, \x0\().16b, \t0\().16b
0275 eor \t6\().16b, \t6\().16b, \x5\().16b
0276 eor \x1\().16b, \x1\().16b, \t1\().16b
0277 ext \t0\().16b, \x4\().16b, \x4\().16b, #8
0278 eor \t4\().16b, \t4\().16b, \x3\().16b
0279 ext \t1\().16b, \x5\().16b, \x5\().16b, #8
0280 eor \t7\().16b, \t7\().16b, \x6\().16b
0281 ext \x4\().16b, \x3\().16b, \x3\().16b, #8
0282 eor \t3\().16b, \t3\().16b, \x2\().16b
0283 ext \x5\().16b, \x7\().16b, \x7\().16b, #8
0284 eor \t4\().16b, \t4\().16b, \x7\().16b
0285 ext \x3\().16b, \x6\().16b, \x6\().16b, #8
0286 eor \t3\().16b, \t3\().16b, \x7\().16b
0287 ext \x6\().16b, \x2\().16b, \x2\().16b, #8
0288 eor \x7\().16b, \t1\().16b, \t5\().16b
0289 .ifb \inv
0290 eor \x2\().16b, \t0\().16b, \t4\().16b
0291 eor \x4\().16b, \x4\().16b, \t3\().16b
0292 eor \x5\().16b, \x5\().16b, \t7\().16b
0293 eor \x3\().16b, \x3\().16b, \t6\().16b
0294 eor \x6\().16b, \x6\().16b, \t2\().16b
0295 .else
0296 eor \t3\().16b, \t3\().16b, \x4\().16b
0297 eor \x5\().16b, \x5\().16b, \t7\().16b
0298 eor \x2\().16b, \x3\().16b, \t6\().16b
0299 eor \x3\().16b, \t0\().16b, \t4\().16b
0300 eor \x4\().16b, \x6\().16b, \t2\().16b
0301 mov \x6\().16b, \t3\().16b
0302 .endif
0303 .endm
0304
0305 .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
0306 t0, t1, t2, t3, t4, t5, t6, t7
0307 ext \t0\().16b, \x0\().16b, \x0\().16b, #8
0308 ext \t6\().16b, \x6\().16b, \x6\().16b, #8
0309 ext \t7\().16b, \x7\().16b, \x7\().16b, #8
0310 eor \t0\().16b, \t0\().16b, \x0\().16b
0311 ext \t1\().16b, \x1\().16b, \x1\().16b, #8
0312 eor \t6\().16b, \t6\().16b, \x6\().16b
0313 ext \t2\().16b, \x2\().16b, \x2\().16b, #8
0314 eor \t7\().16b, \t7\().16b, \x7\().16b
0315 ext \t3\().16b, \x3\().16b, \x3\().16b, #8
0316 eor \t1\().16b, \t1\().16b, \x1\().16b
0317 ext \t4\().16b, \x4\().16b, \x4\().16b, #8
0318 eor \t2\().16b, \t2\().16b, \x2\().16b
0319 ext \t5\().16b, \x5\().16b, \x5\().16b, #8
0320 eor \t3\().16b, \t3\().16b, \x3\().16b
0321 eor \t4\().16b, \t4\().16b, \x4\().16b
0322 eor \t5\().16b, \t5\().16b, \x5\().16b
0323 eor \x0\().16b, \x0\().16b, \t6\().16b
0324 eor \x1\().16b, \x1\().16b, \t6\().16b
0325 eor \x2\().16b, \x2\().16b, \t0\().16b
0326 eor \x4\().16b, \x4\().16b, \t2\().16b
0327 eor \x3\().16b, \x3\().16b, \t1\().16b
0328 eor \x1\().16b, \x1\().16b, \t7\().16b
0329 eor \x2\().16b, \x2\().16b, \t7\().16b
0330 eor \x4\().16b, \x4\().16b, \t6\().16b
0331 eor \x5\().16b, \x5\().16b, \t3\().16b
0332 eor \x3\().16b, \x3\().16b, \t6\().16b
0333 eor \x6\().16b, \x6\().16b, \t4\().16b
0334 eor \x4\().16b, \x4\().16b, \t7\().16b
0335 eor \x5\().16b, \x5\().16b, \t7\().16b
0336 eor \x7\().16b, \x7\().16b, \t5\().16b
0337 mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
0338 \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
0339 .endm
0340
0341 .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
0342 ushr \t0\().2d, \b0\().2d, #\n
0343 ushr \t1\().2d, \b1\().2d, #\n
0344 eor \t0\().16b, \t0\().16b, \a0\().16b
0345 eor \t1\().16b, \t1\().16b, \a1\().16b
0346 and \t0\().16b, \t0\().16b, \mask\().16b
0347 and \t1\().16b, \t1\().16b, \mask\().16b
0348 eor \a0\().16b, \a0\().16b, \t0\().16b
0349 shl \t0\().2d, \t0\().2d, #\n
0350 eor \a1\().16b, \a1\().16b, \t1\().16b
0351 shl \t1\().2d, \t1\().2d, #\n
0352 eor \b0\().16b, \b0\().16b, \t0\().16b
0353 eor \b1\().16b, \b1\().16b, \t1\().16b
0354 .endm
0355
0356 .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
0357 movi \t0\().16b, #0x55
0358 movi \t1\().16b, #0x33
0359 swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
0360 swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
0361 movi \t0\().16b, #0x0f
0362 swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
0363 swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
0364 swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
0365 swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
0366 .endm
0367
0368
0369 .align 6
0370 M0: .octa 0x0004080c0105090d02060a0e03070b0f
0371
0372 M0SR: .octa 0x0004080c05090d010a0e02060f03070b
0373 SR: .octa 0x0f0e0d0c0a09080b0504070600030201
0374 SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
0375
0376 M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
0377 ISR: .octa 0x0f0e0d0c080b0a090504070602010003
0378 ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
0379
0380
0381
0382
0383 SYM_FUNC_START(aesbs_convert_key)
0384 ld1 {v7.4s}, [x1], #16 // load round 0 key
0385 ld1 {v17.4s}, [x1], #16 // load round 1 key
0386
0387 movi v8.16b, #0x01 // bit masks
0388 movi v9.16b, #0x02
0389 movi v10.16b, #0x04
0390 movi v11.16b, #0x08
0391 movi v12.16b, #0x10
0392 movi v13.16b, #0x20
0393 movi v14.16b, #0x40
0394 movi v15.16b, #0x80
0395 ldr q16, M0
0396
0397 sub x2, x2, #1
0398 str q7, [x0], #16 // save round 0 key
0399
0400 .Lkey_loop:
0401 tbl v7.16b ,{v17.16b}, v16.16b
0402 ld1 {v17.4s}, [x1], #16 // load next round key
0403
0404 cmtst v0.16b, v7.16b, v8.16b
0405 cmtst v1.16b, v7.16b, v9.16b
0406 cmtst v2.16b, v7.16b, v10.16b
0407 cmtst v3.16b, v7.16b, v11.16b
0408 cmtst v4.16b, v7.16b, v12.16b
0409 cmtst v5.16b, v7.16b, v13.16b
0410 cmtst v6.16b, v7.16b, v14.16b
0411 cmtst v7.16b, v7.16b, v15.16b
0412 not v0.16b, v0.16b
0413 not v1.16b, v1.16b
0414 not v5.16b, v5.16b
0415 not v6.16b, v6.16b
0416
0417 subs x2, x2, #1
0418 stp q0, q1, [x0], #128
0419 stp q2, q3, [x0, #-96]
0420 stp q4, q5, [x0, #-64]
0421 stp q6, q7, [x0, #-32]
0422 b.ne .Lkey_loop
0423
0424 movi v7.16b, #0x63 // compose .L63
0425 eor v17.16b, v17.16b, v7.16b
0426 str q17, [x0]
0427 ret
0428 SYM_FUNC_END(aesbs_convert_key)
0429
0430 .align 4
0431 SYM_FUNC_START_LOCAL(aesbs_encrypt8)
0432 ldr q9, [bskey], #16 // round 0 key
0433 ldr q8, M0SR
0434 ldr q24, SR
0435
0436 eor v10.16b, v0.16b, v9.16b // xor with round0 key
0437 eor v11.16b, v1.16b, v9.16b
0438 tbl v0.16b, {v10.16b}, v8.16b
0439 eor v12.16b, v2.16b, v9.16b
0440 tbl v1.16b, {v11.16b}, v8.16b
0441 eor v13.16b, v3.16b, v9.16b
0442 tbl v2.16b, {v12.16b}, v8.16b
0443 eor v14.16b, v4.16b, v9.16b
0444 tbl v3.16b, {v13.16b}, v8.16b
0445 eor v15.16b, v5.16b, v9.16b
0446 tbl v4.16b, {v14.16b}, v8.16b
0447 eor v10.16b, v6.16b, v9.16b
0448 tbl v5.16b, {v15.16b}, v8.16b
0449 eor v11.16b, v7.16b, v9.16b
0450 tbl v6.16b, {v10.16b}, v8.16b
0451 tbl v7.16b, {v11.16b}, v8.16b
0452
0453 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
0454
0455 sub rounds, rounds, #1
0456 b .Lenc_sbox
0457
0458 .Lenc_loop:
0459 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
0460 .Lenc_sbox:
0461 sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
0462 v13, v14, v15
0463 subs rounds, rounds, #1
0464 b.cc .Lenc_done
0465
0466 enc_next_rk
0467
0468 mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
0469 v13, v14, v15
0470
0471 add_round_key v0, v1, v2, v3, v4, v5, v6, v7
0472
0473 b.ne .Lenc_loop
0474 ldr q24, SRM0
0475 b .Lenc_loop
0476
0477 .Lenc_done:
0478 ldr q12, [bskey] // last round key
0479
0480 bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
0481
0482 eor v0.16b, v0.16b, v12.16b
0483 eor v1.16b, v1.16b, v12.16b
0484 eor v4.16b, v4.16b, v12.16b
0485 eor v6.16b, v6.16b, v12.16b
0486 eor v3.16b, v3.16b, v12.16b
0487 eor v7.16b, v7.16b, v12.16b
0488 eor v2.16b, v2.16b, v12.16b
0489 eor v5.16b, v5.16b, v12.16b
0490 ret
0491 SYM_FUNC_END(aesbs_encrypt8)
0492
0493 .align 4
0494 SYM_FUNC_START_LOCAL(aesbs_decrypt8)
0495 lsl x9, rounds, #7
0496 add bskey, bskey, x9
0497
0498 ldr q9, [bskey, #-112]! // round 0 key
0499 ldr q8, M0ISR
0500 ldr q24, ISR
0501
0502 eor v10.16b, v0.16b, v9.16b // xor with round0 key
0503 eor v11.16b, v1.16b, v9.16b
0504 tbl v0.16b, {v10.16b}, v8.16b
0505 eor v12.16b, v2.16b, v9.16b
0506 tbl v1.16b, {v11.16b}, v8.16b
0507 eor v13.16b, v3.16b, v9.16b
0508 tbl v2.16b, {v12.16b}, v8.16b
0509 eor v14.16b, v4.16b, v9.16b
0510 tbl v3.16b, {v13.16b}, v8.16b
0511 eor v15.16b, v5.16b, v9.16b
0512 tbl v4.16b, {v14.16b}, v8.16b
0513 eor v10.16b, v6.16b, v9.16b
0514 tbl v5.16b, {v15.16b}, v8.16b
0515 eor v11.16b, v7.16b, v9.16b
0516 tbl v6.16b, {v10.16b}, v8.16b
0517 tbl v7.16b, {v11.16b}, v8.16b
0518
0519 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
0520
0521 sub rounds, rounds, #1
0522 b .Ldec_sbox
0523
0524 .Ldec_loop:
0525 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
0526 .Ldec_sbox:
0527 inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
0528 v13, v14, v15
0529 subs rounds, rounds, #1
0530 b.cc .Ldec_done
0531
0532 dec_next_rk
0533
0534 add_round_key v0, v1, v6, v4, v2, v7, v3, v5
0535
0536 inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
0537 v13, v14, v15
0538
0539 b.ne .Ldec_loop
0540 ldr q24, ISRM0
0541 b .Ldec_loop
0542 .Ldec_done:
0543 ldr q12, [bskey, #-16] // last round key
0544
0545 bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
0546
0547 eor v0.16b, v0.16b, v12.16b
0548 eor v1.16b, v1.16b, v12.16b
0549 eor v6.16b, v6.16b, v12.16b
0550 eor v4.16b, v4.16b, v12.16b
0551 eor v2.16b, v2.16b, v12.16b
0552 eor v7.16b, v7.16b, v12.16b
0553 eor v3.16b, v3.16b, v12.16b
0554 eor v5.16b, v5.16b, v12.16b
0555 ret
0556 SYM_FUNC_END(aesbs_decrypt8)
0557
0558
0559
0560
0561
0562
0563
0564 .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
0565 frame_push 5
0566
0567 mov x19, x0
0568 mov x20, x1
0569 mov x21, x2
0570 mov x22, x3
0571 mov x23, x4
0572
0573 99: mov x5, #1
0574 lsl x5, x5, x23
0575 subs w23, w23, #8
0576 csel x23, x23, xzr, pl
0577 csel x5, x5, xzr, mi
0578
0579 ld1 {v0.16b}, [x20], #16
0580 tbnz x5, #1, 0f
0581 ld1 {v1.16b}, [x20], #16
0582 tbnz x5, #2, 0f
0583 ld1 {v2.16b}, [x20], #16
0584 tbnz x5, #3, 0f
0585 ld1 {v3.16b}, [x20], #16
0586 tbnz x5, #4, 0f
0587 ld1 {v4.16b}, [x20], #16
0588 tbnz x5, #5, 0f
0589 ld1 {v5.16b}, [x20], #16
0590 tbnz x5, #6, 0f
0591 ld1 {v6.16b}, [x20], #16
0592 tbnz x5, #7, 0f
0593 ld1 {v7.16b}, [x20], #16
0594
0595 0: mov bskey, x21
0596 mov rounds, x22
0597 bl \do8
0598
0599 st1 {\o0\().16b}, [x19], #16
0600 tbnz x5, #1, 1f
0601 st1 {\o1\().16b}, [x19], #16
0602 tbnz x5, #2, 1f
0603 st1 {\o2\().16b}, [x19], #16
0604 tbnz x5, #3, 1f
0605 st1 {\o3\().16b}, [x19], #16
0606 tbnz x5, #4, 1f
0607 st1 {\o4\().16b}, [x19], #16
0608 tbnz x5, #5, 1f
0609 st1 {\o5\().16b}, [x19], #16
0610 tbnz x5, #6, 1f
0611 st1 {\o6\().16b}, [x19], #16
0612 tbnz x5, #7, 1f
0613 st1 {\o7\().16b}, [x19], #16
0614
0615 cbz x23, 1f
0616 b 99b
0617
0618 1: frame_pop
0619 ret
0620 .endm
0621
0622 .align 4
0623 SYM_FUNC_START(aesbs_ecb_encrypt)
0624 __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
0625 SYM_FUNC_END(aesbs_ecb_encrypt)
0626
0627 .align 4
0628 SYM_FUNC_START(aesbs_ecb_decrypt)
0629 __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
0630 SYM_FUNC_END(aesbs_ecb_decrypt)
0631
0632
0633
0634
0635
0636 .align 4
0637 SYM_FUNC_START(aesbs_cbc_decrypt)
0638 frame_push 6
0639
0640 mov x19, x0
0641 mov x20, x1
0642 mov x21, x2
0643 mov x22, x3
0644 mov x23, x4
0645 mov x24, x5
0646
0647 99: mov x6, #1
0648 lsl x6, x6, x23
0649 subs w23, w23, #8
0650 csel x23, x23, xzr, pl
0651 csel x6, x6, xzr, mi
0652
0653 ld1 {v0.16b}, [x20], #16
0654 mov v25.16b, v0.16b
0655 tbnz x6, #1, 0f
0656 ld1 {v1.16b}, [x20], #16
0657 mov v26.16b, v1.16b
0658 tbnz x6, #2, 0f
0659 ld1 {v2.16b}, [x20], #16
0660 mov v27.16b, v2.16b
0661 tbnz x6, #3, 0f
0662 ld1 {v3.16b}, [x20], #16
0663 mov v28.16b, v3.16b
0664 tbnz x6, #4, 0f
0665 ld1 {v4.16b}, [x20], #16
0666 mov v29.16b, v4.16b
0667 tbnz x6, #5, 0f
0668 ld1 {v5.16b}, [x20], #16
0669 mov v30.16b, v5.16b
0670 tbnz x6, #6, 0f
0671 ld1 {v6.16b}, [x20], #16
0672 mov v31.16b, v6.16b
0673 tbnz x6, #7, 0f
0674 ld1 {v7.16b}, [x20]
0675
0676 0: mov bskey, x21
0677 mov rounds, x22
0678 bl aesbs_decrypt8
0679
0680 ld1 {v24.16b}, [x24] // load IV
0681
0682 eor v1.16b, v1.16b, v25.16b
0683 eor v6.16b, v6.16b, v26.16b
0684 eor v4.16b, v4.16b, v27.16b
0685 eor v2.16b, v2.16b, v28.16b
0686 eor v7.16b, v7.16b, v29.16b
0687 eor v0.16b, v0.16b, v24.16b
0688 eor v3.16b, v3.16b, v30.16b
0689 eor v5.16b, v5.16b, v31.16b
0690
0691 st1 {v0.16b}, [x19], #16
0692 mov v24.16b, v25.16b
0693 tbnz x6, #1, 1f
0694 st1 {v1.16b}, [x19], #16
0695 mov v24.16b, v26.16b
0696 tbnz x6, #2, 1f
0697 st1 {v6.16b}, [x19], #16
0698 mov v24.16b, v27.16b
0699 tbnz x6, #3, 1f
0700 st1 {v4.16b}, [x19], #16
0701 mov v24.16b, v28.16b
0702 tbnz x6, #4, 1f
0703 st1 {v2.16b}, [x19], #16
0704 mov v24.16b, v29.16b
0705 tbnz x6, #5, 1f
0706 st1 {v7.16b}, [x19], #16
0707 mov v24.16b, v30.16b
0708 tbnz x6, #6, 1f
0709 st1 {v3.16b}, [x19], #16
0710 mov v24.16b, v31.16b
0711 tbnz x6, #7, 1f
0712 ld1 {v24.16b}, [x20], #16
0713 st1 {v5.16b}, [x19], #16
0714 1: st1 {v24.16b}, [x24] // store IV
0715
0716 cbz x23, 2f
0717 b 99b
0718
0719 2: frame_pop
0720 ret
0721 SYM_FUNC_END(aesbs_cbc_decrypt)
0722
0723 .macro next_tweak, out, in, const, tmp
0724 sshr \tmp\().2d, \in\().2d, #63
0725 and \tmp\().16b, \tmp\().16b, \const\().16b
0726 add \out\().2d, \in\().2d, \in\().2d
0727 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
0728 eor \out\().16b, \out\().16b, \tmp\().16b
0729 .endm
0730
0731
0732
0733
0734
0735
0736
0737 SYM_FUNC_START_LOCAL(__xts_crypt8)
0738 movi v18.2s, #0x1
0739 movi v19.2s, #0x87
0740 uzp1 v18.4s, v18.4s, v19.4s
0741
0742 ld1 {v0.16b-v3.16b}, [x1], #64
0743 ld1 {v4.16b-v7.16b}, [x1], #64
0744
0745 next_tweak v26, v25, v18, v19
0746 next_tweak v27, v26, v18, v19
0747 next_tweak v28, v27, v18, v19
0748 next_tweak v29, v28, v18, v19
0749 next_tweak v30, v29, v18, v19
0750 next_tweak v31, v30, v18, v19
0751 next_tweak v16, v31, v18, v19
0752 next_tweak v17, v16, v18, v19
0753
0754 eor v0.16b, v0.16b, v25.16b
0755 eor v1.16b, v1.16b, v26.16b
0756 eor v2.16b, v2.16b, v27.16b
0757 eor v3.16b, v3.16b, v28.16b
0758 eor v4.16b, v4.16b, v29.16b
0759 eor v5.16b, v5.16b, v30.16b
0760 eor v6.16b, v6.16b, v31.16b
0761 eor v7.16b, v7.16b, v16.16b
0762
0763 stp q16, q17, [sp, #16]
0764
0765 mov bskey, x2
0766 mov rounds, x3
0767 br x16
0768 SYM_FUNC_END(__xts_crypt8)
0769
0770 .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
0771 stp x29, x30, [sp, #-48]!
0772 mov x29, sp
0773
0774 ld1 {v25.16b}, [x5]
0775
0776 0: adr x16, \do8
0777 bl __xts_crypt8
0778
0779 eor v16.16b, \o0\().16b, v25.16b
0780 eor v17.16b, \o1\().16b, v26.16b
0781 eor v18.16b, \o2\().16b, v27.16b
0782 eor v19.16b, \o3\().16b, v28.16b
0783
0784 ldp q24, q25, [sp, #16]
0785
0786 eor v20.16b, \o4\().16b, v29.16b
0787 eor v21.16b, \o5\().16b, v30.16b
0788 eor v22.16b, \o6\().16b, v31.16b
0789 eor v23.16b, \o7\().16b, v24.16b
0790
0791 st1 {v16.16b-v19.16b}, [x0], #64
0792 st1 {v20.16b-v23.16b}, [x0], #64
0793
0794 subs x4, x4, #8
0795 b.gt 0b
0796
0797 st1 {v25.16b}, [x5]
0798 ldp x29, x30, [sp], #48
0799 ret
0800 .endm
0801
0802 SYM_FUNC_START(aesbs_xts_encrypt)
0803 __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
0804 SYM_FUNC_END(aesbs_xts_encrypt)
0805
0806 SYM_FUNC_START(aesbs_xts_decrypt)
0807 __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
0808 SYM_FUNC_END(aesbs_xts_decrypt)
0809
0810 .macro next_ctr, v
0811 mov \v\().d[1], x8
0812 adds x8, x8, #1
0813 mov \v\().d[0], x7
0814 adc x7, x7, xzr
0815 rev64 \v\().16b, \v\().16b
0816 .endm
0817
0818
0819
0820
0821
0822 SYM_FUNC_START(aesbs_ctr_encrypt)
0823 stp x29, x30, [sp, #-16]!
0824 mov x29, sp
0825
0826 ldp x7, x8, [x5]
0827 ld1 {v0.16b}, [x5]
0828 CPU_LE( rev x7, x7 )
0829 CPU_LE( rev x8, x8 )
0830 adds x8, x8, #1
0831 adc x7, x7, xzr
0832
0833 0: next_ctr v1
0834 next_ctr v2
0835 next_ctr v3
0836 next_ctr v4
0837 next_ctr v5
0838 next_ctr v6
0839 next_ctr v7
0840
0841 mov bskey, x2
0842 mov rounds, x3
0843 bl aesbs_encrypt8
0844
0845 ld1 { v8.16b-v11.16b}, [x1], #64
0846 ld1 {v12.16b-v15.16b}, [x1], #64
0847
0848 eor v8.16b, v0.16b, v8.16b
0849 eor v9.16b, v1.16b, v9.16b
0850 eor v10.16b, v4.16b, v10.16b
0851 eor v11.16b, v6.16b, v11.16b
0852 eor v12.16b, v3.16b, v12.16b
0853 eor v13.16b, v7.16b, v13.16b
0854 eor v14.16b, v2.16b, v14.16b
0855 eor v15.16b, v5.16b, v15.16b
0856
0857 st1 { v8.16b-v11.16b}, [x0], #64
0858 st1 {v12.16b-v15.16b}, [x0], #64
0859
0860 next_ctr v0
0861 subs x4, x4, #8
0862 b.gt 0b
0863
0864 st1 {v0.16b}, [x5]
0865 ldp x29, x30, [sp], #16
0866 ret
0867 SYM_FUNC_END(aesbs_ctr_encrypt)