0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include <linux/linkage.h>
0013 #include <asm/assembler.h>
0014
0015 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0016 .set .Lv\b\().2d, \b
0017 .set .Lv\b\().16b, \b
0018 .endr
0019
0020
0021
0022
0023 .macro eor3, rd, rn, rm, ra
0024 .inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
0025 .endm
0026
0027 .macro rax1, rd, rn, rm
0028 .inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0029 .endm
0030
0031 .macro bcax, rd, rn, rm, ra
0032 .inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
0033 .endm
0034
0035 .macro xar, rd, rn, rm, imm6
0036 .inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
0037 .endm
0038
0039
0040
0041
0042 .text
0043 SYM_FUNC_START(sha3_ce_transform)
0044
0045 add x8, x0, #32
0046 ld1 { v0.1d- v3.1d}, [x0]
0047 ld1 { v4.1d- v7.1d}, [x8], #32
0048 ld1 { v8.1d-v11.1d}, [x8], #32
0049 ld1 {v12.1d-v15.1d}, [x8], #32
0050 ld1 {v16.1d-v19.1d}, [x8], #32
0051 ld1 {v20.1d-v23.1d}, [x8], #32
0052 ld1 {v24.1d}, [x8]
0053
0054 0: sub w2, w2, #1
0055 mov w8, #24
0056 adr_l x9, .Lsha3_rcon
0057
0058
0059 ld1 {v25.8b-v28.8b}, [x1], #32
0060 ld1 {v29.8b-v31.8b}, [x1], #24
0061 eor v0.8b, v0.8b, v25.8b
0062 eor v1.8b, v1.8b, v26.8b
0063 eor v2.8b, v2.8b, v27.8b
0064 eor v3.8b, v3.8b, v28.8b
0065 eor v4.8b, v4.8b, v29.8b
0066 eor v5.8b, v5.8b, v30.8b
0067 eor v6.8b, v6.8b, v31.8b
0068
0069 tbnz x3, #6, 2f // SHA3-512
0070
0071 ld1 {v25.8b-v28.8b}, [x1], #32
0072 ld1 {v29.8b-v30.8b}, [x1], #16
0073 eor v7.8b, v7.8b, v25.8b
0074 eor v8.8b, v8.8b, v26.8b
0075 eor v9.8b, v9.8b, v27.8b
0076 eor v10.8b, v10.8b, v28.8b
0077 eor v11.8b, v11.8b, v29.8b
0078 eor v12.8b, v12.8b, v30.8b
0079
0080 tbnz x3, #4, 1f // SHA3-384 or SHA3-224
0081
0082 // SHA3-256
0083 ld1 {v25.8b-v28.8b}, [x1], #32
0084 eor v13.8b, v13.8b, v25.8b
0085 eor v14.8b, v14.8b, v26.8b
0086 eor v15.8b, v15.8b, v27.8b
0087 eor v16.8b, v16.8b, v28.8b
0088 b 3f
0089
0090 1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
0091
0092 // SHA3-224
0093 ld1 {v25.8b-v28.8b}, [x1], #32
0094 ld1 {v29.8b}, [x1], #8
0095 eor v13.8b, v13.8b, v25.8b
0096 eor v14.8b, v14.8b, v26.8b
0097 eor v15.8b, v15.8b, v27.8b
0098 eor v16.8b, v16.8b, v28.8b
0099 eor v17.8b, v17.8b, v29.8b
0100 b 3f
0101
0102 // SHA3-512
0103 2: ld1 {v25.8b-v26.8b}, [x1], #16
0104 eor v7.8b, v7.8b, v25.8b
0105 eor v8.8b, v8.8b, v26.8b
0106
0107 3: sub w8, w8, #1
0108
0109 eor3 v29.16b, v4.16b, v9.16b, v14.16b
0110 eor3 v26.16b, v1.16b, v6.16b, v11.16b
0111 eor3 v28.16b, v3.16b, v8.16b, v13.16b
0112 eor3 v25.16b, v0.16b, v5.16b, v10.16b
0113 eor3 v27.16b, v2.16b, v7.16b, v12.16b
0114 eor3 v29.16b, v29.16b, v19.16b, v24.16b
0115 eor3 v26.16b, v26.16b, v16.16b, v21.16b
0116 eor3 v28.16b, v28.16b, v18.16b, v23.16b
0117 eor3 v25.16b, v25.16b, v15.16b, v20.16b
0118 eor3 v27.16b, v27.16b, v17.16b, v22.16b
0119
0120 rax1 v30.2d, v29.2d, v26.2d // bc[0]
0121 rax1 v26.2d, v26.2d, v28.2d // bc[2]
0122 rax1 v28.2d, v28.2d, v25.2d // bc[4]
0123 rax1 v25.2d, v25.2d, v27.2d // bc[1]
0124 rax1 v27.2d, v27.2d, v29.2d // bc[3]
0125
0126 eor v0.16b, v0.16b, v30.16b
0127 xar v29.2d, v1.2d, v25.2d, (64 - 1)
0128 xar v1.2d, v6.2d, v25.2d, (64 - 44)
0129 xar v6.2d, v9.2d, v28.2d, (64 - 20)
0130 xar v9.2d, v22.2d, v26.2d, (64 - 61)
0131 xar v22.2d, v14.2d, v28.2d, (64 - 39)
0132 xar v14.2d, v20.2d, v30.2d, (64 - 18)
0133 xar v31.2d, v2.2d, v26.2d, (64 - 62)
0134 xar v2.2d, v12.2d, v26.2d, (64 - 43)
0135 xar v12.2d, v13.2d, v27.2d, (64 - 25)
0136 xar v13.2d, v19.2d, v28.2d, (64 - 8)
0137 xar v19.2d, v23.2d, v27.2d, (64 - 56)
0138 xar v23.2d, v15.2d, v30.2d, (64 - 41)
0139 xar v15.2d, v4.2d, v28.2d, (64 - 27)
0140 xar v28.2d, v24.2d, v28.2d, (64 - 14)
0141 xar v24.2d, v21.2d, v25.2d, (64 - 2)
0142 xar v8.2d, v8.2d, v27.2d, (64 - 55)
0143 xar v4.2d, v16.2d, v25.2d, (64 - 45)
0144 xar v16.2d, v5.2d, v30.2d, (64 - 36)
0145 xar v5.2d, v3.2d, v27.2d, (64 - 28)
0146 xar v27.2d, v18.2d, v27.2d, (64 - 21)
0147 xar v3.2d, v17.2d, v26.2d, (64 - 15)
0148 xar v25.2d, v11.2d, v25.2d, (64 - 10)
0149 xar v26.2d, v7.2d, v26.2d, (64 - 6)
0150 xar v30.2d, v10.2d, v30.2d, (64 - 3)
0151
0152 bcax v20.16b, v31.16b, v22.16b, v8.16b
0153 bcax v21.16b, v8.16b, v23.16b, v22.16b
0154 bcax v22.16b, v22.16b, v24.16b, v23.16b
0155 bcax v23.16b, v23.16b, v31.16b, v24.16b
0156 bcax v24.16b, v24.16b, v8.16b, v31.16b
0157
0158 ld1r {v31.2d}, [x9], #8
0159
0160 bcax v17.16b, v25.16b, v19.16b, v3.16b
0161 bcax v18.16b, v3.16b, v15.16b, v19.16b
0162 bcax v19.16b, v19.16b, v16.16b, v15.16b
0163 bcax v15.16b, v15.16b, v25.16b, v16.16b
0164 bcax v16.16b, v16.16b, v3.16b, v25.16b
0165
0166 bcax v10.16b, v29.16b, v12.16b, v26.16b
0167 bcax v11.16b, v26.16b, v13.16b, v12.16b
0168 bcax v12.16b, v12.16b, v14.16b, v13.16b
0169 bcax v13.16b, v13.16b, v29.16b, v14.16b
0170 bcax v14.16b, v14.16b, v26.16b, v29.16b
0171
0172 bcax v7.16b, v30.16b, v9.16b, v4.16b
0173 bcax v8.16b, v4.16b, v5.16b, v9.16b
0174 bcax v9.16b, v9.16b, v6.16b, v5.16b
0175 bcax v5.16b, v5.16b, v30.16b, v6.16b
0176 bcax v6.16b, v6.16b, v4.16b, v30.16b
0177
0178 bcax v3.16b, v27.16b, v0.16b, v28.16b
0179 bcax v4.16b, v28.16b, v1.16b, v0.16b
0180 bcax v0.16b, v0.16b, v2.16b, v1.16b
0181 bcax v1.16b, v1.16b, v27.16b, v2.16b
0182 bcax v2.16b, v2.16b, v28.16b, v27.16b
0183
0184 eor v0.16b, v0.16b, v31.16b
0185
0186 cbnz w8, 3b
0187 cond_yield 4f, x8, x9
0188 cbnz w2, 0b
0189
0190
0191 4: st1 { v0.1d- v3.1d}, [x0], #32
0192 st1 { v4.1d- v7.1d}, [x0], #32
0193 st1 { v8.1d-v11.1d}, [x0], #32
0194 st1 {v12.1d-v15.1d}, [x0], #32
0195 st1 {v16.1d-v19.1d}, [x0], #32
0196 st1 {v20.1d-v23.1d}, [x0], #32
0197 st1 {v24.1d}, [x0]
0198 mov w0, w2
0199 ret
0200 SYM_FUNC_END(sha3_ce_transform)
0201
0202 .section ".rodata", "a"
0203 .align 8
0204 .Lsha3_rcon:
0205 .quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
0206 .quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
0207 .quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
0208 .quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
0209 .quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
0210 .quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
0211 .quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
0212 .quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008