0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include <linux/linkage.h>
0013 #include <asm/assembler.h>
0014
0015 .irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0016 .set .Lq\b, \b
0017 .set .Lv\b\().2d, \b
0018 .endr
0019
0020 .macro sha512h, rd, rn, rm
0021 .inst 0xce608000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0022 .endm
0023
0024 .macro sha512h2, rd, rn, rm
0025 .inst 0xce608400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0026 .endm
0027
0028 .macro sha512su0, rd, rn
0029 .inst 0xcec08000 | .L\rd | (.L\rn << 5)
0030 .endm
0031
0032 .macro sha512su1, rd, rn, rm
0033 .inst 0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0034 .endm
0035
0036
0037
0038
0039 .section ".rodata", "a"
0040 .align 4
0041 .Lsha512_rcon:
0042 .quad 0x428a2f98d728ae22, 0x7137449123ef65cd
0043 .quad 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
0044 .quad 0x3956c25bf348b538, 0x59f111f1b605d019
0045 .quad 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
0046 .quad 0xd807aa98a3030242, 0x12835b0145706fbe
0047 .quad 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
0048 .quad 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
0049 .quad 0x9bdc06a725c71235, 0xc19bf174cf692694
0050 .quad 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
0051 .quad 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
0052 .quad 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
0053 .quad 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
0054 .quad 0x983e5152ee66dfab, 0xa831c66d2db43210
0055 .quad 0xb00327c898fb213f, 0xbf597fc7beef0ee4
0056 .quad 0xc6e00bf33da88fc2, 0xd5a79147930aa725
0057 .quad 0x06ca6351e003826f, 0x142929670a0e6e70
0058 .quad 0x27b70a8546d22ffc, 0x2e1b21385c26c926
0059 .quad 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
0060 .quad 0x650a73548baf63de, 0x766a0abb3c77b2a8
0061 .quad 0x81c2c92e47edaee6, 0x92722c851482353b
0062 .quad 0xa2bfe8a14cf10364, 0xa81a664bbc423001
0063 .quad 0xc24b8b70d0f89791, 0xc76c51a30654be30
0064 .quad 0xd192e819d6ef5218, 0xd69906245565a910
0065 .quad 0xf40e35855771202a, 0x106aa07032bbd1b8
0066 .quad 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
0067 .quad 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
0068 .quad 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
0069 .quad 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
0070 .quad 0x748f82ee5defb2fc, 0x78a5636f43172f60
0071 .quad 0x84c87814a1f0ab72, 0x8cc702081a6439ec
0072 .quad 0x90befffa23631e28, 0xa4506cebde82bde9
0073 .quad 0xbef9a3f7b2c67915, 0xc67178f2e372532b
0074 .quad 0xca273eceea26619c, 0xd186b8c721c0c207
0075 .quad 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
0076 .quad 0x06f067aa72176fba, 0x0a637dc5a2c898a6
0077 .quad 0x113f9804bef90dae, 0x1b710b35131c471b
0078 .quad 0x28db77f523047d84, 0x32caab7b40c72493
0079 .quad 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
0080 .quad 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
0081 .quad 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
0082
0083 .macro dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4
0084 .ifnb \rc1
0085 ld1 {v\rc1\().2d}, [x4], #16
0086 .endif
0087 add v5.2d, v\rc0\().2d, v\in0\().2d
0088 ext v6.16b, v\i2\().16b, v\i3\().16b, #8
0089 ext v5.16b, v5.16b, v5.16b, #8
0090 ext v7.16b, v\i1\().16b, v\i2\().16b, #8
0091 add v\i3\().2d, v\i3\().2d, v5.2d
0092 .ifnb \in1
0093 ext v5.16b, v\in3\().16b, v\in4\().16b, #8
0094 sha512su0 v\in0\().2d, v\in1\().2d
0095 .endif
0096 sha512h q\i3, q6, v7.2d
0097 .ifnb \in1
0098 sha512su1 v\in0\().2d, v\in2\().2d, v5.2d
0099 .endif
0100 add v\i4\().2d, v\i1\().2d, v\i3\().2d
0101 sha512h2 q\i3, q\i1, v\i0\().2d
0102 .endm
0103
0104
0105
0106
0107
0108 .text
0109 SYM_FUNC_START(sha512_ce_transform)
0110
0111 ld1 {v8.2d-v11.2d}, [x0]
0112
0113
0114 adr_l x3, .Lsha512_rcon
0115 ld1 {v20.2d-v23.2d}, [x3], #64
0116
0117
0118 0: ld1 {v12.2d-v15.2d}, [x1], #64
0119 ld1 {v16.2d-v19.2d}, [x1], #64
0120 sub w2, w2, #1
0121
0122 CPU_LE( rev64 v12.16b, v12.16b )
0123 CPU_LE( rev64 v13.16b, v13.16b )
0124 CPU_LE( rev64 v14.16b, v14.16b )
0125 CPU_LE( rev64 v15.16b, v15.16b )
0126 CPU_LE( rev64 v16.16b, v16.16b )
0127 CPU_LE( rev64 v17.16b, v17.16b )
0128 CPU_LE( rev64 v18.16b, v18.16b )
0129 CPU_LE( rev64 v19.16b, v19.16b )
0130
0131 mov x4, x3 // rc pointer
0132
0133 mov v0.16b, v8.16b
0134 mov v1.16b, v9.16b
0135 mov v2.16b, v10.16b
0136 mov v3.16b, v11.16b
0137
0138 // v0 ab cd -- ef gh ab
0139 // v1 cd -- ef gh ab cd
0140 // v2 ef gh ab cd -- ef
0141 // v3 gh ab cd -- ef gh
0142 // v4 -- ef gh ab cd --
0143
0144 dround 0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17
0145 dround 3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18
0146 dround 2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19
0147 dround 4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12
0148 dround 1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13
0149
0150 dround 0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14
0151 dround 3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15
0152 dround 2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16
0153 dround 4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17
0154 dround 1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18
0155
0156 dround 0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19
0157 dround 3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12
0158 dround 2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13
0159 dround 4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14
0160 dround 1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15
0161
0162 dround 0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16
0163 dround 3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17
0164 dround 2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18
0165 dround 4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19
0166 dround 1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12
0167
0168 dround 0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13
0169 dround 3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14
0170 dround 2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15
0171 dround 4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16
0172 dround 1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17
0173
0174 dround 0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18
0175 dround 3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19
0176 dround 2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12
0177 dround 4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13
0178 dround 1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14
0179
0180 dround 0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15
0181 dround 3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16
0182 dround 2, 3, 1, 4, 0, 28, 24, 12
0183 dround 4, 2, 0, 1, 3, 29, 25, 13
0184 dround 1, 4, 3, 0, 2, 30, 26, 14
0185
0186 dround 0, 1, 2, 3, 4, 31, 27, 15
0187 dround 3, 0, 4, 2, 1, 24, , 16
0188 dround 2, 3, 1, 4, 0, 25, , 17
0189 dround 4, 2, 0, 1, 3, 26, , 18
0190 dround 1, 4, 3, 0, 2, 27, , 19
0191
0192
0193 add v8.2d, v8.2d, v0.2d
0194 add v9.2d, v9.2d, v1.2d
0195 add v10.2d, v10.2d, v2.2d
0196 add v11.2d, v11.2d, v3.2d
0197
0198 cond_yield 3f, x4, x5
0199
0200 cbnz w2, 0b
0201
0202
0203 3: st1 {v8.2d-v11.2d}, [x0]
0204 mov w0, w2
0205 ret
0206 SYM_FUNC_END(sha512_ce_transform)