0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/linkage.h>
0009 #include <asm/assembler.h>
0010
0011 .text
0012 .arch armv8-a+crypto
0013
0014 dga .req q20
0015 dgav .req v20
0016 dgb .req q21
0017 dgbv .req v21
0018
0019 t0 .req v22
0020 t1 .req v23
0021
0022 dg0q .req q24
0023 dg0v .req v24
0024 dg1q .req q25
0025 dg1v .req v25
0026 dg2q .req q26
0027 dg2v .req v26
0028
0029 .macro add_only, ev, rc, s0
0030 mov dg2v.16b, dg0v.16b
0031 .ifeq \ev
0032 add t1.4s, v\s0\().4s, \rc\().4s
0033 sha256h dg0q, dg1q, t0.4s
0034 sha256h2 dg1q, dg2q, t0.4s
0035 .else
0036 .ifnb \s0
0037 add t0.4s, v\s0\().4s, \rc\().4s
0038 .endif
0039 sha256h dg0q, dg1q, t1.4s
0040 sha256h2 dg1q, dg2q, t1.4s
0041 .endif
0042 .endm
0043
0044 .macro add_update, ev, rc, s0, s1, s2, s3
0045 sha256su0 v\s0\().4s, v\s1\().4s
0046 add_only \ev, \rc, \s1
0047 sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s
0048 .endm
0049
0050
0051
0052
0053 .section ".rodata", "a"
0054 .align 4
0055 .Lsha2_rcon:
0056 .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
0057 .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
0058 .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
0059 .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
0060 .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
0061 .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
0062 .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
0063 .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
0064 .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
0065 .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
0066 .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
0067 .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
0068 .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
0069 .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
0070 .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
0071 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
0072
0073
0074
0075
0076
0077 .text
0078 SYM_FUNC_START(sha2_ce_transform)
0079
0080 adr_l x8, .Lsha2_rcon
0081 ld1 { v0.4s- v3.4s}, [x8], #64
0082 ld1 { v4.4s- v7.4s}, [x8], #64
0083 ld1 { v8.4s-v11.4s}, [x8], #64
0084 ld1 {v12.4s-v15.4s}, [x8]
0085
0086
0087 ld1 {dgav.4s, dgbv.4s}, [x0]
0088
0089
0090 ldr_l w4, sha256_ce_offsetof_finalize, x4
0091 ldr w4, [x0, x4]
0092
0093
0094 0: ld1 {v16.4s-v19.4s}, [x1], #64
0095 sub w2, w2, #1
0096
0097 CPU_LE( rev32 v16.16b, v16.16b )
0098 CPU_LE( rev32 v17.16b, v17.16b )
0099 CPU_LE( rev32 v18.16b, v18.16b )
0100 CPU_LE( rev32 v19.16b, v19.16b )
0101
0102 1: add t0.4s, v16.4s, v0.4s
0103 mov dg0v.16b, dgav.16b
0104 mov dg1v.16b, dgbv.16b
0105
0106 add_update 0, v1, 16, 17, 18, 19
0107 add_update 1, v2, 17, 18, 19, 16
0108 add_update 0, v3, 18, 19, 16, 17
0109 add_update 1, v4, 19, 16, 17, 18
0110
0111 add_update 0, v5, 16, 17, 18, 19
0112 add_update 1, v6, 17, 18, 19, 16
0113 add_update 0, v7, 18, 19, 16, 17
0114 add_update 1, v8, 19, 16, 17, 18
0115
0116 add_update 0, v9, 16, 17, 18, 19
0117 add_update 1, v10, 17, 18, 19, 16
0118 add_update 0, v11, 18, 19, 16, 17
0119 add_update 1, v12, 19, 16, 17, 18
0120
0121 add_only 0, v13, 17
0122 add_only 1, v14, 18
0123 add_only 0, v15, 19
0124 add_only 1
0125
0126
0127 add dgav.4s, dgav.4s, dg0v.4s
0128 add dgbv.4s, dgbv.4s, dg1v.4s
0129
0130
0131 cbz w2, 2f
0132 cond_yield 3f, x5, x6
0133 b 0b
0134
0135
0136
0137
0138
0139
0140 2: cbz x4, 3f
0141 ldr_l w4, sha256_ce_offsetof_count, x4
0142 ldr x4, [x0, x4]
0143 movi v17.2d, #0
0144 mov x8, #0x80000000
0145 movi v18.2d, #0
0146 ror x7, x4, #29 // ror(lsl(x4, 3), 32)
0147 fmov d16, x8
0148 mov x4, #0
0149 mov v19.d[0], xzr
0150 mov v19.d[1], x7
0151 b 1b
0152
0153
0154 3: st1 {dgav.4s, dgbv.4s}, [x0]
0155 mov w0, w2
0156 ret
0157 SYM_FUNC_END(sha2_ce_transform)