0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/linkage.h>
0009 #include <asm/assembler.h>
0010
0011 .text
0012 .arch armv8-a+crypto
0013
0014 k0 .req v0
0015 k1 .req v1
0016 k2 .req v2
0017 k3 .req v3
0018
0019 t0 .req v4
0020 t1 .req v5
0021
0022 dga .req q6
0023 dgav .req v6
0024 dgb .req s7
0025 dgbv .req v7
0026
0027 dg0q .req q12
0028 dg0s .req s12
0029 dg0v .req v12
0030 dg1s .req s13
0031 dg1v .req v13
0032 dg2s .req s14
0033
0034 .macro add_only, op, ev, rc, s0, dg1
0035 .ifc \ev, ev
0036 add t1.4s, v\s0\().4s, \rc\().4s
0037 sha1h dg2s, dg0s
0038 .ifnb \dg1
0039 sha1\op dg0q, \dg1, t0.4s
0040 .else
0041 sha1\op dg0q, dg1s, t0.4s
0042 .endif
0043 .else
0044 .ifnb \s0
0045 add t0.4s, v\s0\().4s, \rc\().4s
0046 .endif
0047 sha1h dg1s, dg0s
0048 sha1\op dg0q, dg2s, t1.4s
0049 .endif
0050 .endm
0051
0052 .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
0053 sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
0054 add_only \op, \ev, \rc, \s1, \dg1
0055 sha1su1 v\s0\().4s, v\s3\().4s
0056 .endm
0057
0058 .macro loadrc, k, val, tmp
0059 movz \tmp, :abs_g0_nc:\val
0060 movk \tmp, :abs_g1:\val
0061 dup \k, \tmp
0062 .endm
0063
0064
0065
0066
0067
0068 SYM_FUNC_START(sha1_ce_transform)
0069
0070 loadrc k0.4s, 0x5a827999, w6
0071 loadrc k1.4s, 0x6ed9eba1, w6
0072 loadrc k2.4s, 0x8f1bbcdc, w6
0073 loadrc k3.4s, 0xca62c1d6, w6
0074
0075
0076 ld1 {dgav.4s}, [x0]
0077 ldr dgb, [x0, #16]
0078
0079
0080 ldr_l w4, sha1_ce_offsetof_finalize, x4
0081 ldr w4, [x0, x4]
0082
0083
0084 0: ld1 {v8.4s-v11.4s}, [x1], #64
0085 sub w2, w2, #1
0086
0087 CPU_LE( rev32 v8.16b, v8.16b )
0088 CPU_LE( rev32 v9.16b, v9.16b )
0089 CPU_LE( rev32 v10.16b, v10.16b )
0090 CPU_LE( rev32 v11.16b, v11.16b )
0091
0092 1: add t0.4s, v8.4s, k0.4s
0093 mov dg0v.16b, dgav.16b
0094
0095 add_update c, ev, k0, 8, 9, 10, 11, dgb
0096 add_update c, od, k0, 9, 10, 11, 8
0097 add_update c, ev, k0, 10, 11, 8, 9
0098 add_update c, od, k0, 11, 8, 9, 10
0099 add_update c, ev, k1, 8, 9, 10, 11
0100
0101 add_update p, od, k1, 9, 10, 11, 8
0102 add_update p, ev, k1, 10, 11, 8, 9
0103 add_update p, od, k1, 11, 8, 9, 10
0104 add_update p, ev, k1, 8, 9, 10, 11
0105 add_update p, od, k2, 9, 10, 11, 8
0106
0107 add_update m, ev, k2, 10, 11, 8, 9
0108 add_update m, od, k2, 11, 8, 9, 10
0109 add_update m, ev, k2, 8, 9, 10, 11
0110 add_update m, od, k2, 9, 10, 11, 8
0111 add_update m, ev, k3, 10, 11, 8, 9
0112
0113 add_update p, od, k3, 11, 8, 9, 10
0114 add_only p, ev, k3, 9
0115 add_only p, od, k3, 10
0116 add_only p, ev, k3, 11
0117 add_only p, od
0118
0119
0120 add dgbv.2s, dgbv.2s, dg1v.2s
0121 add dgav.4s, dgav.4s, dg0v.4s
0122
0123 cbz w2, 2f
0124 cond_yield 3f, x5, x6
0125 b 0b
0126
0127
0128
0129
0130
0131
0132 2: cbz x4, 3f
0133 ldr_l w4, sha1_ce_offsetof_count, x4
0134 ldr x4, [x0, x4]
0135 movi v9.2d, #0
0136 mov x8, #0x80000000
0137 movi v10.2d, #0
0138 ror x7, x4, #29 // ror(lsl(x4, 3), 32)
0139 fmov d8, x8
0140 mov x4, #0
0141 mov v11.d[0], xzr
0142 mov v11.d[1], x7
0143 b 1b
0144
0145
0146 3: st1 {dgav.4s}, [x0]
0147 str dgb, [x0, #16]
0148 mov w0, w2
0149 ret
0150 SYM_FUNC_END(sha1_ce_transform)