0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/linkage.h>
0009 #include <asm/assembler.h>
0010
0011 .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
0012 .set .Lv\b\().4s, \b
0013 .endr
0014
0015 .macro sm3partw1, rd, rn, rm
0016 .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0017 .endm
0018
0019 .macro sm3partw2, rd, rn, rm
0020 .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0021 .endm
0022
0023 .macro sm3ss1, rd, rn, rm, ra
0024 .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
0025 .endm
0026
0027 .macro sm3tt1a, rd, rn, rm, imm2
0028 .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
0029 .endm
0030
0031 .macro sm3tt1b, rd, rn, rm, imm2
0032 .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
0033 .endm
0034
0035 .macro sm3tt2a, rd, rn, rm, imm2
0036 .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
0037 .endm
0038
0039 .macro sm3tt2b, rd, rn, rm, imm2
0040 .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
0041 .endm
0042
0043 .macro round, ab, s0, t0, t1, i
0044 sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s
0045 shl \t1\().4s, \t0\().4s, #1
0046 sri \t1\().4s, \t0\().4s, #31
0047 sm3tt1\ab v8.4s, v5.4s, v10.4s, \i
0048 sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i
0049 .endm
0050
0051 .macro qround, ab, s0, s1, s2, s3, s4
0052 .ifnb \s4
0053 ext \s4\().16b, \s1\().16b, \s2\().16b, #12
0054 ext v6.16b, \s0\().16b, \s1\().16b, #12
0055 ext v7.16b, \s2\().16b, \s3\().16b, #8
0056 sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s
0057 .endif
0058
0059 eor v10.16b, \s0\().16b, \s1\().16b
0060
0061 round \ab, \s0, v11, v12, 0
0062 round \ab, \s0, v12, v11, 1
0063 round \ab, \s0, v11, v12, 2
0064 round \ab, \s0, v12, v11, 3
0065
0066 .ifnb \s4
0067 sm3partw2 \s4\().4s, v7.4s, v6.4s
0068 .endif
0069 .endm
0070
0071
0072
0073
0074
0075 .text
0076 SYM_FUNC_START(sm3_ce_transform)
0077
0078 ld1 {v8.4s-v9.4s}, [x0]
0079 rev64 v8.4s, v8.4s
0080 rev64 v9.4s, v9.4s
0081 ext v8.16b, v8.16b, v8.16b, #8
0082 ext v9.16b, v9.16b, v9.16b, #8
0083
0084 adr_l x8, .Lt
0085 ldp s13, s14, [x8]
0086
0087
0088 0: ld1 {v0.16b-v3.16b}, [x1], #64
0089 sub w2, w2, #1
0090
0091 mov v15.16b, v8.16b
0092 mov v16.16b, v9.16b
0093
0094 CPU_LE( rev32 v0.16b, v0.16b )
0095 CPU_LE( rev32 v1.16b, v1.16b )
0096 CPU_LE( rev32 v2.16b, v2.16b )
0097 CPU_LE( rev32 v3.16b, v3.16b )
0098
0099 ext v11.16b, v13.16b, v13.16b, #4
0100
0101 qround a, v0, v1, v2, v3, v4
0102 qround a, v1, v2, v3, v4, v0
0103 qround a, v2, v3, v4, v0, v1
0104 qround a, v3, v4, v0, v1, v2
0105
0106 ext v11.16b, v14.16b, v14.16b, #4
0107
0108 qround b, v4, v0, v1, v2, v3
0109 qround b, v0, v1, v2, v3, v4
0110 qround b, v1, v2, v3, v4, v0
0111 qround b, v2, v3, v4, v0, v1
0112 qround b, v3, v4, v0, v1, v2
0113 qround b, v4, v0, v1, v2, v3
0114 qround b, v0, v1, v2, v3, v4
0115 qround b, v1, v2, v3, v4, v0
0116 qround b, v2, v3, v4, v0, v1
0117 qround b, v3, v4
0118 qround b, v4, v0
0119 qround b, v0, v1
0120
0121 eor v8.16b, v8.16b, v15.16b
0122 eor v9.16b, v9.16b, v16.16b
0123
0124
0125 cbnz w2, 0b
0126
0127
0128 rev64 v8.4s, v8.4s
0129 rev64 v9.4s, v9.4s
0130 ext v8.16b, v8.16b, v8.16b, #8
0131 ext v9.16b, v9.16b, v9.16b, #8
0132 st1 {v8.4s-v9.4s}, [x0]
0133 ret
0134 SYM_FUNC_END(sm3_ce_transform)
0135
0136 .section ".rodata", "a"
0137 .align 3
0138 .Lt: .word 0x79cc4519, 0x9d8a7a87