0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/linkage.h>
0010 #include <asm/assembler.h>
0011
0012
0013
0014
0015
0016
0017
0018 #define L(label) .L ## label
0019
0020 #define REP8_01 0x0101010101010101
0021 #define REP8_7f 0x7f7f7f7f7f7f7f7f
0022
0023
0024 #define src1 x0
0025 #define src2 x1
0026 #define limit x2
0027 #define result x0
0028
0029
0030 #define data1 x3
0031 #define data1w w3
0032 #define data2 x4
0033 #define data2w w4
0034 #define has_nul x5
0035 #define diff x6
0036 #define syndrome x7
0037 #define tmp1 x8
0038 #define tmp2 x9
0039 #define tmp3 x10
0040 #define zeroones x11
0041 #define pos x12
0042 #define mask x13
0043 #define endloop x14
0044 #define count mask
0045 #define offset pos
0046 #define neg_offset x15
0047
0048
0049
0050
0051
0052
0053 #ifdef __AARCH64EB__
0054 #define LS_FW lsl
0055 #define LS_BK lsr
0056 #else
0057 #define LS_FW lsr
0058 #define LS_BK lsl
0059 #endif
0060
0061 SYM_FUNC_START(__pi_strncmp)
0062 cbz limit, L(ret0)
0063 eor tmp1, src1, src2
0064 mov zeroones, #REP8_01
0065 tst tmp1, #7
0066 and count, src1, #7
0067 b.ne L(misaligned8)
0068 cbnz count, L(mutual_align)
0069
0070
0071
0072
0073 .p2align 4
0074 L(loop_aligned):
0075 ldr data1, [src1], #8
0076 ldr data2, [src2], #8
0077 L(start_realigned):
0078 subs limit, limit, #8
0079 sub tmp1, data1, zeroones
0080 orr tmp2, data1, #REP8_7f
0081 eor diff, data1, data2
0082 csinv endloop, diff, xzr, hi
0083 bics has_nul, tmp1, tmp2
0084 ccmp endloop, #0, #0, eq
0085 b.eq L(loop_aligned)
0086
0087
0088 L(full_check):
0089 #ifndef __AARCH64EB__
0090 orr syndrome, diff, has_nul
0091 add limit, limit, 8
0092 L(syndrome_check):
0093
0094
0095 rev syndrome, syndrome
0096 rev data1, data1
0097 clz pos, syndrome
0098 rev data2, data2
0099 lsl data1, data1, pos
0100 cmp limit, pos, lsr #3
0101 lsl data2, data2, pos
0102
0103
0104 lsr data1, data1, #56
0105 sub result, data1, data2, lsr #56
0106 csel result, result, xzr, hi
0107 ret
0108 #else
0109
0110 tbz limit, #63, L(not_limit)
0111 add tmp1, limit, 8
0112 cbz limit, L(not_limit)
0113
0114 lsl limit, tmp1, #3
0115 mov mask, #~0
0116 lsr mask, mask, limit
0117 bic data1, data1, mask
0118 bic data2, data2, mask
0119
0120
0121 orr has_nul, has_nul, mask
0122
0123 L(not_limit):
0124
0125
0126
0127
0128
0129
0130 cbnz has_nul, 1f
0131 cmp data1, data2
0132 cset result, ne
0133 cneg result, result, lo
0134 ret
0135 1:
0136
0137 rev tmp3, data1
0138 sub tmp1, tmp3, zeroones
0139 orr tmp2, tmp3, #REP8_7f
0140 bic has_nul, tmp1, tmp2
0141 rev has_nul, has_nul
0142 orr syndrome, diff, has_nul
0143 clz pos, syndrome
0144
0145
0146
0147
0148 L(end_quick):
0149 lsl data1, data1, pos
0150 lsl data2, data2, pos
0151
0152
0153 lsr data1, data1, #56
0154 sub result, data1, data2, lsr #56
0155 ret
0156 #endif
0157
0158 L(mutual_align):
0159
0160
0161
0162
0163
0164 bic src1, src1, #7
0165 bic src2, src2, #7
0166 ldr data1, [src1], #8
0167 neg tmp3, count, lsl #3
0168 ldr data2, [src2], #8
0169 mov tmp2, #~0
0170 LS_FW tmp2, tmp2, tmp3
0171
0172 adds limit, limit, count
0173 csinv limit, limit, xzr, lo
0174 orr data1, data1, tmp2
0175 orr data2, data2, tmp2
0176 b L(start_realigned)
0177
0178 .p2align 4
0179
0180 L(misaligned8):
0181 cmp limit, #16
0182 b.hs L(try_misaligned_words)
0183
0184 L(byte_loop):
0185
0186 ldrb data1w, [src1], #1
0187 ldrb data2w, [src2], #1
0188 subs limit, limit, #1
0189 ccmp data1w, #1, #0, hi
0190 ccmp data1w, data2w, #0, cs
0191 b.eq L(byte_loop)
0192 L(done):
0193 sub result, data1, data2
0194 ret
0195
0196
0197 L(try_misaligned_words):
0198 cbz count, L(src1_aligned)
0199
0200 neg count, count
0201 and count, count, #7
0202 sub limit, limit, count
0203
0204 L(page_end_loop):
0205 ldrb data1w, [src1], #1
0206 ldrb data2w, [src2], #1
0207 cmp data1w, #1
0208 ccmp data1w, data2w, #0, cs
0209 b.ne L(done)
0210 subs count, count, #1
0211 b.hi L(page_end_loop)
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230 L(src1_aligned):
0231
0232
0233 lsl offset, src2, #3
0234 bic src2, src2, #0xf
0235 mov mask, -1
0236 neg neg_offset, offset
0237 ldr data1, [src1], #8
0238 ldp tmp1, tmp2, [src2], #16
0239 LS_BK mask, mask, neg_offset
0240 and neg_offset, neg_offset, #63
0241
0242 tbnz offset, 6, L(misaligned_mid_loop)
0243
0244 L(loop_misaligned):
0245
0246 LS_FW data2, tmp1, offset
0247 LS_BK tmp1, tmp2, neg_offset
0248 subs limit, limit, #8
0249 orr data2, data2, tmp1
0250 sub has_nul, data1, zeroones
0251 eor diff, data1, data2
0252 orr tmp3, data1, #REP8_7f
0253 csinv endloop, diff, xzr, hi
0254 bic has_nul, has_nul, tmp3
0255 orr tmp3, endloop, has_nul
0256 cbnz tmp3, L(full_check)
0257
0258 ldr data1, [src1], #8
0259 L(misaligned_mid_loop):
0260
0261 LS_FW data2, tmp2, offset
0262 #ifdef __AARCH64EB__
0263
0264
0265
0266 rev tmp3, data1
0267 #define data1_fixed tmp3
0268 #else
0269 #define data1_fixed data1
0270 #endif
0271 sub has_nul, data1_fixed, zeroones
0272 orr tmp3, data1_fixed, #REP8_7f
0273 eor diff, data2, data1
0274 bic has_nul, has_nul, tmp3
0275 #ifdef __AARCH64EB__
0276 rev has_nul, has_nul
0277 #endif
0278 cmp limit, neg_offset, lsr #3
0279 orr syndrome, diff, has_nul
0280 bic syndrome, syndrome, mask
0281 csinv tmp3, syndrome, xzr, hi
0282 cbnz tmp3, L(syndrome_check)
0283
0284
0285 ldp tmp1, tmp2, [src2], #16
0286 cmp limit, #8
0287 LS_BK data2, tmp1, neg_offset
0288 eor diff, data2, data1
0289 orr syndrome, diff, has_nul
0290 and syndrome, syndrome, mask
0291 csinv tmp3, syndrome, xzr, hi
0292 cbnz tmp3, L(syndrome_check)
0293
0294 ldr data1, [src1], #8
0295 sub limit, limit, #8
0296 b L(loop_misaligned)
0297
0298 #ifdef __AARCH64EB__
0299 L(syndrome_check):
0300 clz pos, syndrome
0301 cmp pos, limit, lsl #3
0302 b.lo L(end_quick)
0303 #endif
0304
0305 L(ret0):
0306 mov result, #0
0307 ret
0308 SYM_FUNC_END(__pi_strncmp)
0309 SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp)
0310 EXPORT_SYMBOL_NOKASAN(strncmp)