0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/linkage.h>
0010 #include <asm/assembler.h>
0011 #include <asm/mte-def.h>
0012
0013
0014
0015
0016
0017
0018 #define L(label) .L ## label
0019
0020
0021 #define srcin x0
0022 #define len x0
0023
0024
0025 #define src x1
0026 #define data1 x2
0027 #define data2 x3
0028 #define has_nul1 x4
0029 #define has_nul2 x5
0030 #define tmp1 x4
0031 #define tmp2 x5
0032 #define tmp3 x6
0033 #define tmp4 x7
0034 #define zeroones x8
0035
0036
0037
0038
0039
0040
0041
0042 #define REP8_01 0x0101010101010101
0043 #define REP8_7f 0x7f7f7f7f7f7f7f7f
0044 #define REP8_80 0x8080808080808080
0045
0046
0047
0048
0049
0050
0051 #ifdef CONFIG_KASAN_HW_TAGS
0052 #define MIN_PAGE_SIZE MTE_GRANULE_SIZE
0053 #else
0054 #define MIN_PAGE_SIZE 4096
0055 #endif
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082 SYM_FUNC_START(__pi_strlen)
0083 and tmp1, srcin, MIN_PAGE_SIZE - 1
0084 mov zeroones, REP8_01
0085 cmp tmp1, MIN_PAGE_SIZE - 16
0086 b.gt L(page_cross)
0087 ldp data1, data2, [srcin]
0088 #ifdef __AARCH64EB__
0089
0090
0091
0092
0093 rev data1, data1
0094 rev data2, data2
0095 #endif
0096 sub tmp1, data1, zeroones
0097 orr tmp2, data1, REP8_7f
0098 sub tmp3, data2, zeroones
0099 orr tmp4, data2, REP8_7f
0100 bics has_nul1, tmp1, tmp2
0101 bic has_nul2, tmp3, tmp4
0102 ccmp has_nul2, 0, 0, eq
0103 beq L(main_loop_entry)
0104
0105
0106 csel has_nul1, has_nul1, has_nul2, cc
0107 mov len, 8
0108 rev has_nul1, has_nul1
0109 clz tmp1, has_nul1
0110 csel len, xzr, len, cc
0111 add len, len, tmp1, lsr 3
0112 ret
0113
0114
0115
0116
0117 .p2align 4
0118 L(main_loop_entry):
0119 bic src, srcin, 15
0120 sub src, src, 16
0121 L(main_loop):
0122 ldp data1, data2, [src, 32]!
0123 L(page_cross_entry):
0124 sub tmp1, data1, zeroones
0125 sub tmp3, data2, zeroones
0126 orr tmp2, tmp1, tmp3
0127 tst tmp2, zeroones, lsl 7
0128 bne 1f
0129 ldp data1, data2, [src, 16]
0130 sub tmp1, data1, zeroones
0131 sub tmp3, data2, zeroones
0132 orr tmp2, tmp1, tmp3
0133 tst tmp2, zeroones, lsl 7
0134 beq L(main_loop)
0135 add src, src, 16
0136 1:
0137
0138 orr tmp2, data1, REP8_7f
0139 orr tmp4, data2, REP8_7f
0140 bics has_nul1, tmp1, tmp2
0141 bic has_nul2, tmp3, tmp4
0142 ccmp has_nul2, 0, 0, eq
0143 beq L(nonascii_loop)
0144
0145
0146 L(tail):
0147 #ifdef __AARCH64EB__
0148
0149
0150
0151
0152 csel data1, data1, data2, cc
0153 rev data1, data1
0154 sub tmp1, data1, zeroones
0155 orr tmp2, data1, REP8_7f
0156 bic has_nul1, tmp1, tmp2
0157 #else
0158 csel has_nul1, has_nul1, has_nul2, cc
0159 #endif
0160 sub len, src, srcin
0161 rev has_nul1, has_nul1
0162 add tmp2, len, 8
0163 clz tmp1, has_nul1
0164 csel len, len, tmp2, cc
0165 add len, len, tmp1, lsr 3
0166 ret
0167
0168 L(nonascii_loop):
0169 ldp data1, data2, [src, 16]!
0170 sub tmp1, data1, zeroones
0171 orr tmp2, data1, REP8_7f
0172 sub tmp3, data2, zeroones
0173 orr tmp4, data2, REP8_7f
0174 bics has_nul1, tmp1, tmp2
0175 bic has_nul2, tmp3, tmp4
0176 ccmp has_nul2, 0, 0, eq
0177 bne L(tail)
0178 ldp data1, data2, [src, 16]!
0179 sub tmp1, data1, zeroones
0180 orr tmp2, data1, REP8_7f
0181 sub tmp3, data2, zeroones
0182 orr tmp4, data2, REP8_7f
0183 bics has_nul1, tmp1, tmp2
0184 bic has_nul2, tmp3, tmp4
0185 ccmp has_nul2, 0, 0, eq
0186 beq L(nonascii_loop)
0187 b L(tail)
0188
0189
0190
0191
0192 L(page_cross):
0193 bic src, srcin, 15
0194 ldp data1, data2, [src]
0195 lsl tmp1, srcin, 3
0196 mov tmp4, -1
0197 #ifdef __AARCH64EB__
0198
0199 lsr tmp1, tmp4, tmp1
0200 #else
0201
0202 lsl tmp1, tmp4, tmp1
0203 #endif
0204 orr tmp1, tmp1, REP8_80
0205 orn data1, data1, tmp1
0206 orn tmp2, data2, tmp1
0207 tst srcin, 8
0208 csel data1, data1, tmp4, eq
0209 csel data2, data2, tmp2, eq
0210 b L(page_cross_entry)
0211 SYM_FUNC_END(__pi_strlen)
0212 SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen)
0213 EXPORT_SYMBOL_NOKASAN(strlen)