0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/linkage.h>
0010 #include <asm/assembler.h>
0011
0012
0013
0014
0015
0016
0017 #define L(label) .L ## label
0018
0019
0020 #define src1 x0
0021 #define src2 x1
0022 #define limit x2
0023 #define result w0
0024
0025
0026 #define data1 x3
0027 #define data1w w3
0028 #define data1h x4
0029 #define data2 x5
0030 #define data2w w5
0031 #define data2h x6
0032 #define tmp1 x7
0033 #define tmp2 x8
0034
0035 SYM_FUNC_START(__pi_memcmp)
0036 subs limit, limit, 8
0037 b.lo L(less8)
0038
0039 ldr data1, [src1], 8
0040 ldr data2, [src2], 8
0041 cmp data1, data2
0042 b.ne L(return)
0043
0044 subs limit, limit, 8
0045 b.gt L(more16)
0046
0047 ldr data1, [src1, limit]
0048 ldr data2, [src2, limit]
0049 b L(return)
0050
0051 L(more16):
0052 ldr data1, [src1], 8
0053 ldr data2, [src2], 8
0054 cmp data1, data2
0055 bne L(return)
0056
0057
0058
0059 subs limit, limit, 16
0060 b.ls L(last_bytes)
0061
0062
0063
0064 cmp limit, 96
0065 b.ls L(loop16)
0066
0067
0068 and tmp1, src1, 15
0069 add limit, limit, tmp1
0070 sub src1, src1, tmp1
0071 sub src2, src2, tmp1
0072
0073
0074
0075
0076 .p2align 4
0077 L(loop16):
0078 ldp data1, data1h, [src1], 16
0079 ldp data2, data2h, [src2], 16
0080 subs limit, limit, 16
0081 ccmp data1, data2, 0, hi
0082 ccmp data1h, data2h, 0, eq
0083 b.eq L(loop16)
0084
0085 cmp data1, data2
0086 bne L(return)
0087 mov data1, data1h
0088 mov data2, data2h
0089 cmp data1, data2
0090 bne L(return)
0091
0092
0093 L(last_bytes):
0094 add src1, src1, limit
0095 add src2, src2, limit
0096 ldp data1, data1h, [src1]
0097 ldp data2, data2h, [src2]
0098 cmp data1, data2
0099 bne L(return)
0100 mov data1, data1h
0101 mov data2, data2h
0102 cmp data1, data2
0103
0104
0105 L(return):
0106 #ifndef __AARCH64EB__
0107 rev data1, data1
0108 rev data2, data2
0109 #endif
0110 cmp data1, data2
0111 L(ret_eq):
0112 cset result, ne
0113 cneg result, result, lo
0114 ret
0115
0116 .p2align 4
0117
0118 L(less8):
0119 adds limit, limit, 4
0120 b.lo L(less4)
0121 ldr data1w, [src1], 4
0122 ldr data2w, [src2], 4
0123 cmp data1w, data2w
0124 b.ne L(return)
0125 sub limit, limit, 4
0126 L(less4):
0127 adds limit, limit, 4
0128 beq L(ret_eq)
0129 L(byte_loop):
0130 ldrb data1w, [src1], 1
0131 ldrb data2w, [src2], 1
0132 subs limit, limit, 1
0133 ccmp data1w, data2w, 0, ne
0134 b.eq L(byte_loop)
0135 sub result, data1w, data2w
0136 ret
0137 SYM_FUNC_END(__pi_memcmp)
0138 SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp)
0139 EXPORT_SYMBOL_NOKASAN(memcmp)