0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/linkage.h>
0010 #include <asm/assembler.h>
0011
0012
0013
0014
0015
0016
0017
0018 #define L(label) .L ## label
0019
0020 #define dstin x0
0021 #define src x1
0022 #define count x2
0023 #define dst x3
0024 #define srcend x4
0025 #define dstend x5
0026 #define A_l x6
0027 #define A_lw w6
0028 #define A_h x7
0029 #define B_l x8
0030 #define B_lw w8
0031 #define B_h x9
0032 #define C_l x10
0033 #define C_lw w10
0034 #define C_h x11
0035 #define D_l x12
0036 #define D_h x13
0037 #define E_l x14
0038 #define E_h x15
0039 #define F_l x16
0040 #define F_h x17
0041 #define G_l count
0042 #define G_h dst
0043 #define H_l src
0044 #define H_h srcend
0045 #define tmp1 x14
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 SYM_FUNC_START(__pi_memcpy)
0061 add srcend, src, count
0062 add dstend, dstin, count
0063 cmp count, 128
0064 b.hi L(copy_long)
0065 cmp count, 32
0066 b.hi L(copy32_128)
0067
0068
0069 cmp count, 16
0070 b.lo L(copy16)
0071 ldp A_l, A_h, [src]
0072 ldp D_l, D_h, [srcend, -16]
0073 stp A_l, A_h, [dstin]
0074 stp D_l, D_h, [dstend, -16]
0075 ret
0076
0077
0078 L(copy16):
0079 tbz count, 3, L(copy8)
0080 ldr A_l, [src]
0081 ldr A_h, [srcend, -8]
0082 str A_l, [dstin]
0083 str A_h, [dstend, -8]
0084 ret
0085
0086 .p2align 3
0087
0088 L(copy8):
0089 tbz count, 2, L(copy4)
0090 ldr A_lw, [src]
0091 ldr B_lw, [srcend, -4]
0092 str A_lw, [dstin]
0093 str B_lw, [dstend, -4]
0094 ret
0095
0096
0097 L(copy4):
0098 cbz count, L(copy0)
0099 lsr tmp1, count, 1
0100 ldrb A_lw, [src]
0101 ldrb C_lw, [srcend, -1]
0102 ldrb B_lw, [src, tmp1]
0103 strb A_lw, [dstin]
0104 strb B_lw, [dstin, tmp1]
0105 strb C_lw, [dstend, -1]
0106 L(copy0):
0107 ret
0108
0109 .p2align 4
0110
0111 L(copy32_128):
0112 ldp A_l, A_h, [src]
0113 ldp B_l, B_h, [src, 16]
0114 ldp C_l, C_h, [srcend, -32]
0115 ldp D_l, D_h, [srcend, -16]
0116 cmp count, 64
0117 b.hi L(copy128)
0118 stp A_l, A_h, [dstin]
0119 stp B_l, B_h, [dstin, 16]
0120 stp C_l, C_h, [dstend, -32]
0121 stp D_l, D_h, [dstend, -16]
0122 ret
0123
0124 .p2align 4
0125
0126 L(copy128):
0127 ldp E_l, E_h, [src, 32]
0128 ldp F_l, F_h, [src, 48]
0129 cmp count, 96
0130 b.ls L(copy96)
0131 ldp G_l, G_h, [srcend, -64]
0132 ldp H_l, H_h, [srcend, -48]
0133 stp G_l, G_h, [dstend, -64]
0134 stp H_l, H_h, [dstend, -48]
0135 L(copy96):
0136 stp A_l, A_h, [dstin]
0137 stp B_l, B_h, [dstin, 16]
0138 stp E_l, E_h, [dstin, 32]
0139 stp F_l, F_h, [dstin, 48]
0140 stp C_l, C_h, [dstend, -32]
0141 stp D_l, D_h, [dstend, -16]
0142 ret
0143
0144 .p2align 4
0145
0146 L(copy_long):
0147
0148 sub tmp1, dstin, src
0149 cbz tmp1, L(copy0)
0150 cmp tmp1, count
0151 b.lo L(copy_long_backwards)
0152
0153
0154
0155 ldp D_l, D_h, [src]
0156 and tmp1, dstin, 15
0157 bic dst, dstin, 15
0158 sub src, src, tmp1
0159 add count, count, tmp1
0160 ldp A_l, A_h, [src, 16]
0161 stp D_l, D_h, [dstin]
0162 ldp B_l, B_h, [src, 32]
0163 ldp C_l, C_h, [src, 48]
0164 ldp D_l, D_h, [src, 64]!
0165 subs count, count, 128 + 16
0166 b.ls L(copy64_from_end)
0167
0168 L(loop64):
0169 stp A_l, A_h, [dst, 16]
0170 ldp A_l, A_h, [src, 16]
0171 stp B_l, B_h, [dst, 32]
0172 ldp B_l, B_h, [src, 32]
0173 stp C_l, C_h, [dst, 48]
0174 ldp C_l, C_h, [src, 48]
0175 stp D_l, D_h, [dst, 64]!
0176 ldp D_l, D_h, [src, 64]!
0177 subs count, count, 64
0178 b.hi L(loop64)
0179
0180
0181 L(copy64_from_end):
0182 ldp E_l, E_h, [srcend, -64]
0183 stp A_l, A_h, [dst, 16]
0184 ldp A_l, A_h, [srcend, -48]
0185 stp B_l, B_h, [dst, 32]
0186 ldp B_l, B_h, [srcend, -32]
0187 stp C_l, C_h, [dst, 48]
0188 ldp C_l, C_h, [srcend, -16]
0189 stp D_l, D_h, [dst, 64]
0190 stp E_l, E_h, [dstend, -64]
0191 stp A_l, A_h, [dstend, -48]
0192 stp B_l, B_h, [dstend, -32]
0193 stp C_l, C_h, [dstend, -16]
0194 ret
0195
0196 .p2align 4
0197
0198
0199
0200 L(copy_long_backwards):
0201 ldp D_l, D_h, [srcend, -16]
0202 and tmp1, dstend, 15
0203 sub srcend, srcend, tmp1
0204 sub count, count, tmp1
0205 ldp A_l, A_h, [srcend, -16]
0206 stp D_l, D_h, [dstend, -16]
0207 ldp B_l, B_h, [srcend, -32]
0208 ldp C_l, C_h, [srcend, -48]
0209 ldp D_l, D_h, [srcend, -64]!
0210 sub dstend, dstend, tmp1
0211 subs count, count, 128
0212 b.ls L(copy64_from_start)
0213
0214 L(loop64_backwards):
0215 stp A_l, A_h, [dstend, -16]
0216 ldp A_l, A_h, [srcend, -16]
0217 stp B_l, B_h, [dstend, -32]
0218 ldp B_l, B_h, [srcend, -32]
0219 stp C_l, C_h, [dstend, -48]
0220 ldp C_l, C_h, [srcend, -48]
0221 stp D_l, D_h, [dstend, -64]!
0222 ldp D_l, D_h, [srcend, -64]!
0223 subs count, count, 64
0224 b.hi L(loop64_backwards)
0225
0226
0227 L(copy64_from_start):
0228 ldp G_l, G_h, [src, 48]
0229 stp A_l, A_h, [dstend, -16]
0230 ldp A_l, A_h, [src, 32]
0231 stp B_l, B_h, [dstend, -32]
0232 ldp B_l, B_h, [src, 16]
0233 stp C_l, C_h, [dstend, -48]
0234 ldp C_l, C_h, [src]
0235 stp D_l, D_h, [dstend, -64]
0236 stp G_l, G_h, [dstin, 48]
0237 stp A_l, A_h, [dstin, 32]
0238 stp B_l, B_h, [dstin, 16]
0239 stp C_l, C_h, [dstin]
0240 ret
0241 SYM_FUNC_END(__pi_memcpy)
0242
0243 SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)
0244 EXPORT_SYMBOL(__memcpy)
0245 SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
0246 EXPORT_SYMBOL(memcpy)
0247
0248 SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy)
0249
0250 SYM_FUNC_ALIAS(__memmove, __pi_memmove)
0251 EXPORT_SYMBOL(__memmove)
0252 SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
0253 EXPORT_SYMBOL(memmove)