0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 dstin .req x0
0025 src .req x1
0026 count .req x2
0027 tmp1 .req x3
0028 tmp1w .req w3
0029 tmp2 .req x4
0030 tmp2w .req w4
0031 dst .req x6
0032
0033 A_l .req x7
0034 A_h .req x8
0035 B_l .req x9
0036 B_h .req x10
0037 C_l .req x11
0038 C_h .req x12
0039 D_l .req x13
0040 D_h .req x14
0041
0042 mov dst, dstin
0043 cmp count, #16
0044
0045 b.lo .Ltiny15
0046
0047 neg tmp2, src
0048 ands tmp2, tmp2, #15
0049 b.eq .LSrcAligned
0050 sub count, count, tmp2
0051
0052
0053
0054
0055
0056
0057 tbz tmp2, #0, 1f
0058 ldrb1 tmp1w, src, #1
0059 strb1 tmp1w, dst, #1
0060 1:
0061 tbz tmp2, #1, 2f
0062 ldrh1 tmp1w, src, #2
0063 strh1 tmp1w, dst, #2
0064 2:
0065 tbz tmp2, #2, 3f
0066 ldr1 tmp1w, src, #4
0067 str1 tmp1w, dst, #4
0068 3:
0069 tbz tmp2, #3, .LSrcAligned
0070 ldr1 tmp1, src, #8
0071 str1 tmp1, dst, #8
0072
0073 .LSrcAligned:
0074 cmp count, #64
0075 b.ge .Lcpy_over64
0076
0077
0078
0079
0080 .Ltail63:
0081
0082
0083
0084
0085 ands tmp1, count, #0x30
0086 b.eq .Ltiny15
0087 cmp tmp1w, #0x20
0088 b.eq 1f
0089 b.lt 2f
0090 ldp1 A_l, A_h, src, #16
0091 stp1 A_l, A_h, dst, #16
0092 1:
0093 ldp1 A_l, A_h, src, #16
0094 stp1 A_l, A_h, dst, #16
0095 2:
0096 ldp1 A_l, A_h, src, #16
0097 stp1 A_l, A_h, dst, #16
0098 .Ltiny15:
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110 tbz count, #3, 1f
0111 ldr1 tmp1, src, #8
0112 str1 tmp1, dst, #8
0113 1:
0114 tbz count, #2, 2f
0115 ldr1 tmp1w, src, #4
0116 str1 tmp1w, dst, #4
0117 2:
0118 tbz count, #1, 3f
0119 ldrh1 tmp1w, src, #2
0120 strh1 tmp1w, dst, #2
0121 3:
0122 tbz count, #0, .Lexitfunc
0123 ldrb1 tmp1w, src, #1
0124 strb1 tmp1w, dst, #1
0125
0126 b .Lexitfunc
0127
0128 .Lcpy_over64:
0129 subs count, count, #128
0130 b.ge .Lcpy_body_large
0131
0132
0133
0134
0135 ldp1 A_l, A_h, src, #16
0136 stp1 A_l, A_h, dst, #16
0137 ldp1 B_l, B_h, src, #16
0138 ldp1 C_l, C_h, src, #16
0139 stp1 B_l, B_h, dst, #16
0140 stp1 C_l, C_h, dst, #16
0141 ldp1 D_l, D_h, src, #16
0142 stp1 D_l, D_h, dst, #16
0143
0144 tst count, #0x3f
0145 b.ne .Ltail63
0146 b .Lexitfunc
0147
0148
0149
0150
0151
0152 .p2align L1_CACHE_SHIFT
0153 .Lcpy_body_large:
0154
0155 ldp1 A_l, A_h, src, #16
0156 ldp1 B_l, B_h, src, #16
0157 ldp1 C_l, C_h, src, #16
0158 ldp1 D_l, D_h, src, #16
0159 1:
0160
0161
0162
0163
0164 stp1 A_l, A_h, dst, #16
0165 ldp1 A_l, A_h, src, #16
0166 stp1 B_l, B_h, dst, #16
0167 ldp1 B_l, B_h, src, #16
0168 stp1 C_l, C_h, dst, #16
0169 ldp1 C_l, C_h, src, #16
0170 stp1 D_l, D_h, dst, #16
0171 ldp1 D_l, D_h, src, #16
0172 subs count, count, #64
0173 b.ge 1b
0174 stp1 A_l, A_h, dst, #16
0175 stp1 B_l, B_h, dst, #16
0176 stp1 C_l, C_h, dst, #16
0177 stp1 D_l, D_h, dst, #16
0178
0179 tst count, #0x3f
0180 b.ne .Ltail63
0181 .Lexitfunc: