0001
0002
0003
0004
0005
0006
0007 #include <asm/assembler.h>
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 src .req r0
0019 dst .req r1
0020 len .req r2
0021 sum .req r3
0022
0023 .Lzero: mov r0, sum
0024 load_regs
0025
0026
0027
0028
0029
0030
0031
0032 .Ldst_unaligned:
0033 tst dst, #1
0034 beq .Ldst_16bit
0035
0036 load1b ip
0037 sub len, len, #1
0038 adcs sum, sum, ip, put_byte_1 @ update checksum
0039 strb ip, [dst], #1
0040 tst dst, #2
0041 reteq lr @ dst is now 32bit aligned
0042
0043 .Ldst_16bit: load2b r8, ip
0044 sub len, len, #2
0045 adcs sum, sum, r8, put_byte_0
0046 strb r8, [dst], #1
0047 adcs sum, sum, ip, put_byte_1
0048 strb ip, [dst], #1
0049 ret lr @ dst is now 32bit aligned
0050
0051
0052
0053
0054
0055 .Lless8: teq len, #0 @ check for zero count
0056 beq .Lzero
0057
0058
0059 tst dst, #1 @ dst 16-bit aligned
0060 beq .Lless8_aligned
0061
0062
0063 load1b ip
0064 sub len, len, #1
0065 adcs sum, sum, ip, put_byte_1 @ update checksum
0066 strb ip, [dst], #1
0067 tst len, #6
0068 beq .Lless8_byteonly
0069
0070 1: load2b r8, ip
0071 sub len, len, #2
0072 adcs sum, sum, r8, put_byte_0
0073 strb r8, [dst], #1
0074 adcs sum, sum, ip, put_byte_1
0075 strb ip, [dst], #1
0076 .Lless8_aligned:
0077 tst len, #6
0078 bne 1b
0079 .Lless8_byteonly:
0080 tst len, #1
0081 beq .Ldone
0082 load1b r8
0083 adcs sum, sum, r8, put_byte_0 @ update checksum
0084 strb r8, [dst], #1
0085 b .Ldone
0086
0087 FN_ENTRY
0088 save_regs
0089 mov sum, #-1
0090
0091 cmp len, #8 @ Ensure that we have at least
0092 blo .Lless8 @ 8 bytes to copy.
0093
0094 adds sum, sum, #0 @ C = 0
0095 tst dst, #3 @ Test destination alignment
0096 blne .Ldst_unaligned @ align destination, return here
0097
0098
0099
0100
0101
0102
0103
0104 tst src, #3 @ Test source alignment
0105 bne .Lsrc_not_aligned
0106
0107
0108
0109 bics ip, len, #15
0110 beq 2f
0111
0112 1: load4l r4, r5, r6, r7
0113 stmia dst!, {r4, r5, r6, r7}
0114 adcs sum, sum, r4
0115 adcs sum, sum, r5
0116 adcs sum, sum, r6
0117 adcs sum, sum, r7
0118 sub ip, ip, #16
0119 teq ip, #0
0120 bne 1b
0121
0122 2: ands ip, len, #12
0123 beq 4f
0124 tst ip, #8
0125 beq 3f
0126 load2l r4, r5
0127 stmia dst!, {r4, r5}
0128 adcs sum, sum, r4
0129 adcs sum, sum, r5
0130 tst ip, #4
0131 beq 4f
0132
0133 3: load1l r4
0134 str r4, [dst], #4
0135 adcs sum, sum, r4
0136
0137 4: ands len, len, #3
0138 beq .Ldone
0139 load1l r4
0140 tst len, #2
0141 mov r5, r4, get_byte_0
0142 beq .Lexit
0143 adcs sum, sum, r4, lspush #16
0144 strb r5, [dst], #1
0145 mov r5, r4, get_byte_1
0146 strb r5, [dst], #1
0147 mov r5, r4, get_byte_2
0148 .Lexit: tst len, #1
0149 strbne r5, [dst], #1
0150 andne r5, r5, #255
0151 adcsne sum, sum, r5, put_byte_0
0152
0153
0154
0155
0156
0157
0158
0159 .Ldone: adc r0, sum, #0
0160 ldr sum, [sp, #0] @ dst
0161 tst sum, #1
0162 movne r0, r0, ror #8
0163 load_regs
0164
0165 .Lsrc_not_aligned:
0166 adc sum, sum, #0 @ include C from dst alignment
0167 and ip, src, #3
0168 bic src, src, #3
0169 load1l r5
0170 cmp ip, #2
0171 beq .Lsrc2_aligned
0172 bhi .Lsrc3_aligned
0173 mov r4, r5, lspull #8 @ C = 0
0174 bics ip, len, #15
0175 beq 2f
0176 1: load4l r5, r6, r7, r8
0177 orr r4, r4, r5, lspush #24
0178 mov r5, r5, lspull #8
0179 orr r5, r5, r6, lspush #24
0180 mov r6, r6, lspull #8
0181 orr r6, r6, r7, lspush #24
0182 mov r7, r7, lspull #8
0183 orr r7, r7, r8, lspush #24
0184 stmia dst!, {r4, r5, r6, r7}
0185 adcs sum, sum, r4
0186 adcs sum, sum, r5
0187 adcs sum, sum, r6
0188 adcs sum, sum, r7
0189 mov r4, r8, lspull #8
0190 sub ip, ip, #16
0191 teq ip, #0
0192 bne 1b
0193 2: ands ip, len, #12
0194 beq 4f
0195 tst ip, #8
0196 beq 3f
0197 load2l r5, r6
0198 orr r4, r4, r5, lspush #24
0199 mov r5, r5, lspull #8
0200 orr r5, r5, r6, lspush #24
0201 stmia dst!, {r4, r5}
0202 adcs sum, sum, r4
0203 adcs sum, sum, r5
0204 mov r4, r6, lspull #8
0205 tst ip, #4
0206 beq 4f
0207 3: load1l r5
0208 orr r4, r4, r5, lspush #24
0209 str r4, [dst], #4
0210 adcs sum, sum, r4
0211 mov r4, r5, lspull #8
0212 4: ands len, len, #3
0213 beq .Ldone
0214 mov r5, r4, get_byte_0
0215 tst len, #2
0216 beq .Lexit
0217 adcs sum, sum, r4, lspush #16
0218 strb r5, [dst], #1
0219 mov r5, r4, get_byte_1
0220 strb r5, [dst], #1
0221 mov r5, r4, get_byte_2
0222 b .Lexit
0223
0224 .Lsrc2_aligned: mov r4, r5, lspull #16
0225 adds sum, sum, #0
0226 bics ip, len, #15
0227 beq 2f
0228 1: load4l r5, r6, r7, r8
0229 orr r4, r4, r5, lspush #16
0230 mov r5, r5, lspull #16
0231 orr r5, r5, r6, lspush #16
0232 mov r6, r6, lspull #16
0233 orr r6, r6, r7, lspush #16
0234 mov r7, r7, lspull #16
0235 orr r7, r7, r8, lspush #16
0236 stmia dst!, {r4, r5, r6, r7}
0237 adcs sum, sum, r4
0238 adcs sum, sum, r5
0239 adcs sum, sum, r6
0240 adcs sum, sum, r7
0241 mov r4, r8, lspull #16
0242 sub ip, ip, #16
0243 teq ip, #0
0244 bne 1b
0245 2: ands ip, len, #12
0246 beq 4f
0247 tst ip, #8
0248 beq 3f
0249 load2l r5, r6
0250 orr r4, r4, r5, lspush #16
0251 mov r5, r5, lspull #16
0252 orr r5, r5, r6, lspush #16
0253 stmia dst!, {r4, r5}
0254 adcs sum, sum, r4
0255 adcs sum, sum, r5
0256 mov r4, r6, lspull #16
0257 tst ip, #4
0258 beq 4f
0259 3: load1l r5
0260 orr r4, r4, r5, lspush #16
0261 str r4, [dst], #4
0262 adcs sum, sum, r4
0263 mov r4, r5, lspull #16
0264 4: ands len, len, #3
0265 beq .Ldone
0266 mov r5, r4, get_byte_0
0267 tst len, #2
0268 beq .Lexit
0269 adcs sum, sum, r4
0270 strb r5, [dst], #1
0271 mov r5, r4, get_byte_1
0272 strb r5, [dst], #1
0273 tst len, #1
0274 beq .Ldone
0275 load1b r5
0276 b .Lexit
0277
0278 .Lsrc3_aligned: mov r4, r5, lspull #24
0279 adds sum, sum, #0
0280 bics ip, len, #15
0281 beq 2f
0282 1: load4l r5, r6, r7, r8
0283 orr r4, r4, r5, lspush #8
0284 mov r5, r5, lspull #24
0285 orr r5, r5, r6, lspush #8
0286 mov r6, r6, lspull #24
0287 orr r6, r6, r7, lspush #8
0288 mov r7, r7, lspull #24
0289 orr r7, r7, r8, lspush #8
0290 stmia dst!, {r4, r5, r6, r7}
0291 adcs sum, sum, r4
0292 adcs sum, sum, r5
0293 adcs sum, sum, r6
0294 adcs sum, sum, r7
0295 mov r4, r8, lspull #24
0296 sub ip, ip, #16
0297 teq ip, #0
0298 bne 1b
0299 2: ands ip, len, #12
0300 beq 4f
0301 tst ip, #8
0302 beq 3f
0303 load2l r5, r6
0304 orr r4, r4, r5, lspush #8
0305 mov r5, r5, lspull #24
0306 orr r5, r5, r6, lspush #8
0307 stmia dst!, {r4, r5}
0308 adcs sum, sum, r4
0309 adcs sum, sum, r5
0310 mov r4, r6, lspull #24
0311 tst ip, #4
0312 beq 4f
0313 3: load1l r5
0314 orr r4, r4, r5, lspush #8
0315 str r4, [dst], #4
0316 adcs sum, sum, r4
0317 mov r4, r5, lspull #24
0318 4: ands len, len, #3
0319 beq .Ldone
0320 mov r5, r4, get_byte_0
0321 tst len, #2
0322 beq .Lexit
0323 strb r5, [dst], #1
0324 adcs sum, sum, r4
0325 load1l r4
0326 mov r5, r4, get_byte_0
0327 strb r5, [dst], #1
0328 adcs sum, sum, r4, lspush #24
0329 mov r5, r4, get_byte_1
0330 b .Lexit
0331 FN_EXIT