0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/linkage.h>
0025 #include <asm/errno.h>
0026 #include <asm/asm.h>
0027 #include <asm/export.h>
0028 #include <asm/nospec-branch.h>
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038 .text
0039
0040 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
0041
0042
0043
0044
0045
0046
0047
0048
0049 SYM_FUNC_START(csum_partial)
0050 pushl %esi
0051 pushl %ebx
0052 movl 20(%esp),%eax # Function arg: unsigned int sum
0053 movl 16(%esp),%ecx # Function arg: int len
0054 movl 12(%esp),%esi # Function arg: unsigned char *buff
0055 testl $3, %esi # Check alignment.
0056 jz 2f # Jump if alignment is ok.
0057 testl $1, %esi # Check alignment.
0058 jz 10f # Jump if alignment is boundary of 2 bytes.
0059
0060 # buf is odd
0061 dec %ecx
0062 jl 8f
0063 movzbl (%esi), %ebx
0064 adcl %ebx, %eax
0065 roll $8, %eax
0066 inc %esi
0067 testl $2, %esi
0068 jz 2f
0069 10:
0070 subl $2, %ecx # Alignment uses up two bytes.
0071 jae 1f # Jump if we had at least two bytes.
0072 addl $2, %ecx # ecx was < 2. Deal with it.
0073 jmp 4f
0074 1: movw (%esi), %bx
0075 addl $2, %esi
0076 addw %bx, %ax
0077 adcl $0, %eax
0078 2:
0079 movl %ecx, %edx
0080 shrl $5, %ecx
0081 jz 2f
0082 testl %esi, %esi
0083 1: movl (%esi), %ebx
0084 adcl %ebx, %eax
0085 movl 4(%esi), %ebx
0086 adcl %ebx, %eax
0087 movl 8(%esi), %ebx
0088 adcl %ebx, %eax
0089 movl 12(%esi), %ebx
0090 adcl %ebx, %eax
0091 movl 16(%esi), %ebx
0092 adcl %ebx, %eax
0093 movl 20(%esi), %ebx
0094 adcl %ebx, %eax
0095 movl 24(%esi), %ebx
0096 adcl %ebx, %eax
0097 movl 28(%esi), %ebx
0098 adcl %ebx, %eax
0099 lea 32(%esi), %esi
0100 dec %ecx
0101 jne 1b
0102 adcl $0, %eax
0103 2: movl %edx, %ecx
0104 andl $0x1c, %edx
0105 je 4f
0106 shrl $2, %edx # This clears CF
0107 3: adcl (%esi), %eax
0108 lea 4(%esi), %esi
0109 dec %edx
0110 jne 3b
0111 adcl $0, %eax
0112 4: andl $3, %ecx
0113 jz 7f
0114 cmpl $2, %ecx
0115 jb 5f
0116 movw (%esi),%cx
0117 leal 2(%esi),%esi
0118 je 6f
0119 shll $16,%ecx
0120 5: movb (%esi),%cl
0121 6: addl %ecx,%eax
0122 adcl $0, %eax
0123 7:
0124 testb $1, 12(%esp)
0125 jz 8f
0126 roll $8, %eax
0127 8:
0128 popl %ebx
0129 popl %esi
0130 RET
0131 SYM_FUNC_END(csum_partial)
0132
0133 #else
0134
0135
0136
0137 SYM_FUNC_START(csum_partial)
0138 pushl %esi
0139 pushl %ebx
0140 movl 20(%esp),%eax # Function arg: unsigned int sum
0141 movl 16(%esp),%ecx # Function arg: int len
0142 movl 12(%esp),%esi # Function arg: const unsigned char *buf
0143
0144 testl $3, %esi
0145 jnz 25f
0146 10:
0147 movl %ecx, %edx
0148 movl %ecx, %ebx
0149 andl $0x7c, %ebx
0150 shrl $7, %ecx
0151 addl %ebx,%esi
0152 shrl $2, %ebx
0153 negl %ebx
0154 lea 45f(%ebx,%ebx,2), %ebx
0155 testl %esi, %esi
0156 JMP_NOSPEC ebx
0157
0158 # Handle 2-byte-aligned regions
0159 20: addw (%esi), %ax
0160 lea 2(%esi), %esi
0161 adcl $0, %eax
0162 jmp 10b
0163 25:
0164 testl $1, %esi
0165 jz 30f
0166 # buf is odd
0167 dec %ecx
0168 jl 90f
0169 movzbl (%esi), %ebx
0170 addl %ebx, %eax
0171 adcl $0, %eax
0172 roll $8, %eax
0173 inc %esi
0174 testl $2, %esi
0175 jz 10b
0176
0177 30: subl $2, %ecx
0178 ja 20b
0179 je 32f
0180 addl $2, %ecx
0181 jz 80f
0182 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
0183 addl %ebx, %eax
0184 adcl $0, %eax
0185 jmp 80f
0186 32:
0187 addw (%esi), %ax # csumming 2 bytes, 2-aligned
0188 adcl $0, %eax
0189 jmp 80f
0190
0191 40:
0192 addl -128(%esi), %eax
0193 adcl -124(%esi), %eax
0194 adcl -120(%esi), %eax
0195 adcl -116(%esi), %eax
0196 adcl -112(%esi), %eax
0197 adcl -108(%esi), %eax
0198 adcl -104(%esi), %eax
0199 adcl -100(%esi), %eax
0200 adcl -96(%esi), %eax
0201 adcl -92(%esi), %eax
0202 adcl -88(%esi), %eax
0203 adcl -84(%esi), %eax
0204 adcl -80(%esi), %eax
0205 adcl -76(%esi), %eax
0206 adcl -72(%esi), %eax
0207 adcl -68(%esi), %eax
0208 adcl -64(%esi), %eax
0209 adcl -60(%esi), %eax
0210 adcl -56(%esi), %eax
0211 adcl -52(%esi), %eax
0212 adcl -48(%esi), %eax
0213 adcl -44(%esi), %eax
0214 adcl -40(%esi), %eax
0215 adcl -36(%esi), %eax
0216 adcl -32(%esi), %eax
0217 adcl -28(%esi), %eax
0218 adcl -24(%esi), %eax
0219 adcl -20(%esi), %eax
0220 adcl -16(%esi), %eax
0221 adcl -12(%esi), %eax
0222 adcl -8(%esi), %eax
0223 adcl -4(%esi), %eax
0224 45:
0225 lea 128(%esi), %esi
0226 adcl $0, %eax
0227 dec %ecx
0228 jge 40b
0229 movl %edx, %ecx
0230 50: andl $3, %ecx
0231 jz 80f
0232
0233 # Handle the last 1-3 bytes without jumping
0234 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
0235 movl $0xffffff,%ebx # by the shll and shrl instructions
0236 shll $3,%ecx
0237 shrl %cl,%ebx
0238 andl -128(%esi),%ebx # esi is 4-aligned so should be ok
0239 addl %ebx,%eax
0240 adcl $0,%eax
0241 80:
0242 testb $1, 12(%esp)
0243 jz 90f
0244 roll $8, %eax
0245 90:
0246 popl %ebx
0247 popl %esi
0248 RET
0249 SYM_FUNC_END(csum_partial)
0250
0251 #endif
0252 EXPORT_SYMBOL(csum_partial)
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263 #define EXC(y...) \
0264 9999: y; \
0265 _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX)
0266
0267 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
0268
0269 #define ARGBASE 16
0270 #define FP 12
0271
0272 SYM_FUNC_START(csum_partial_copy_generic)
0273 subl $4,%esp
0274 pushl %edi
0275 pushl %esi
0276 pushl %ebx
0277 movl ARGBASE+12(%esp),%ecx # len
0278 movl ARGBASE+4(%esp),%esi # src
0279 movl ARGBASE+8(%esp),%edi # dst
0280
0281 movl $-1, %eax # sum
0282 testl $2, %edi # Check alignment.
0283 jz 2f # Jump if alignment is ok.
0284 subl $2, %ecx # Alignment uses up two bytes.
0285 jae 1f # Jump if we had at least two bytes.
0286 addl $2, %ecx # ecx was < 2. Deal with it.
0287 jmp 4f
0288 EXC(1: movw (%esi), %bx )
0289 addl $2, %esi
0290 EXC( movw %bx, (%edi) )
0291 addl $2, %edi
0292 addw %bx, %ax
0293 adcl $0, %eax
0294 2:
0295 movl %ecx, FP(%esp)
0296 shrl $5, %ecx
0297 jz 2f
0298 testl %esi, %esi # what's wrong with clc?
0299 EXC(1: movl (%esi), %ebx )
0300 EXC( movl 4(%esi), %edx )
0301 adcl %ebx, %eax
0302 EXC( movl %ebx, (%edi) )
0303 adcl %edx, %eax
0304 EXC( movl %edx, 4(%edi) )
0305
0306 EXC( movl 8(%esi), %ebx )
0307 EXC( movl 12(%esi), %edx )
0308 adcl %ebx, %eax
0309 EXC( movl %ebx, 8(%edi) )
0310 adcl %edx, %eax
0311 EXC( movl %edx, 12(%edi) )
0312
0313 EXC( movl 16(%esi), %ebx )
0314 EXC( movl 20(%esi), %edx )
0315 adcl %ebx, %eax
0316 EXC( movl %ebx, 16(%edi) )
0317 adcl %edx, %eax
0318 EXC( movl %edx, 20(%edi) )
0319
0320 EXC( movl 24(%esi), %ebx )
0321 EXC( movl 28(%esi), %edx )
0322 adcl %ebx, %eax
0323 EXC( movl %ebx, 24(%edi) )
0324 adcl %edx, %eax
0325 EXC( movl %edx, 28(%edi) )
0326
0327 lea 32(%esi), %esi
0328 lea 32(%edi), %edi
0329 dec %ecx
0330 jne 1b
0331 adcl $0, %eax
0332 2: movl FP(%esp), %edx
0333 movl %edx, %ecx
0334 andl $0x1c, %edx
0335 je 4f
0336 shrl $2, %edx # This clears CF
0337 EXC(3: movl (%esi), %ebx )
0338 adcl %ebx, %eax
0339 EXC( movl %ebx, (%edi) )
0340 lea 4(%esi), %esi
0341 lea 4(%edi), %edi
0342 dec %edx
0343 jne 3b
0344 adcl $0, %eax
0345 4: andl $3, %ecx
0346 jz 7f
0347 cmpl $2, %ecx
0348 jb 5f
0349 EXC( movw (%esi), %cx )
0350 leal 2(%esi), %esi
0351 EXC( movw %cx, (%edi) )
0352 leal 2(%edi), %edi
0353 je 6f
0354 shll $16,%ecx
0355 EXC(5: movb (%esi), %cl )
0356 EXC( movb %cl, (%edi) )
0357 6: addl %ecx, %eax
0358 adcl $0, %eax
0359 7:
0360
0361 popl %ebx
0362 popl %esi
0363 popl %edi
0364 popl %ecx # equivalent to addl $4,%esp
0365 RET
0366 SYM_FUNC_END(csum_partial_copy_generic)
0367
0368 #else
0369
0370
0371
0372 #define ROUND1(x) \
0373 EXC(movl x(%esi), %ebx ) ; \
0374 addl %ebx, %eax ; \
0375 EXC(movl %ebx, x(%edi) ) ;
0376
0377 #define ROUND(x) \
0378 EXC(movl x(%esi), %ebx ) ; \
0379 adcl %ebx, %eax ; \
0380 EXC(movl %ebx, x(%edi) ) ;
0381
0382 #define ARGBASE 12
0383
0384 SYM_FUNC_START(csum_partial_copy_generic)
0385 pushl %ebx
0386 pushl %edi
0387 pushl %esi
0388 movl ARGBASE+4(%esp),%esi #src
0389 movl ARGBASE+8(%esp),%edi #dst
0390 movl ARGBASE+12(%esp),%ecx #len
0391 movl $-1, %eax #sum
0392 # movl %ecx, %edx
0393 movl %ecx, %ebx
0394 movl %esi, %edx
0395 shrl $6, %ecx
0396 andl $0x3c, %ebx
0397 negl %ebx
0398 subl %ebx, %esi
0399 subl %ebx, %edi
0400 lea -1(%esi),%edx
0401 andl $-32,%edx
0402 lea 3f(%ebx,%ebx), %ebx
0403 testl %esi, %esi
0404 JMP_NOSPEC ebx
0405 1: addl $64,%esi
0406 addl $64,%edi
0407 EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl)
0408 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
0409 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
0410 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
0411 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
0412 3: adcl $0,%eax
0413 addl $64, %edx
0414 dec %ecx
0415 jge 1b
0416 4: movl ARGBASE+12(%esp),%edx #len
0417 andl $3, %edx
0418 jz 7f
0419 cmpl $2, %edx
0420 jb 5f
0421 EXC( movw (%esi), %dx )
0422 leal 2(%esi), %esi
0423 EXC( movw %dx, (%edi) )
0424 leal 2(%edi), %edi
0425 je 6f
0426 shll $16,%edx
0427 5:
0428 EXC( movb (%esi), %dl )
0429 EXC( movb %dl, (%edi) )
0430 6: addl %edx, %eax
0431 adcl $0, %eax
0432 7:
0433
0434 popl %esi
0435 popl %edi
0436 popl %ebx
0437 RET
0438 SYM_FUNC_END(csum_partial_copy_generic)
0439
0440 #undef ROUND
0441 #undef ROUND1
0442
0443 #endif
0444 EXPORT_SYMBOL(csum_partial_copy_generic)