Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * INET     An implementation of the TCP/IP protocol suite for the LINUX
0004  *      operating system.  INET is implemented using the  BSD Socket
0005  *      interface as the means of communication with the user level.
0006  *
0007  *      IP/TCP/UDP checksumming routines
0008  *
0009  * Authors: Jorge Cwik, <jorge@laser.satlink.net>
0010  *      Arnt Gulbrandsen, <agulbra@nvg.unit.no>
0011  *      Tom May, <ftom@netcom.com>
0012  *              Pentium Pro/II routines:
0013  *              Alexander Kjeldaas <astor@guardian.no>
0014  *              Finn Arne Gangstad <finnag@guardian.no>
0015  *      Lots of code moved from tcp.c and ip.c; see those files
0016  *      for more names.
0017  *
0018  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
0019  *               handling.
0020  *      Andi Kleen,  add zeroing on error
0021  *                   converted to pure assembler
0022  */
0023 
0024 #include <linux/linkage.h>
0025 #include <asm/errno.h>
0026 #include <asm/asm.h>
0027 #include <asm/export.h>
0028 #include <asm/nospec-branch.h>
0029 
0030 /*
0031  * computes a partial checksum, e.g. for TCP/UDP fragments
0032  */
0033 
0034 /*  
0035 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
0036  */
0037         
0038 .text
0039         
0040 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
0041 
0042       /*        
0043        * Experiments with Ethernet and SLIP connections show that buff
0044        * is aligned on either a 2-byte or 4-byte boundary.  We get at
0045        * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
0046        * Fortunately, it is easy to convert 2-byte alignment to 4-byte
0047        * alignment for the unrolled loop.
0048        */       
0049 SYM_FUNC_START(csum_partial)
0050     pushl %esi
0051     pushl %ebx
0052     movl 20(%esp),%eax  # Function arg: unsigned int sum
0053     movl 16(%esp),%ecx  # Function arg: int len
0054     movl 12(%esp),%esi  # Function arg: unsigned char *buff
0055     testl $3, %esi      # Check alignment.
0056     jz 2f           # Jump if alignment is ok.
0057     testl $1, %esi      # Check alignment.
0058     jz 10f          # Jump if alignment is boundary of 2 bytes.
0059 
0060     # buf is odd
0061     dec %ecx
0062     jl 8f
0063     movzbl (%esi), %ebx
0064     adcl %ebx, %eax
0065     roll $8, %eax
0066     inc %esi
0067     testl $2, %esi
0068     jz 2f
0069 10:
0070     subl $2, %ecx       # Alignment uses up two bytes.
0071     jae 1f          # Jump if we had at least two bytes.
0072     addl $2, %ecx       # ecx was < 2.  Deal with it.
0073     jmp 4f
0074 1:  movw (%esi), %bx
0075     addl $2, %esi
0076     addw %bx, %ax
0077     adcl $0, %eax
0078 2:
0079     movl %ecx, %edx
0080     shrl $5, %ecx
0081     jz 2f
0082     testl %esi, %esi
0083 1:  movl (%esi), %ebx
0084     adcl %ebx, %eax
0085     movl 4(%esi), %ebx
0086     adcl %ebx, %eax
0087     movl 8(%esi), %ebx
0088     adcl %ebx, %eax
0089     movl 12(%esi), %ebx
0090     adcl %ebx, %eax
0091     movl 16(%esi), %ebx
0092     adcl %ebx, %eax
0093     movl 20(%esi), %ebx
0094     adcl %ebx, %eax
0095     movl 24(%esi), %ebx
0096     adcl %ebx, %eax
0097     movl 28(%esi), %ebx
0098     adcl %ebx, %eax
0099     lea 32(%esi), %esi
0100     dec %ecx
0101     jne 1b
0102     adcl $0, %eax
0103 2:  movl %edx, %ecx
0104     andl $0x1c, %edx
0105     je 4f
0106     shrl $2, %edx       # This clears CF
0107 3:  adcl (%esi), %eax
0108     lea 4(%esi), %esi
0109     dec %edx
0110     jne 3b
0111     adcl $0, %eax
0112 4:  andl $3, %ecx
0113     jz 7f
0114     cmpl $2, %ecx
0115     jb 5f
0116     movw (%esi),%cx
0117     leal 2(%esi),%esi
0118     je 6f
0119     shll $16,%ecx
0120 5:  movb (%esi),%cl
0121 6:  addl %ecx,%eax
0122     adcl $0, %eax 
0123 7:  
0124     testb $1, 12(%esp)
0125     jz 8f
0126     roll $8, %eax
0127 8:
0128     popl %ebx
0129     popl %esi
0130     RET
0131 SYM_FUNC_END(csum_partial)
0132 
0133 #else
0134 
0135 /* Version for PentiumII/PPro */
0136 
0137 SYM_FUNC_START(csum_partial)
0138     pushl %esi
0139     pushl %ebx
0140     movl 20(%esp),%eax  # Function arg: unsigned int sum
0141     movl 16(%esp),%ecx  # Function arg: int len
0142     movl 12(%esp),%esi  # Function arg: const unsigned char *buf
0143 
0144     testl $3, %esi         
0145     jnz 25f                 
0146 10:
0147     movl %ecx, %edx
0148     movl %ecx, %ebx
0149     andl $0x7c, %ebx
0150     shrl $7, %ecx
0151     addl %ebx,%esi
0152     shrl $2, %ebx  
0153     negl %ebx
0154     lea 45f(%ebx,%ebx,2), %ebx
0155     testl %esi, %esi
0156     JMP_NOSPEC ebx
0157 
0158     # Handle 2-byte-aligned regions
0159 20: addw (%esi), %ax
0160     lea 2(%esi), %esi
0161     adcl $0, %eax
0162     jmp 10b
0163 25:
0164     testl $1, %esi         
0165     jz 30f                 
0166     # buf is odd
0167     dec %ecx
0168     jl 90f
0169     movzbl (%esi), %ebx
0170     addl %ebx, %eax
0171     adcl $0, %eax
0172     roll $8, %eax
0173     inc %esi
0174     testl $2, %esi
0175     jz 10b
0176 
0177 30: subl $2, %ecx          
0178     ja 20b                 
0179     je 32f
0180     addl $2, %ecx
0181     jz 80f
0182     movzbl (%esi),%ebx  # csumming 1 byte, 2-aligned
0183     addl %ebx, %eax
0184     adcl $0, %eax
0185     jmp 80f
0186 32:
0187     addw (%esi), %ax    # csumming 2 bytes, 2-aligned
0188     adcl $0, %eax
0189     jmp 80f
0190 
0191 40: 
0192     addl -128(%esi), %eax
0193     adcl -124(%esi), %eax
0194     adcl -120(%esi), %eax
0195     adcl -116(%esi), %eax   
0196     adcl -112(%esi), %eax   
0197     adcl -108(%esi), %eax
0198     adcl -104(%esi), %eax
0199     adcl -100(%esi), %eax
0200     adcl -96(%esi), %eax
0201     adcl -92(%esi), %eax
0202     adcl -88(%esi), %eax
0203     adcl -84(%esi), %eax
0204     adcl -80(%esi), %eax
0205     adcl -76(%esi), %eax
0206     adcl -72(%esi), %eax
0207     adcl -68(%esi), %eax
0208     adcl -64(%esi), %eax     
0209     adcl -60(%esi), %eax     
0210     adcl -56(%esi), %eax     
0211     adcl -52(%esi), %eax   
0212     adcl -48(%esi), %eax   
0213     adcl -44(%esi), %eax
0214     adcl -40(%esi), %eax
0215     adcl -36(%esi), %eax
0216     adcl -32(%esi), %eax
0217     adcl -28(%esi), %eax
0218     adcl -24(%esi), %eax
0219     adcl -20(%esi), %eax
0220     adcl -16(%esi), %eax
0221     adcl -12(%esi), %eax
0222     adcl -8(%esi), %eax
0223     adcl -4(%esi), %eax
0224 45:
0225     lea 128(%esi), %esi
0226     adcl $0, %eax
0227     dec %ecx
0228     jge 40b
0229     movl %edx, %ecx
0230 50: andl $3, %ecx
0231     jz 80f
0232 
0233     # Handle the last 1-3 bytes without jumping
0234     notl %ecx       # 1->2, 2->1, 3->0, higher bits are masked
0235     movl $0xffffff,%ebx # by the shll and shrl instructions
0236     shll $3,%ecx
0237     shrl %cl,%ebx
0238     andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
0239     addl %ebx,%eax
0240     adcl $0,%eax
0241 80: 
0242     testb $1, 12(%esp)
0243     jz 90f
0244     roll $8, %eax
0245 90: 
0246     popl %ebx
0247     popl %esi
0248     RET
0249 SYM_FUNC_END(csum_partial)
0250                 
0251 #endif
0252 EXPORT_SYMBOL(csum_partial)
0253 
0254 /*
0255 unsigned int csum_partial_copy_generic (const char *src, char *dst,
0256                   int len)
0257  */ 
0258 
0259 /*
0260  * Copy from ds while checksumming, otherwise like csum_partial
0261  */
0262 
0263 #define EXC(y...)                       \
0264     9999: y;                        \
0265     _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX)
0266 
0267 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
0268 
0269 #define ARGBASE 16      
0270 #define FP      12
0271         
0272 SYM_FUNC_START(csum_partial_copy_generic)
0273     subl  $4,%esp   
0274     pushl %edi
0275     pushl %esi
0276     pushl %ebx
0277     movl ARGBASE+12(%esp),%ecx  # len
0278     movl ARGBASE+4(%esp),%esi   # src
0279     movl ARGBASE+8(%esp),%edi   # dst
0280 
0281     movl $-1, %eax          # sum
0282     testl $2, %edi          # Check alignment. 
0283     jz 2f               # Jump if alignment is ok.
0284     subl $2, %ecx           # Alignment uses up two bytes.
0285     jae 1f              # Jump if we had at least two bytes.
0286     addl $2, %ecx           # ecx was < 2.  Deal with it.
0287     jmp 4f
0288 EXC(1:  movw (%esi), %bx    )
0289     addl $2, %esi
0290 EXC(    movw %bx, (%edi)    )
0291     addl $2, %edi
0292     addw %bx, %ax   
0293     adcl $0, %eax
0294 2:
0295     movl %ecx, FP(%esp)
0296     shrl $5, %ecx
0297     jz 2f
0298     testl %esi, %esi        # what's wrong with clc?
0299 EXC(1:  movl (%esi), %ebx   )
0300 EXC(    movl 4(%esi), %edx  )
0301     adcl %ebx, %eax
0302 EXC(    movl %ebx, (%edi)   )
0303     adcl %edx, %eax
0304 EXC(    movl %edx, 4(%edi)  )
0305 
0306 EXC(    movl 8(%esi), %ebx  )
0307 EXC(    movl 12(%esi), %edx )
0308     adcl %ebx, %eax
0309 EXC(    movl %ebx, 8(%edi)  )
0310     adcl %edx, %eax
0311 EXC(    movl %edx, 12(%edi) )
0312 
0313 EXC(    movl 16(%esi), %ebx     )
0314 EXC(    movl 20(%esi), %edx )
0315     adcl %ebx, %eax
0316 EXC(    movl %ebx, 16(%edi) )
0317     adcl %edx, %eax
0318 EXC(    movl %edx, 20(%edi) )
0319 
0320 EXC(    movl 24(%esi), %ebx )
0321 EXC(    movl 28(%esi), %edx )
0322     adcl %ebx, %eax
0323 EXC(    movl %ebx, 24(%edi) )
0324     adcl %edx, %eax
0325 EXC(    movl %edx, 28(%edi) )
0326 
0327     lea 32(%esi), %esi
0328     lea 32(%edi), %edi
0329     dec %ecx
0330     jne 1b
0331     adcl $0, %eax
0332 2:  movl FP(%esp), %edx
0333     movl %edx, %ecx
0334     andl $0x1c, %edx
0335     je 4f
0336     shrl $2, %edx           # This clears CF
0337 EXC(3:  movl (%esi), %ebx   )
0338     adcl %ebx, %eax
0339 EXC(    movl %ebx, (%edi)   )
0340     lea 4(%esi), %esi
0341     lea 4(%edi), %edi
0342     dec %edx
0343     jne 3b
0344     adcl $0, %eax
0345 4:  andl $3, %ecx
0346     jz 7f
0347     cmpl $2, %ecx
0348     jb 5f
0349 EXC(    movw (%esi), %cx    )
0350     leal 2(%esi), %esi
0351 EXC(    movw %cx, (%edi)    )
0352     leal 2(%edi), %edi
0353     je 6f
0354     shll $16,%ecx
0355 EXC(5:  movb (%esi), %cl    )
0356 EXC(    movb %cl, (%edi)    )
0357 6:  addl %ecx, %eax
0358     adcl $0, %eax
0359 7:
0360 
0361     popl %ebx
0362     popl %esi
0363     popl %edi
0364     popl %ecx           # equivalent to addl $4,%esp
0365     RET
0366 SYM_FUNC_END(csum_partial_copy_generic)
0367 
0368 #else
0369 
0370 /* Version for PentiumII/PPro */
0371 
0372 #define ROUND1(x) \
0373     EXC(movl x(%esi), %ebx  )   ;   \
0374     addl %ebx, %eax         ;   \
0375     EXC(movl %ebx, x(%edi)  )   ;
0376 
0377 #define ROUND(x) \
0378     EXC(movl x(%esi), %ebx  )   ;   \
0379     adcl %ebx, %eax         ;   \
0380     EXC(movl %ebx, x(%edi)  )   ;
0381 
0382 #define ARGBASE 12
0383         
0384 SYM_FUNC_START(csum_partial_copy_generic)
0385     pushl %ebx
0386     pushl %edi
0387     pushl %esi
0388     movl ARGBASE+4(%esp),%esi   #src
0389     movl ARGBASE+8(%esp),%edi   #dst    
0390     movl ARGBASE+12(%esp),%ecx  #len
0391     movl $-1, %eax          #sum
0392 #   movl %ecx, %edx  
0393     movl %ecx, %ebx  
0394     movl %esi, %edx
0395     shrl $6, %ecx     
0396     andl $0x3c, %ebx  
0397     negl %ebx
0398     subl %ebx, %esi  
0399     subl %ebx, %edi  
0400     lea  -1(%esi),%edx
0401     andl $-32,%edx
0402     lea 3f(%ebx,%ebx), %ebx
0403     testl %esi, %esi 
0404     JMP_NOSPEC ebx
0405 1:  addl $64,%esi
0406     addl $64,%edi 
0407     EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl)
0408     ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
0409     ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
0410     ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
0411     ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4) 
0412 3:  adcl $0,%eax
0413     addl $64, %edx
0414     dec %ecx
0415     jge 1b
0416 4:  movl ARGBASE+12(%esp),%edx  #len
0417     andl $3, %edx
0418     jz 7f
0419     cmpl $2, %edx
0420     jb 5f
0421 EXC(    movw (%esi), %dx         )
0422     leal 2(%esi), %esi
0423 EXC(    movw %dx, (%edi)         )
0424     leal 2(%edi), %edi
0425     je 6f
0426     shll $16,%edx
0427 5:
0428 EXC(    movb (%esi), %dl         )
0429 EXC(    movb %dl, (%edi)         )
0430 6:  addl %edx, %eax
0431     adcl $0, %eax
0432 7:
0433 
0434     popl %esi
0435     popl %edi
0436     popl %ebx
0437     RET
0438 SYM_FUNC_END(csum_partial_copy_generic)
0439                 
0440 #undef ROUND
0441 #undef ROUND1       
0442         
0443 #endif
0444 EXPORT_SYMBOL(csum_partial_copy_generic)