x86/um/checksum_32.S

0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * INET     An implementation of the TCP/IP protocol suite for the LINUX
0004  *      operating system.  INET is implemented using the  BSD Socket
0005  *      interface as the means of communication with the user level.
0006  *
0007  *      IP/TCP/UDP checksumming routines
0008  *
0009  * Authors: Jorge Cwik, <jorge@laser.satlink.net>
0010  *      Arnt Gulbrandsen, <agulbra@nvg.unit.no>
0011  *      Tom May, <ftom@netcom.com>
0012  *              Pentium Pro/II routines:
0013  *              Alexander Kjeldaas <astor@guardian.no>
0014  *              Finn Arne Gangstad <finnag@guardian.no>
0015  *      Lots of code moved from tcp.c and ip.c; see those files
0016  *      for more names.
0017  *
0018  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
0019  *               handling.
0020  *      Andi Kleen,  add zeroing on error
0021  *                   converted to pure assembler
0022  */
0023
0024 #include <asm/errno.h>
0025 #include <asm/asm.h>
0026 #include <asm/export.h>
0027
0028 /*
0029  * computes a partial checksum, e.g. for TCP/UDP fragments
0030  */
0031
0032 /*
0033 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
0034  */
0035
0036 .text
0037 .align 4
0038 .globl csum_partial
0039
0040 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
0041
0042       /*
0043        * Experiments with Ethernet and SLIP connections show that buff
0044        * is aligned on either a 2-byte or 4-byte boundary.  We get at
0045        * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
0046        * Fortunately, it is easy to convert 2-byte alignment to 4-byte
0047        * alignment for the unrolled loop.
0048        */
0049 csum_partial:
0050     pushl %esi
0051     pushl %ebx
0052     movl 20(%esp),%eax  # Function arg: unsigned int sum
0053     movl 16(%esp),%ecx  # Function arg: int len
0054     movl 12(%esp),%esi  # Function arg: unsigned char *buff
0055     testl $2, %esi      # Check alignment.
0056     jz 2f           # Jump if alignment is ok.
0057     subl $2, %ecx       # Alignment uses up two bytes.
0058     jae 1f          # Jump if we had at least two bytes.
0059     addl $2, %ecx       # ecx was < 2.  Deal with it.
0060     jmp 4f
0061 1:  movw (%esi), %bx
0062     addl $2, %esi
0063     addw %bx, %ax
0064     adcl $0, %eax
0065 2:
0066     movl %ecx, %edx
0067     shrl $5, %ecx
0068     jz 2f
0069     testl %esi, %esi
0070 1:  movl (%esi), %ebx
0071     adcl %ebx, %eax
0072     movl 4(%esi), %ebx
0073     adcl %ebx, %eax
0074     movl 8(%esi), %ebx
0075     adcl %ebx, %eax
0076     movl 12(%esi), %ebx
0077     adcl %ebx, %eax
0078     movl 16(%esi), %ebx
0079     adcl %ebx, %eax
0080     movl 20(%esi), %ebx
0081     adcl %ebx, %eax
0082     movl 24(%esi), %ebx
0083     adcl %ebx, %eax
0084     movl 28(%esi), %ebx
0085     adcl %ebx, %eax
0086     lea 32(%esi), %esi
0087     dec %ecx
0088     jne 1b
0089     adcl $0, %eax
0090 2:  movl %edx, %ecx
0091     andl $0x1c, %edx
0092     je 4f
0093     shrl $2, %edx       # This clears CF
0094 3:  adcl (%esi), %eax
0095     lea 4(%esi), %esi
0096     dec %edx
0097     jne 3b
0098     adcl $0, %eax
0099 4:  andl $3, %ecx
0100     jz 7f
0101     cmpl $2, %ecx
0102     jb 5f
0103     movw (%esi),%cx
0104     leal 2(%esi),%esi
0105     je 6f
0106     shll $16,%ecx
0107 5:  movb (%esi),%cl
0108 6:  addl %ecx,%eax
0109     adcl $0, %eax
0110 7:
0111     popl %ebx
0112     popl %esi
0113     RET
0114
0115 #else
0116
0117 /* Version for PentiumII/PPro */
0118
0119 csum_partial:
0120     pushl %esi
0121     pushl %ebx
0122     movl 20(%esp),%eax  # Function arg: unsigned int sum
0123     movl 16(%esp),%ecx  # Function arg: int len
0124     movl 12(%esp),%esi  # Function arg: const unsigned char *buf
0125
0126     testl $2, %esi
0127     jnz 30f
0128 10:
0129     movl %ecx, %edx
0130     movl %ecx, %ebx
0131     andl $0x7c, %ebx
0132     shrl $7, %ecx
0133     addl %ebx,%esi
0134     shrl $2, %ebx
0135     negl %ebx
0136     lea 45f(%ebx,%ebx,2), %ebx
0137     testl %esi, %esi
0138     jmp *%ebx
0139
0140     # Handle 2-byte-aligned regions
0141 20: addw (%esi), %ax
0142     lea 2(%esi), %esi
0143     adcl $0, %eax
0144     jmp 10b
0145
0146 30: subl $2, %ecx
0147     ja 20b
0148     je 32f
0149     movzbl (%esi),%ebx  # csumming 1 byte, 2-aligned
0150     addl %ebx, %eax
0151     adcl $0, %eax
0152     jmp 80f
0153 32:
0154     addw (%esi), %ax    # csumming 2 bytes, 2-aligned
0155     adcl $0, %eax
0156     jmp 80f
0157
0158 40:
0159     addl -128(%esi), %eax
0160     adcl -124(%esi), %eax
0161     adcl -120(%esi), %eax
0162     adcl -116(%esi), %eax
0163     adcl -112(%esi), %eax
0164     adcl -108(%esi), %eax
0165     adcl -104(%esi), %eax
0166     adcl -100(%esi), %eax
0167     adcl -96(%esi), %eax
0168     adcl -92(%esi), %eax
0169     adcl -88(%esi), %eax
0170     adcl -84(%esi), %eax
0171     adcl -80(%esi), %eax
0172     adcl -76(%esi), %eax
0173     adcl -72(%esi), %eax
0174     adcl -68(%esi), %eax
0175     adcl -64(%esi), %eax
0176     adcl -60(%esi), %eax
0177     adcl -56(%esi), %eax
0178     adcl -52(%esi), %eax
0179     adcl -48(%esi), %eax
0180     adcl -44(%esi), %eax
0181     adcl -40(%esi), %eax
0182     adcl -36(%esi), %eax
0183     adcl -32(%esi), %eax
0184     adcl -28(%esi), %eax
0185     adcl -24(%esi), %eax
0186     adcl -20(%esi), %eax
0187     adcl -16(%esi), %eax
0188     adcl -12(%esi), %eax
0189     adcl -8(%esi), %eax
0190     adcl -4(%esi), %eax
0191 45:
0192     lea 128(%esi), %esi
0193     adcl $0, %eax
0194     dec %ecx
0195     jge 40b
0196     movl %edx, %ecx
0197 50: andl $3, %ecx
0198     jz 80f
0199
0200     # Handle the last 1-3 bytes without jumping
0201     notl %ecx       # 1->2, 2->1, 3->0, higher bits are masked
0202     movl $0xffffff,%ebx # by the shll and shrl instructions
0203     shll $3,%ecx
0204     shrl %cl,%ebx
0205     andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
0206     addl %ebx,%eax
0207     adcl $0,%eax
0208 80:
0209     popl %ebx
0210     popl %esi
0211     RET
0212
0213 #endif
0214     EXPORT_SYMBOL(csum_partial)