Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
0003  *
0004  * This file is subject to the terms and conditions of the GNU General Public
0005  * License.  See the file COPYING in the main directory of this archive
0006  * for more details. No warranty for anything given at all.
0007  */
0008 #include <linux/linkage.h>
0009 #include <asm/errno.h>
0010 #include <asm/asm.h>
0011 
0012 /*
0013  * Checksum copy with exception handling.
0014  * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
0015  * destination is zeroed.
0016  *
0017  * Input
0018  * rdi  source
0019  * rsi  destination
0020  * edx  len (32bit)
0021  *
0022  * Output
0023  * eax  64bit sum. undefined in case of exception.
0024  *
0025  * Wrappers need to take care of valid exception sum and zeroing.
0026  * They also should align source or destination to 8 bytes.
0027  */
0028 
0029     .macro source
0030 10:
0031     _ASM_EXTABLE_UA(10b, .Lfault)
0032     .endm
0033 
0034     .macro dest
0035 20:
0036     _ASM_EXTABLE_UA(20b, .Lfault)
0037     .endm
0038 
0039 SYM_FUNC_START(csum_partial_copy_generic)
0040     subq  $5*8, %rsp
0041     movq  %rbx, 0*8(%rsp)
0042     movq  %r12, 1*8(%rsp)
0043     movq  %r14, 2*8(%rsp)
0044     movq  %r13, 3*8(%rsp)
0045     movq  %r15, 4*8(%rsp)
0046 
0047     movl  $-1, %eax
0048     xorl  %r9d, %r9d
0049     movl  %edx, %ecx
0050     cmpl  $8, %ecx
0051     jb    .Lshort
0052 
0053     testb  $7, %sil
0054     jne   .Lunaligned
0055 .Laligned:
0056     movl  %ecx, %r12d
0057 
0058     shrq  $6, %r12
0059     jz  .Lhandle_tail       /* < 64 */
0060 
0061     clc
0062 
0063     /* main loop. clear in 64 byte blocks */
0064     /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
0065     /* r11: temp3, rdx: temp4, r12 loopcnt */
0066     /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
0067     .p2align 4
0068 .Lloop:
0069     source
0070     movq  (%rdi), %rbx
0071     source
0072     movq  8(%rdi), %r8
0073     source
0074     movq  16(%rdi), %r11
0075     source
0076     movq  24(%rdi), %rdx
0077 
0078     source
0079     movq  32(%rdi), %r10
0080     source
0081     movq  40(%rdi), %r15
0082     source
0083     movq  48(%rdi), %r14
0084     source
0085     movq  56(%rdi), %r13
0086 
0087 30:
0088     /*
0089      * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
0090      * potentially unmapped kernel address.
0091      */
0092     _ASM_EXTABLE(30b, 2f)
0093     prefetcht0 5*64(%rdi)
0094 2:
0095     adcq  %rbx, %rax
0096     adcq  %r8, %rax
0097     adcq  %r11, %rax
0098     adcq  %rdx, %rax
0099     adcq  %r10, %rax
0100     adcq  %r15, %rax
0101     adcq  %r14, %rax
0102     adcq  %r13, %rax
0103 
0104     decl %r12d
0105 
0106     dest
0107     movq %rbx, (%rsi)
0108     dest
0109     movq %r8, 8(%rsi)
0110     dest
0111     movq %r11, 16(%rsi)
0112     dest
0113     movq %rdx, 24(%rsi)
0114 
0115     dest
0116     movq %r10, 32(%rsi)
0117     dest
0118     movq %r15, 40(%rsi)
0119     dest
0120     movq %r14, 48(%rsi)
0121     dest
0122     movq %r13, 56(%rsi)
0123 
0124     leaq 64(%rdi), %rdi
0125     leaq 64(%rsi), %rsi
0126 
0127     jnz .Lloop
0128 
0129     adcq  %r9, %rax
0130 
0131     /* do last up to 56 bytes */
0132 .Lhandle_tail:
0133     /* ecx: count, rcx.63: the end result needs to be rol8 */
0134     movq %rcx, %r10
0135     andl $63, %ecx
0136     shrl $3, %ecx
0137     jz  .Lfold
0138     clc
0139     .p2align 4
0140 .Lloop_8:
0141     source
0142     movq (%rdi), %rbx
0143     adcq %rbx, %rax
0144     decl %ecx
0145     dest
0146     movq %rbx, (%rsi)
0147     leaq 8(%rsi), %rsi /* preserve carry */
0148     leaq 8(%rdi), %rdi
0149     jnz .Lloop_8
0150     adcq %r9, %rax  /* add in carry */
0151 
0152 .Lfold:
0153     /* reduce checksum to 32bits */
0154     movl %eax, %ebx
0155     shrq $32, %rax
0156     addl %ebx, %eax
0157     adcl %r9d, %eax
0158 
0159     /* do last up to 6 bytes */
0160 .Lhandle_7:
0161     movl %r10d, %ecx
0162     andl $7, %ecx
0163 .L1:                /* .Lshort rejoins the common path here */
0164     shrl $1, %ecx
0165     jz   .Lhandle_1
0166     movl $2, %edx
0167     xorl %ebx, %ebx
0168     clc
0169     .p2align 4
0170 .Lloop_1:
0171     source
0172     movw (%rdi), %bx
0173     adcl %ebx, %eax
0174     decl %ecx
0175     dest
0176     movw %bx, (%rsi)
0177     leaq 2(%rdi), %rdi
0178     leaq 2(%rsi), %rsi
0179     jnz .Lloop_1
0180     adcl %r9d, %eax /* add in carry */
0181 
0182     /* handle last odd byte */
0183 .Lhandle_1:
0184     testb $1, %r10b
0185     jz    .Lende
0186     xorl  %ebx, %ebx
0187     source
0188     movb (%rdi), %bl
0189     dest
0190     movb %bl, (%rsi)
0191     addl %ebx, %eax
0192     adcl %r9d, %eax     /* carry */
0193 
0194 .Lende:
0195     testq %r10, %r10
0196     js  .Lwas_odd
0197 .Lout:
0198     movq 0*8(%rsp), %rbx
0199     movq 1*8(%rsp), %r12
0200     movq 2*8(%rsp), %r14
0201     movq 3*8(%rsp), %r13
0202     movq 4*8(%rsp), %r15
0203     addq $5*8, %rsp
0204     RET
0205 .Lshort:
0206     movl %ecx, %r10d
0207     jmp  .L1
0208 .Lunaligned:
0209     xorl %ebx, %ebx
0210     testb $1, %sil
0211     jne  .Lodd
0212 1:  testb $2, %sil
0213     je   2f
0214     source
0215     movw (%rdi), %bx
0216     dest
0217     movw %bx, (%rsi)
0218     leaq 2(%rdi), %rdi
0219     subq $2, %rcx
0220     leaq 2(%rsi), %rsi
0221     addq %rbx, %rax
0222 2:  testb $4, %sil
0223     je .Laligned
0224     source
0225     movl (%rdi), %ebx
0226     dest
0227     movl %ebx, (%rsi)
0228     leaq 4(%rdi), %rdi
0229     subq $4, %rcx
0230     leaq 4(%rsi), %rsi
0231     addq %rbx, %rax
0232     jmp .Laligned
0233 
0234 .Lodd:
0235     source
0236     movb (%rdi), %bl
0237     dest
0238     movb %bl, (%rsi)
0239     leaq 1(%rdi), %rdi
0240     leaq 1(%rsi), %rsi
0241     /* decrement, set MSB */
0242     leaq -1(%rcx, %rcx), %rcx
0243     rorq $1, %rcx
0244     shll $8, %ebx
0245     addq %rbx, %rax
0246     jmp 1b
0247 
0248 .Lwas_odd:
0249     roll $8, %eax
0250     jmp .Lout
0251 
0252     /* Exception: just return 0 */
0253 .Lfault:
0254     xorl %eax, %eax
0255     jmp  .Lout
0256 SYM_FUNC_END(csum_partial_copy_generic)