Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
0003 
0004 #include <linux/linkage.h>
0005 #include <asm/cpufeatures.h>
0006 #include <asm/alternative.h>
0007 #include <asm/export.h>
0008 
0009 /*
0010  * Some CPUs run faster using the string copy instructions (sane microcode).
0011  * It is also a lot simpler. Use this when possible. But, don't use streaming
0012  * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
0013  * prefetch distance based on SMP/UP.
0014  */
0015     ALIGN
0016 SYM_FUNC_START(copy_page)
0017     ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
0018     movl    $4096/8, %ecx
0019     rep movsq
0020     RET
0021 SYM_FUNC_END(copy_page)
0022 EXPORT_SYMBOL(copy_page)
0023 
0024 SYM_FUNC_START_LOCAL(copy_page_regs)
0025     subq    $2*8,   %rsp
0026     movq    %rbx,   (%rsp)
0027     movq    %r12,   1*8(%rsp)
0028 
0029     movl    $(4096/64)-5,   %ecx
0030     .p2align 4
0031 .Loop64:
0032     dec %rcx
0033     movq    0x8*0(%rsi), %rax
0034     movq    0x8*1(%rsi), %rbx
0035     movq    0x8*2(%rsi), %rdx
0036     movq    0x8*3(%rsi), %r8
0037     movq    0x8*4(%rsi), %r9
0038     movq    0x8*5(%rsi), %r10
0039     movq    0x8*6(%rsi), %r11
0040     movq    0x8*7(%rsi), %r12
0041 
0042     prefetcht0 5*64(%rsi)
0043 
0044     movq    %rax, 0x8*0(%rdi)
0045     movq    %rbx, 0x8*1(%rdi)
0046     movq    %rdx, 0x8*2(%rdi)
0047     movq    %r8,  0x8*3(%rdi)
0048     movq    %r9,  0x8*4(%rdi)
0049     movq    %r10, 0x8*5(%rdi)
0050     movq    %r11, 0x8*6(%rdi)
0051     movq    %r12, 0x8*7(%rdi)
0052 
0053     leaq    64 (%rsi), %rsi
0054     leaq    64 (%rdi), %rdi
0055 
0056     jnz .Loop64
0057 
0058     movl    $5, %ecx
0059     .p2align 4
0060 .Loop2:
0061     decl    %ecx
0062 
0063     movq    0x8*0(%rsi), %rax
0064     movq    0x8*1(%rsi), %rbx
0065     movq    0x8*2(%rsi), %rdx
0066     movq    0x8*3(%rsi), %r8
0067     movq    0x8*4(%rsi), %r9
0068     movq    0x8*5(%rsi), %r10
0069     movq    0x8*6(%rsi), %r11
0070     movq    0x8*7(%rsi), %r12
0071 
0072     movq    %rax, 0x8*0(%rdi)
0073     movq    %rbx, 0x8*1(%rdi)
0074     movq    %rdx, 0x8*2(%rdi)
0075     movq    %r8,  0x8*3(%rdi)
0076     movq    %r9,  0x8*4(%rdi)
0077     movq    %r10, 0x8*5(%rdi)
0078     movq    %r11, 0x8*6(%rdi)
0079     movq    %r12, 0x8*7(%rdi)
0080 
0081     leaq    64(%rdi), %rdi
0082     leaq    64(%rsi), %rsi
0083     jnz .Loop2
0084 
0085     movq    (%rsp), %rbx
0086     movq    1*8(%rsp), %r12
0087     addq    $2*8, %rsp
0088     RET
0089 SYM_FUNC_END(copy_page_regs)