Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
0004  * Copyright 2002 Andi Kleen, SuSE Labs.
0005  *
0006  * Functions to copy from and to user space.
0007  */
0008 
0009 #include <linux/linkage.h>
0010 #include <asm/current.h>
0011 #include <asm/asm-offsets.h>
0012 #include <asm/thread_info.h>
0013 #include <asm/cpufeatures.h>
0014 #include <asm/alternative.h>
0015 #include <asm/asm.h>
0016 #include <asm/smap.h>
0017 #include <asm/export.h>
0018 #include <asm/trapnr.h>
0019 
0020 .macro ALIGN_DESTINATION
0021     /* check for bad alignment of destination */
0022     movl %edi,%ecx
0023     andl $7,%ecx
0024     jz 102f             /* already aligned */
0025     subl $8,%ecx
0026     negl %ecx
0027     subl %ecx,%edx
0028 100:    movb (%rsi),%al
0029 101:    movb %al,(%rdi)
0030     incq %rsi
0031     incq %rdi
0032     decl %ecx
0033     jnz 100b
0034 102:
0035 
0036     _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align)
0037     _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align)
0038 .endm
0039 
0040 /*
0041  * copy_user_generic_unrolled - memory copy with exception handling.
0042  * This version is for CPUs like P4 that don't have efficient micro
0043  * code for rep movsq
0044  *
0045  * Input:
0046  * rdi destination
0047  * rsi source
0048  * rdx count
0049  *
0050  * Output:
0051  * eax uncopied bytes or 0 if successful.
0052  */
0053 SYM_FUNC_START(copy_user_generic_unrolled)
0054     ASM_STAC
0055     cmpl $8,%edx
0056     jb .Lcopy_user_short_string_bytes
0057     ALIGN_DESTINATION
0058     movl %edx,%ecx
0059     andl $63,%edx
0060     shrl $6,%ecx
0061     jz copy_user_short_string
0062 1:  movq (%rsi),%r8
0063 2:  movq 1*8(%rsi),%r9
0064 3:  movq 2*8(%rsi),%r10
0065 4:  movq 3*8(%rsi),%r11
0066 5:  movq %r8,(%rdi)
0067 6:  movq %r9,1*8(%rdi)
0068 7:  movq %r10,2*8(%rdi)
0069 8:  movq %r11,3*8(%rdi)
0070 9:  movq 4*8(%rsi),%r8
0071 10: movq 5*8(%rsi),%r9
0072 11: movq 6*8(%rsi),%r10
0073 12: movq 7*8(%rsi),%r11
0074 13: movq %r8,4*8(%rdi)
0075 14: movq %r9,5*8(%rdi)
0076 15: movq %r10,6*8(%rdi)
0077 16: movq %r11,7*8(%rdi)
0078     leaq 64(%rsi),%rsi
0079     leaq 64(%rdi),%rdi
0080     decl %ecx
0081     jnz 1b
0082     jmp copy_user_short_string
0083 
0084 30: shll $6,%ecx
0085     addl %ecx,%edx
0086     jmp .Lcopy_user_handle_tail
0087 
0088     _ASM_EXTABLE_CPY(1b, 30b)
0089     _ASM_EXTABLE_CPY(2b, 30b)
0090     _ASM_EXTABLE_CPY(3b, 30b)
0091     _ASM_EXTABLE_CPY(4b, 30b)
0092     _ASM_EXTABLE_CPY(5b, 30b)
0093     _ASM_EXTABLE_CPY(6b, 30b)
0094     _ASM_EXTABLE_CPY(7b, 30b)
0095     _ASM_EXTABLE_CPY(8b, 30b)
0096     _ASM_EXTABLE_CPY(9b, 30b)
0097     _ASM_EXTABLE_CPY(10b, 30b)
0098     _ASM_EXTABLE_CPY(11b, 30b)
0099     _ASM_EXTABLE_CPY(12b, 30b)
0100     _ASM_EXTABLE_CPY(13b, 30b)
0101     _ASM_EXTABLE_CPY(14b, 30b)
0102     _ASM_EXTABLE_CPY(15b, 30b)
0103     _ASM_EXTABLE_CPY(16b, 30b)
0104 SYM_FUNC_END(copy_user_generic_unrolled)
0105 EXPORT_SYMBOL(copy_user_generic_unrolled)
0106 
0107 /* Some CPUs run faster using the string copy instructions.
0108  * This is also a lot simpler. Use them when possible.
0109  *
0110  * Only 4GB of copy is supported. This shouldn't be a problem
0111  * because the kernel normally only writes from/to page sized chunks
0112  * even if user space passed a longer buffer.
0113  * And more would be dangerous because both Intel and AMD have
0114  * errata with rep movsq > 4GB. If someone feels the need to fix
0115  * this please consider this.
0116  *
0117  * Input:
0118  * rdi destination
0119  * rsi source
0120  * rdx count
0121  *
0122  * Output:
0123  * eax uncopied bytes or 0 if successful.
0124  */
0125 SYM_FUNC_START(copy_user_generic_string)
0126     ASM_STAC
0127     cmpl $8,%edx
0128     jb 2f       /* less than 8 bytes, go to byte copy loop */
0129     ALIGN_DESTINATION
0130     movl %edx,%ecx
0131     shrl $3,%ecx
0132     andl $7,%edx
0133 1:  rep movsq
0134 2:  movl %edx,%ecx
0135 3:  rep movsb
0136     xorl %eax,%eax
0137     ASM_CLAC
0138     RET
0139 
0140 11: leal (%rdx,%rcx,8),%ecx
0141 12: movl %ecx,%edx      /* ecx is zerorest also */
0142     jmp .Lcopy_user_handle_tail
0143 
0144     _ASM_EXTABLE_CPY(1b, 11b)
0145     _ASM_EXTABLE_CPY(3b, 12b)
0146 SYM_FUNC_END(copy_user_generic_string)
0147 EXPORT_SYMBOL(copy_user_generic_string)
0148 
0149 /*
0150  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
0151  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
0152  *
0153  * Input:
0154  * rdi destination
0155  * rsi source
0156  * rdx count
0157  *
0158  * Output:
0159  * eax uncopied bytes or 0 if successful.
0160  */
0161 SYM_FUNC_START(copy_user_enhanced_fast_string)
0162     ASM_STAC
0163     /* CPUs without FSRM should avoid rep movsb for short copies */
0164     ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
0165     movl %edx,%ecx
0166 1:  rep movsb
0167     xorl %eax,%eax
0168     ASM_CLAC
0169     RET
0170 
0171 12: movl %ecx,%edx      /* ecx is zerorest also */
0172     jmp .Lcopy_user_handle_tail
0173 
0174     _ASM_EXTABLE_CPY(1b, 12b)
0175 SYM_FUNC_END(copy_user_enhanced_fast_string)
0176 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
0177 
0178 /*
0179  * Try to copy last bytes and clear the rest if needed.
0180  * Since protection fault in copy_from/to_user is not a normal situation,
0181  * it is not necessary to optimize tail handling.
0182  * Don't try to copy the tail if machine check happened
0183  *
0184  * Input:
0185  * eax trap number written by ex_handler_copy()
0186  * rdi destination
0187  * rsi source
0188  * rdx count
0189  *
0190  * Output:
0191  * eax uncopied bytes or 0 if successful.
0192  */
0193 SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
0194     cmp $X86_TRAP_MC,%eax
0195     je 3f
0196 
0197     movl %edx,%ecx
0198 1:  rep movsb
0199 2:  mov %ecx,%eax
0200     ASM_CLAC
0201     RET
0202 
0203 3:
0204     movl %edx,%eax
0205     ASM_CLAC
0206     RET
0207 
0208     _ASM_EXTABLE_CPY(1b, 2b)
0209 
0210 .Lcopy_user_handle_align:
0211     addl %ecx,%edx          /* ecx is zerorest also */
0212     jmp .Lcopy_user_handle_tail
0213 
0214 SYM_CODE_END(.Lcopy_user_handle_tail)
0215 
0216 /*
0217  * Finish memcpy of less than 64 bytes.  #AC should already be set.
0218  *
0219  * Input:
0220  * rdi destination
0221  * rsi source
0222  * rdx count (< 64)
0223  *
0224  * Output:
0225  * eax uncopied bytes or 0 if successful.
0226  */
0227 SYM_CODE_START_LOCAL(copy_user_short_string)
0228     movl %edx,%ecx
0229     andl $7,%edx
0230     shrl $3,%ecx
0231     jz .Lcopy_user_short_string_bytes
0232 18: movq (%rsi),%r8
0233 19: movq %r8,(%rdi)
0234     leaq 8(%rsi),%rsi
0235     leaq 8(%rdi),%rdi
0236     decl %ecx
0237     jnz 18b
0238 .Lcopy_user_short_string_bytes:
0239     andl %edx,%edx
0240     jz 23f
0241     movl %edx,%ecx
0242 21: movb (%rsi),%al
0243 22: movb %al,(%rdi)
0244     incq %rsi
0245     incq %rdi
0246     decl %ecx
0247     jnz 21b
0248 23: xor %eax,%eax
0249     ASM_CLAC
0250     RET
0251 
0252 40: leal (%rdx,%rcx,8),%edx
0253     jmp 60f
0254 50: movl %ecx,%edx      /* ecx is zerorest also */
0255 60: jmp .Lcopy_user_handle_tail
0256 
0257     _ASM_EXTABLE_CPY(18b, 40b)
0258     _ASM_EXTABLE_CPY(19b, 40b)
0259     _ASM_EXTABLE_CPY(21b, 50b)
0260     _ASM_EXTABLE_CPY(22b, 50b)
0261 SYM_CODE_END(copy_user_short_string)
0262 
0263 /*
0264  * copy_user_nocache - Uncached memory copy with exception handling
0265  * This will force destination out of cache for more performance.
0266  *
0267  * Note: Cached memory copy is used when destination or size is not
0268  * naturally aligned. That is:
0269  *  - Require 8-byte alignment when size is 8 bytes or larger.
0270  *  - Require 4-byte alignment when size is 4 bytes.
0271  */
0272 SYM_FUNC_START(__copy_user_nocache)
0273     ASM_STAC
0274 
0275     /* If size is less than 8 bytes, go to 4-byte copy */
0276     cmpl $8,%edx
0277     jb .L_4b_nocache_copy_entry
0278 
0279     /* If destination is not 8-byte aligned, "cache" copy to align it */
0280     ALIGN_DESTINATION
0281 
0282     /* Set 4x8-byte copy count and remainder */
0283     movl %edx,%ecx
0284     andl $63,%edx
0285     shrl $6,%ecx
0286     jz .L_8b_nocache_copy_entry /* jump if count is 0 */
0287 
0288     /* Perform 4x8-byte nocache loop-copy */
0289 .L_4x8b_nocache_copy_loop:
0290 1:  movq (%rsi),%r8
0291 2:  movq 1*8(%rsi),%r9
0292 3:  movq 2*8(%rsi),%r10
0293 4:  movq 3*8(%rsi),%r11
0294 5:  movnti %r8,(%rdi)
0295 6:  movnti %r9,1*8(%rdi)
0296 7:  movnti %r10,2*8(%rdi)
0297 8:  movnti %r11,3*8(%rdi)
0298 9:  movq 4*8(%rsi),%r8
0299 10: movq 5*8(%rsi),%r9
0300 11: movq 6*8(%rsi),%r10
0301 12: movq 7*8(%rsi),%r11
0302 13: movnti %r8,4*8(%rdi)
0303 14: movnti %r9,5*8(%rdi)
0304 15: movnti %r10,6*8(%rdi)
0305 16: movnti %r11,7*8(%rdi)
0306     leaq 64(%rsi),%rsi
0307     leaq 64(%rdi),%rdi
0308     decl %ecx
0309     jnz .L_4x8b_nocache_copy_loop
0310 
0311     /* Set 8-byte copy count and remainder */
0312 .L_8b_nocache_copy_entry:
0313     movl %edx,%ecx
0314     andl $7,%edx
0315     shrl $3,%ecx
0316     jz .L_4b_nocache_copy_entry /* jump if count is 0 */
0317 
0318     /* Perform 8-byte nocache loop-copy */
0319 .L_8b_nocache_copy_loop:
0320 20: movq (%rsi),%r8
0321 21: movnti %r8,(%rdi)
0322     leaq 8(%rsi),%rsi
0323     leaq 8(%rdi),%rdi
0324     decl %ecx
0325     jnz .L_8b_nocache_copy_loop
0326 
0327     /* If no byte left, we're done */
0328 .L_4b_nocache_copy_entry:
0329     andl %edx,%edx
0330     jz .L_finish_copy
0331 
0332     /* If destination is not 4-byte aligned, go to byte copy: */
0333     movl %edi,%ecx
0334     andl $3,%ecx
0335     jnz .L_1b_cache_copy_entry
0336 
0337     /* Set 4-byte copy count (1 or 0) and remainder */
0338     movl %edx,%ecx
0339     andl $3,%edx
0340     shrl $2,%ecx
0341     jz .L_1b_cache_copy_entry   /* jump if count is 0 */
0342 
0343     /* Perform 4-byte nocache copy: */
0344 30: movl (%rsi),%r8d
0345 31: movnti %r8d,(%rdi)
0346     leaq 4(%rsi),%rsi
0347     leaq 4(%rdi),%rdi
0348 
0349     /* If no bytes left, we're done: */
0350     andl %edx,%edx
0351     jz .L_finish_copy
0352 
0353     /* Perform byte "cache" loop-copy for the remainder */
0354 .L_1b_cache_copy_entry:
0355     movl %edx,%ecx
0356 .L_1b_cache_copy_loop:
0357 40: movb (%rsi),%al
0358 41: movb %al,(%rdi)
0359     incq %rsi
0360     incq %rdi
0361     decl %ecx
0362     jnz .L_1b_cache_copy_loop
0363 
0364     /* Finished copying; fence the prior stores */
0365 .L_finish_copy:
0366     xorl %eax,%eax
0367     ASM_CLAC
0368     sfence
0369     RET
0370 
0371 .L_fixup_4x8b_copy:
0372     shll $6,%ecx
0373     addl %ecx,%edx
0374     jmp .L_fixup_handle_tail
0375 .L_fixup_8b_copy:
0376     lea (%rdx,%rcx,8),%rdx
0377     jmp .L_fixup_handle_tail
0378 .L_fixup_4b_copy:
0379     lea (%rdx,%rcx,4),%rdx
0380     jmp .L_fixup_handle_tail
0381 .L_fixup_1b_copy:
0382     movl %ecx,%edx
0383 .L_fixup_handle_tail:
0384     sfence
0385     jmp .Lcopy_user_handle_tail
0386 
0387     _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
0388     _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
0389     _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
0390     _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
0391     _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
0392     _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
0393     _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
0394     _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
0395     _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
0396     _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
0397     _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
0398     _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
0399     _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
0400     _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
0401     _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
0402     _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
0403     _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
0404     _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
0405     _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
0406     _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
0407     _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
0408     _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
0409 SYM_FUNC_END(__copy_user_nocache)
0410 EXPORT_SYMBOL(__copy_user_nocache)