Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * Memory copy functions for 32-bit PowerPC.
0004  *
0005  * Copyright (C) 1996-2005 Paul Mackerras.
0006  */
0007 #include <asm/processor.h>
0008 #include <asm/cache.h>
0009 #include <asm/errno.h>
0010 #include <asm/ppc_asm.h>
0011 #include <asm/export.h>
0012 #include <asm/code-patching-asm.h>
0013 #include <asm/kasan.h>
0014 
0015 #define COPY_16_BYTES       \
0016     lwz r7,4(r4);   \
0017     lwz r8,8(r4);   \
0018     lwz r9,12(r4);  \
0019     lwzu    r10,16(r4); \
0020     stw r7,4(r6);   \
0021     stw r8,8(r6);   \
0022     stw r9,12(r6);  \
0023     stwu    r10,16(r6)
0024 
0025 #define COPY_16_BYTES_WITHEX(n) \
0026 8 ## n ## 0:            \
0027     lwz r7,4(r4);   \
0028 8 ## n ## 1:            \
0029     lwz r8,8(r4);   \
0030 8 ## n ## 2:            \
0031     lwz r9,12(r4);  \
0032 8 ## n ## 3:            \
0033     lwzu    r10,16(r4); \
0034 8 ## n ## 4:            \
0035     stw r7,4(r6);   \
0036 8 ## n ## 5:            \
0037     stw r8,8(r6);   \
0038 8 ## n ## 6:            \
0039     stw r9,12(r6);  \
0040 8 ## n ## 7:            \
0041     stwu    r10,16(r6)
0042 
0043 #define COPY_16_BYTES_EXCODE(n)         \
0044 9 ## n ## 0:                    \
0045     addi    r5,r5,-(16 * n);        \
0046     b   104f;               \
0047 9 ## n ## 1:                    \
0048     addi    r5,r5,-(16 * n);        \
0049     b   105f;               \
0050     EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);    \
0051     EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);    \
0052     EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);    \
0053     EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);    \
0054     EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);    \
0055     EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);    \
0056     EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);    \
0057     EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
0058 
0059     .text
0060 
0061 CACHELINE_BYTES = L1_CACHE_BYTES
0062 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
0063 CACHELINE_MASK = (L1_CACHE_BYTES-1)
0064 
0065 #ifndef CONFIG_KASAN
0066 _GLOBAL(memset16)
0067     rlwinm. r0 ,r5, 31, 1, 31
0068     addi    r6, r3, -4
0069     beq-    2f
0070     rlwimi  r4 ,r4 ,16 ,0 ,15
0071     mtctr   r0
0072 1:  stwu    r4, 4(r6)
0073     bdnz    1b
0074 2:  andi.   r0, r5, 1
0075     beqlr
0076     sth r4, 4(r6)
0077     blr
0078 EXPORT_SYMBOL(memset16)
0079 #endif
0080 
0081 /*
0082  * Use dcbz on the complete cache lines in the destination
0083  * to set them to zero.  This requires that the destination
0084  * area is cacheable.  -- paulus
0085  *
0086  * During early init, cache might not be active yet, so dcbz cannot be used.
0087  * We therefore skip the optimised bloc that uses dcbz. This jump is
0088  * replaced by a nop once cache is active. This is done in machine_init()
0089  */
0090 _GLOBAL_KASAN(memset)
0091     cmplwi  0,r5,4
0092     blt 7f
0093 
0094     rlwimi  r4,r4,8,16,23
0095     rlwimi  r4,r4,16,0,15
0096 
0097     stw r4,0(r3)
0098     beqlr
0099     andi.   r0,r3,3
0100     add r5,r0,r5
0101     subf    r6,r0,r3
0102     cmplwi  0,r4,0
0103     /*
0104      * Skip optimised bloc until cache is enabled. Will be replaced
0105      * by 'bne' during boot to use normal procedure if r4 is not zero
0106      */
0107 5:  b   2f
0108     patch_site  5b, patch__memset_nocache
0109 
0110     clrlwi  r7,r6,32-LG_CACHELINE_BYTES
0111     add r8,r7,r5
0112     srwi    r9,r8,LG_CACHELINE_BYTES
0113     addic.  r9,r9,-1    /* total number of complete cachelines */
0114     ble 2f
0115     xori    r0,r7,CACHELINE_MASK & ~3
0116     srwi.   r0,r0,2
0117     beq 3f
0118     mtctr   r0
0119 4:  stwu    r4,4(r6)
0120     bdnz    4b
0121 3:  mtctr   r9
0122     li  r7,4
0123 10: dcbz    r7,r6
0124     addi    r6,r6,CACHELINE_BYTES
0125     bdnz    10b
0126     clrlwi  r5,r8,32-LG_CACHELINE_BYTES
0127     addi    r5,r5,4
0128 
0129 2:  srwi    r0,r5,2
0130     mtctr   r0
0131     bdz 6f
0132 1:  stwu    r4,4(r6)
0133     bdnz    1b
0134 6:  andi.   r5,r5,3
0135     beqlr
0136     mtctr   r5
0137     addi    r6,r6,3
0138 8:  stbu    r4,1(r6)
0139     bdnz    8b
0140     blr
0141 
0142 7:  cmpwi   0,r5,0
0143     beqlr
0144     mtctr   r5
0145     addi    r6,r3,-1
0146 9:  stbu    r4,1(r6)
0147     bdnz    9b
0148     blr
0149 EXPORT_SYMBOL(memset)
0150 EXPORT_SYMBOL_KASAN(memset)
0151 
0152 /*
0153  * This version uses dcbz on the complete cache lines in the
0154  * destination area to reduce memory traffic.  This requires that
0155  * the destination area is cacheable.
0156  * We only use this version if the source and dest don't overlap.
0157  * -- paulus.
0158  *
0159  * During early init, cache might not be active yet, so dcbz cannot be used.
0160  * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
0161  * replaced by a nop once cache is active. This is done in machine_init()
0162  */
0163 _GLOBAL_KASAN(memmove)
0164     cmplw   0,r3,r4
0165     bgt backwards_memcpy
0166     /* fall through */
0167 
0168 _GLOBAL_KASAN(memcpy)
0169 1:  b   generic_memcpy
0170     patch_site  1b, patch__memcpy_nocache
0171 
0172     add r7,r3,r5        /* test if the src & dst overlap */
0173     add r8,r4,r5
0174     cmplw   0,r4,r7
0175     cmplw   1,r3,r8
0176     crand   0,0,4           /* cr0.lt &= cr1.lt */
0177     blt generic_memcpy      /* if regions overlap */
0178 
0179     addi    r4,r4,-4
0180     addi    r6,r3,-4
0181     neg r0,r3
0182     andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
0183     beq 58f
0184 
0185     cmplw   0,r5,r0         /* is this more than total to do? */
0186     blt 63f         /* if not much to do */
0187     andi.   r8,r0,3         /* get it word-aligned first */
0188     subf    r5,r0,r5
0189     mtctr   r8
0190     beq+    61f
0191 70: lbz r9,4(r4)        /* do some bytes */
0192     addi    r4,r4,1
0193     addi    r6,r6,1
0194     stb r9,3(r6)
0195     bdnz    70b
0196 61: srwi.   r0,r0,2
0197     mtctr   r0
0198     beq 58f
0199 72: lwzu    r9,4(r4)        /* do some words */
0200     stwu    r9,4(r6)
0201     bdnz    72b
0202 
0203 58: srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
0204     clrlwi  r5,r5,32-LG_CACHELINE_BYTES
0205     li  r11,4
0206     mtctr   r0
0207     beq 63f
0208 53:
0209     dcbz    r11,r6
0210     COPY_16_BYTES
0211 #if L1_CACHE_BYTES >= 32
0212     COPY_16_BYTES
0213 #if L1_CACHE_BYTES >= 64
0214     COPY_16_BYTES
0215     COPY_16_BYTES
0216 #if L1_CACHE_BYTES >= 128
0217     COPY_16_BYTES
0218     COPY_16_BYTES
0219     COPY_16_BYTES
0220     COPY_16_BYTES
0221 #endif
0222 #endif
0223 #endif
0224     bdnz    53b
0225 
0226 63: srwi.   r0,r5,2
0227     mtctr   r0
0228     beq 64f
0229 30: lwzu    r0,4(r4)
0230     stwu    r0,4(r6)
0231     bdnz    30b
0232 
0233 64: andi.   r0,r5,3
0234     mtctr   r0
0235     beq+    65f
0236     addi    r4,r4,3
0237     addi    r6,r6,3
0238 40: lbzu    r0,1(r4)
0239     stbu    r0,1(r6)
0240     bdnz    40b
0241 65: blr
0242 EXPORT_SYMBOL(memcpy)
0243 EXPORT_SYMBOL(memmove)
0244 EXPORT_SYMBOL_KASAN(memcpy)
0245 EXPORT_SYMBOL_KASAN(memmove)
0246 
0247 generic_memcpy:
0248     srwi.   r7,r5,3
0249     addi    r6,r3,-4
0250     addi    r4,r4,-4
0251     beq 2f          /* if less than 8 bytes to do */
0252     andi.   r0,r6,3         /* get dest word aligned */
0253     mtctr   r7
0254     bne 5f
0255 1:  lwz r7,4(r4)
0256     lwzu    r8,8(r4)
0257     stw r7,4(r6)
0258     stwu    r8,8(r6)
0259     bdnz    1b
0260     andi.   r5,r5,7
0261 2:  cmplwi  0,r5,4
0262     blt 3f
0263     lwzu    r0,4(r4)
0264     addi    r5,r5,-4
0265     stwu    r0,4(r6)
0266 3:  cmpwi   0,r5,0
0267     beqlr
0268     mtctr   r5
0269     addi    r4,r4,3
0270     addi    r6,r6,3
0271 4:  lbzu    r0,1(r4)
0272     stbu    r0,1(r6)
0273     bdnz    4b
0274     blr
0275 5:  subfic  r0,r0,4
0276     mtctr   r0
0277 6:  lbz r7,4(r4)
0278     addi    r4,r4,1
0279     stb r7,4(r6)
0280     addi    r6,r6,1
0281     bdnz    6b
0282     subf    r5,r0,r5
0283     rlwinm. r7,r5,32-3,3,31
0284     beq 2b
0285     mtctr   r7
0286     b   1b
0287 
0288 _GLOBAL(backwards_memcpy)
0289     rlwinm. r7,r5,32-3,3,31     /* r0 = r5 >> 3 */
0290     add r6,r3,r5
0291     add r4,r4,r5
0292     beq 2f
0293     andi.   r0,r6,3
0294     mtctr   r7
0295     bne 5f
0296 1:  lwz r7,-4(r4)
0297     lwzu    r8,-8(r4)
0298     stw r7,-4(r6)
0299     stwu    r8,-8(r6)
0300     bdnz    1b
0301     andi.   r5,r5,7
0302 2:  cmplwi  0,r5,4
0303     blt 3f
0304     lwzu    r0,-4(r4)
0305     subi    r5,r5,4
0306     stwu    r0,-4(r6)
0307 3:  cmpwi   0,r5,0
0308     beqlr
0309     mtctr   r5
0310 4:  lbzu    r0,-1(r4)
0311     stbu    r0,-1(r6)
0312     bdnz    4b
0313     blr
0314 5:  mtctr   r0
0315 6:  lbzu    r7,-1(r4)
0316     stbu    r7,-1(r6)
0317     bdnz    6b
0318     subf    r5,r0,r5
0319     rlwinm. r7,r5,32-3,3,31
0320     beq 2b
0321     mtctr   r7
0322     b   1b
0323 
0324 _GLOBAL(__copy_tofrom_user)
0325     addi    r4,r4,-4
0326     addi    r6,r3,-4
0327     neg r0,r3
0328     andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
0329     beq 58f
0330 
0331     cmplw   0,r5,r0         /* is this more than total to do? */
0332     blt 63f         /* if not much to do */
0333     andi.   r8,r0,3         /* get it word-aligned first */
0334     mtctr   r8
0335     beq+    61f
0336 70: lbz r9,4(r4)        /* do some bytes */
0337 71: stb r9,4(r6)
0338     addi    r4,r4,1
0339     addi    r6,r6,1
0340     bdnz    70b
0341 61: subf    r5,r0,r5
0342     srwi.   r0,r0,2
0343     mtctr   r0
0344     beq 58f
0345 72: lwzu    r9,4(r4)        /* do some words */
0346 73: stwu    r9,4(r6)
0347     bdnz    72b
0348 
0349     EX_TABLE(70b,100f)
0350     EX_TABLE(71b,101f)
0351     EX_TABLE(72b,102f)
0352     EX_TABLE(73b,103f)
0353 
0354 58: srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
0355     clrlwi  r5,r5,32-LG_CACHELINE_BYTES
0356     li  r11,4
0357     beq 63f
0358 
0359     /* Here we decide how far ahead to prefetch the source */
0360     li  r3,4
0361     cmpwi   r0,1
0362     li  r7,0
0363     ble 114f
0364     li  r7,1
0365 #if MAX_COPY_PREFETCH > 1
0366     /* Heuristically, for large transfers we prefetch
0367        MAX_COPY_PREFETCH cachelines ahead.  For small transfers
0368        we prefetch 1 cacheline ahead. */
0369     cmpwi   r0,MAX_COPY_PREFETCH
0370     ble 112f
0371     li  r7,MAX_COPY_PREFETCH
0372 112:    mtctr   r7
0373 111:    dcbt    r3,r4
0374     addi    r3,r3,CACHELINE_BYTES
0375     bdnz    111b
0376 #else
0377     dcbt    r3,r4
0378     addi    r3,r3,CACHELINE_BYTES
0379 #endif /* MAX_COPY_PREFETCH > 1 */
0380 
0381 114:    subf    r8,r7,r0
0382     mr  r0,r7
0383     mtctr   r8
0384 
0385 53: dcbt    r3,r4
0386 54: dcbz    r11,r6
0387     EX_TABLE(54b,105f)
0388 /* the main body of the cacheline loop */
0389     COPY_16_BYTES_WITHEX(0)
0390 #if L1_CACHE_BYTES >= 32
0391     COPY_16_BYTES_WITHEX(1)
0392 #if L1_CACHE_BYTES >= 64
0393     COPY_16_BYTES_WITHEX(2)
0394     COPY_16_BYTES_WITHEX(3)
0395 #if L1_CACHE_BYTES >= 128
0396     COPY_16_BYTES_WITHEX(4)
0397     COPY_16_BYTES_WITHEX(5)
0398     COPY_16_BYTES_WITHEX(6)
0399     COPY_16_BYTES_WITHEX(7)
0400 #endif
0401 #endif
0402 #endif
0403     bdnz    53b
0404     cmpwi   r0,0
0405     li  r3,4
0406     li  r7,0
0407     bne 114b
0408 
0409 63: srwi.   r0,r5,2
0410     mtctr   r0
0411     beq 64f
0412 30: lwzu    r0,4(r4)
0413 31: stwu    r0,4(r6)
0414     bdnz    30b
0415 
0416 64: andi.   r0,r5,3
0417     mtctr   r0
0418     beq+    65f
0419 40: lbz r0,4(r4)
0420 41: stb r0,4(r6)
0421     addi    r4,r4,1
0422     addi    r6,r6,1
0423     bdnz    40b
0424 65: li  r3,0
0425     blr
0426 
0427 /* read fault, initial single-byte copy */
0428 100:    li  r9,0
0429     b   90f
0430 /* write fault, initial single-byte copy */
0431 101:    li  r9,1
0432 90: subf    r5,r8,r5
0433     li  r3,0
0434     b   99f
0435 /* read fault, initial word copy */
0436 102:    li  r9,0
0437     b   91f
0438 /* write fault, initial word copy */
0439 103:    li  r9,1
0440 91: li  r3,2
0441     b   99f
0442 
0443 /*
0444  * this stuff handles faults in the cacheline loop and branches to either
0445  * 104f (if in read part) or 105f (if in write part), after updating r5
0446  */
0447     COPY_16_BYTES_EXCODE(0)
0448 #if L1_CACHE_BYTES >= 32
0449     COPY_16_BYTES_EXCODE(1)
0450 #if L1_CACHE_BYTES >= 64
0451     COPY_16_BYTES_EXCODE(2)
0452     COPY_16_BYTES_EXCODE(3)
0453 #if L1_CACHE_BYTES >= 128
0454     COPY_16_BYTES_EXCODE(4)
0455     COPY_16_BYTES_EXCODE(5)
0456     COPY_16_BYTES_EXCODE(6)
0457     COPY_16_BYTES_EXCODE(7)
0458 #endif
0459 #endif
0460 #endif
0461 
0462 /* read fault in cacheline loop */
0463 104:    li  r9,0
0464     b   92f
0465 /* fault on dcbz (effectively a write fault) */
0466 /* or write fault in cacheline loop */
0467 105:    li  r9,1
0468 92: li  r3,LG_CACHELINE_BYTES
0469     mfctr   r8
0470     add r0,r0,r8
0471     b   106f
0472 /* read fault in final word loop */
0473 108:    li  r9,0
0474     b   93f
0475 /* write fault in final word loop */
0476 109:    li  r9,1
0477 93: andi.   r5,r5,3
0478     li  r3,2
0479     b   99f
0480 /* read fault in final byte loop */
0481 110:    li  r9,0
0482     b   94f
0483 /* write fault in final byte loop */
0484 111:    li  r9,1
0485 94: li  r5,0
0486     li  r3,0
0487 /*
0488  * At this stage the number of bytes not copied is
0489  * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
0490  */
0491 99: mfctr   r0
0492 106:    slw r3,r0,r3
0493     add.    r3,r3,r5
0494     beq 120f            /* shouldn't happen */
0495     cmpwi   0,r9,0
0496     bne 120f
0497 /* for a read fault, first try to continue the copy one byte at a time */
0498     mtctr   r3
0499 130:    lbz r0,4(r4)
0500 131:    stb r0,4(r6)
0501     addi    r4,r4,1
0502     addi    r6,r6,1
0503     bdnz    130b
0504 /* then clear out the destination: r3 bytes starting at 4(r6) */
0505 132:    mfctr   r3
0506 120:    blr
0507 
0508     EX_TABLE(30b,108b)
0509     EX_TABLE(31b,109b)
0510     EX_TABLE(40b,110b)
0511     EX_TABLE(41b,111b)
0512     EX_TABLE(130b,132b)
0513     EX_TABLE(131b,120b)
0514 
0515 EXPORT_SYMBOL(__copy_tofrom_user)