Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
0004  */
0005 #include <asm/processor.h>
0006 #include <asm/ppc_asm.h>
0007 #include <asm/export.h>
0008 #include <asm/asm-compat.h>
0009 #include <asm/feature-fixups.h>
0010 
0011 #ifndef SELFTEST_CASE
0012 /* 0 == most CPUs, 1 == POWER6, 2 == Cell */
0013 #define SELFTEST_CASE   0
0014 #endif
0015 
0016 #ifdef __BIG_ENDIAN__
0017 #define sLd sld     /* Shift towards low-numbered address. */
0018 #define sHd srd     /* Shift towards high-numbered address. */
0019 #else
0020 #define sLd srd     /* Shift towards low-numbered address. */
0021 #define sHd sld     /* Shift towards high-numbered address. */
0022 #endif
0023 
0024 /*
0025  * These macros are used to generate exception table entries.
0026  * The exception handlers below use the original arguments
0027  * (stored on the stack) and the point where we're up to in
0028  * the destination buffer, i.e. the address of the first
0029  * unmodified byte.  Generally r3 points into the destination
0030  * buffer, but the first unmodified byte is at a variable
0031  * offset from r3.  In the code below, the symbol r3_offset
0032  * is set to indicate the current offset at each point in
0033  * the code.  This offset is then used as a negative offset
0034  * from the exception handler code, and those instructions
0035  * before the exception handlers are addi instructions that
0036  * adjust r3 to point to the correct place.
0037  */
0038     .macro  lex     /* exception handler for load */
0039 100:    EX_TABLE(100b, .Lld_exc - r3_offset)
0040     .endm
0041 
0042     .macro  stex        /* exception handler for store */
0043 100:    EX_TABLE(100b, .Lst_exc - r3_offset)
0044     .endm
0045 
0046     .align  7
0047 _GLOBAL_TOC(__copy_tofrom_user)
0048 #ifdef CONFIG_PPC_BOOK3S_64
0049 BEGIN_FTR_SECTION
0050     nop
0051 FTR_SECTION_ELSE
0052     b   __copy_tofrom_user_power7
0053 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
0054 #endif
0055 _GLOBAL(__copy_tofrom_user_base)
0056     /* first check for a 4kB copy on a 4kB boundary */
0057     cmpldi  cr1,r5,16
0058     cmpdi   cr6,r5,4096
0059     or  r0,r3,r4
0060     neg r6,r3       /* LS 3 bits = # bytes to 8-byte dest bdry */
0061     andi.   r0,r0,4095
0062     std r3,-24(r1)
0063     crand   cr0*4+2,cr0*4+2,cr6*4+2
0064     std r4,-16(r1)
0065     std r5,-8(r1)
0066     dcbt    0,r4
0067     beq .Lcopy_page_4K
0068     andi.   r6,r6,7
0069     PPC_MTOCRF(0x01,r5)
0070     blt cr1,.Lshort_copy
0071 /* Below we want to nop out the bne if we're on a CPU that has the
0072  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
0073  * cleared.
0074  * At the time of writing the only CPU that has this combination of bits
0075  * set is Power6.
0076  */
0077 test_feature = (SELFTEST_CASE == 1)
0078 BEGIN_FTR_SECTION
0079     nop
0080 FTR_SECTION_ELSE
0081     bne .Ldst_unaligned
0082 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
0083             CPU_FTR_UNALIGNED_LD_STD)
0084 .Ldst_aligned:
0085     addi    r3,r3,-16
0086 r3_offset = 16
0087 test_feature = (SELFTEST_CASE == 0)
0088 BEGIN_FTR_SECTION
0089     andi.   r0,r4,7
0090     bne .Lsrc_unaligned
0091 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
0092     blt cr1,.Ldo_tail       /* if < 16 bytes to copy */
0093     srdi    r0,r5,5
0094     cmpdi   cr1,r0,0
0095 lex;    ld  r7,0(r4)
0096 lex;    ld  r6,8(r4)
0097     addi    r4,r4,16
0098     mtctr   r0
0099     andi.   r0,r5,0x10
0100     beq 22f
0101     addi    r3,r3,16
0102 r3_offset = 0
0103     addi    r4,r4,-16
0104     mr  r9,r7
0105     mr  r8,r6
0106     beq cr1,72f
0107 21:
0108 lex;    ld  r7,16(r4)
0109 lex;    ld  r6,24(r4)
0110     addi    r4,r4,32
0111 stex;   std r9,0(r3)
0112 r3_offset = 8
0113 stex;   std r8,8(r3)
0114 r3_offset = 16
0115 22:
0116 lex;    ld  r9,0(r4)
0117 lex;    ld  r8,8(r4)
0118 stex;   std r7,16(r3)
0119 r3_offset = 24
0120 stex;   std r6,24(r3)
0121     addi    r3,r3,32
0122 r3_offset = 0
0123     bdnz    21b
0124 72:
0125 stex;   std r9,0(r3)
0126 r3_offset = 8
0127 stex;   std r8,8(r3)
0128 r3_offset = 16
0129     andi.   r5,r5,0xf
0130     beq+    3f
0131     addi    r4,r4,16
0132 .Ldo_tail:
0133     addi    r3,r3,16
0134 r3_offset = 0
0135     bf  cr7*4+0,246f
0136 lex;    ld  r9,0(r4)
0137     addi    r4,r4,8
0138 stex;   std r9,0(r3)
0139     addi    r3,r3,8
0140 246:    bf  cr7*4+1,1f
0141 lex;    lwz r9,0(r4)
0142     addi    r4,r4,4
0143 stex;   stw r9,0(r3)
0144     addi    r3,r3,4
0145 1:  bf  cr7*4+2,2f
0146 lex;    lhz r9,0(r4)
0147     addi    r4,r4,2
0148 stex;   sth r9,0(r3)
0149     addi    r3,r3,2
0150 2:  bf  cr7*4+3,3f
0151 lex;    lbz r9,0(r4)
0152 stex;   stb r9,0(r3)
0153 3:  li  r3,0
0154     blr
0155 
0156 .Lsrc_unaligned:
0157 r3_offset = 16
0158     srdi    r6,r5,3
0159     addi    r5,r5,-16
0160     subf    r4,r0,r4
0161     srdi    r7,r5,4
0162     sldi    r10,r0,3
0163     cmpldi  cr6,r6,3
0164     andi.   r5,r5,7
0165     mtctr   r7
0166     subfic  r11,r10,64
0167     add r5,r5,r0
0168     bt  cr7*4+0,28f
0169 
0170 lex;    ld  r9,0(r4)    /* 3+2n loads, 2+2n stores */
0171 lex;    ld  r0,8(r4)
0172     sLd r6,r9,r10
0173 lex;    ldu r9,16(r4)
0174     sHd r7,r0,r11
0175     sLd r8,r0,r10
0176     or  r7,r7,r6
0177     blt cr6,79f
0178 lex;    ld  r0,8(r4)
0179     b   2f
0180 
0181 28:
0182 lex;    ld  r0,0(r4)    /* 4+2n loads, 3+2n stores */
0183 lex;    ldu r9,8(r4)
0184     sLd r8,r0,r10
0185     addi    r3,r3,-8
0186 r3_offset = 24
0187     blt cr6,5f
0188 lex;    ld  r0,8(r4)
0189     sHd r12,r9,r11
0190     sLd r6,r9,r10
0191 lex;    ldu r9,16(r4)
0192     or  r12,r8,r12
0193     sHd r7,r0,r11
0194     sLd r8,r0,r10
0195     addi    r3,r3,16
0196 r3_offset = 8
0197     beq cr6,78f
0198 
0199 1:  or  r7,r7,r6
0200 lex;    ld  r0,8(r4)
0201 stex;   std r12,8(r3)
0202 r3_offset = 16
0203 2:  sHd r12,r9,r11
0204     sLd r6,r9,r10
0205 lex;    ldu r9,16(r4)
0206     or  r12,r8,r12
0207 stex;   stdu    r7,16(r3)
0208 r3_offset = 8
0209     sHd r7,r0,r11
0210     sLd r8,r0,r10
0211     bdnz    1b
0212 
0213 78:
0214 stex;   std r12,8(r3)
0215 r3_offset = 16
0216     or  r7,r7,r6
0217 79:
0218 stex;   std r7,16(r3)
0219 r3_offset = 24
0220 5:  sHd r12,r9,r11
0221     or  r12,r8,r12
0222 stex;   std r12,24(r3)
0223 r3_offset = 32
0224     bne 6f
0225     li  r3,0
0226     blr
0227 6:  cmpwi   cr1,r5,8
0228     addi    r3,r3,32
0229 r3_offset = 0
0230     sLd r9,r9,r10
0231     ble cr1,7f
0232 lex;    ld  r0,8(r4)
0233     sHd r7,r0,r11
0234     or  r9,r7,r9
0235 7:
0236     bf  cr7*4+1,1f
0237 #ifdef __BIG_ENDIAN__
0238     rotldi  r9,r9,32
0239 #endif
0240 stex;   stw r9,0(r3)
0241 #ifdef __LITTLE_ENDIAN__
0242     rotrdi  r9,r9,32
0243 #endif
0244     addi    r3,r3,4
0245 1:  bf  cr7*4+2,2f
0246 #ifdef __BIG_ENDIAN__
0247     rotldi  r9,r9,16
0248 #endif
0249 stex;   sth r9,0(r3)
0250 #ifdef __LITTLE_ENDIAN__
0251     rotrdi  r9,r9,16
0252 #endif
0253     addi    r3,r3,2
0254 2:  bf  cr7*4+3,3f
0255 #ifdef __BIG_ENDIAN__
0256     rotldi  r9,r9,8
0257 #endif
0258 stex;   stb r9,0(r3)
0259 #ifdef __LITTLE_ENDIAN__
0260     rotrdi  r9,r9,8
0261 #endif
0262 3:  li  r3,0
0263     blr
0264 
0265 .Ldst_unaligned:
0266 r3_offset = 0
0267     PPC_MTOCRF(0x01,r6)     /* put #bytes to 8B bdry into cr7 */
0268     subf    r5,r6,r5
0269     li  r7,0
0270     cmpldi  cr1,r5,16
0271     bf  cr7*4+3,1f
0272 100:    EX_TABLE(100b, .Lld_exc_r7)
0273     lbz r0,0(r4)
0274 100:    EX_TABLE(100b, .Lst_exc_r7)
0275     stb r0,0(r3)
0276     addi    r7,r7,1
0277 1:  bf  cr7*4+2,2f
0278 100:    EX_TABLE(100b, .Lld_exc_r7)
0279     lhzx    r0,r7,r4
0280 100:    EX_TABLE(100b, .Lst_exc_r7)
0281     sthx    r0,r7,r3
0282     addi    r7,r7,2
0283 2:  bf  cr7*4+1,3f
0284 100:    EX_TABLE(100b, .Lld_exc_r7)
0285     lwzx    r0,r7,r4
0286 100:    EX_TABLE(100b, .Lst_exc_r7)
0287     stwx    r0,r7,r3
0288 3:  PPC_MTOCRF(0x01,r5)
0289     add r4,r6,r4
0290     add r3,r6,r3
0291     b   .Ldst_aligned
0292 
0293 .Lshort_copy:
0294 r3_offset = 0
0295     bf  cr7*4+0,1f
0296 lex;    lwz r0,0(r4)
0297 lex;    lwz r9,4(r4)
0298     addi    r4,r4,8
0299 stex;   stw r0,0(r3)
0300 stex;   stw r9,4(r3)
0301     addi    r3,r3,8
0302 1:  bf  cr7*4+1,2f
0303 lex;    lwz r0,0(r4)
0304     addi    r4,r4,4
0305 stex;   stw r0,0(r3)
0306     addi    r3,r3,4
0307 2:  bf  cr7*4+2,3f
0308 lex;    lhz r0,0(r4)
0309     addi    r4,r4,2
0310 stex;   sth r0,0(r3)
0311     addi    r3,r3,2
0312 3:  bf  cr7*4+3,4f
0313 lex;    lbz r0,0(r4)
0314 stex;   stb r0,0(r3)
0315 4:  li  r3,0
0316     blr
0317 
0318 /*
0319  * exception handlers follow
0320  * we have to return the number of bytes not copied
0321  * for an exception on a load, we set the rest of the destination to 0
0322  * Note that the number of bytes of instructions for adjusting r3 needs
0323  * to equal the amount of the adjustment, due to the trick of using
0324  * .Lld_exc - r3_offset as the handler address.
0325  */
0326 
0327 .Lld_exc_r7:
0328     add r3,r3,r7
0329     b   .Lld_exc
0330 
0331     /* adjust by 24 */
0332     addi    r3,r3,8
0333     nop
0334     /* adjust by 16 */
0335     addi    r3,r3,8
0336     nop
0337     /* adjust by 8 */
0338     addi    r3,r3,8
0339     nop
0340 
0341 /*
0342  * Here we have had a fault on a load and r3 points to the first
0343  * unmodified byte of the destination.  We use the original arguments
0344  * and r3 to work out how much wasn't copied.  Since we load some
0345  * distance ahead of the stores, we continue copying byte-by-byte until
0346  * we hit the load fault again in order to copy as much as possible.
0347  */
0348 .Lld_exc:
0349     ld  r6,-24(r1)
0350     ld  r4,-16(r1)
0351     ld  r5,-8(r1)
0352     subf    r6,r6,r3
0353     add r4,r4,r6
0354     subf    r5,r6,r5    /* #bytes left to go */
0355 
0356 /*
0357  * first see if we can copy any more bytes before hitting another exception
0358  */
0359     mtctr   r5
0360 r3_offset = 0
0361 100:    EX_TABLE(100b, .Ldone)
0362 43: lbz r0,0(r4)
0363     addi    r4,r4,1
0364 stex;   stb r0,0(r3)
0365     addi    r3,r3,1
0366     bdnz    43b
0367     li  r3,0        /* huh? all copied successfully this time? */
0368     blr
0369 
0370 /*
0371  * here we have trapped again, amount remaining is in ctr.
0372  */
0373 .Ldone:
0374     mfctr   r3
0375     blr
0376 
0377 /*
0378  * exception handlers for stores: we need to work out how many bytes
0379  * weren't copied, and we may need to copy some more.
0380  * Note that the number of bytes of instructions for adjusting r3 needs
0381  * to equal the amount of the adjustment, due to the trick of using
0382  * .Lst_exc - r3_offset as the handler address.
0383  */
0384 .Lst_exc_r7:
0385     add r3,r3,r7
0386     b   .Lst_exc
0387 
0388     /* adjust by 24 */
0389     addi    r3,r3,8
0390     nop
0391     /* adjust by 16 */
0392     addi    r3,r3,8
0393     nop
0394     /* adjust by 8 */
0395     addi    r3,r3,4
0396     /* adjust by 4 */
0397     addi    r3,r3,4
0398 .Lst_exc:
0399     ld  r6,-24(r1)  /* original destination pointer */
0400     ld  r4,-16(r1)  /* original source pointer */
0401     ld  r5,-8(r1)   /* original number of bytes */
0402     add r7,r6,r5
0403     /*
0404      * If the destination pointer isn't 8-byte aligned,
0405      * we may have got the exception as a result of a
0406      * store that overlapped a page boundary, so we may be
0407      * able to copy a few more bytes.
0408      */
0409 17: andi.   r0,r3,7
0410     beq 19f
0411     subf    r8,r6,r3    /* #bytes copied */
0412 100:    EX_TABLE(100b,19f)
0413     lbzx    r0,r8,r4
0414 100:    EX_TABLE(100b,19f)
0415     stb r0,0(r3)
0416     addi    r3,r3,1
0417     cmpld   r3,r7
0418     blt 17b
0419 19: subf    r3,r3,r7    /* #bytes not copied in r3 */
0420     blr
0421 
0422 /*
0423  * Routine to copy a whole page of data, optimized for POWER4.
0424  * On POWER4 it is more than 50% faster than the simple loop
0425  * above (following the .Ldst_aligned label).
0426  */
0427     .macro  exc
0428 100:    EX_TABLE(100b, .Labort)
0429     .endm
0430 .Lcopy_page_4K:
0431     std r31,-32(1)
0432     std r30,-40(1)
0433     std r29,-48(1)
0434     std r28,-56(1)
0435     std r27,-64(1)
0436     std r26,-72(1)
0437     std r25,-80(1)
0438     std r24,-88(1)
0439     std r23,-96(1)
0440     std r22,-104(1)
0441     std r21,-112(1)
0442     std r20,-120(1)
0443     li  r5,4096/32 - 1
0444     addi    r3,r3,-8
0445     li  r0,5
0446 0:  addi    r5,r5,-24
0447     mtctr   r0
0448 exc;    ld  r22,640(4)
0449 exc;    ld  r21,512(4)
0450 exc;    ld  r20,384(4)
0451 exc;    ld  r11,256(4)
0452 exc;    ld  r9,128(4)
0453 exc;    ld  r7,0(4)
0454 exc;    ld  r25,648(4)
0455 exc;    ld  r24,520(4)
0456 exc;    ld  r23,392(4)
0457 exc;    ld  r10,264(4)
0458 exc;    ld  r8,136(4)
0459 exc;    ldu r6,8(4)
0460     cmpwi   r5,24
0461 1:
0462 exc;    std r22,648(3)
0463 exc;    std r21,520(3)
0464 exc;    std r20,392(3)
0465 exc;    std r11,264(3)
0466 exc;    std r9,136(3)
0467 exc;    std r7,8(3)
0468 exc;    ld  r28,648(4)
0469 exc;    ld  r27,520(4)
0470 exc;    ld  r26,392(4)
0471 exc;    ld  r31,264(4)
0472 exc;    ld  r30,136(4)
0473 exc;    ld  r29,8(4)
0474 exc;    std r25,656(3)
0475 exc;    std r24,528(3)
0476 exc;    std r23,400(3)
0477 exc;    std r10,272(3)
0478 exc;    std r8,144(3)
0479 exc;    std r6,16(3)
0480 exc;    ld  r22,656(4)
0481 exc;    ld  r21,528(4)
0482 exc;    ld  r20,400(4)
0483 exc;    ld  r11,272(4)
0484 exc;    ld  r9,144(4)
0485 exc;    ld  r7,16(4)
0486 exc;    std r28,664(3)
0487 exc;    std r27,536(3)
0488 exc;    std r26,408(3)
0489 exc;    std r31,280(3)
0490 exc;    std r30,152(3)
0491 exc;    stdu    r29,24(3)
0492 exc;    ld  r25,664(4)
0493 exc;    ld  r24,536(4)
0494 exc;    ld  r23,408(4)
0495 exc;    ld  r10,280(4)
0496 exc;    ld  r8,152(4)
0497 exc;    ldu r6,24(4)
0498     bdnz    1b
0499 exc;    std r22,648(3)
0500 exc;    std r21,520(3)
0501 exc;    std r20,392(3)
0502 exc;    std r11,264(3)
0503 exc;    std r9,136(3)
0504 exc;    std r7,8(3)
0505     addi    r4,r4,640
0506     addi    r3,r3,648
0507     bge 0b
0508     mtctr   r5
0509 exc;    ld  r7,0(4)
0510 exc;    ld  r8,8(4)
0511 exc;    ldu r9,16(4)
0512 3:
0513 exc;    ld  r10,8(4)
0514 exc;    std r7,8(3)
0515 exc;    ld  r7,16(4)
0516 exc;    std r8,16(3)
0517 exc;    ld  r8,24(4)
0518 exc;    std r9,24(3)
0519 exc;    ldu r9,32(4)
0520 exc;    stdu    r10,32(3)
0521     bdnz    3b
0522 4:
0523 exc;    ld  r10,8(4)
0524 exc;    std r7,8(3)
0525 exc;    std r8,16(3)
0526 exc;    std r9,24(3)
0527 exc;    std r10,32(3)
0528 9:  ld  r20,-120(1)
0529     ld  r21,-112(1)
0530     ld  r22,-104(1)
0531     ld  r23,-96(1)
0532     ld  r24,-88(1)
0533     ld  r25,-80(1)
0534     ld  r26,-72(1)
0535     ld  r27,-64(1)
0536     ld  r28,-56(1)
0537     ld  r29,-48(1)
0538     ld  r30,-40(1)
0539     ld  r31,-32(1)
0540     li  r3,0
0541     blr
0542 
0543 /*
0544  * on an exception, reset to the beginning and jump back into the
0545  * standard __copy_tofrom_user
0546  */
0547 .Labort:
0548     ld  r20,-120(1)
0549     ld  r21,-112(1)
0550     ld  r22,-104(1)
0551     ld  r23,-96(1)
0552     ld  r24,-88(1)
0553     ld  r25,-80(1)
0554     ld  r26,-72(1)
0555     ld  r27,-64(1)
0556     ld  r28,-56(1)
0557     ld  r29,-48(1)
0558     ld  r30,-40(1)
0559     ld  r31,-32(1)
0560     ld  r3,-24(r1)
0561     ld  r4,-16(r1)
0562     li  r5,4096
0563     b   .Ldst_aligned
0564 EXPORT_SYMBOL(__copy_tofrom_user)