Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * Copyright (C) IBM Corporation, 2011
0004  * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
0005  * Author - Balbir Singh <bsingharora@gmail.com>
0006  */
0007 #include <asm/ppc_asm.h>
0008 #include <asm/errno.h>
0009 #include <asm/export.h>
0010 
0011     .macro err1
0012 100:
0013     EX_TABLE(100b,.Ldo_err1)
0014     .endm
0015 
0016     .macro err2
0017 200:
0018     EX_TABLE(200b,.Ldo_err2)
0019     .endm
0020 
0021     .macro err3
0022 300:    EX_TABLE(300b,.Ldone)
0023     .endm
0024 
0025 .Ldo_err2:
0026     ld  r22,STK_REG(R22)(r1)
0027     ld  r21,STK_REG(R21)(r1)
0028     ld  r20,STK_REG(R20)(r1)
0029     ld  r19,STK_REG(R19)(r1)
0030     ld  r18,STK_REG(R18)(r1)
0031     ld  r17,STK_REG(R17)(r1)
0032     ld  r16,STK_REG(R16)(r1)
0033     ld  r15,STK_REG(R15)(r1)
0034     ld  r14,STK_REG(R14)(r1)
0035     addi    r1,r1,STACKFRAMESIZE
0036 .Ldo_err1:
0037     /* Do a byte by byte copy to get the exact remaining size */
0038     mtctr   r7
0039 46:
0040 err3;   lbz r0,0(r4)
0041     addi    r4,r4,1
0042 err3;   stb r0,0(r3)
0043     addi    r3,r3,1
0044     bdnz    46b
0045     li  r3,0
0046     blr
0047 
0048 .Ldone:
0049     mfctr   r3
0050     blr
0051 
0052 
0053 _GLOBAL(copy_mc_generic)
0054     mr  r7,r5
0055     cmpldi  r5,16
0056     blt .Lshort_copy
0057 
0058 .Lcopy:
0059     /* Get the source 8B aligned */
0060     neg r6,r4
0061     mtocrf  0x01,r6
0062     clrldi  r6,r6,(64-3)
0063 
0064     bf  cr7*4+3,1f
0065 err1;   lbz r0,0(r4)
0066     addi    r4,r4,1
0067 err1;   stb r0,0(r3)
0068     addi    r3,r3,1
0069     subi    r7,r7,1
0070 
0071 1:  bf  cr7*4+2,2f
0072 err1;   lhz r0,0(r4)
0073     addi    r4,r4,2
0074 err1;   sth r0,0(r3)
0075     addi    r3,r3,2
0076     subi    r7,r7,2
0077 
0078 2:  bf  cr7*4+1,3f
0079 err1;   lwz r0,0(r4)
0080     addi    r4,r4,4
0081 err1;   stw r0,0(r3)
0082     addi    r3,r3,4
0083     subi    r7,r7,4
0084 
0085 3:  sub r5,r5,r6
0086     cmpldi  r5,128
0087 
0088     mflr    r0
0089     stdu    r1,-STACKFRAMESIZE(r1)
0090     std r14,STK_REG(R14)(r1)
0091     std r15,STK_REG(R15)(r1)
0092     std r16,STK_REG(R16)(r1)
0093     std r17,STK_REG(R17)(r1)
0094     std r18,STK_REG(R18)(r1)
0095     std r19,STK_REG(R19)(r1)
0096     std r20,STK_REG(R20)(r1)
0097     std r21,STK_REG(R21)(r1)
0098     std r22,STK_REG(R22)(r1)
0099     std r0,STACKFRAMESIZE+16(r1)
0100 
0101     blt 5f
0102     srdi    r6,r5,7
0103     mtctr   r6
0104 
0105     /* Now do cacheline (128B) sized loads and stores. */
0106     .align  5
0107 4:
0108 err2;   ld  r0,0(r4)
0109 err2;   ld  r6,8(r4)
0110 err2;   ld  r8,16(r4)
0111 err2;   ld  r9,24(r4)
0112 err2;   ld  r10,32(r4)
0113 err2;   ld  r11,40(r4)
0114 err2;   ld  r12,48(r4)
0115 err2;   ld  r14,56(r4)
0116 err2;   ld  r15,64(r4)
0117 err2;   ld  r16,72(r4)
0118 err2;   ld  r17,80(r4)
0119 err2;   ld  r18,88(r4)
0120 err2;   ld  r19,96(r4)
0121 err2;   ld  r20,104(r4)
0122 err2;   ld  r21,112(r4)
0123 err2;   ld  r22,120(r4)
0124     addi    r4,r4,128
0125 err2;   std r0,0(r3)
0126 err2;   std r6,8(r3)
0127 err2;   std r8,16(r3)
0128 err2;   std r9,24(r3)
0129 err2;   std r10,32(r3)
0130 err2;   std r11,40(r3)
0131 err2;   std r12,48(r3)
0132 err2;   std r14,56(r3)
0133 err2;   std r15,64(r3)
0134 err2;   std r16,72(r3)
0135 err2;   std r17,80(r3)
0136 err2;   std r18,88(r3)
0137 err2;   std r19,96(r3)
0138 err2;   std r20,104(r3)
0139 err2;   std r21,112(r3)
0140 err2;   std r22,120(r3)
0141     addi    r3,r3,128
0142     subi    r7,r7,128
0143     bdnz    4b
0144 
0145     clrldi  r5,r5,(64-7)
0146 
0147     /* Up to 127B to go */
0148 5:  srdi    r6,r5,4
0149     mtocrf  0x01,r6
0150 
0151 6:  bf  cr7*4+1,7f
0152 err2;   ld  r0,0(r4)
0153 err2;   ld  r6,8(r4)
0154 err2;   ld  r8,16(r4)
0155 err2;   ld  r9,24(r4)
0156 err2;   ld  r10,32(r4)
0157 err2;   ld  r11,40(r4)
0158 err2;   ld  r12,48(r4)
0159 err2;   ld  r14,56(r4)
0160     addi    r4,r4,64
0161 err2;   std r0,0(r3)
0162 err2;   std r6,8(r3)
0163 err2;   std r8,16(r3)
0164 err2;   std r9,24(r3)
0165 err2;   std r10,32(r3)
0166 err2;   std r11,40(r3)
0167 err2;   std r12,48(r3)
0168 err2;   std r14,56(r3)
0169     addi    r3,r3,64
0170     subi    r7,r7,64
0171 
0172 7:  ld  r14,STK_REG(R14)(r1)
0173     ld  r15,STK_REG(R15)(r1)
0174     ld  r16,STK_REG(R16)(r1)
0175     ld  r17,STK_REG(R17)(r1)
0176     ld  r18,STK_REG(R18)(r1)
0177     ld  r19,STK_REG(R19)(r1)
0178     ld  r20,STK_REG(R20)(r1)
0179     ld  r21,STK_REG(R21)(r1)
0180     ld  r22,STK_REG(R22)(r1)
0181     addi    r1,r1,STACKFRAMESIZE
0182 
0183     /* Up to 63B to go */
0184     bf  cr7*4+2,8f
0185 err1;   ld  r0,0(r4)
0186 err1;   ld  r6,8(r4)
0187 err1;   ld  r8,16(r4)
0188 err1;   ld  r9,24(r4)
0189     addi    r4,r4,32
0190 err1;   std r0,0(r3)
0191 err1;   std r6,8(r3)
0192 err1;   std r8,16(r3)
0193 err1;   std r9,24(r3)
0194     addi    r3,r3,32
0195     subi    r7,r7,32
0196 
0197     /* Up to 31B to go */
0198 8:  bf  cr7*4+3,9f
0199 err1;   ld  r0,0(r4)
0200 err1;   ld  r6,8(r4)
0201     addi    r4,r4,16
0202 err1;   std r0,0(r3)
0203 err1;   std r6,8(r3)
0204     addi    r3,r3,16
0205     subi    r7,r7,16
0206 
0207 9:  clrldi  r5,r5,(64-4)
0208 
0209     /* Up to 15B to go */
0210 .Lshort_copy:
0211     mtocrf  0x01,r5
0212     bf  cr7*4+0,12f
0213 err1;   lwz r0,0(r4)    /* Less chance of a reject with word ops */
0214 err1;   lwz r6,4(r4)
0215     addi    r4,r4,8
0216 err1;   stw r0,0(r3)
0217 err1;   stw r6,4(r3)
0218     addi    r3,r3,8
0219     subi    r7,r7,8
0220 
0221 12: bf  cr7*4+1,13f
0222 err1;   lwz r0,0(r4)
0223     addi    r4,r4,4
0224 err1;   stw r0,0(r3)
0225     addi    r3,r3,4
0226     subi    r7,r7,4
0227 
0228 13: bf  cr7*4+2,14f
0229 err1;   lhz r0,0(r4)
0230     addi    r4,r4,2
0231 err1;   sth r0,0(r3)
0232     addi    r3,r3,2
0233     subi    r7,r7,2
0234 
0235 14: bf  cr7*4+3,15f
0236 err1;   lbz r0,0(r4)
0237 err1;   stb r0,0(r3)
0238 
0239 15: li  r3,0
0240     blr
0241 
0242 EXPORT_SYMBOL_GPL(copy_mc_generic);