Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
0004  */
0005 #include <asm/processor.h>
0006 #include <asm/ppc_asm.h>
0007 #include <asm/export.h>
0008 #include <asm/asm-compat.h>
0009 #include <asm/feature-fixups.h>
0010 #include <asm/kasan.h>
0011 
0012 #ifndef SELFTEST_CASE
0013 /* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
0014 #define SELFTEST_CASE   0
0015 #endif
0016 
0017     .align  7
0018 _GLOBAL_TOC_KASAN(memcpy)
0019 BEGIN_FTR_SECTION
0020 #ifdef __LITTLE_ENDIAN__
0021     cmpdi   cr7,r5,0
0022 #else
0023     std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
0024 #endif
0025 FTR_SECTION_ELSE
0026 #ifdef CONFIG_PPC_BOOK3S_64
0027     b   memcpy_power7
0028 #endif
0029 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
0030 #ifdef __LITTLE_ENDIAN__
0031     /* dumb little-endian memcpy that will get replaced at runtime */
0032     addi r9,r3,-1
0033     addi r4,r4,-1
0034     beqlr cr7
0035     mtctr r5
0036 1:  lbzu r10,1(r4)
0037     stbu r10,1(r9)
0038     bdnz 1b
0039     blr
0040 #else
0041     PPC_MTOCRF(0x01,r5)
0042     cmpldi  cr1,r5,16
0043     neg r6,r3       # LS 3 bits = # bytes to 8-byte dest bdry
0044     andi.   r6,r6,7
0045     dcbt    0,r4
0046     blt cr1,.Lshort_copy
0047 /* Below we want to nop out the bne if we're on a CPU that has the
0048    CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
0049    cleared.
0050    At the time of writing the only CPU that has this combination of bits
0051    set is Power6. */
0052 test_feature = (SELFTEST_CASE == 1)
0053 BEGIN_FTR_SECTION
0054     nop
0055 FTR_SECTION_ELSE
0056     bne .Ldst_unaligned
0057 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
0058                     CPU_FTR_UNALIGNED_LD_STD)
0059 .Ldst_aligned:
0060     addi    r3,r3,-16
0061 test_feature = (SELFTEST_CASE == 0)
0062 BEGIN_FTR_SECTION
0063     andi.   r0,r4,7
0064     bne .Lsrc_unaligned
0065 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
0066     srdi    r7,r5,4
0067     ld  r9,0(r4)
0068     addi    r4,r4,-8
0069     mtctr   r7
0070     andi.   r5,r5,7
0071     bf  cr7*4+0,2f
0072     addi    r3,r3,8
0073     addi    r4,r4,8
0074     mr  r8,r9
0075     blt cr1,3f
0076 1:  ld  r9,8(r4)
0077     std r8,8(r3)
0078 2:  ldu r8,16(r4)
0079     stdu    r9,16(r3)
0080     bdnz    1b
0081 3:  std r8,8(r3)
0082     beq 3f
0083     addi    r3,r3,16
0084 .Ldo_tail:
0085     bf  cr7*4+1,1f
0086     lwz r9,8(r4)
0087     addi    r4,r4,4
0088     stw r9,0(r3)
0089     addi    r3,r3,4
0090 1:  bf  cr7*4+2,2f
0091     lhz r9,8(r4)
0092     addi    r4,r4,2
0093     sth r9,0(r3)
0094     addi    r3,r3,2
0095 2:  bf  cr7*4+3,3f
0096     lbz r9,8(r4)
0097     stb r9,0(r3)
0098 3:  ld  r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
0099     blr
0100 
0101 .Lsrc_unaligned:
0102     srdi    r6,r5,3
0103     addi    r5,r5,-16
0104     subf    r4,r0,r4
0105     srdi    r7,r5,4
0106     sldi    r10,r0,3
0107     cmpdi   cr6,r6,3
0108     andi.   r5,r5,7
0109     mtctr   r7
0110     subfic  r11,r10,64
0111     add r5,r5,r0
0112 
0113     bt  cr7*4+0,0f
0114 
0115     ld  r9,0(r4)    # 3+2n loads, 2+2n stores
0116     ld  r0,8(r4)
0117     sld r6,r9,r10
0118     ldu r9,16(r4)
0119     srd r7,r0,r11
0120     sld r8,r0,r10
0121     or  r7,r7,r6
0122     blt cr6,4f
0123     ld  r0,8(r4)
0124     # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
0125     b   2f
0126 
0127 0:  ld  r0,0(r4)    # 4+2n loads, 3+2n stores
0128     ldu r9,8(r4)
0129     sld r8,r0,r10
0130     addi    r3,r3,-8
0131     blt cr6,5f
0132     ld  r0,8(r4)
0133     srd r12,r9,r11
0134     sld r6,r9,r10
0135     ldu r9,16(r4)
0136     or  r12,r8,r12
0137     srd r7,r0,r11
0138     sld r8,r0,r10
0139     addi    r3,r3,16
0140     beq cr6,3f
0141 
0142     # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
0143 1:  or  r7,r7,r6
0144     ld  r0,8(r4)
0145     std r12,8(r3)
0146 2:  srd r12,r9,r11
0147     sld r6,r9,r10
0148     ldu r9,16(r4)
0149     or  r12,r8,r12
0150     stdu    r7,16(r3)
0151     srd r7,r0,r11
0152     sld r8,r0,r10
0153     bdnz    1b
0154 
0155 3:  std r12,8(r3)
0156     or  r7,r7,r6
0157 4:  std r7,16(r3)
0158 5:  srd r12,r9,r11
0159     or  r12,r8,r12
0160     std r12,24(r3)
0161     beq 4f
0162     cmpwi   cr1,r5,8
0163     addi    r3,r3,32
0164     sld r9,r9,r10
0165     ble cr1,6f
0166     ld  r0,8(r4)
0167     srd r7,r0,r11
0168     or  r9,r7,r9
0169 6:
0170     bf  cr7*4+1,1f
0171     rotldi  r9,r9,32
0172     stw r9,0(r3)
0173     addi    r3,r3,4
0174 1:  bf  cr7*4+2,2f
0175     rotldi  r9,r9,16
0176     sth r9,0(r3)
0177     addi    r3,r3,2
0178 2:  bf  cr7*4+3,3f
0179     rotldi  r9,r9,8
0180     stb r9,0(r3)
0181 3:  ld  r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
0182     blr
0183 
0184 .Ldst_unaligned:
0185     PPC_MTOCRF(0x01,r6)     # put #bytes to 8B bdry into cr7
0186     subf    r5,r6,r5
0187     li  r7,0
0188     cmpldi  cr1,r5,16
0189     bf  cr7*4+3,1f
0190     lbz r0,0(r4)
0191     stb r0,0(r3)
0192     addi    r7,r7,1
0193 1:  bf  cr7*4+2,2f
0194     lhzx    r0,r7,r4
0195     sthx    r0,r7,r3
0196     addi    r7,r7,2
0197 2:  bf  cr7*4+1,3f
0198     lwzx    r0,r7,r4
0199     stwx    r0,r7,r3
0200 3:  PPC_MTOCRF(0x01,r5)
0201     add r4,r6,r4
0202     add r3,r6,r3
0203     b   .Ldst_aligned
0204 
0205 .Lshort_copy:
0206     bf  cr7*4+0,1f
0207     lwz r0,0(r4)
0208     lwz r9,4(r4)
0209     addi    r4,r4,8
0210     stw r0,0(r3)
0211     stw r9,4(r3)
0212     addi    r3,r3,8
0213 1:  bf  cr7*4+1,2f
0214     lwz r0,0(r4)
0215     addi    r4,r4,4
0216     stw r0,0(r3)
0217     addi    r3,r3,4
0218 2:  bf  cr7*4+2,3f
0219     lhz r0,0(r4)
0220     addi    r4,r4,2
0221     sth r0,0(r3)
0222     addi    r3,r3,2
0223 3:  bf  cr7*4+3,4f
0224     lbz r0,0(r4)
0225     stb r0,0(r3)
0226 4:  ld  r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
0227     blr
0228 #endif
0229 EXPORT_SYMBOL(memcpy)
0230 EXPORT_SYMBOL_KASAN(memcpy)