0001
0002
0003
0004
0005 #include <asm/processor.h>
0006 #include <asm/ppc_asm.h>
0007 #include <asm/export.h>
0008 #include <asm/asm-compat.h>
0009 #include <asm/feature-fixups.h>
0010 #include <asm/kasan.h>
0011
0012 #ifndef SELFTEST_CASE
0013
0014 #define SELFTEST_CASE 0
0015 #endif
0016
0017 .align 7
0018 _GLOBAL_TOC_KASAN(memcpy)
0019 BEGIN_FTR_SECTION
0020 #ifdef __LITTLE_ENDIAN__
0021 cmpdi cr7,r5,0
0022 #else
0023 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
0024 #endif
0025 FTR_SECTION_ELSE
0026 #ifdef CONFIG_PPC_BOOK3S_64
0027 b memcpy_power7
0028 #endif
0029 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
0030 #ifdef __LITTLE_ENDIAN__
0031
0032 addi r9,r3,-1
0033 addi r4,r4,-1
0034 beqlr cr7
0035 mtctr r5
0036 1: lbzu r10,1(r4)
0037 stbu r10,1(r9)
0038 bdnz 1b
0039 blr
0040 #else
0041 PPC_MTOCRF(0x01,r5)
0042 cmpldi cr1,r5,16
0043 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
0044 andi. r6,r6,7
0045 dcbt 0,r4
0046 blt cr1,.Lshort_copy
0047
0048
0049
0050
0051
0052 test_feature = (SELFTEST_CASE == 1)
0053 BEGIN_FTR_SECTION
0054 nop
0055 FTR_SECTION_ELSE
0056 bne .Ldst_unaligned
0057 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
0058 CPU_FTR_UNALIGNED_LD_STD)
0059 .Ldst_aligned:
0060 addi r3,r3,-16
0061 test_feature = (SELFTEST_CASE == 0)
0062 BEGIN_FTR_SECTION
0063 andi. r0,r4,7
0064 bne .Lsrc_unaligned
0065 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
0066 srdi r7,r5,4
0067 ld r9,0(r4)
0068 addi r4,r4,-8
0069 mtctr r7
0070 andi. r5,r5,7
0071 bf cr7*4+0,2f
0072 addi r3,r3,8
0073 addi r4,r4,8
0074 mr r8,r9
0075 blt cr1,3f
0076 1: ld r9,8(r4)
0077 std r8,8(r3)
0078 2: ldu r8,16(r4)
0079 stdu r9,16(r3)
0080 bdnz 1b
0081 3: std r8,8(r3)
0082 beq 3f
0083 addi r3,r3,16
0084 .Ldo_tail:
0085 bf cr7*4+1,1f
0086 lwz r9,8(r4)
0087 addi r4,r4,4
0088 stw r9,0(r3)
0089 addi r3,r3,4
0090 1: bf cr7*4+2,2f
0091 lhz r9,8(r4)
0092 addi r4,r4,2
0093 sth r9,0(r3)
0094 addi r3,r3,2
0095 2: bf cr7*4+3,3f
0096 lbz r9,8(r4)
0097 stb r9,0(r3)
0098 3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
0099 blr
0100
0101 .Lsrc_unaligned:
0102 srdi r6,r5,3
0103 addi r5,r5,-16
0104 subf r4,r0,r4
0105 srdi r7,r5,4
0106 sldi r10,r0,3
0107 cmpdi cr6,r6,3
0108 andi. r5,r5,7
0109 mtctr r7
0110 subfic r11,r10,64
0111 add r5,r5,r0
0112
0113 bt cr7*4+0,0f
0114
0115 ld r9,0(r4) # 3+2n loads, 2+2n stores
0116 ld r0,8(r4)
0117 sld r6,r9,r10
0118 ldu r9,16(r4)
0119 srd r7,r0,r11
0120 sld r8,r0,r10
0121 or r7,r7,r6
0122 blt cr6,4f
0123 ld r0,8(r4)
0124 # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
0125 b 2f
0126
0127 0: ld r0,0(r4) # 4+2n loads, 3+2n stores
0128 ldu r9,8(r4)
0129 sld r8,r0,r10
0130 addi r3,r3,-8
0131 blt cr6,5f
0132 ld r0,8(r4)
0133 srd r12,r9,r11
0134 sld r6,r9,r10
0135 ldu r9,16(r4)
0136 or r12,r8,r12
0137 srd r7,r0,r11
0138 sld r8,r0,r10
0139 addi r3,r3,16
0140 beq cr6,3f
0141
0142 # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
0143 1: or r7,r7,r6
0144 ld r0,8(r4)
0145 std r12,8(r3)
0146 2: srd r12,r9,r11
0147 sld r6,r9,r10
0148 ldu r9,16(r4)
0149 or r12,r8,r12
0150 stdu r7,16(r3)
0151 srd r7,r0,r11
0152 sld r8,r0,r10
0153 bdnz 1b
0154
0155 3: std r12,8(r3)
0156 or r7,r7,r6
0157 4: std r7,16(r3)
0158 5: srd r12,r9,r11
0159 or r12,r8,r12
0160 std r12,24(r3)
0161 beq 4f
0162 cmpwi cr1,r5,8
0163 addi r3,r3,32
0164 sld r9,r9,r10
0165 ble cr1,6f
0166 ld r0,8(r4)
0167 srd r7,r0,r11
0168 or r9,r7,r9
0169 6:
0170 bf cr7*4+1,1f
0171 rotldi r9,r9,32
0172 stw r9,0(r3)
0173 addi r3,r3,4
0174 1: bf cr7*4+2,2f
0175 rotldi r9,r9,16
0176 sth r9,0(r3)
0177 addi r3,r3,2
0178 2: bf cr7*4+3,3f
0179 rotldi r9,r9,8
0180 stb r9,0(r3)
0181 3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
0182 blr
0183
0184 .Ldst_unaligned:
0185 PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7
0186 subf r5,r6,r5
0187 li r7,0
0188 cmpldi cr1,r5,16
0189 bf cr7*4+3,1f
0190 lbz r0,0(r4)
0191 stb r0,0(r3)
0192 addi r7,r7,1
0193 1: bf cr7*4+2,2f
0194 lhzx r0,r7,r4
0195 sthx r0,r7,r3
0196 addi r7,r7,2
0197 2: bf cr7*4+1,3f
0198 lwzx r0,r7,r4
0199 stwx r0,r7,r3
0200 3: PPC_MTOCRF(0x01,r5)
0201 add r4,r6,r4
0202 add r3,r6,r3
0203 b .Ldst_aligned
0204
0205 .Lshort_copy:
0206 bf cr7*4+0,1f
0207 lwz r0,0(r4)
0208 lwz r9,4(r4)
0209 addi r4,r4,8
0210 stw r0,0(r3)
0211 stw r9,4(r3)
0212 addi r3,r3,8
0213 1: bf cr7*4+1,2f
0214 lwz r0,0(r4)
0215 addi r4,r4,4
0216 stw r0,0(r3)
0217 addi r3,r3,4
0218 2: bf cr7*4+2,3f
0219 lhz r0,0(r4)
0220 addi r4,r4,2
0221 sth r0,0(r3)
0222 addi r3,r3,2
0223 3: bf cr7*4+3,4f
0224 lbz r0,0(r4)
0225 stb r0,0(r3)
0226 4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
0227 blr
0228 #endif
0229 EXPORT_SYMBOL(memcpy)
0230 EXPORT_SYMBOL_KASAN(memcpy)