0001
0002
0003
0004
0005
0006
0007
0008 #include <asm/ppc_asm.h>
0009
0010 #ifndef SELFTEST_CASE
0011
0012 #define SELFTEST_CASE 0
0013 #endif
0014
0015 #ifdef __BIG_ENDIAN__
0016 #define LVS(VRT,RA,RB) lvsl VRT,RA,RB
0017 #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
0018 #else
0019 #define LVS(VRT,RA,RB) lvsr VRT,RA,RB
0020 #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
0021 #endif
0022
0023 .macro err1
0024 100:
0025 EX_TABLE(100b,.Ldo_err1)
0026 .endm
0027
0028 .macro err2
0029 200:
0030 EX_TABLE(200b,.Ldo_err2)
0031 .endm
0032
0033 #ifdef CONFIG_ALTIVEC
0034 .macro err3
0035 300:
0036 EX_TABLE(300b,.Ldo_err3)
0037 .endm
0038
0039 .macro err4
0040 400:
0041 EX_TABLE(400b,.Ldo_err4)
0042 .endm
0043
0044
0045 .Ldo_err4:
0046 ld r16,STK_REG(R16)(r1)
0047 ld r15,STK_REG(R15)(r1)
0048 ld r14,STK_REG(R14)(r1)
0049 .Ldo_err3:
0050 bl exit_vmx_usercopy
0051 ld r0,STACKFRAMESIZE+16(r1)
0052 mtlr r0
0053 b .Lexit
0054 #endif
0055
0056 .Ldo_err2:
0057 ld r22,STK_REG(R22)(r1)
0058 ld r21,STK_REG(R21)(r1)
0059 ld r20,STK_REG(R20)(r1)
0060 ld r19,STK_REG(R19)(r1)
0061 ld r18,STK_REG(R18)(r1)
0062 ld r17,STK_REG(R17)(r1)
0063 ld r16,STK_REG(R16)(r1)
0064 ld r15,STK_REG(R15)(r1)
0065 ld r14,STK_REG(R14)(r1)
0066 .Lexit:
0067 addi r1,r1,STACKFRAMESIZE
0068 .Ldo_err1:
0069 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
0070 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
0071 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
0072 b __copy_tofrom_user_base
0073
0074
0075 _GLOBAL(__copy_tofrom_user_power7)
0076 cmpldi r5,16
0077 cmpldi cr1,r5,3328
0078
0079 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
0080 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
0081 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
0082
0083 blt .Lshort_copy
0084
0085 #ifdef CONFIG_ALTIVEC
0086 test_feature = SELFTEST_CASE
0087 BEGIN_FTR_SECTION
0088 bgt cr1,.Lvmx_copy
0089 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
0090 #endif
0091
0092 .Lnonvmx_copy:
0093
0094 neg r6,r4
0095 mtocrf 0x01,r6
0096 clrldi r6,r6,(64-3)
0097
0098 bf cr7*4+3,1f
0099 err1; lbz r0,0(r4)
0100 addi r4,r4,1
0101 err1; stb r0,0(r3)
0102 addi r3,r3,1
0103
0104 1: bf cr7*4+2,2f
0105 err1; lhz r0,0(r4)
0106 addi r4,r4,2
0107 err1; sth r0,0(r3)
0108 addi r3,r3,2
0109
0110 2: bf cr7*4+1,3f
0111 err1; lwz r0,0(r4)
0112 addi r4,r4,4
0113 err1; stw r0,0(r3)
0114 addi r3,r3,4
0115
0116 3: sub r5,r5,r6
0117 cmpldi r5,128
0118 blt 5f
0119
0120 mflr r0
0121 stdu r1,-STACKFRAMESIZE(r1)
0122 std r14,STK_REG(R14)(r1)
0123 std r15,STK_REG(R15)(r1)
0124 std r16,STK_REG(R16)(r1)
0125 std r17,STK_REG(R17)(r1)
0126 std r18,STK_REG(R18)(r1)
0127 std r19,STK_REG(R19)(r1)
0128 std r20,STK_REG(R20)(r1)
0129 std r21,STK_REG(R21)(r1)
0130 std r22,STK_REG(R22)(r1)
0131 std r0,STACKFRAMESIZE+16(r1)
0132
0133 srdi r6,r5,7
0134 mtctr r6
0135
0136
0137 .align 5
0138 4:
0139 err2; ld r0,0(r4)
0140 err2; ld r6,8(r4)
0141 err2; ld r7,16(r4)
0142 err2; ld r8,24(r4)
0143 err2; ld r9,32(r4)
0144 err2; ld r10,40(r4)
0145 err2; ld r11,48(r4)
0146 err2; ld r12,56(r4)
0147 err2; ld r14,64(r4)
0148 err2; ld r15,72(r4)
0149 err2; ld r16,80(r4)
0150 err2; ld r17,88(r4)
0151 err2; ld r18,96(r4)
0152 err2; ld r19,104(r4)
0153 err2; ld r20,112(r4)
0154 err2; ld r21,120(r4)
0155 addi r4,r4,128
0156 err2; std r0,0(r3)
0157 err2; std r6,8(r3)
0158 err2; std r7,16(r3)
0159 err2; std r8,24(r3)
0160 err2; std r9,32(r3)
0161 err2; std r10,40(r3)
0162 err2; std r11,48(r3)
0163 err2; std r12,56(r3)
0164 err2; std r14,64(r3)
0165 err2; std r15,72(r3)
0166 err2; std r16,80(r3)
0167 err2; std r17,88(r3)
0168 err2; std r18,96(r3)
0169 err2; std r19,104(r3)
0170 err2; std r20,112(r3)
0171 err2; std r21,120(r3)
0172 addi r3,r3,128
0173 bdnz 4b
0174
0175 clrldi r5,r5,(64-7)
0176
0177 ld r14,STK_REG(R14)(r1)
0178 ld r15,STK_REG(R15)(r1)
0179 ld r16,STK_REG(R16)(r1)
0180 ld r17,STK_REG(R17)(r1)
0181 ld r18,STK_REG(R18)(r1)
0182 ld r19,STK_REG(R19)(r1)
0183 ld r20,STK_REG(R20)(r1)
0184 ld r21,STK_REG(R21)(r1)
0185 ld r22,STK_REG(R22)(r1)
0186 addi r1,r1,STACKFRAMESIZE
0187
0188
0189 5: srdi r6,r5,4
0190 mtocrf 0x01,r6
0191
0192 6: bf cr7*4+1,7f
0193 err1; ld r0,0(r4)
0194 err1; ld r6,8(r4)
0195 err1; ld r7,16(r4)
0196 err1; ld r8,24(r4)
0197 err1; ld r9,32(r4)
0198 err1; ld r10,40(r4)
0199 err1; ld r11,48(r4)
0200 err1; ld r12,56(r4)
0201 addi r4,r4,64
0202 err1; std r0,0(r3)
0203 err1; std r6,8(r3)
0204 err1; std r7,16(r3)
0205 err1; std r8,24(r3)
0206 err1; std r9,32(r3)
0207 err1; std r10,40(r3)
0208 err1; std r11,48(r3)
0209 err1; std r12,56(r3)
0210 addi r3,r3,64
0211
0212
0213 7: bf cr7*4+2,8f
0214 err1; ld r0,0(r4)
0215 err1; ld r6,8(r4)
0216 err1; ld r7,16(r4)
0217 err1; ld r8,24(r4)
0218 addi r4,r4,32
0219 err1; std r0,0(r3)
0220 err1; std r6,8(r3)
0221 err1; std r7,16(r3)
0222 err1; std r8,24(r3)
0223 addi r3,r3,32
0224
0225
0226 8: bf cr7*4+3,9f
0227 err1; ld r0,0(r4)
0228 err1; ld r6,8(r4)
0229 addi r4,r4,16
0230 err1; std r0,0(r3)
0231 err1; std r6,8(r3)
0232 addi r3,r3,16
0233
0234 9: clrldi r5,r5,(64-4)
0235
0236
0237 .Lshort_copy:
0238 mtocrf 0x01,r5
0239 bf cr7*4+0,12f
0240 err1; lwz r0,0(r4)
0241 err1; lwz r6,4(r4)
0242 addi r4,r4,8
0243 err1; stw r0,0(r3)
0244 err1; stw r6,4(r3)
0245 addi r3,r3,8
0246
0247 12: bf cr7*4+1,13f
0248 err1; lwz r0,0(r4)
0249 addi r4,r4,4
0250 err1; stw r0,0(r3)
0251 addi r3,r3,4
0252
0253 13: bf cr7*4+2,14f
0254 err1; lhz r0,0(r4)
0255 addi r4,r4,2
0256 err1; sth r0,0(r3)
0257 addi r3,r3,2
0258
0259 14: bf cr7*4+3,15f
0260 err1; lbz r0,0(r4)
0261 err1; stb r0,0(r3)
0262
0263 15: li r3,0
0264 blr
0265
0266 .Lunwind_stack_nonvmx_copy:
0267 addi r1,r1,STACKFRAMESIZE
0268 b .Lnonvmx_copy
0269
0270 .Lvmx_copy:
0271 #ifdef CONFIG_ALTIVEC
0272 mflr r0
0273 std r0,16(r1)
0274 stdu r1,-STACKFRAMESIZE(r1)
0275 bl enter_vmx_usercopy
0276 cmpwi cr1,r3,0
0277 ld r0,STACKFRAMESIZE+16(r1)
0278 ld r3,STK_REG(R31)(r1)
0279 ld r4,STK_REG(R30)(r1)
0280 ld r5,STK_REG(R29)(r1)
0281 mtlr r0
0282
0283
0284
0285
0286
0287
0288 clrrdi r6,r4,7
0289 clrrdi r9,r3,7
0290 ori r9,r9,1
0291
0292 srdi r7,r5,7
0293 cmpldi r7,0x3FF
0294 ble 1f
0295 li r7,0x3FF
0296 1: lis r0,0x0E00
0297 sldi r7,r7,7
0298 or r7,r7,r0
0299 ori r10,r7,1
0300
0301 lis r8,0x8000
0302 clrldi r8,r8,32
0303
0304
0305 dcbt 0,r6,0b01000
0306 dcbt 0,r7,0b01010
0307
0308 dcbtst 0,r9,0b01000
0309 dcbtst 0,r10,0b01010
0310 eieio
0311 dcbt 0,r8,0b01010
0312
0313 beq cr1,.Lunwind_stack_nonvmx_copy
0314
0315
0316
0317
0318
0319 xor r6,r4,r3
0320 rldicl. r6,r6,0,(64-4)
0321 bne .Lvmx_unaligned_copy
0322
0323
0324 neg r6,r3
0325 mtocrf 0x01,r6
0326 clrldi r6,r6,(64-4)
0327
0328 bf cr7*4+3,1f
0329 err3; lbz r0,0(r4)
0330 addi r4,r4,1
0331 err3; stb r0,0(r3)
0332 addi r3,r3,1
0333
0334 1: bf cr7*4+2,2f
0335 err3; lhz r0,0(r4)
0336 addi r4,r4,2
0337 err3; sth r0,0(r3)
0338 addi r3,r3,2
0339
0340 2: bf cr7*4+1,3f
0341 err3; lwz r0,0(r4)
0342 addi r4,r4,4
0343 err3; stw r0,0(r3)
0344 addi r3,r3,4
0345
0346 3: bf cr7*4+0,4f
0347 err3; ld r0,0(r4)
0348 addi r4,r4,8
0349 err3; std r0,0(r3)
0350 addi r3,r3,8
0351
0352 4: sub r5,r5,r6
0353
0354
0355 neg r6,r3
0356 srdi r7,r6,4
0357 mtocrf 0x01,r7
0358 clrldi r6,r6,(64-7)
0359
0360 li r9,16
0361 li r10,32
0362 li r11,48
0363
0364 bf cr7*4+3,5f
0365 err3; lvx v1,0,r4
0366 addi r4,r4,16
0367 err3; stvx v1,0,r3
0368 addi r3,r3,16
0369
0370 5: bf cr7*4+2,6f
0371 err3; lvx v1,0,r4
0372 err3; lvx v0,r4,r9
0373 addi r4,r4,32
0374 err3; stvx v1,0,r3
0375 err3; stvx v0,r3,r9
0376 addi r3,r3,32
0377
0378 6: bf cr7*4+1,7f
0379 err3; lvx v3,0,r4
0380 err3; lvx v2,r4,r9
0381 err3; lvx v1,r4,r10
0382 err3; lvx v0,r4,r11
0383 addi r4,r4,64
0384 err3; stvx v3,0,r3
0385 err3; stvx v2,r3,r9
0386 err3; stvx v1,r3,r10
0387 err3; stvx v0,r3,r11
0388 addi r3,r3,64
0389
0390 7: sub r5,r5,r6
0391 srdi r6,r5,7
0392
0393 std r14,STK_REG(R14)(r1)
0394 std r15,STK_REG(R15)(r1)
0395 std r16,STK_REG(R16)(r1)
0396
0397 li r12,64
0398 li r14,80
0399 li r15,96
0400 li r16,112
0401
0402 mtctr r6
0403
0404
0405
0406
0407
0408 .align 5
0409 8:
0410 err4; lvx v7,0,r4
0411 err4; lvx v6,r4,r9
0412 err4; lvx v5,r4,r10
0413 err4; lvx v4,r4,r11
0414 err4; lvx v3,r4,r12
0415 err4; lvx v2,r4,r14
0416 err4; lvx v1,r4,r15
0417 err4; lvx v0,r4,r16
0418 addi r4,r4,128
0419 err4; stvx v7,0,r3
0420 err4; stvx v6,r3,r9
0421 err4; stvx v5,r3,r10
0422 err4; stvx v4,r3,r11
0423 err4; stvx v3,r3,r12
0424 err4; stvx v2,r3,r14
0425 err4; stvx v1,r3,r15
0426 err4; stvx v0,r3,r16
0427 addi r3,r3,128
0428 bdnz 8b
0429
0430 ld r14,STK_REG(R14)(r1)
0431 ld r15,STK_REG(R15)(r1)
0432 ld r16,STK_REG(R16)(r1)
0433
0434
0435 clrldi r5,r5,(64-7)
0436 srdi r6,r5,4
0437 mtocrf 0x01,r6
0438
0439 bf cr7*4+1,9f
0440 err3; lvx v3,0,r4
0441 err3; lvx v2,r4,r9
0442 err3; lvx v1,r4,r10
0443 err3; lvx v0,r4,r11
0444 addi r4,r4,64
0445 err3; stvx v3,0,r3
0446 err3; stvx v2,r3,r9
0447 err3; stvx v1,r3,r10
0448 err3; stvx v0,r3,r11
0449 addi r3,r3,64
0450
0451 9: bf cr7*4+2,10f
0452 err3; lvx v1,0,r4
0453 err3; lvx v0,r4,r9
0454 addi r4,r4,32
0455 err3; stvx v1,0,r3
0456 err3; stvx v0,r3,r9
0457 addi r3,r3,32
0458
0459 10: bf cr7*4+3,11f
0460 err3; lvx v1,0,r4
0461 addi r4,r4,16
0462 err3; stvx v1,0,r3
0463 addi r3,r3,16
0464
0465
0466 11: clrldi r5,r5,(64-4)
0467 mtocrf 0x01,r5
0468 bf cr7*4+0,12f
0469 err3; ld r0,0(r4)
0470 addi r4,r4,8
0471 err3; std r0,0(r3)
0472 addi r3,r3,8
0473
0474 12: bf cr7*4+1,13f
0475 err3; lwz r0,0(r4)
0476 addi r4,r4,4
0477 err3; stw r0,0(r3)
0478 addi r3,r3,4
0479
0480 13: bf cr7*4+2,14f
0481 err3; lhz r0,0(r4)
0482 addi r4,r4,2
0483 err3; sth r0,0(r3)
0484 addi r3,r3,2
0485
0486 14: bf cr7*4+3,15f
0487 err3; lbz r0,0(r4)
0488 err3; stb r0,0(r3)
0489
0490 15: addi r1,r1,STACKFRAMESIZE
0491 b exit_vmx_usercopy
0492
0493 .Lvmx_unaligned_copy:
0494
0495 neg r6,r3
0496 mtocrf 0x01,r6
0497 clrldi r6,r6,(64-4)
0498
0499 bf cr7*4+3,1f
0500 err3; lbz r0,0(r4)
0501 addi r4,r4,1
0502 err3; stb r0,0(r3)
0503 addi r3,r3,1
0504
0505 1: bf cr7*4+2,2f
0506 err3; lhz r0,0(r4)
0507 addi r4,r4,2
0508 err3; sth r0,0(r3)
0509 addi r3,r3,2
0510
0511 2: bf cr7*4+1,3f
0512 err3; lwz r0,0(r4)
0513 addi r4,r4,4
0514 err3; stw r0,0(r3)
0515 addi r3,r3,4
0516
0517 3: bf cr7*4+0,4f
0518 err3; lwz r0,0(r4)
0519 err3; lwz r7,4(r4)
0520 addi r4,r4,8
0521 err3; stw r0,0(r3)
0522 err3; stw r7,4(r3)
0523 addi r3,r3,8
0524
0525 4: sub r5,r5,r6
0526
0527
0528 neg r6,r3
0529 srdi r7,r6,4
0530 mtocrf 0x01,r7
0531 clrldi r6,r6,(64-7)
0532
0533 li r9,16
0534 li r10,32
0535 li r11,48
0536
0537 LVS(v16,0,r4)
0538 err3; lvx v0,0,r4
0539 addi r4,r4,16
0540
0541 bf cr7*4+3,5f
0542 err3; lvx v1,0,r4
0543 VPERM(v8,v0,v1,v16)
0544 addi r4,r4,16
0545 err3; stvx v8,0,r3
0546 addi r3,r3,16
0547 vor v0,v1,v1
0548
0549 5: bf cr7*4+2,6f
0550 err3; lvx v1,0,r4
0551 VPERM(v8,v0,v1,v16)
0552 err3; lvx v0,r4,r9
0553 VPERM(v9,v1,v0,v16)
0554 addi r4,r4,32
0555 err3; stvx v8,0,r3
0556 err3; stvx v9,r3,r9
0557 addi r3,r3,32
0558
0559 6: bf cr7*4+1,7f
0560 err3; lvx v3,0,r4
0561 VPERM(v8,v0,v3,v16)
0562 err3; lvx v2,r4,r9
0563 VPERM(v9,v3,v2,v16)
0564 err3; lvx v1,r4,r10
0565 VPERM(v10,v2,v1,v16)
0566 err3; lvx v0,r4,r11
0567 VPERM(v11,v1,v0,v16)
0568 addi r4,r4,64
0569 err3; stvx v8,0,r3
0570 err3; stvx v9,r3,r9
0571 err3; stvx v10,r3,r10
0572 err3; stvx v11,r3,r11
0573 addi r3,r3,64
0574
0575 7: sub r5,r5,r6
0576 srdi r6,r5,7
0577
0578 std r14,STK_REG(R14)(r1)
0579 std r15,STK_REG(R15)(r1)
0580 std r16,STK_REG(R16)(r1)
0581
0582 li r12,64
0583 li r14,80
0584 li r15,96
0585 li r16,112
0586
0587 mtctr r6
0588
0589
0590
0591
0592
0593 .align 5
0594 8:
0595 err4; lvx v7,0,r4
0596 VPERM(v8,v0,v7,v16)
0597 err4; lvx v6,r4,r9
0598 VPERM(v9,v7,v6,v16)
0599 err4; lvx v5,r4,r10
0600 VPERM(v10,v6,v5,v16)
0601 err4; lvx v4,r4,r11
0602 VPERM(v11,v5,v4,v16)
0603 err4; lvx v3,r4,r12
0604 VPERM(v12,v4,v3,v16)
0605 err4; lvx v2,r4,r14
0606 VPERM(v13,v3,v2,v16)
0607 err4; lvx v1,r4,r15
0608 VPERM(v14,v2,v1,v16)
0609 err4; lvx v0,r4,r16
0610 VPERM(v15,v1,v0,v16)
0611 addi r4,r4,128
0612 err4; stvx v8,0,r3
0613 err4; stvx v9,r3,r9
0614 err4; stvx v10,r3,r10
0615 err4; stvx v11,r3,r11
0616 err4; stvx v12,r3,r12
0617 err4; stvx v13,r3,r14
0618 err4; stvx v14,r3,r15
0619 err4; stvx v15,r3,r16
0620 addi r3,r3,128
0621 bdnz 8b
0622
0623 ld r14,STK_REG(R14)(r1)
0624 ld r15,STK_REG(R15)(r1)
0625 ld r16,STK_REG(R16)(r1)
0626
0627
0628 clrldi r5,r5,(64-7)
0629 srdi r6,r5,4
0630 mtocrf 0x01,r6
0631
0632 bf cr7*4+1,9f
0633 err3; lvx v3,0,r4
0634 VPERM(v8,v0,v3,v16)
0635 err3; lvx v2,r4,r9
0636 VPERM(v9,v3,v2,v16)
0637 err3; lvx v1,r4,r10
0638 VPERM(v10,v2,v1,v16)
0639 err3; lvx v0,r4,r11
0640 VPERM(v11,v1,v0,v16)
0641 addi r4,r4,64
0642 err3; stvx v8,0,r3
0643 err3; stvx v9,r3,r9
0644 err3; stvx v10,r3,r10
0645 err3; stvx v11,r3,r11
0646 addi r3,r3,64
0647
0648 9: bf cr7*4+2,10f
0649 err3; lvx v1,0,r4
0650 VPERM(v8,v0,v1,v16)
0651 err3; lvx v0,r4,r9
0652 VPERM(v9,v1,v0,v16)
0653 addi r4,r4,32
0654 err3; stvx v8,0,r3
0655 err3; stvx v9,r3,r9
0656 addi r3,r3,32
0657
0658 10: bf cr7*4+3,11f
0659 err3; lvx v1,0,r4
0660 VPERM(v8,v0,v1,v16)
0661 addi r4,r4,16
0662 err3; stvx v8,0,r3
0663 addi r3,r3,16
0664
0665
0666 11: clrldi r5,r5,(64-4)
0667 addi r4,r4,-16
0668 mtocrf 0x01,r5
0669 bf cr7*4+0,12f
0670 err3; lwz r0,0(r4)
0671 err3; lwz r6,4(r4)
0672 addi r4,r4,8
0673 err3; stw r0,0(r3)
0674 err3; stw r6,4(r3)
0675 addi r3,r3,8
0676
0677 12: bf cr7*4+1,13f
0678 err3; lwz r0,0(r4)
0679 addi r4,r4,4
0680 err3; stw r0,0(r3)
0681 addi r3,r3,4
0682
0683 13: bf cr7*4+2,14f
0684 err3; lhz r0,0(r4)
0685 addi r4,r4,2
0686 err3; sth r0,0(r3)
0687 addi r3,r3,2
0688
0689 14: bf cr7*4+3,15f
0690 err3; lbz r0,0(r4)
0691 err3; stb r0,0(r3)
0692
0693 15: addi r1,r1,STACKFRAMESIZE
0694 b exit_vmx_usercopy
0695 #endif