0001
0002
0003
0004
0005
0006
0007
0008 extern void
0009 xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
0010 const unsigned long * __restrict p2);
0011 extern void
0012 xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1,
0013 const unsigned long * __restrict p2,
0014 const unsigned long * __restrict p3);
0015 extern void
0016 xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1,
0017 const unsigned long * __restrict p2,
0018 const unsigned long * __restrict p3,
0019 const unsigned long * __restrict p4);
0020 extern void
0021 xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1,
0022 const unsigned long * __restrict p2,
0023 const unsigned long * __restrict p3,
0024 const unsigned long * __restrict p4,
0025 const unsigned long * __restrict p5);
0026
0027 extern void
0028 xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1,
0029 const unsigned long * __restrict p2);
0030 extern void
0031 xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1,
0032 const unsigned long * __restrict p2,
0033 const unsigned long * __restrict p3);
0034 extern void
0035 xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1,
0036 const unsigned long * __restrict p2,
0037 const unsigned long * __restrict p3,
0038 const unsigned long * __restrict p4);
0039 extern void
0040 xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1,
0041 const unsigned long * __restrict p2,
0042 const unsigned long * __restrict p3,
0043 const unsigned long * __restrict p4,
0044 const unsigned long * __restrict p5);
0045
0046 asm(" \n\
0047 .text \n\
0048 .align 3 \n\
0049 .ent xor_alpha_2 \n\
0050 xor_alpha_2: \n\
0051 .prologue 0 \n\
0052 srl $16, 6, $16 \n\
0053 .align 4 \n\
0054 2: \n\
0055 ldq $0,0($17) \n\
0056 ldq $1,0($18) \n\
0057 ldq $2,8($17) \n\
0058 ldq $3,8($18) \n\
0059 \n\
0060 ldq $4,16($17) \n\
0061 ldq $5,16($18) \n\
0062 ldq $6,24($17) \n\
0063 ldq $7,24($18) \n\
0064 \n\
0065 ldq $19,32($17) \n\
0066 ldq $20,32($18) \n\
0067 ldq $21,40($17) \n\
0068 ldq $22,40($18) \n\
0069 \n\
0070 ldq $23,48($17) \n\
0071 ldq $24,48($18) \n\
0072 ldq $25,56($17) \n\
0073 xor $0,$1,$0 # 7 cycles from $1 load \n\
0074 \n\
0075 ldq $27,56($18) \n\
0076 xor $2,$3,$2 \n\
0077 stq $0,0($17) \n\
0078 xor $4,$5,$4 \n\
0079 \n\
0080 stq $2,8($17) \n\
0081 xor $6,$7,$6 \n\
0082 stq $4,16($17) \n\
0083 xor $19,$20,$19 \n\
0084 \n\
0085 stq $6,24($17) \n\
0086 xor $21,$22,$21 \n\
0087 stq $19,32($17) \n\
0088 xor $23,$24,$23 \n\
0089 \n\
0090 stq $21,40($17) \n\
0091 xor $25,$27,$25 \n\
0092 stq $23,48($17) \n\
0093 subq $16,1,$16 \n\
0094 \n\
0095 stq $25,56($17) \n\
0096 addq $17,64,$17 \n\
0097 addq $18,64,$18 \n\
0098 bgt $16,2b \n\
0099 \n\
0100 ret \n\
0101 .end xor_alpha_2 \n\
0102 \n\
0103 .align 3 \n\
0104 .ent xor_alpha_3 \n\
0105 xor_alpha_3: \n\
0106 .prologue 0 \n\
0107 srl $16, 6, $16 \n\
0108 .align 4 \n\
0109 3: \n\
0110 ldq $0,0($17) \n\
0111 ldq $1,0($18) \n\
0112 ldq $2,0($19) \n\
0113 ldq $3,8($17) \n\
0114 \n\
0115 ldq $4,8($18) \n\
0116 ldq $6,16($17) \n\
0117 ldq $7,16($18) \n\
0118 ldq $21,24($17) \n\
0119 \n\
0120 ldq $22,24($18) \n\
0121 ldq $24,32($17) \n\
0122 ldq $25,32($18) \n\
0123 ldq $5,8($19) \n\
0124 \n\
0125 ldq $20,16($19) \n\
0126 ldq $23,24($19) \n\
0127 ldq $27,32($19) \n\
0128 nop \n\
0129 \n\
0130 xor $0,$1,$1 # 8 cycles from $0 load \n\
0131 xor $3,$4,$4 # 6 cycles from $4 load \n\
0132 xor $6,$7,$7 # 6 cycles from $7 load \n\
0133 xor $21,$22,$22 # 5 cycles from $22 load \n\
0134 \n\
0135 xor $1,$2,$2 # 9 cycles from $2 load \n\
0136 xor $24,$25,$25 # 5 cycles from $25 load \n\
0137 stq $2,0($17) \n\
0138 xor $4,$5,$5 # 6 cycles from $5 load \n\
0139 \n\
0140 stq $5,8($17) \n\
0141 xor $7,$20,$20 # 7 cycles from $20 load \n\
0142 stq $20,16($17) \n\
0143 xor $22,$23,$23 # 7 cycles from $23 load \n\
0144 \n\
0145 stq $23,24($17) \n\
0146 xor $25,$27,$27 # 7 cycles from $27 load \n\
0147 stq $27,32($17) \n\
0148 nop \n\
0149 \n\
0150 ldq $0,40($17) \n\
0151 ldq $1,40($18) \n\
0152 ldq $3,48($17) \n\
0153 ldq $4,48($18) \n\
0154 \n\
0155 ldq $6,56($17) \n\
0156 ldq $7,56($18) \n\
0157 ldq $2,40($19) \n\
0158 ldq $5,48($19) \n\
0159 \n\
0160 ldq $20,56($19) \n\
0161 xor $0,$1,$1 # 4 cycles from $1 load \n\
0162 xor $3,$4,$4 # 5 cycles from $4 load \n\
0163 xor $6,$7,$7 # 5 cycles from $7 load \n\
0164 \n\
0165 xor $1,$2,$2 # 4 cycles from $2 load \n\
0166 xor $4,$5,$5 # 5 cycles from $5 load \n\
0167 stq $2,40($17) \n\
0168 xor $7,$20,$20 # 4 cycles from $20 load \n\
0169 \n\
0170 stq $5,48($17) \n\
0171 subq $16,1,$16 \n\
0172 stq $20,56($17) \n\
0173 addq $19,64,$19 \n\
0174 \n\
0175 addq $18,64,$18 \n\
0176 addq $17,64,$17 \n\
0177 bgt $16,3b \n\
0178 ret \n\
0179 .end xor_alpha_3 \n\
0180 \n\
0181 .align 3 \n\
0182 .ent xor_alpha_4 \n\
0183 xor_alpha_4: \n\
0184 .prologue 0 \n\
0185 srl $16, 6, $16 \n\
0186 .align 4 \n\
0187 4: \n\
0188 ldq $0,0($17) \n\
0189 ldq $1,0($18) \n\
0190 ldq $2,0($19) \n\
0191 ldq $3,0($20) \n\
0192 \n\
0193 ldq $4,8($17) \n\
0194 ldq $5,8($18) \n\
0195 ldq $6,8($19) \n\
0196 ldq $7,8($20) \n\
0197 \n\
0198 ldq $21,16($17) \n\
0199 ldq $22,16($18) \n\
0200 ldq $23,16($19) \n\
0201 ldq $24,16($20) \n\
0202 \n\
0203 ldq $25,24($17) \n\
0204 xor $0,$1,$1 # 6 cycles from $1 load \n\
0205 ldq $27,24($18) \n\
0206 xor $2,$3,$3 # 6 cycles from $3 load \n\
0207 \n\
0208 ldq $0,24($19) \n\
0209 xor $1,$3,$3 \n\
0210 ldq $1,24($20) \n\
0211 xor $4,$5,$5 # 7 cycles from $5 load \n\
0212 \n\
0213 stq $3,0($17) \n\
0214 xor $6,$7,$7 \n\
0215 xor $21,$22,$22 # 7 cycles from $22 load \n\
0216 xor $5,$7,$7 \n\
0217 \n\
0218 stq $7,8($17) \n\
0219 xor $23,$24,$24 # 7 cycles from $24 load \n\
0220 ldq $2,32($17) \n\
0221 xor $22,$24,$24 \n\
0222 \n\
0223 ldq $3,32($18) \n\
0224 ldq $4,32($19) \n\
0225 ldq $5,32($20) \n\
0226 xor $25,$27,$27 # 8 cycles from $27 load \n\
0227 \n\
0228 ldq $6,40($17) \n\
0229 ldq $7,40($18) \n\
0230 ldq $21,40($19) \n\
0231 ldq $22,40($20) \n\
0232 \n\
0233 stq $24,16($17) \n\
0234 xor $0,$1,$1 # 9 cycles from $1 load \n\
0235 xor $2,$3,$3 # 5 cycles from $3 load \n\
0236 xor $27,$1,$1 \n\
0237 \n\
0238 stq $1,24($17) \n\
0239 xor $4,$5,$5 # 5 cycles from $5 load \n\
0240 ldq $23,48($17) \n\
0241 ldq $24,48($18) \n\
0242 \n\
0243 ldq $25,48($19) \n\
0244 xor $3,$5,$5 \n\
0245 ldq $27,48($20) \n\
0246 ldq $0,56($17) \n\
0247 \n\
0248 ldq $1,56($18) \n\
0249 ldq $2,56($19) \n\
0250 xor $6,$7,$7 # 8 cycles from $6 load \n\
0251 ldq $3,56($20) \n\
0252 \n\
0253 stq $5,32($17) \n\
0254 xor $21,$22,$22 # 8 cycles from $22 load \n\
0255 xor $7,$22,$22 \n\
0256 xor $23,$24,$24 # 5 cycles from $24 load \n\
0257 \n\
0258 stq $22,40($17) \n\
0259 xor $25,$27,$27 # 5 cycles from $27 load \n\
0260 xor $24,$27,$27 \n\
0261 xor $0,$1,$1 # 5 cycles from $1 load \n\
0262 \n\
0263 stq $27,48($17) \n\
0264 xor $2,$3,$3 # 4 cycles from $3 load \n\
0265 xor $1,$3,$3 \n\
0266 subq $16,1,$16 \n\
0267 \n\
0268 stq $3,56($17) \n\
0269 addq $20,64,$20 \n\
0270 addq $19,64,$19 \n\
0271 addq $18,64,$18 \n\
0272 \n\
0273 addq $17,64,$17 \n\
0274 bgt $16,4b \n\
0275 ret \n\
0276 .end xor_alpha_4 \n\
0277 \n\
0278 .align 3 \n\
0279 .ent xor_alpha_5 \n\
0280 xor_alpha_5: \n\
0281 .prologue 0 \n\
0282 srl $16, 6, $16 \n\
0283 .align 4 \n\
0284 5: \n\
0285 ldq $0,0($17) \n\
0286 ldq $1,0($18) \n\
0287 ldq $2,0($19) \n\
0288 ldq $3,0($20) \n\
0289 \n\
0290 ldq $4,0($21) \n\
0291 ldq $5,8($17) \n\
0292 ldq $6,8($18) \n\
0293 ldq $7,8($19) \n\
0294 \n\
0295 ldq $22,8($20) \n\
0296 ldq $23,8($21) \n\
0297 ldq $24,16($17) \n\
0298 ldq $25,16($18) \n\
0299 \n\
0300 ldq $27,16($19) \n\
0301 xor $0,$1,$1 # 6 cycles from $1 load \n\
0302 ldq $28,16($20) \n\
0303 xor $2,$3,$3 # 6 cycles from $3 load \n\
0304 \n\
0305 ldq $0,16($21) \n\
0306 xor $1,$3,$3 \n\
0307 ldq $1,24($17) \n\
0308 xor $3,$4,$4 # 7 cycles from $4 load \n\
0309 \n\
0310 stq $4,0($17) \n\
0311 xor $5,$6,$6 # 7 cycles from $6 load \n\
0312 xor $7,$22,$22 # 7 cycles from $22 load \n\
0313 xor $6,$23,$23 # 7 cycles from $23 load \n\
0314 \n\
0315 ldq $2,24($18) \n\
0316 xor $22,$23,$23 \n\
0317 ldq $3,24($19) \n\
0318 xor $24,$25,$25 # 8 cycles from $25 load \n\
0319 \n\
0320 stq $23,8($17) \n\
0321 xor $25,$27,$27 # 8 cycles from $27 load \n\
0322 ldq $4,24($20) \n\
0323 xor $28,$0,$0 # 7 cycles from $0 load \n\
0324 \n\
0325 ldq $5,24($21) \n\
0326 xor $27,$0,$0 \n\
0327 ldq $6,32($17) \n\
0328 ldq $7,32($18) \n\
0329 \n\
0330 stq $0,16($17) \n\
0331 xor $1,$2,$2 # 6 cycles from $2 load \n\
0332 ldq $22,32($19) \n\
0333 xor $3,$4,$4 # 4 cycles from $4 load \n\
0334 \n\
0335 ldq $23,32($20) \n\
0336 xor $2,$4,$4 \n\
0337 ldq $24,32($21) \n\
0338 ldq $25,40($17) \n\
0339 \n\
0340 ldq $27,40($18) \n\
0341 ldq $28,40($19) \n\
0342 ldq $0,40($20) \n\
0343 xor $4,$5,$5 # 7 cycles from $5 load \n\
0344 \n\
0345 stq $5,24($17) \n\
0346 xor $6,$7,$7 # 7 cycles from $7 load \n\
0347 ldq $1,40($21) \n\
0348 ldq $2,48($17) \n\
0349 \n\
0350 ldq $3,48($18) \n\
0351 xor $7,$22,$22 # 7 cycles from $22 load \n\
0352 ldq $4,48($19) \n\
0353 xor $23,$24,$24 # 6 cycles from $24 load \n\
0354 \n\
0355 ldq $5,48($20) \n\
0356 xor $22,$24,$24 \n\
0357 ldq $6,48($21) \n\
0358 xor $25,$27,$27 # 7 cycles from $27 load \n\
0359 \n\
0360 stq $24,32($17) \n\
0361 xor $27,$28,$28 # 8 cycles from $28 load \n\
0362 ldq $7,56($17) \n\
0363 xor $0,$1,$1 # 6 cycles from $1 load \n\
0364 \n\
0365 ldq $22,56($18) \n\
0366 ldq $23,56($19) \n\
0367 ldq $24,56($20) \n\
0368 ldq $25,56($21) \n\
0369 \n\
0370 xor $28,$1,$1 \n\
0371 xor $2,$3,$3 # 9 cycles from $3 load \n\
0372 xor $3,$4,$4 # 9 cycles from $4 load \n\
0373 xor $5,$6,$6 # 8 cycles from $6 load \n\
0374 \n\
0375 stq $1,40($17) \n\
0376 xor $4,$6,$6 \n\
0377 xor $7,$22,$22 # 7 cycles from $22 load \n\
0378 xor $23,$24,$24 # 6 cycles from $24 load \n\
0379 \n\
0380 stq $6,48($17) \n\
0381 xor $22,$24,$24 \n\
0382 subq $16,1,$16 \n\
0383 xor $24,$25,$25 # 8 cycles from $25 load \n\
0384 \n\
0385 stq $25,56($17) \n\
0386 addq $21,64,$21 \n\
0387 addq $20,64,$20 \n\
0388 addq $19,64,$19 \n\
0389 \n\
0390 addq $18,64,$18 \n\
0391 addq $17,64,$17 \n\
0392 bgt $16,5b \n\
0393 ret \n\
0394 .end xor_alpha_5 \n\
0395 \n\
0396 .align 3 \n\
0397 .ent xor_alpha_prefetch_2 \n\
0398 xor_alpha_prefetch_2: \n\
0399 .prologue 0 \n\
0400 srl $16, 6, $16 \n\
0401 \n\
0402 ldq $31, 0($17) \n\
0403 ldq $31, 0($18) \n\
0404 \n\
0405 ldq $31, 64($17) \n\
0406 ldq $31, 64($18) \n\
0407 \n\
0408 ldq $31, 128($17) \n\
0409 ldq $31, 128($18) \n\
0410 \n\
0411 ldq $31, 192($17) \n\
0412 ldq $31, 192($18) \n\
0413 .align 4 \n\
0414 2: \n\
0415 ldq $0,0($17) \n\
0416 ldq $1,0($18) \n\
0417 ldq $2,8($17) \n\
0418 ldq $3,8($18) \n\
0419 \n\
0420 ldq $4,16($17) \n\
0421 ldq $5,16($18) \n\
0422 ldq $6,24($17) \n\
0423 ldq $7,24($18) \n\
0424 \n\
0425 ldq $19,32($17) \n\
0426 ldq $20,32($18) \n\
0427 ldq $21,40($17) \n\
0428 ldq $22,40($18) \n\
0429 \n\
0430 ldq $23,48($17) \n\
0431 ldq $24,48($18) \n\
0432 ldq $25,56($17) \n\
0433 ldq $27,56($18) \n\
0434 \n\
0435 ldq $31,256($17) \n\
0436 xor $0,$1,$0 # 8 cycles from $1 load \n\
0437 ldq $31,256($18) \n\
0438 xor $2,$3,$2 \n\
0439 \n\
0440 stq $0,0($17) \n\
0441 xor $4,$5,$4 \n\
0442 stq $2,8($17) \n\
0443 xor $6,$7,$6 \n\
0444 \n\
0445 stq $4,16($17) \n\
0446 xor $19,$20,$19 \n\
0447 stq $6,24($17) \n\
0448 xor $21,$22,$21 \n\
0449 \n\
0450 stq $19,32($17) \n\
0451 xor $23,$24,$23 \n\
0452 stq $21,40($17) \n\
0453 xor $25,$27,$25 \n\
0454 \n\
0455 stq $23,48($17) \n\
0456 subq $16,1,$16 \n\
0457 stq $25,56($17) \n\
0458 addq $17,64,$17 \n\
0459 \n\
0460 addq $18,64,$18 \n\
0461 bgt $16,2b \n\
0462 ret \n\
0463 .end xor_alpha_prefetch_2 \n\
0464 \n\
0465 .align 3 \n\
0466 .ent xor_alpha_prefetch_3 \n\
0467 xor_alpha_prefetch_3: \n\
0468 .prologue 0 \n\
0469 srl $16, 6, $16 \n\
0470 \n\
0471 ldq $31, 0($17) \n\
0472 ldq $31, 0($18) \n\
0473 ldq $31, 0($19) \n\
0474 \n\
0475 ldq $31, 64($17) \n\
0476 ldq $31, 64($18) \n\
0477 ldq $31, 64($19) \n\
0478 \n\
0479 ldq $31, 128($17) \n\
0480 ldq $31, 128($18) \n\
0481 ldq $31, 128($19) \n\
0482 \n\
0483 ldq $31, 192($17) \n\
0484 ldq $31, 192($18) \n\
0485 ldq $31, 192($19) \n\
0486 .align 4 \n\
0487 3: \n\
0488 ldq $0,0($17) \n\
0489 ldq $1,0($18) \n\
0490 ldq $2,0($19) \n\
0491 ldq $3,8($17) \n\
0492 \n\
0493 ldq $4,8($18) \n\
0494 ldq $6,16($17) \n\
0495 ldq $7,16($18) \n\
0496 ldq $21,24($17) \n\
0497 \n\
0498 ldq $22,24($18) \n\
0499 ldq $24,32($17) \n\
0500 ldq $25,32($18) \n\
0501 ldq $5,8($19) \n\
0502 \n\
0503 ldq $20,16($19) \n\
0504 ldq $23,24($19) \n\
0505 ldq $27,32($19) \n\
0506 nop \n\
0507 \n\
0508 xor $0,$1,$1 # 8 cycles from $0 load \n\
0509 xor $3,$4,$4 # 7 cycles from $4 load \n\
0510 xor $6,$7,$7 # 6 cycles from $7 load \n\
0511 xor $21,$22,$22 # 5 cycles from $22 load \n\
0512 \n\
0513 xor $1,$2,$2 # 9 cycles from $2 load \n\
0514 xor $24,$25,$25 # 5 cycles from $25 load \n\
0515 stq $2,0($17) \n\
0516 xor $4,$5,$5 # 6 cycles from $5 load \n\
0517 \n\
0518 stq $5,8($17) \n\
0519 xor $7,$20,$20 # 7 cycles from $20 load \n\
0520 stq $20,16($17) \n\
0521 xor $22,$23,$23 # 7 cycles from $23 load \n\
0522 \n\
0523 stq $23,24($17) \n\
0524 xor $25,$27,$27 # 7 cycles from $27 load \n\
0525 stq $27,32($17) \n\
0526 nop \n\
0527 \n\
0528 ldq $0,40($17) \n\
0529 ldq $1,40($18) \n\
0530 ldq $3,48($17) \n\
0531 ldq $4,48($18) \n\
0532 \n\
0533 ldq $6,56($17) \n\
0534 ldq $7,56($18) \n\
0535 ldq $2,40($19) \n\
0536 ldq $5,48($19) \n\
0537 \n\
0538 ldq $20,56($19) \n\
0539 ldq $31,256($17) \n\
0540 ldq $31,256($18) \n\
0541 ldq $31,256($19) \n\
0542 \n\
0543 xor $0,$1,$1 # 6 cycles from $1 load \n\
0544 xor $3,$4,$4 # 5 cycles from $4 load \n\
0545 xor $6,$7,$7 # 5 cycles from $7 load \n\
0546 xor $1,$2,$2 # 4 cycles from $2 load \n\
0547 \n\
0548 xor $4,$5,$5 # 5 cycles from $5 load \n\
0549 xor $7,$20,$20 # 4 cycles from $20 load \n\
0550 stq $2,40($17) \n\
0551 subq $16,1,$16 \n\
0552 \n\
0553 stq $5,48($17) \n\
0554 addq $19,64,$19 \n\
0555 stq $20,56($17) \n\
0556 addq $18,64,$18 \n\
0557 \n\
0558 addq $17,64,$17 \n\
0559 bgt $16,3b \n\
0560 ret \n\
0561 .end xor_alpha_prefetch_3 \n\
0562 \n\
0563 .align 3 \n\
0564 .ent xor_alpha_prefetch_4 \n\
0565 xor_alpha_prefetch_4: \n\
0566 .prologue 0 \n\
0567 srl $16, 6, $16 \n\
0568 \n\
0569 ldq $31, 0($17) \n\
0570 ldq $31, 0($18) \n\
0571 ldq $31, 0($19) \n\
0572 ldq $31, 0($20) \n\
0573 \n\
0574 ldq $31, 64($17) \n\
0575 ldq $31, 64($18) \n\
0576 ldq $31, 64($19) \n\
0577 ldq $31, 64($20) \n\
0578 \n\
0579 ldq $31, 128($17) \n\
0580 ldq $31, 128($18) \n\
0581 ldq $31, 128($19) \n\
0582 ldq $31, 128($20) \n\
0583 \n\
0584 ldq $31, 192($17) \n\
0585 ldq $31, 192($18) \n\
0586 ldq $31, 192($19) \n\
0587 ldq $31, 192($20) \n\
0588 .align 4 \n\
0589 4: \n\
0590 ldq $0,0($17) \n\
0591 ldq $1,0($18) \n\
0592 ldq $2,0($19) \n\
0593 ldq $3,0($20) \n\
0594 \n\
0595 ldq $4,8($17) \n\
0596 ldq $5,8($18) \n\
0597 ldq $6,8($19) \n\
0598 ldq $7,8($20) \n\
0599 \n\
0600 ldq $21,16($17) \n\
0601 ldq $22,16($18) \n\
0602 ldq $23,16($19) \n\
0603 ldq $24,16($20) \n\
0604 \n\
0605 ldq $25,24($17) \n\
0606 xor $0,$1,$1 # 6 cycles from $1 load \n\
0607 ldq $27,24($18) \n\
0608 xor $2,$3,$3 # 6 cycles from $3 load \n\
0609 \n\
0610 ldq $0,24($19) \n\
0611 xor $1,$3,$3 \n\
0612 ldq $1,24($20) \n\
0613 xor $4,$5,$5 # 7 cycles from $5 load \n\
0614 \n\
0615 stq $3,0($17) \n\
0616 xor $6,$7,$7 \n\
0617 xor $21,$22,$22 # 7 cycles from $22 load \n\
0618 xor $5,$7,$7 \n\
0619 \n\
0620 stq $7,8($17) \n\
0621 xor $23,$24,$24 # 7 cycles from $24 load \n\
0622 ldq $2,32($17) \n\
0623 xor $22,$24,$24 \n\
0624 \n\
0625 ldq $3,32($18) \n\
0626 ldq $4,32($19) \n\
0627 ldq $5,32($20) \n\
0628 xor $25,$27,$27 # 8 cycles from $27 load \n\
0629 \n\
0630 ldq $6,40($17) \n\
0631 ldq $7,40($18) \n\
0632 ldq $21,40($19) \n\
0633 ldq $22,40($20) \n\
0634 \n\
0635 stq $24,16($17) \n\
0636 xor $0,$1,$1 # 9 cycles from $1 load \n\
0637 xor $2,$3,$3 # 5 cycles from $3 load \n\
0638 xor $27,$1,$1 \n\
0639 \n\
0640 stq $1,24($17) \n\
0641 xor $4,$5,$5 # 5 cycles from $5 load \n\
0642 ldq $23,48($17) \n\
0643 xor $3,$5,$5 \n\
0644 \n\
0645 ldq $24,48($18) \n\
0646 ldq $25,48($19) \n\
0647 ldq $27,48($20) \n\
0648 ldq $0,56($17) \n\
0649 \n\
0650 ldq $1,56($18) \n\
0651 ldq $2,56($19) \n\
0652 ldq $3,56($20) \n\
0653 xor $6,$7,$7 # 8 cycles from $6 load \n\
0654 \n\
0655 ldq $31,256($17) \n\
0656 xor $21,$22,$22 # 8 cycles from $22 load \n\
0657 ldq $31,256($18) \n\
0658 xor $7,$22,$22 \n\
0659 \n\
0660 ldq $31,256($19) \n\
0661 xor $23,$24,$24 # 6 cycles from $24 load \n\
0662 ldq $31,256($20) \n\
0663 xor $25,$27,$27 # 6 cycles from $27 load \n\
0664 \n\
0665 stq $5,32($17) \n\
0666 xor $24,$27,$27 \n\
0667 xor $0,$1,$1 # 7 cycles from $1 load \n\
0668 xor $2,$3,$3 # 6 cycles from $3 load \n\
0669 \n\
0670 stq $22,40($17) \n\
0671 xor $1,$3,$3 \n\
0672 stq $27,48($17) \n\
0673 subq $16,1,$16 \n\
0674 \n\
0675 stq $3,56($17) \n\
0676 addq $20,64,$20 \n\
0677 addq $19,64,$19 \n\
0678 addq $18,64,$18 \n\
0679 \n\
0680 addq $17,64,$17 \n\
0681 bgt $16,4b \n\
0682 ret \n\
0683 .end xor_alpha_prefetch_4 \n\
0684 \n\
0685 .align 3 \n\
0686 .ent xor_alpha_prefetch_5 \n\
0687 xor_alpha_prefetch_5: \n\
0688 .prologue 0 \n\
0689 srl $16, 6, $16 \n\
0690 \n\
0691 ldq $31, 0($17) \n\
0692 ldq $31, 0($18) \n\
0693 ldq $31, 0($19) \n\
0694 ldq $31, 0($20) \n\
0695 ldq $31, 0($21) \n\
0696 \n\
0697 ldq $31, 64($17) \n\
0698 ldq $31, 64($18) \n\
0699 ldq $31, 64($19) \n\
0700 ldq $31, 64($20) \n\
0701 ldq $31, 64($21) \n\
0702 \n\
0703 ldq $31, 128($17) \n\
0704 ldq $31, 128($18) \n\
0705 ldq $31, 128($19) \n\
0706 ldq $31, 128($20) \n\
0707 ldq $31, 128($21) \n\
0708 \n\
0709 ldq $31, 192($17) \n\
0710 ldq $31, 192($18) \n\
0711 ldq $31, 192($19) \n\
0712 ldq $31, 192($20) \n\
0713 ldq $31, 192($21) \n\
0714 .align 4 \n\
0715 5: \n\
0716 ldq $0,0($17) \n\
0717 ldq $1,0($18) \n\
0718 ldq $2,0($19) \n\
0719 ldq $3,0($20) \n\
0720 \n\
0721 ldq $4,0($21) \n\
0722 ldq $5,8($17) \n\
0723 ldq $6,8($18) \n\
0724 ldq $7,8($19) \n\
0725 \n\
0726 ldq $22,8($20) \n\
0727 ldq $23,8($21) \n\
0728 ldq $24,16($17) \n\
0729 ldq $25,16($18) \n\
0730 \n\
0731 ldq $27,16($19) \n\
0732 xor $0,$1,$1 # 6 cycles from $1 load \n\
0733 ldq $28,16($20) \n\
0734 xor $2,$3,$3 # 6 cycles from $3 load \n\
0735 \n\
0736 ldq $0,16($21) \n\
0737 xor $1,$3,$3 \n\
0738 ldq $1,24($17) \n\
0739 xor $3,$4,$4 # 7 cycles from $4 load \n\
0740 \n\
0741 stq $4,0($17) \n\
0742 xor $5,$6,$6 # 7 cycles from $6 load \n\
0743 xor $7,$22,$22 # 7 cycles from $22 load \n\
0744 xor $6,$23,$23 # 7 cycles from $23 load \n\
0745 \n\
0746 ldq $2,24($18) \n\
0747 xor $22,$23,$23 \n\
0748 ldq $3,24($19) \n\
0749 xor $24,$25,$25 # 8 cycles from $25 load \n\
0750 \n\
0751 stq $23,8($17) \n\
0752 xor $25,$27,$27 # 8 cycles from $27 load \n\
0753 ldq $4,24($20) \n\
0754 xor $28,$0,$0 # 7 cycles from $0 load \n\
0755 \n\
0756 ldq $5,24($21) \n\
0757 xor $27,$0,$0 \n\
0758 ldq $6,32($17) \n\
0759 ldq $7,32($18) \n\
0760 \n\
0761 stq $0,16($17) \n\
0762 xor $1,$2,$2 # 6 cycles from $2 load \n\
0763 ldq $22,32($19) \n\
0764 xor $3,$4,$4 # 4 cycles from $4 load \n\
0765 \n\
0766 ldq $23,32($20) \n\
0767 xor $2,$4,$4 \n\
0768 ldq $24,32($21) \n\
0769 ldq $25,40($17) \n\
0770 \n\
0771 ldq $27,40($18) \n\
0772 ldq $28,40($19) \n\
0773 ldq $0,40($20) \n\
0774 xor $4,$5,$5 # 7 cycles from $5 load \n\
0775 \n\
0776 stq $5,24($17) \n\
0777 xor $6,$7,$7 # 7 cycles from $7 load \n\
0778 ldq $1,40($21) \n\
0779 ldq $2,48($17) \n\
0780 \n\
0781 ldq $3,48($18) \n\
0782 xor $7,$22,$22 # 7 cycles from $22 load \n\
0783 ldq $4,48($19) \n\
0784 xor $23,$24,$24 # 6 cycles from $24 load \n\
0785 \n\
0786 ldq $5,48($20) \n\
0787 xor $22,$24,$24 \n\
0788 ldq $6,48($21) \n\
0789 xor $25,$27,$27 # 7 cycles from $27 load \n\
0790 \n\
0791 stq $24,32($17) \n\
0792 xor $27,$28,$28 # 8 cycles from $28 load \n\
0793 ldq $7,56($17) \n\
0794 xor $0,$1,$1 # 6 cycles from $1 load \n\
0795 \n\
0796 ldq $22,56($18) \n\
0797 ldq $23,56($19) \n\
0798 ldq $24,56($20) \n\
0799 ldq $25,56($21) \n\
0800 \n\
0801 ldq $31,256($17) \n\
0802 xor $28,$1,$1 \n\
0803 ldq $31,256($18) \n\
0804 xor $2,$3,$3 # 9 cycles from $3 load \n\
0805 \n\
0806 ldq $31,256($19) \n\
0807 xor $3,$4,$4 # 9 cycles from $4 load \n\
0808 ldq $31,256($20) \n\
0809 xor $5,$6,$6 # 8 cycles from $6 load \n\
0810 \n\
0811 stq $1,40($17) \n\
0812 xor $4,$6,$6 \n\
0813 xor $7,$22,$22 # 7 cycles from $22 load \n\
0814 xor $23,$24,$24 # 6 cycles from $24 load \n\
0815 \n\
0816 stq $6,48($17) \n\
0817 xor $22,$24,$24 \n\
0818 ldq $31,256($21) \n\
0819 xor $24,$25,$25 # 8 cycles from $25 load \n\
0820 \n\
0821 stq $25,56($17) \n\
0822 subq $16,1,$16 \n\
0823 addq $21,64,$21 \n\
0824 addq $20,64,$20 \n\
0825 \n\
0826 addq $19,64,$19 \n\
0827 addq $18,64,$18 \n\
0828 addq $17,64,$17 \n\
0829 bgt $16,5b \n\
0830 \n\
0831 ret \n\
0832 .end xor_alpha_prefetch_5 \n\
0833 ");
0834
0835 static struct xor_block_template xor_block_alpha = {
0836 .name = "alpha",
0837 .do_2 = xor_alpha_2,
0838 .do_3 = xor_alpha_3,
0839 .do_4 = xor_alpha_4,
0840 .do_5 = xor_alpha_5,
0841 };
0842
0843 static struct xor_block_template xor_block_alpha_prefetch = {
0844 .name = "alpha prefetch",
0845 .do_2 = xor_alpha_prefetch_2,
0846 .do_3 = xor_alpha_prefetch_3,
0847 .do_4 = xor_alpha_prefetch_4,
0848 .do_5 = xor_alpha_prefetch_5,
0849 };
0850
0851
0852 #include <asm-generic/xor.h>
0853
0854 #undef XOR_TRY_TEMPLATES
0855 #define XOR_TRY_TEMPLATES \
0856 do { \
0857 xor_speed(&xor_block_8regs); \
0858 xor_speed(&xor_block_32regs); \
0859 xor_speed(&xor_block_alpha); \
0860 xor_speed(&xor_block_alpha_prefetch); \
0861 } while (0)
0862
0863
0864
0865 #define XOR_SELECT_TEMPLATE(FASTEST) \
0866 (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)