0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040 ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
0041 ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
0042 ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
0043 ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066 $flavour = shift || "64";
0067
0068 $v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
0069
0070 if ($flavour =~ /64|n32/i) {{{
0071
0072
0073
0074
0075 my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
0076 my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
0077
0078 $code.=<<___;
0079
0080 defined(_MIPS_ARCH_MIPS64R6)) \\
0081 && !defined(_MIPS_ARCH_MIPS64R2)
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113 .text
0114 .set noat
0115 .set noreorder
0116
0117 .align 5
0118 .globl poly1305_init
0119 .ent poly1305_init
0120 poly1305_init:
0121 .frame $sp,0,$ra
0122 .set reorder
0123
0124 sd $zero,0($ctx)
0125 sd $zero,8($ctx)
0126 sd $zero,16($ctx)
0127
0128 beqz $inp,.Lno_key
0129
0130
0131 andi $tmp0,$inp,7
0132 dsubu $inp,$inp,$tmp0
0133 sll $tmp0,$tmp0,3
0134 ld $in0,0($inp)
0135 ld $in1,8($inp)
0136 beqz $tmp0,.Laligned_key
0137 ld $tmp2,16($inp)
0138
0139 subu $tmp1,$zero,$tmp0
0140
0141 dsllv $in0,$in0,$tmp0
0142 dsrlv $tmp3,$in1,$tmp1
0143 dsllv $in1,$in1,$tmp0
0144 dsrlv $tmp2,$tmp2,$tmp1
0145
0146 dsrlv $in0,$in0,$tmp0
0147 dsllv $tmp3,$in1,$tmp1
0148 dsrlv $in1,$in1,$tmp0
0149 dsllv $tmp2,$tmp2,$tmp1
0150
0151 or $in0,$in0,$tmp3
0152 or $in1,$in1,$tmp2
0153 .Laligned_key:
0154
0155 ldl $in0,0+MSB($inp)
0156 ldl $in1,8+MSB($inp)
0157 ldr $in0,0+LSB($inp)
0158 ldr $in1,8+LSB($inp)
0159
0160
0161
0162 dsbh $in0,$in0
0163 dsbh $in1,$in1
0164 dshd $in0,$in0
0165 dshd $in1,$in1
0166
0167 ori $tmp0,$zero,0xFF
0168 dsll $tmp2,$tmp0,32
0169 or $tmp0,$tmp2
0170
0171 and $tmp1,$in0,$tmp0
0172 and $tmp3,$in1,$tmp0
0173 dsrl $tmp2,$in0,24
0174 dsrl $tmp4,$in1,24
0175 dsll $tmp1,24
0176 dsll $tmp3,24
0177 and $tmp2,$tmp0
0178 and $tmp4,$tmp0
0179 dsll $tmp0,8
0180 or $tmp1,$tmp2
0181 or $tmp3,$tmp4
0182 and $tmp2,$in0,$tmp0
0183 and $tmp4,$in1,$tmp0
0184 dsrl $in0,8
0185 dsrl $in1,8
0186 dsll $tmp2,8
0187 dsll $tmp4,8
0188 and $in0,$tmp0
0189 and $in1,$tmp0
0190 or $tmp1,$tmp2
0191 or $tmp3,$tmp4
0192 or $in0,$tmp1
0193 or $in1,$tmp3
0194 dsrl $tmp1,$in0,32
0195 dsrl $tmp3,$in1,32
0196 dsll $in0,32
0197 dsll $in1,32
0198 or $in0,$tmp1
0199 or $in1,$tmp3
0200
0201
0202 li $tmp0,1
0203 dsll $tmp0,32
0204 daddiu $tmp0,-63
0205 dsll $tmp0,28
0206 daddiu $tmp0,-1
0207
0208 and $in0,$tmp0
0209 daddiu $tmp0,-3
0210 and $in1,$tmp0
0211
0212 sd $in0,24($ctx)
0213 dsrl $tmp0,$in1,2
0214 sd $in1,32($ctx)
0215 daddu $tmp0,$in1
0216 sd $tmp0,40($ctx)
0217
0218 .Lno_key:
0219 li $v0,0
0220 jr $ra
0221 .end poly1305_init
0222 ___
0223 {
0224 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
0225
0226 my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
0227 ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
0228 my ($shr,$shl) = ($s6,$s7);
0229
0230 $code.=<<___;
0231 .align 5
0232 .globl poly1305_blocks
0233 .ent poly1305_blocks
0234 poly1305_blocks:
0235 .set noreorder
0236 dsrl $len,4
0237 bnez $len,poly1305_blocks_internal
0238 nop
0239 jr $ra
0240 nop
0241 .end poly1305_blocks
0242
0243 .align 5
0244 .ent poly1305_blocks_internal
0245 poly1305_blocks_internal:
0246 .set noreorder
0247
0248 .frame $sp,8*8,$ra
0249 .mask $SAVED_REGS_MASK|0x000c0000,-8
0250 dsubu $sp,8*8
0251 sd $s7,56($sp)
0252 sd $s6,48($sp)
0253
0254 .frame $sp,6*8,$ra
0255 .mask $SAVED_REGS_MASK,-8
0256 dsubu $sp,6*8
0257
0258 sd $s5,40($sp)
0259 sd $s4,32($sp)
0260 ___
0261 $code.=<<___ if ($flavour =~ /nubi/i);
0262 sd $s3,24($sp)
0263 sd $s2,16($sp)
0264 sd $s1,8($sp)
0265 sd $s0,0($sp)
0266 ___
0267 $code.=<<___;
0268 .set reorder
0269
0270
0271 andi $shr,$inp,7
0272 dsubu $inp,$inp,$shr
0273 sll $shr,$shr,3
0274 subu $shl,$zero,$shr
0275
0276
0277 ld $h0,0($ctx)
0278 ld $h1,8($ctx)
0279 ld $h2,16($ctx)
0280
0281 ld $r0,24($ctx)
0282 ld $r1,32($ctx)
0283 ld $rs1,40($ctx)
0284
0285 dsll $len,4
0286 daddu $len,$inp
0287 b .Loop
0288
0289 .align 4
0290 .Loop:
0291
0292 ld $in0,0($inp)
0293 ld $in1,8($inp)
0294 beqz $shr,.Laligned_inp
0295
0296 ld $tmp2,16($inp)
0297
0298 dsllv $in0,$in0,$shr
0299 dsrlv $tmp3,$in1,$shl
0300 dsllv $in1,$in1,$shr
0301 dsrlv $tmp2,$tmp2,$shl
0302
0303 dsrlv $in0,$in0,$shr
0304 dsllv $tmp3,$in1,$shl
0305 dsrlv $in1,$in1,$shr
0306 dsllv $tmp2,$tmp2,$shl
0307
0308 or $in0,$in0,$tmp3
0309 or $in1,$in1,$tmp2
0310 .Laligned_inp:
0311
0312 ldl $in0,0+MSB($inp)
0313 ldl $in1,8+MSB($inp)
0314 ldr $in0,0+LSB($inp)
0315 ldr $in1,8+LSB($inp)
0316
0317 daddiu $inp,16
0318
0319
0320 dsbh $in0,$in0
0321 dsbh $in1,$in1
0322 dshd $in0,$in0
0323 dshd $in1,$in1
0324
0325 ori $tmp0,$zero,0xFF
0326 dsll $tmp2,$tmp0,32
0327 or $tmp0,$tmp2
0328
0329 and $tmp1,$in0,$tmp0
0330 and $tmp3,$in1,$tmp0
0331 dsrl $tmp2,$in0,24
0332 dsrl $tmp4,$in1,24
0333 dsll $tmp1,24
0334 dsll $tmp3,24
0335 and $tmp2,$tmp0
0336 and $tmp4,$tmp0
0337 dsll $tmp0,8
0338 or $tmp1,$tmp2
0339 or $tmp3,$tmp4
0340 and $tmp2,$in0,$tmp0
0341 and $tmp4,$in1,$tmp0
0342 dsrl $in0,8
0343 dsrl $in1,8
0344 dsll $tmp2,8
0345 dsll $tmp4,8
0346 and $in0,$tmp0
0347 and $in1,$tmp0
0348 or $tmp1,$tmp2
0349 or $tmp3,$tmp4
0350 or $in0,$tmp1
0351 or $in1,$tmp3
0352 dsrl $tmp1,$in0,32
0353 dsrl $tmp3,$in1,32
0354 dsll $in0,32
0355 dsll $in1,32
0356 or $in0,$tmp1
0357 or $in1,$tmp3
0358
0359
0360 dsrl $tmp1,$h2,2
0361 andi $h2,$h2,3
0362 dsll $tmp0,$tmp1,2
0363
0364 daddu $d0,$h0,$in0
0365 daddu $tmp1,$tmp0
0366 sltu $tmp0,$d0,$h0
0367 daddu $d0,$d0,$tmp1
0368 sltu $tmp1,$d0,$tmp1
0369 daddu $d1,$h1,$in1
0370 daddu $tmp0,$tmp1
0371 sltu $tmp1,$d1,$h1
0372 daddu $d1,$tmp0
0373
0374 dmultu ($r0,$d0)
0375 daddu $d2,$h2,$padbit
0376 sltu $tmp0,$d1,$tmp0
0377 mflo ($h0,$r0,$d0)
0378 mfhi ($h1,$r0,$d0)
0379
0380 dmultu ($rs1,$d1)
0381 daddu $d2,$tmp1
0382 daddu $d2,$tmp0
0383 mflo ($tmp0,$rs1,$d1)
0384 mfhi ($tmp1,$rs1,$d1)
0385
0386 dmultu ($r1,$d0)
0387 mflo ($tmp2,$r1,$d0)
0388 mfhi ($h2,$r1,$d0)
0389 daddu $h0,$tmp0
0390 daddu $h1,$tmp1
0391 sltu $tmp0,$h0,$tmp0
0392
0393 dmultu ($r0,$d1)
0394 daddu $h1,$tmp0
0395 daddu $h1,$tmp2
0396 mflo ($tmp0,$r0,$d1)
0397 mfhi ($tmp1,$r0,$d1)
0398
0399 dmultu ($rs1,$d2)
0400 sltu $tmp2,$h1,$tmp2
0401 daddu $h2,$tmp2
0402 mflo ($tmp2,$rs1,$d2)
0403
0404 dmultu ($r0,$d2)
0405 daddu $h1,$tmp0
0406 daddu $h2,$tmp1
0407 mflo ($tmp3,$r0,$d2)
0408 sltu $tmp0,$h1,$tmp0
0409 daddu $h2,$tmp0
0410
0411 daddu $h1,$tmp2
0412 sltu $tmp2,$h1,$tmp2
0413 daddu $h2,$tmp2
0414 daddu $h2,$tmp3
0415
0416 bne $inp,$len,.Loop
0417
0418 sd $h0,0($ctx)
0419 sd $h1,8($ctx)
0420 sd $h2,16($ctx)
0421
0422 .set noreorder
0423
0424 ld $s7,56($sp)
0425 ld $s6,48($sp)
0426
0427 ld $s5,40($sp)
0428 ld $s4,32($sp)
0429 ___
0430 $code.=<<___ if ($flavour =~ /nubi/i);
0431 ld $s3,24($sp)
0432 ld $s2,16($sp)
0433 ld $s1,8($sp)
0434 ld $s0,0($sp)
0435 ___
0436 $code.=<<___;
0437 jr $ra
0438
0439 daddu $sp,8*8
0440
0441 daddu $sp,6*8
0442
0443 .end poly1305_blocks_internal
0444 ___
0445 }
0446 {
0447 my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
0448
0449 $code.=<<___;
0450 .align 5
0451 .globl poly1305_emit
0452 .ent poly1305_emit
0453 poly1305_emit:
0454 .frame $sp,0,$ra
0455 .set reorder
0456
0457 ld $tmp2,16($ctx)
0458 ld $tmp0,0($ctx)
0459 ld $tmp1,8($ctx)
0460
0461 li $in0,-4
0462 dsrl $in1,$tmp2,2
0463 and $in0,$tmp2
0464 andi $tmp2,$tmp2,3
0465 daddu $in0,$in1
0466
0467 daddu $tmp0,$tmp0,$in0
0468 sltu $in1,$tmp0,$in0
0469 daddiu $in0,$tmp0,5
0470 daddu $tmp1,$tmp1,$in1
0471 sltiu $tmp3,$in0,5
0472 sltu $tmp4,$tmp1,$in1
0473 daddu $in1,$tmp1,$tmp3
0474 daddu $tmp2,$tmp2,$tmp4
0475 sltu $tmp3,$in1,$tmp3
0476 daddu $tmp2,$tmp2,$tmp3
0477
0478 dsrl $tmp2,2
0479 dsubu $tmp2,$zero,$tmp2
0480
0481 xor $in0,$tmp0
0482 xor $in1,$tmp1
0483 and $in0,$tmp2
0484 and $in1,$tmp2
0485 xor $in0,$tmp0
0486 xor $in1,$tmp1
0487
0488 lwu $tmp0,0($nonce)
0489 lwu $tmp1,4($nonce)
0490 lwu $tmp2,8($nonce)
0491 lwu $tmp3,12($nonce)
0492 dsll $tmp1,32
0493 dsll $tmp3,32
0494 or $tmp0,$tmp1
0495 or $tmp2,$tmp3
0496
0497 daddu $in0,$tmp0
0498 daddu $in1,$tmp2
0499 sltu $tmp0,$in0,$tmp0
0500 daddu $in1,$tmp0
0501
0502 dsrl $tmp0,$in0,8
0503 dsrl $tmp1,$in0,16
0504 dsrl $tmp2,$in0,24
0505 sb $in0,0($mac)
0506 dsrl $tmp3,$in0,32
0507 sb $tmp0,1($mac)
0508 dsrl $tmp0,$in0,40
0509 sb $tmp1,2($mac)
0510 dsrl $tmp1,$in0,48
0511 sb $tmp2,3($mac)
0512 dsrl $tmp2,$in0,56
0513 sb $tmp3,4($mac)
0514 dsrl $tmp3,$in1,8
0515 sb $tmp0,5($mac)
0516 dsrl $tmp0,$in1,16
0517 sb $tmp1,6($mac)
0518 dsrl $tmp1,$in1,24
0519 sb $tmp2,7($mac)
0520
0521 sb $in1,8($mac)
0522 dsrl $tmp2,$in1,32
0523 sb $tmp3,9($mac)
0524 dsrl $tmp3,$in1,40
0525 sb $tmp0,10($mac)
0526 dsrl $tmp0,$in1,48
0527 sb $tmp1,11($mac)
0528 dsrl $tmp1,$in1,56
0529 sb $tmp2,12($mac)
0530 sb $tmp3,13($mac)
0531 sb $tmp0,14($mac)
0532 sb $tmp1,15($mac)
0533
0534 jr $ra
0535 .end poly1305_emit
0536 .rdata
0537 .asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
0538 .align 2
0539 ___
0540 }
0541 }}} else {{{
0542
0543
0544
0545
0546 my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
0547 my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
0548 ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
0549
0550 $code.=<<___;
0551
0552 defined(_MIPS_ARCH_MIPS32R6)) \\
0553 && !defined(_MIPS_ARCH_MIPS32R2)
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585 .text
0586 .set noat
0587 .set noreorder
0588
0589 .align 5
0590 .globl poly1305_init
0591 .ent poly1305_init
0592 poly1305_init:
0593 .frame $sp,0,$ra
0594 .set reorder
0595
0596 sw $zero,0($ctx)
0597 sw $zero,4($ctx)
0598 sw $zero,8($ctx)
0599 sw $zero,12($ctx)
0600 sw $zero,16($ctx)
0601
0602 beqz $inp,.Lno_key
0603
0604
0605 andi $tmp0,$inp,3
0606 subu $inp,$inp,$tmp0
0607 sll $tmp0,$tmp0,3
0608 lw $in0,0($inp)
0609 lw $in1,4($inp)
0610 lw $in2,8($inp)
0611 lw $in3,12($inp)
0612 beqz $tmp0,.Laligned_key
0613
0614 lw $tmp2,16($inp)
0615 subu $tmp1,$zero,$tmp0
0616
0617 sllv $in0,$in0,$tmp0
0618 srlv $tmp3,$in1,$tmp1
0619 sllv $in1,$in1,$tmp0
0620 or $in0,$in0,$tmp3
0621 srlv $tmp3,$in2,$tmp1
0622 sllv $in2,$in2,$tmp0
0623 or $in1,$in1,$tmp3
0624 srlv $tmp3,$in3,$tmp1
0625 sllv $in3,$in3,$tmp0
0626 or $in2,$in2,$tmp3
0627 srlv $tmp2,$tmp2,$tmp1
0628 or $in3,$in3,$tmp2
0629
0630 srlv $in0,$in0,$tmp0
0631 sllv $tmp3,$in1,$tmp1
0632 srlv $in1,$in1,$tmp0
0633 or $in0,$in0,$tmp3
0634 sllv $tmp3,$in2,$tmp1
0635 srlv $in2,$in2,$tmp0
0636 or $in1,$in1,$tmp3
0637 sllv $tmp3,$in3,$tmp1
0638 srlv $in3,$in3,$tmp0
0639 or $in2,$in2,$tmp3
0640 sllv $tmp2,$tmp2,$tmp1
0641 or $in3,$in3,$tmp2
0642
0643 .Laligned_key:
0644
0645 lwl $in0,0+MSB($inp)
0646 lwl $in1,4+MSB($inp)
0647 lwl $in2,8+MSB($inp)
0648 lwl $in3,12+MSB($inp)
0649 lwr $in0,0+LSB($inp)
0650 lwr $in1,4+LSB($inp)
0651 lwr $in2,8+LSB($inp)
0652 lwr $in3,12+LSB($inp)
0653
0654
0655
0656 wsbh $in0,$in0
0657 wsbh $in1,$in1
0658 wsbh $in2,$in2
0659 wsbh $in3,$in3
0660 rotr $in0,$in0,16
0661 rotr $in1,$in1,16
0662 rotr $in2,$in2,16
0663 rotr $in3,$in3,16
0664
0665 srl $tmp0,$in0,24
0666 srl $tmp1,$in0,8
0667 andi $tmp2,$in0,0xFF00
0668 sll $in0,$in0,24
0669 andi $tmp1,0xFF00
0670 sll $tmp2,$tmp2,8
0671 or $in0,$tmp0
0672 srl $tmp0,$in1,24
0673 or $tmp1,$tmp2
0674 srl $tmp2,$in1,8
0675 or $in0,$tmp1
0676 andi $tmp1,$in1,0xFF00
0677 sll $in1,$in1,24
0678 andi $tmp2,0xFF00
0679 sll $tmp1,$tmp1,8
0680 or $in1,$tmp0
0681 srl $tmp0,$in2,24
0682 or $tmp2,$tmp1
0683 srl $tmp1,$in2,8
0684 or $in1,$tmp2
0685 andi $tmp2,$in2,0xFF00
0686 sll $in2,$in2,24
0687 andi $tmp1,0xFF00
0688 sll $tmp2,$tmp2,8
0689 or $in2,$tmp0
0690 srl $tmp0,$in3,24
0691 or $tmp1,$tmp2
0692 srl $tmp2,$in3,8
0693 or $in2,$tmp1
0694 andi $tmp1,$in3,0xFF00
0695 sll $in3,$in3,24
0696 andi $tmp2,0xFF00
0697 sll $tmp1,$tmp1,8
0698 or $in3,$tmp0
0699 or $tmp2,$tmp1
0700 or $in3,$tmp2
0701
0702
0703 lui $tmp0,0x0fff
0704 ori $tmp0,0xffff
0705 and $in0,$in0,$tmp0
0706 subu $tmp0,3
0707 and $in1,$in1,$tmp0
0708 and $in2,$in2,$tmp0
0709 and $in3,$in3,$tmp0
0710
0711 sw $in0,20($ctx)
0712 sw $in1,24($ctx)
0713 sw $in2,28($ctx)
0714 sw $in3,32($ctx)
0715
0716 srl $tmp1,$in1,2
0717 srl $tmp2,$in2,2
0718 srl $tmp3,$in3,2
0719 addu $in1,$in1,$tmp1
0720 addu $in2,$in2,$tmp2
0721 addu $in3,$in3,$tmp3
0722 sw $in1,36($ctx)
0723 sw $in2,40($ctx)
0724 sw $in3,44($ctx)
0725 .Lno_key:
0726 li $v0,0
0727 jr $ra
0728 .end poly1305_init
0729 ___
0730 {
0731 my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
0732
0733 my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
0734 ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
0735 my ($d0,$d1,$d2,$d3) =
0736 ($a4,$a5,$a6,$a7);
0737 my $shr = $t2;
0738 my $one = $t2;
0739
0740 $code.=<<___;
0741 .globl poly1305_blocks
0742 .align 5
0743 .ent poly1305_blocks
0744 poly1305_blocks:
0745 .frame $sp,16*4,$ra
0746 .mask $SAVED_REGS_MASK,-4
0747 .set noreorder
0748 subu $sp, $sp,4*12
0749 sw $s11,4*11($sp)
0750 sw $s10,4*10($sp)
0751 sw $s9, 4*9($sp)
0752 sw $s8, 4*8($sp)
0753 sw $s7, 4*7($sp)
0754 sw $s6, 4*6($sp)
0755 sw $s5, 4*5($sp)
0756 sw $s4, 4*4($sp)
0757 ___
0758 $code.=<<___ if ($flavour =~ /nubi/i);
0759 sw $s3, 4*3($sp)
0760 sw $s2, 4*2($sp)
0761 sw $s1, 4*1($sp)
0762 sw $s0, 4*0($sp)
0763 ___
0764 $code.=<<___;
0765 .set reorder
0766
0767 srl $len,4
0768 li $one,1
0769 beqz $len,.Labort
0770
0771
0772 andi $shr,$inp,3
0773 subu $inp,$inp,$shr
0774 sll $shr,$shr,3
0775
0776
0777 lw $h0,0($ctx)
0778 lw $h1,4($ctx)
0779 lw $h2,8($ctx)
0780 lw $h3,12($ctx)
0781 lw $h4,16($ctx)
0782
0783 lw $r0,20($ctx)
0784 lw $r1,24($ctx)
0785 lw $r2,28($ctx)
0786 lw $r3,32($ctx)
0787 lw $rs1,36($ctx)
0788 lw $rs2,40($ctx)
0789 lw $rs3,44($ctx)
0790
0791 sll $len,4
0792 addu $len,$len,$inp
0793 b .Loop
0794
0795 .align 4
0796 .Loop:
0797
0798 lw $d0,0($inp)
0799 lw $d1,4($inp)
0800 lw $d2,8($inp)
0801 lw $d3,12($inp)
0802 beqz $shr,.Laligned_inp
0803
0804 lw $t0,16($inp)
0805 subu $t1,$zero,$shr
0806
0807 sllv $d0,$d0,$shr
0808 srlv $at,$d1,$t1
0809 sllv $d1,$d1,$shr
0810 or $d0,$d0,$at
0811 srlv $at,$d2,$t1
0812 sllv $d2,$d2,$shr
0813 or $d1,$d1,$at
0814 srlv $at,$d3,$t1
0815 sllv $d3,$d3,$shr
0816 or $d2,$d2,$at
0817 srlv $t0,$t0,$t1
0818 or $d3,$d3,$t0
0819
0820 srlv $d0,$d0,$shr
0821 sllv $at,$d1,$t1
0822 srlv $d1,$d1,$shr
0823 or $d0,$d0,$at
0824 sllv $at,$d2,$t1
0825 srlv $d2,$d2,$shr
0826 or $d1,$d1,$at
0827 sllv $at,$d3,$t1
0828 srlv $d3,$d3,$shr
0829 or $d2,$d2,$at
0830 sllv $t0,$t0,$t1
0831 or $d3,$d3,$t0
0832
0833 .Laligned_inp:
0834
0835 lwl $d0,0+MSB($inp)
0836 lwl $d1,4+MSB($inp)
0837 lwl $d2,8+MSB($inp)
0838 lwl $d3,12+MSB($inp)
0839 lwr $d0,0+LSB($inp)
0840 lwr $d1,4+LSB($inp)
0841 lwr $d2,8+LSB($inp)
0842 lwr $d3,12+LSB($inp)
0843
0844
0845
0846 wsbh $d0,$d0
0847 wsbh $d1,$d1
0848 wsbh $d2,$d2
0849 wsbh $d3,$d3
0850 rotr $d0,$d0,16
0851 rotr $d1,$d1,16
0852 rotr $d2,$d2,16
0853 rotr $d3,$d3,16
0854
0855 srl $at,$d0,24
0856 srl $t0,$d0,8
0857 andi $t1,$d0,0xFF00
0858 sll $d0,$d0,24
0859 andi $t0,0xFF00
0860 sll $t1,$t1,8
0861 or $d0,$at
0862 srl $at,$d1,24
0863 or $t0,$t1
0864 srl $t1,$d1,8
0865 or $d0,$t0
0866 andi $t0,$d1,0xFF00
0867 sll $d1,$d1,24
0868 andi $t1,0xFF00
0869 sll $t0,$t0,8
0870 or $d1,$at
0871 srl $at,$d2,24
0872 or $t1,$t0
0873 srl $t0,$d2,8
0874 or $d1,$t1
0875 andi $t1,$d2,0xFF00
0876 sll $d2,$d2,24
0877 andi $t0,0xFF00
0878 sll $t1,$t1,8
0879 or $d2,$at
0880 srl $at,$d3,24
0881 or $t0,$t1
0882 srl $t1,$d3,8
0883 or $d2,$t0
0884 andi $t0,$d3,0xFF00
0885 sll $d3,$d3,24
0886 andi $t1,0xFF00
0887 sll $t0,$t0,8
0888 or $d3,$at
0889 or $t1,$t0
0890 or $d3,$t1
0891
0892
0893 srl $t0,$h4,2
0894 andi $h4,$h4,3
0895 sll $at,$t0,2
0896
0897 addu $d0,$d0,$h0
0898 addu $t0,$t0,$at
0899 sltu $h0,$d0,$h0
0900 addu $d0,$d0,$t0
0901 sltu $at,$d0,$t0
0902
0903 addu $d1,$d1,$h1
0904 addu $h0,$h0,$at
0905 sltu $h1,$d1,$h1
0906 addu $d1,$d1,$h0
0907 sltu $h0,$d1,$h0
0908
0909 addu $d2,$d2,$h2
0910 addu $h1,$h1,$h0
0911 sltu $h2,$d2,$h2
0912 addu $d2,$d2,$h1
0913 sltu $h1,$d2,$h1
0914
0915 addu $d3,$d3,$h3
0916 addu $h2,$h2,$h1
0917 sltu $h3,$d3,$h3
0918 addu $d3,$d3,$h2
0919
0920
0921 multu $r0,$d0
0922 sltu $h2,$d3,$h2
0923 maddu $rs3,$d1
0924 addu $h3,$h3,$h2
0925 maddu $rs2,$d2
0926 addu $h4,$h4,$padbit
0927 maddu $rs1,$d3
0928 addu $h4,$h4,$h3
0929 mfhi $at
0930 mflo $h0
0931
0932 multu $r1,$d0
0933 maddu $r0,$d1
0934 maddu $rs3,$d2
0935 maddu $rs2,$d3
0936 maddu $rs1,$h4
0937 maddu $at,$one
0938 mfhi $at
0939 mflo $h1
0940
0941 multu $r2,$d0
0942 maddu $r1,$d1
0943 maddu $r0,$d2
0944 maddu $rs3,$d3
0945 maddu $rs2,$h4
0946 maddu $at,$one
0947 mfhi $at
0948 mflo $h2
0949
0950 mul $t0,$r0,$h4
0951
0952 multu $r3,$d0
0953 maddu $r2,$d1
0954 maddu $r1,$d2
0955 maddu $r0,$d3
0956 maddu $rs3,$h4
0957 maddu $at,$one
0958 mfhi $at
0959 mflo $h3
0960
0961 addiu $inp,$inp,16
0962
0963 addu $h4,$t0,$at
0964
0965 multu ($r0,$d0)
0966 mflo ($h0,$r0,$d0)
0967 mfhi ($h1,$r0,$d0)
0968
0969 sltu $h2,$d3,$h2
0970 addu $h3,$h3,$h2
0971
0972 multu ($rs3,$d1)
0973 mflo ($at,$rs3,$d1)
0974 mfhi ($t0,$rs3,$d1)
0975
0976 addu $h4,$h4,$padbit
0977 addiu $inp,$inp,16
0978 addu $h4,$h4,$h3
0979
0980 multu ($rs2,$d2)
0981 mflo ($a3,$rs2,$d2)
0982 mfhi ($t1,$rs2,$d2)
0983 addu $h0,$h0,$at
0984 addu $h1,$h1,$t0
0985 multu ($rs1,$d3)
0986 sltu $at,$h0,$at
0987 addu $h1,$h1,$at
0988
0989 mflo ($at,$rs1,$d3)
0990 mfhi ($t0,$rs1,$d3)
0991 addu $h0,$h0,$a3
0992 addu $h1,$h1,$t1
0993 multu ($r1,$d0)
0994 sltu $a3,$h0,$a3
0995 addu $h1,$h1,$a3
0996
0997
0998 mflo ($a3,$r1,$d0)
0999 mfhi ($h2,$r1,$d0)
1000 addu $h0,$h0,$at
1001 addu $h1,$h1,$t0
1002 multu ($r0,$d1)
1003 sltu $at,$h0,$at
1004 addu $h1,$h1,$at
1005
1006 mflo ($at,$r0,$d1)
1007 mfhi ($t0,$r0,$d1)
1008 addu $h1,$h1,$a3
1009 sltu $a3,$h1,$a3
1010 multu ($rs3,$d2)
1011 addu $h2,$h2,$a3
1012
1013 mflo ($a3,$rs3,$d2)
1014 mfhi ($t1,$rs3,$d2)
1015 addu $h1,$h1,$at
1016 addu $h2,$h2,$t0
1017 multu ($rs2,$d3)
1018 sltu $at,$h1,$at
1019 addu $h2,$h2,$at
1020
1021 mflo ($at,$rs2,$d3)
1022 mfhi ($t0,$rs2,$d3)
1023 addu $h1,$h1,$a3
1024 addu $h2,$h2,$t1
1025 multu ($rs1,$h4)
1026 sltu $a3,$h1,$a3
1027 addu $h2,$h2,$a3
1028
1029 mflo ($a3,$rs1,$h4)
1030 addu $h1,$h1,$at
1031 addu $h2,$h2,$t0
1032 multu ($r2,$d0)
1033 sltu $at,$h1,$at
1034 addu $h2,$h2,$at
1035
1036
1037 mflo ($at,$r2,$d0)
1038 mfhi ($h3,$r2,$d0)
1039 addu $h1,$h1,$a3
1040 sltu $a3,$h1,$a3
1041 multu ($r1,$d1)
1042 addu $h2,$h2,$a3
1043
1044 mflo ($a3,$r1,$d1)
1045 mfhi ($t1,$r1,$d1)
1046 addu $h2,$h2,$at
1047 sltu $at,$h2,$at
1048 multu ($r0,$d2)
1049 addu $h3,$h3,$at
1050
1051 mflo ($at,$r0,$d2)
1052 mfhi ($t0,$r0,$d2)
1053 addu $h2,$h2,$a3
1054 addu $h3,$h3,$t1
1055 multu ($rs3,$d3)
1056 sltu $a3,$h2,$a3
1057 addu $h3,$h3,$a3
1058
1059 mflo ($a3,$rs3,$d3)
1060 mfhi ($t1,$rs3,$d3)
1061 addu $h2,$h2,$at
1062 addu $h3,$h3,$t0
1063 multu ($rs2,$h4)
1064 sltu $at,$h2,$at
1065 addu $h3,$h3,$at
1066
1067 mflo ($at,$rs2,$h4)
1068 addu $h2,$h2,$a3
1069 addu $h3,$h3,$t1
1070 multu ($r3,$d0)
1071 sltu $a3,$h2,$a3
1072 addu $h3,$h3,$a3
1073
1074
1075 mflo ($a3,$r3,$d0)
1076 mfhi ($t1,$r3,$d0)
1077 addu $h2,$h2,$at
1078 sltu $at,$h2,$at
1079 multu ($r2,$d1)
1080 addu $h3,$h3,$at
1081
1082 mflo ($at,$r2,$d1)
1083 mfhi ($t0,$r2,$d1)
1084 addu $h3,$h3,$a3
1085 sltu $a3,$h3,$a3
1086 multu ($r0,$d3)
1087 addu $t1,$t1,$a3
1088
1089 mflo ($a3,$r0,$d3)
1090 mfhi ($d3,$r0,$d3)
1091 addu $h3,$h3,$at
1092 addu $t1,$t1,$t0
1093 multu ($r1,$d2)
1094 sltu $at,$h3,$at
1095 addu $t1,$t1,$at
1096
1097 mflo ($at,$r1,$d2)
1098 mfhi ($t0,$r1,$d2)
1099 addu $h3,$h3,$a3
1100 addu $t1,$t1,$d3
1101 multu ($rs3,$h4)
1102 sltu $a3,$h3,$a3
1103 addu $t1,$t1,$a3
1104
1105 mflo ($a3,$rs3,$h4)
1106 addu $h3,$h3,$at
1107 addu $t1,$t1,$t0
1108 multu ($r0,$h4)
1109 sltu $at,$h3,$at
1110 addu $t1,$t1,$at
1111
1112
1113 mflo ($h4,$r0,$h4)
1114 addu $h3,$h3,$a3
1115 sltu $a3,$h3,$a3
1116 addu $t1,$t1,$a3
1117 addu $h4,$h4,$t1
1118
1119 li $padbit,1
1120
1121 bne $inp,$len,.Loop
1122
1123 sw $h0,0($ctx)
1124 sw $h1,4($ctx)
1125 sw $h2,8($ctx)
1126 sw $h3,12($ctx)
1127 sw $h4,16($ctx)
1128
1129 .set noreorder
1130 .Labort:
1131 lw $s11,4*11($sp)
1132 lw $s10,4*10($sp)
1133 lw $s9, 4*9($sp)
1134 lw $s8, 4*8($sp)
1135 lw $s7, 4*7($sp)
1136 lw $s6, 4*6($sp)
1137 lw $s5, 4*5($sp)
1138 lw $s4, 4*4($sp)
1139 ___
1140 $code.=<<___ if ($flavour =~ /nubi/i);
1141 lw $s3, 4*3($sp)
1142 lw $s2, 4*2($sp)
1143 lw $s1, 4*1($sp)
1144 lw $s0, 4*0($sp)
1145 ___
1146 $code.=<<___;
1147 jr $ra
1148 addu $sp,$sp,4*12
1149 .end poly1305_blocks
1150 ___
1151 }
1152 {
1153 my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
1154
1155 $code.=<<___;
1156 .align 5
1157 .globl poly1305_emit
1158 .ent poly1305_emit
1159 poly1305_emit:
1160 .frame $sp,0,$ra
1161 .set reorder
1162
1163 lw $tmp4,16($ctx)
1164 lw $tmp0,0($ctx)
1165 lw $tmp1,4($ctx)
1166 lw $tmp2,8($ctx)
1167 lw $tmp3,12($ctx)
1168
1169 li $in0,-4
1170 srl $ctx,$tmp4,2
1171 and $in0,$in0,$tmp4
1172 andi $tmp4,$tmp4,3
1173 addu $ctx,$ctx,$in0
1174
1175 addu $tmp0,$tmp0,$ctx
1176 sltu $ctx,$tmp0,$ctx
1177 addiu $in0,$tmp0,5
1178 addu $tmp1,$tmp1,$ctx
1179 sltiu $in1,$in0,5
1180 sltu $ctx,$tmp1,$ctx
1181 addu $in1,$in1,$tmp1
1182 addu $tmp2,$tmp2,$ctx
1183 sltu $in2,$in1,$tmp1
1184 sltu $ctx,$tmp2,$ctx
1185 addu $in2,$in2,$tmp2
1186 addu $tmp3,$tmp3,$ctx
1187 sltu $in3,$in2,$tmp2
1188 sltu $ctx,$tmp3,$ctx
1189 addu $in3,$in3,$tmp3
1190 addu $tmp4,$tmp4,$ctx
1191 sltu $ctx,$in3,$tmp3
1192 addu $ctx,$tmp4
1193
1194 srl $ctx,2
1195 subu $ctx,$zero,$ctx
1196
1197 xor $in0,$tmp0
1198 xor $in1,$tmp1
1199 xor $in2,$tmp2
1200 xor $in3,$tmp3
1201 and $in0,$ctx
1202 and $in1,$ctx
1203 and $in2,$ctx
1204 and $in3,$ctx
1205 xor $in0,$tmp0
1206 xor $in1,$tmp1
1207 xor $in2,$tmp2
1208 xor $in3,$tmp3
1209
1210 lw $tmp0,0($nonce)
1211 lw $tmp1,4($nonce)
1212 lw $tmp2,8($nonce)
1213 lw $tmp3,12($nonce)
1214
1215 addu $in0,$tmp0
1216 sltu $ctx,$in0,$tmp0
1217
1218 addu $in1,$tmp1
1219 sltu $tmp1,$in1,$tmp1
1220 addu $in1,$ctx
1221 sltu $ctx,$in1,$ctx
1222 addu $ctx,$tmp1
1223
1224 addu $in2,$tmp2
1225 sltu $tmp2,$in2,$tmp2
1226 addu $in2,$ctx
1227 sltu $ctx,$in2,$ctx
1228 addu $ctx,$tmp2
1229
1230 addu $in3,$tmp3
1231 addu $in3,$ctx
1232
1233 srl $tmp0,$in0,8
1234 srl $tmp1,$in0,16
1235 srl $tmp2,$in0,24
1236 sb $in0, 0($mac)
1237 sb $tmp0,1($mac)
1238 srl $tmp0,$in1,8
1239 sb $tmp1,2($mac)
1240 srl $tmp1,$in1,16
1241 sb $tmp2,3($mac)
1242 srl $tmp2,$in1,24
1243 sb $in1, 4($mac)
1244 sb $tmp0,5($mac)
1245 srl $tmp0,$in2,8
1246 sb $tmp1,6($mac)
1247 srl $tmp1,$in2,16
1248 sb $tmp2,7($mac)
1249 srl $tmp2,$in2,24
1250 sb $in2, 8($mac)
1251 sb $tmp0,9($mac)
1252 srl $tmp0,$in3,8
1253 sb $tmp1,10($mac)
1254 srl $tmp1,$in3,16
1255 sb $tmp2,11($mac)
1256 srl $tmp2,$in3,24
1257 sb $in3, 12($mac)
1258 sb $tmp0,13($mac)
1259 sb $tmp1,14($mac)
1260 sb $tmp2,15($mac)
1261
1262 jr $ra
1263 .end poly1305_emit
1264 .rdata
1265 .asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
1266 .align 2
1267 ___
1268 }
1269 }}}
1270
1271 $output=pop and open STDOUT,">$output";
1272 print $code;
1273 close STDOUT;