0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
0048 open STDOUT,">$output";
0049
0050 $ctx="r0"; $t0="r0";
0051 $inp="r1"; $t4="r1";
0052 $len="r2"; $t1="r2";
0053 $T1="r3"; $t3="r3";
0054 $A="r4";
0055 $B="r5";
0056 $C="r6";
0057 $D="r7";
0058 $E="r8";
0059 $F="r9";
0060 $G="r10";
0061 $H="r11";
0062 @V=($A,$B,$C,$D,$E,$F,$G,$H);
0063 $t2="r12";
0064 $Ktbl="r14";
0065
0066 @Sigma0=( 2,13,22);
0067 @Sigma1=( 6,11,25);
0068 @sigma0=( 7,18, 3);
0069 @sigma1=(17,19,10);
0070
0071 sub BODY_00_15 {
0072 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
0073
0074 $code.=<<___ if ($i<16);
0075
0076 @ ldr $t1,[$inp],
0077
0078 str $inp,[sp,
0079
0080 eor $t0,$e,$e,ror
0081 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
0082 eor $t0,$t0,$e,ror
0083
0084 rev $t1,$t1
0085
0086
0087 @ ldrb $t1,[$inp,
0088 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
0089 ldrb $t2,[$inp,
0090 ldrb $t0,[$inp,
0091 orr $t1,$t1,$t2,lsl
0092 ldrb $t2,[$inp],
0093 orr $t1,$t1,$t0,lsl
0094
0095 str $inp,[sp,
0096
0097 eor $t0,$e,$e,ror
0098 orr $t1,$t1,$t2,lsl
0099 eor $t0,$t0,$e,ror
0100
0101 ___
0102 $code.=<<___;
0103 ldr $t2,[$Ktbl],
0104 add $h,$h,$t1 @ h+=X[i]
0105 str $t1,[sp,
0106 eor $t1,$f,$g
0107 add $h,$h,$t0,ror
0108 and $t1,$t1,$e
0109 add $h,$h,$t2 @ h+=K256[i]
0110 eor $t1,$t1,$g @ Ch(e,f,g)
0111 eor $t0,$a,$a,ror
0112 add $h,$h,$t1 @ h+=Ch(e,f,g)
0113
0114 and $t2,$t2,
0115 cmp $t2,
0116
0117
0118
0119 ldr $t1,[$inp],
0120
0121 ldrb $t1,[$inp,
0122
0123 eor $t2,$a,$b @ a^b, b^c in next round
0124
0125 ldr $t1,[sp,
0126 eor $t2,$a,$b @ a^b, b^c in next round
0127 ldr $t4,[sp,
0128
0129 eor $t0,$t0,$a,ror
0130 and $t3,$t3,$t2 @ (b^c)&=(a^b)
0131 add $d,$d,$h @ d+=h
0132 eor $t3,$t3,$b @ Maj(a,b,c)
0133 add $h,$h,$t0,ror
0134 @ add $h,$h,$t3 @ h+=Maj(a,b,c)
0135 ___
0136 ($t2,$t3)=($t3,$t2);
0137 }
0138
0139 sub BODY_16_XX {
0140 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
0141
0142 $code.=<<___;
0143 @ ldr $t1,[sp,
0144 @ ldr $t4,[sp,
0145 mov $t0,$t1,ror
0146 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
0147 mov $t2,$t4,ror
0148 eor $t0,$t0,$t1,ror
0149 eor $t2,$t2,$t4,ror
0150 eor $t0,$t0,$t1,lsr
0151 ldr $t1,[sp,
0152 eor $t2,$t2,$t4,lsr
0153 ldr $t4,[sp,
0154
0155 add $t2,$t2,$t0
0156 eor $t0,$e,$e,ror
0157 add $t1,$t1,$t2
0158 eor $t0,$t0,$e,ror
0159 add $t1,$t1,$t4 @ X[i]
0160 ___
0161 &BODY_00_15(@_);
0162 }
0163
0164 $code=<<___;
0165
0166
0167
0168
0169
0170
0171
0172 .text
0173
0174 .code 32
0175
0176 .syntax unified
0177
0178 .thumb
0179
0180 .code 32
0181
0182
0183
0184 .type K256,%object
0185 .align 5
0186 K256:
0187 .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
0188 .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
0189 .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
0190 .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
0191 .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
0192 .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
0193 .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
0194 .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
0195 .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
0196 .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
0197 .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
0198 .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
0199 .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
0200 .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
0201 .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
0202 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
0203 .size K256,.-K256
0204 .word 0 @ terminator
0205
0206 .LOPENSSL_armcap:
0207 .word OPENSSL_armcap_P-sha256_block_data_order
0208
0209 .align 5
0210
0211 .global sha256_block_data_order
0212 .type sha256_block_data_order,%function
0213 sha256_block_data_order:
0214 .Lsha256_block_data_order:
0215
0216 sub r3,pc,
0217
0218 adr r3,.Lsha256_block_data_order
0219
0220
0221 ldr r12,.LOPENSSL_armcap
0222 ldr r12,[r3,r12] @ OPENSSL_armcap_P
0223 tst r12,
0224 bne .LARMv8
0225 tst r12,
0226 bne .LNEON
0227
0228 add $len,$inp,$len,lsl
0229 stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
0230 ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
0231 sub $Ktbl,r3,
0232 sub sp,sp,
0233 .Loop:
0234
0235 ldr $t1,[$inp],
0236
0237 ldrb $t1,[$inp,
0238
0239 eor $t3,$B,$C @ magic
0240 eor $t2,$t2,$t2
0241 ___
0242 for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
0243 $code.=".Lrounds_16_xx:\n";
0244 for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
0245 $code.=<<___;
0246
0247 ite eq @ Thumb2 thing, sanity check in ARM
0248
0249 ldreq $t3,[sp,
0250 bne .Lrounds_16_xx
0251
0252 add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
0253 ldr $t0,[$t3,
0254 ldr $t1,[$t3,
0255 ldr $t2,[$t3,
0256 add $A,$A,$t0
0257 ldr $t0,[$t3,
0258 add $B,$B,$t1
0259 ldr $t1,[$t3,
0260 add $C,$C,$t2
0261 ldr $t2,[$t3,
0262 add $D,$D,$t0
0263 ldr $t0,[$t3,
0264 add $E,$E,$t1
0265 ldr $t1,[$t3,
0266 add $F,$F,$t2
0267 ldr $inp,[sp,
0268 ldr $t2,[sp,
0269 add $G,$G,$t0
0270 add $H,$H,$t1
0271 stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
0272 cmp $inp,$t2
0273 sub $Ktbl,$Ktbl,
0274 bne .Loop
0275
0276 add sp,sp,
0277
0278 ldmia sp!,{r4-r11,pc}
0279
0280 ldmia sp!,{r4-r11,lr}
0281 tst lr,
0282 moveq pc,lr @ be binary compatible with V4, yet
0283 bx lr @ interoperable with Thumb ISA:-)
0284
0285 .size sha256_block_data_order,.-sha256_block_data_order
0286 ___
0287
0288
0289
0290 {{{
0291 my @X=map("q$_",(0..3));
0292 my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
0293 my $Xfer=$t4;
0294 my $j=0;
0295
0296 sub Dlo() { shift=~?"d".($1*2):""; }
0297 sub Dhi() { shift=~?"d".($1*2+1):""; }
0298
0299 sub AUTOLOAD()
0300 { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
0301 my $arg = pop;
0302 $arg = "#$arg" if ($arg*1 eq $arg);
0303 $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
0304 }
0305
0306 sub Xupdate()
0307 { use integer;
0308 my $body = shift;
0309 my @insns = (&$body,&$body,&$body,&$body);
0310 my ($a,$b,$c,$d,$e,$f,$g,$h);
0311
0312 &vext_8 ($T0,@X[0],@X[1],4);
0313 eval(shift(@insns));
0314 eval(shift(@insns));
0315 eval(shift(@insns));
0316 &vext_8 ($T1,@X[2],@X[3],4);
0317 eval(shift(@insns));
0318 eval(shift(@insns));
0319 eval(shift(@insns));
0320 &vshr_u32 ($T2,$T0,$sigma0[0]);
0321 eval(shift(@insns));
0322 eval(shift(@insns));
0323 &vadd_i32 (@X[0],@X[0],$T1);
0324 eval(shift(@insns));
0325 eval(shift(@insns));
0326 &vshr_u32 ($T1,$T0,$sigma0[2]);
0327 eval(shift(@insns));
0328 eval(shift(@insns));
0329 &vsli_32 ($T2,$T0,32-$sigma0[0]);
0330 eval(shift(@insns));
0331 eval(shift(@insns));
0332 &vshr_u32 ($T3,$T0,$sigma0[1]);
0333 eval(shift(@insns));
0334 eval(shift(@insns));
0335 &veor ($T1,$T1,$T2);
0336 eval(shift(@insns));
0337 eval(shift(@insns));
0338 &vsli_32 ($T3,$T0,32-$sigma0[1]);
0339 eval(shift(@insns));
0340 eval(shift(@insns));
0341 &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
0342 eval(shift(@insns));
0343 eval(shift(@insns));
0344 &veor ($T1,$T1,$T3);
0345 eval(shift(@insns));
0346 eval(shift(@insns));
0347 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
0348 eval(shift(@insns));
0349 eval(shift(@insns));
0350 &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
0351 eval(shift(@insns));
0352 eval(shift(@insns));
0353 &vadd_i32 (@X[0],@X[0],$T1);
0354 eval(shift(@insns));
0355 eval(shift(@insns));
0356 &veor ($T5,$T5,$T4);
0357 eval(shift(@insns));
0358 eval(shift(@insns));
0359 &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
0360 eval(shift(@insns));
0361 eval(shift(@insns));
0362 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
0363 eval(shift(@insns));
0364 eval(shift(@insns));
0365 &veor ($T5,$T5,$T4);
0366 eval(shift(@insns));
0367 eval(shift(@insns));
0368 &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);
0369 eval(shift(@insns));
0370 eval(shift(@insns));
0371 &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
0372 eval(shift(@insns));
0373 eval(shift(@insns));
0374 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
0375 eval(shift(@insns));
0376 eval(shift(@insns));
0377 &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
0378 eval(shift(@insns));
0379 eval(shift(@insns));
0380 &veor ($T5,$T5,$T4);
0381 eval(shift(@insns));
0382 eval(shift(@insns));
0383 &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
0384 eval(shift(@insns));
0385 eval(shift(@insns));
0386 &vld1_32 ("{$T0}","[$Ktbl,:128]!");
0387 eval(shift(@insns));
0388 eval(shift(@insns));
0389 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
0390 eval(shift(@insns));
0391 eval(shift(@insns));
0392 &veor ($T5,$T5,$T4);
0393 eval(shift(@insns));
0394 eval(shift(@insns));
0395 &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);
0396 eval(shift(@insns));
0397 eval(shift(@insns));
0398 &vadd_i32 ($T0,$T0,@X[0]);
0399 while($#insns>=2) { eval(shift(@insns)); }
0400 &vst1_32 ("{$T0}","[$Xfer,:128]!");
0401 eval(shift(@insns));
0402 eval(shift(@insns));
0403
0404 push(@X,shift(@X));
0405 }
0406
0407 sub Xpreload()
0408 { use integer;
0409 my $body = shift;
0410 my @insns = (&$body,&$body,&$body,&$body);
0411 my ($a,$b,$c,$d,$e,$f,$g,$h);
0412
0413 eval(shift(@insns));
0414 eval(shift(@insns));
0415 eval(shift(@insns));
0416 eval(shift(@insns));
0417 &vld1_32 ("{$T0}","[$Ktbl,:128]!");
0418 eval(shift(@insns));
0419 eval(shift(@insns));
0420 eval(shift(@insns));
0421 eval(shift(@insns));
0422 &vrev32_8 (@X[0],@X[0]);
0423 eval(shift(@insns));
0424 eval(shift(@insns));
0425 eval(shift(@insns));
0426 eval(shift(@insns));
0427 &vadd_i32 ($T0,$T0,@X[0]);
0428 foreach (@insns) { eval; }
0429 &vst1_32 ("{$T0}","[$Xfer,:128]!");
0430
0431 push(@X,shift(@X));
0432 }
0433
0434 sub body_00_15 () {
0435 (
0436 '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
0437 '&add ($h,$h,$t1)',
0438 '&eor ($t1,$f,$g)',
0439 '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
0440 '&add ($a,$a,$t2)',
0441 '&and ($t1,$t1,$e)',
0442 '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',
0443 '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
0444 '&eor ($t1,$t1,$g)',
0445 '&add ($h,$h,$t2,"ror#$Sigma1[0]")',
0446 '&eor ($t2,$a,$b)',
0447 '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',
0448 '&add ($h,$h,$t1)',
0449 '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
0450 '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
0451 '&ldr ($t1,"[sp,#64]") if ($j==31)',
0452 '&and ($t3,$t3,$t2)',
0453 '&add ($d,$d,$h)',
0454 '&add ($h,$h,$t0,"ror#$Sigma0[0]");'.
0455 '&eor ($t3,$t3,$b)',
0456 '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
0457 )
0458 }
0459
0460 $code.=<<___;
0461
0462 .arch armv7-a
0463 .fpu neon
0464
0465 .global sha256_block_data_order_neon
0466 .type sha256_block_data_order_neon,%function
0467 .align 4
0468 sha256_block_data_order_neon:
0469 .LNEON:
0470 stmdb sp!,{r4-r12,lr}
0471
0472 sub $H,sp,
0473 adr $Ktbl,.Lsha256_block_data_order
0474 sub $Ktbl,$Ktbl,
0475 bic $H,$H,
0476 mov $t2,sp
0477 mov sp,$H @ alloca
0478 add $len,$inp,$len,lsl
0479
0480 vld1.8 {@X[0]},[$inp]!
0481 vld1.8 {@X[1]},[$inp]!
0482 vld1.8 {@X[2]},[$inp]!
0483 vld1.8 {@X[3]},[$inp]!
0484 vld1.32 {$T0},[$Ktbl,:128]!
0485 vld1.32 {$T1},[$Ktbl,:128]!
0486 vld1.32 {$T2},[$Ktbl,:128]!
0487 vld1.32 {$T3},[$Ktbl,:128]!
0488 vrev32.8 @X[0],@X[0] @ yes, even on
0489 str $ctx,[sp,
0490 vrev32.8 @X[1],@X[1] @ big-endian
0491 str $inp,[sp,
0492 mov $Xfer,sp
0493 vrev32.8 @X[2],@X[2]
0494 str $len,[sp,
0495 vrev32.8 @X[3],@X[3]
0496 str $t2,[sp,
0497 vadd.i32 $T0,$T0,@X[0]
0498 vadd.i32 $T1,$T1,@X[1]
0499 vst1.32 {$T0},[$Xfer,:128]!
0500 vadd.i32 $T2,$T2,@X[2]
0501 vst1.32 {$T1},[$Xfer,:128]!
0502 vadd.i32 $T3,$T3,@X[3]
0503 vst1.32 {$T2},[$Xfer,:128]!
0504 vst1.32 {$T3},[$Xfer,:128]!
0505
0506 ldmia $ctx,{$A-$H}
0507 sub $Xfer,$Xfer,
0508 ldr $t1,[sp,
0509 eor $t2,$t2,$t2
0510 eor $t3,$B,$C
0511 b .L_00_48
0512
0513 .align 4
0514 .L_00_48:
0515 ___
0516 &Xupdate(\&body_00_15);
0517 &Xupdate(\&body_00_15);
0518 &Xupdate(\&body_00_15);
0519 &Xupdate(\&body_00_15);
0520 $code.=<<___;
0521 teq $t1,
0522 ldr $t1,[sp,
0523 sub $Xfer,$Xfer,
0524 bne .L_00_48
0525
0526 ldr $inp,[sp,
0527 ldr $t0,[sp,
0528 sub $Ktbl,$Ktbl,
0529 teq $inp,$t0
0530 it eq
0531 subeq $inp,$inp,
0532 vld1.8 {@X[0]},[$inp]! @ load next input block
0533 vld1.8 {@X[1]},[$inp]!
0534 vld1.8 {@X[2]},[$inp]!
0535 vld1.8 {@X[3]},[$inp]!
0536 it ne
0537 strne $inp,[sp,
0538 mov $Xfer,sp
0539 ___
0540 &Xpreload(\&body_00_15);
0541 &Xpreload(\&body_00_15);
0542 &Xpreload(\&body_00_15);
0543 &Xpreload(\&body_00_15);
0544 $code.=<<___;
0545 ldr $t0,[$t1,
0546 add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
0547 ldr $t2,[$t1,
0548 ldr $t3,[$t1,
0549 ldr $t4,[$t1,
0550 add $A,$A,$t0 @ accumulate
0551 ldr $t0,[$t1,
0552 add $B,$B,$t2
0553 ldr $t2,[$t1,
0554 add $C,$C,$t3
0555 ldr $t3,[$t1,
0556 add $D,$D,$t4
0557 ldr $t4,[$t1,
0558 add $E,$E,$t0
0559 str $A,[$t1],
0560 add $F,$F,$t2
0561 str $B,[$t1],
0562 add $G,$G,$t3
0563 str $C,[$t1],
0564 add $H,$H,$t4
0565 str $D,[$t1],
0566 stmia $t1,{$E-$H}
0567
0568 ittte ne
0569 movne $Xfer,sp
0570 ldrne $t1,[sp,
0571 eorne $t2,$t2,$t2
0572 ldreq sp,[sp,
0573 itt ne
0574 eorne $t3,$B,$C
0575 bne .L_00_48
0576
0577 ldmia sp!,{r4-r12,pc}
0578 .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
0579
0580 ___
0581 }}}
0582
0583
0584
0585 {{{
0586 my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
0587 my @MSG=map("q$_",(8..11));
0588 my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
0589 my $Ktbl="r3";
0590
0591 $code.=<<___;
0592
0593
0594
0595
0596
0597
0598
0599
0600 .type sha256_block_data_order_armv8,%function
0601 .align 5
0602 sha256_block_data_order_armv8:
0603 .LARMv8:
0604 vld1.32 {$ABCD,$EFGH},[$ctx]
0605
0606 adr $Ktbl,.LARMv8
0607 sub $Ktbl,$Ktbl,
0608
0609 adrl $Ktbl,K256
0610
0611 add $len,$inp,$len,lsl
0612
0613 .Loop_v8:
0614 vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
0615 vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
0616 vld1.32 {$W0},[$Ktbl]!
0617 vrev32.8 @MSG[0],@MSG[0]
0618 vrev32.8 @MSG[1],@MSG[1]
0619 vrev32.8 @MSG[2],@MSG[2]
0620 vrev32.8 @MSG[3],@MSG[3]
0621 vmov $ABCD_SAVE,$ABCD @ offload
0622 vmov $EFGH_SAVE,$EFGH
0623 teq $inp,$len
0624 ___
0625 for($i=0;$i<12;$i++) {
0626 $code.=<<___;
0627 vld1.32 {$W1},[$Ktbl]!
0628 vadd.i32 $W0,$W0,@MSG[0]
0629 sha256su0 @MSG[0],@MSG[1]
0630 vmov $abcd,$ABCD
0631 sha256h $ABCD,$EFGH,$W0
0632 sha256h2 $EFGH,$abcd,$W0
0633 sha256su1 @MSG[0],@MSG[2],@MSG[3]
0634 ___
0635 ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
0636 }
0637 $code.=<<___;
0638 vld1.32 {$W1},[$Ktbl]!
0639 vadd.i32 $W0,$W0,@MSG[0]
0640 vmov $abcd,$ABCD
0641 sha256h $ABCD,$EFGH,$W0
0642 sha256h2 $EFGH,$abcd,$W0
0643
0644 vld1.32 {$W0},[$Ktbl]!
0645 vadd.i32 $W1,$W1,@MSG[1]
0646 vmov $abcd,$ABCD
0647 sha256h $ABCD,$EFGH,$W1
0648 sha256h2 $EFGH,$abcd,$W1
0649
0650 vld1.32 {$W1},[$Ktbl]
0651 vadd.i32 $W0,$W0,@MSG[2]
0652 sub $Ktbl,$Ktbl,
0653 vmov $abcd,$ABCD
0654 sha256h $ABCD,$EFGH,$W0
0655 sha256h2 $EFGH,$abcd,$W0
0656
0657 vadd.i32 $W1,$W1,@MSG[3]
0658 vmov $abcd,$ABCD
0659 sha256h $ABCD,$EFGH,$W1
0660 sha256h2 $EFGH,$abcd,$W1
0661
0662 vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
0663 vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
0664 it ne
0665 bne .Loop_v8
0666
0667 vst1.32 {$ABCD,$EFGH},[$ctx]
0668
0669 ret @ bx lr
0670 .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
0671
0672 ___
0673 }}}
0674 $code.=<<___;
0675 .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
0676 .align 2
0677
0678 .comm OPENSSL_armcap_P,4,4
0679
0680 ___
0681
0682 open SELF,$0;
0683 while(<SELF>) {
0684 next if (/^
0685 last if (!s/^#/@/ and !/^$/);
0686 print;
0687 }
0688 close SELF;
0689
0690 { my %opcode = (
0691 "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
0692 "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
0693
0694 sub unsha256 {
0695 my ($mnemonic,$arg)=@_;
0696
0697 if ($arg =~ ) {
0698 my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
0699 |(($2&7)<<17)|(($2&8)<<4)
0700 |(($3&7)<<1) |(($3&8)<<2);
0701
0702
0703
0704 sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
0705 $word&0xff,($word>>8)&0xff,
0706 ($word>>16)&0xff,($word>>24)&0xff,
0707 $mnemonic,$arg;
0708 }
0709 }
0710 }
0711
0712 foreach (split($/,$code)) {
0713
0714 s/\`([^\`]*)\`/eval $1/geo;
0715
0716 s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
0717
0718 s/\bret\b/bx lr/go or
0719 s/\bbx\s+lr\b/.word\t0xe12fff1e/go;
0720
0721 print $_,"\n";
0722 }
0723
0724 close STDOUT;