0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063 $output=pop;
0064 $flavour=pop;
0065
0066 if ($flavour && $flavour ne "void") {
0067 $0 =~ ; $dir=$1;
0068 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
0069 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
0070 die "can't locate arm-xlate.pl";
0071
0072 open OUT,"| \"$^X\" $xlate $flavour $output";
0073 *STDOUT=*OUT;
0074 } else {
0075 open STDOUT,">$output";
0076 }
0077
0078 if ($output =~ /512/) {
0079 $BITS=512;
0080 $SZ=8;
0081 @Sigma0=(28,34,39);
0082 @Sigma1=(14,18,41);
0083 @sigma0=(1, 8, 7);
0084 @sigma1=(19,61, 6);
0085 $rounds=80;
0086 $reg_t="x";
0087 } else {
0088 $BITS=256;
0089 $SZ=4;
0090 @Sigma0=( 2,13,22);
0091 @Sigma1=( 6,11,25);
0092 @sigma0=( 7,18, 3);
0093 @sigma1=(17,19,10);
0094 $rounds=64;
0095 $reg_t="w";
0096 }
0097
0098 $func="sha${BITS}_block_data_order";
0099
0100 ($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
0101
0102 @X=map("$reg_t$_",(3..15,0..2));
0103 @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27));
0104 ($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28));
0105
0106 sub BODY_00_xx {
0107 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
0108 my $j=($i+1)&15;
0109 my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
0110 $T0=@X[$i+3] if ($i<11);
0111
0112 $code.=<<___ if ($i<16);
0113
0114 rev @X[$i],@X[$i] // $i
0115
0116 ___
0117 $code.=<<___ if ($i<13 && ($i&1));
0118 ldp @X[$i+1],@X[$i+2],[$inp],
0119 ___
0120 $code.=<<___ if ($i==13);
0121 ldp @X[14],@X[15],[$inp]
0122 ___
0123 $code.=<<___ if ($i>=14);
0124 ldr @X[($i-11)&15],[sp,
0125 ___
0126 $code.=<<___ if ($i>0 && $i<16);
0127 add $a,$a,$t1 // h+=Sigma0(a)
0128 ___
0129 $code.=<<___ if ($i>=11);
0130 str @X[($i-8)&15],[sp,
0131 ___
0132
0133
0134
0135
0136
0137
0138
0139 $code.=<<___ if ($i<15);
0140 ror $t0,$e,
0141 add $h,$h,$t2 // h+=K[i]
0142 eor $T0,$e,$e,ror
0143 and $t1,$f,$e
0144 bic $t2,$g,$e
0145 add $h,$h,@X[$i&15] // h+=X[i]
0146 orr $t1,$t1,$t2 // Ch(e,f,g)
0147 eor $t2,$a,$b // a^b, b^c in next round
0148 eor $t0,$t0,$T0,ror
0149 ror $T0,$a,
0150 add $h,$h,$t1 // h+=Ch(e,f,g)
0151 eor $t1,$a,$a,ror
0152 add $h,$h,$t0 // h+=Sigma1(e)
0153 and $t3,$t3,$t2 // (b^c)&=(a^b)
0154 add $d,$d,$h // d+=h
0155 eor $t3,$t3,$b // Maj(a,b,c)
0156 eor $t1,$T0,$t1,ror
0157 add $h,$h,$t3 // h+=Maj(a,b,c)
0158 ldr $t3,[$Ktbl],
0159 //add $h,$h,$t1 // h+=Sigma0(a)
0160 ___
0161 $code.=<<___ if ($i>=15);
0162 ror $t0,$e,
0163 add $h,$h,$t2 // h+=K[i]
0164 ror $T1,@X[($j+1)&15],
0165 and $t1,$f,$e
0166 ror $T2,@X[($j+14)&15],
0167 bic $t2,$g,$e
0168 ror $T0,$a,
0169 add $h,$h,@X[$i&15] // h+=X[i]
0170 eor $t0,$t0,$e,ror
0171 eor $T1,$T1,@X[($j+1)&15],ror
0172 orr $t1,$t1,$t2 // Ch(e,f,g)
0173 eor $t2,$a,$b // a^b, b^c in next round
0174 eor $t0,$t0,$e,ror
0175 eor $T0,$T0,$a,ror
0176 add $h,$h,$t1 // h+=Ch(e,f,g)
0177 and $t3,$t3,$t2 // (b^c)&=(a^b)
0178 eor $T2,$T2,@X[($j+14)&15],ror
0179 eor $T1,$T1,@X[($j+1)&15],lsr
0180 add $h,$h,$t0 // h+=Sigma1(e)
0181 eor $t3,$t3,$b // Maj(a,b,c)
0182 eor $t1,$T0,$a,ror
0183 eor $T2,$T2,@X[($j+14)&15],lsr
0184 add @X[$j],@X[$j],@X[($j+9)&15]
0185 add $d,$d,$h // d+=h
0186 add $h,$h,$t3 // h+=Maj(a,b,c)
0187 ldr $t3,[$Ktbl],
0188 add @X[$j],@X[$j],$T1
0189 add $h,$h,$t1 // h+=Sigma0(a)
0190 add @X[$j],@X[$j],$T2
0191 ___
0192 ($t2,$t3)=($t3,$t2);
0193 }
0194
0195 $code.=<<___;
0196
0197
0198
0199
0200 .text
0201
0202 .extern OPENSSL_armcap_P
0203 .globl $func
0204 .type $func,%function
0205 .align 6
0206 $func:
0207 ___
0208 $code.=<<___ if ($SZ==4);
0209
0210
0211 ldrsw x16,.LOPENSSL_armcap_P
0212
0213 ldr x16,.LOPENSSL_armcap_P
0214
0215 adr x17,.LOPENSSL_armcap_P
0216 add x16,x16,x17
0217 ldr w16,[x16]
0218 tst w16,
0219 b.ne .Lv8_entry
0220 tst w16,
0221 b.ne .Lneon_entry
0222
0223 ___
0224 $code.=<<___;
0225 stp x29,x30,[sp,
0226 add x29,sp,
0227
0228 stp x19,x20,[sp,
0229 stp x21,x22,[sp,
0230 stp x23,x24,[sp,
0231 stp x25,x26,[sp,
0232 stp x27,x28,[sp,
0233 sub sp,sp,
0234
0235 ldp $A,$B,[$ctx] // load context
0236 ldp $C,$D,[$ctx,
0237 ldp $E,$F,[$ctx,
0238 add $num,$inp,$num,lsl
0239 ldp $G,$H,[$ctx,
0240 adr $Ktbl,.LK$BITS
0241 stp $ctx,$num,[x29,
0242
0243 .Loop:
0244 ldp @X[0],@X[1],[$inp],
0245 ldr $t2,[$Ktbl],
0246 eor $t3,$B,$C // magic seed
0247 str $inp,[x29,
0248 ___
0249 for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
0250 $code.=".Loop_16_xx:\n";
0251 for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
0252 $code.=<<___;
0253 cbnz $t2,.Loop_16_xx
0254
0255 ldp $ctx,$num,[x29,
0256 ldr $inp,[x29,
0257 sub $Ktbl,$Ktbl,
0258
0259 ldp @X[0],@X[1],[$ctx]
0260 ldp @X[2],@X[3],[$ctx,
0261 add $inp,$inp,
0262 ldp @X[4],@X[5],[$ctx,
0263 add $A,$A,@X[0]
0264 ldp @X[6],@X[7],[$ctx,
0265 add $B,$B,@X[1]
0266 add $C,$C,@X[2]
0267 add $D,$D,@X[3]
0268 stp $A,$B,[$ctx]
0269 add $E,$E,@X[4]
0270 add $F,$F,@X[5]
0271 stp $C,$D,[$ctx,
0272 add $G,$G,@X[6]
0273 add $H,$H,@X[7]
0274 cmp $inp,$num
0275 stp $E,$F,[$ctx,
0276 stp $G,$H,[$ctx,
0277 b.ne .Loop
0278
0279 ldp x19,x20,[x29,
0280 add sp,sp,
0281 ldp x21,x22,[x29,
0282 ldp x23,x24,[x29,
0283 ldp x25,x26,[x29,
0284 ldp x27,x28,[x29,
0285 ldp x29,x30,[sp],
0286 ret
0287 .size $func,.-$func
0288
0289 .align 6
0290 .type .LK$BITS,%object
0291 .LK$BITS:
0292 ___
0293 $code.=<<___ if ($SZ==8);
0294 .quad 0x428a2f98d728ae22,0x7137449123ef65cd
0295 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
0296 .quad 0x3956c25bf348b538,0x59f111f1b605d019
0297 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
0298 .quad 0xd807aa98a3030242,0x12835b0145706fbe
0299 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
0300 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
0301 .quad 0x9bdc06a725c71235,0xc19bf174cf692694
0302 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
0303 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
0304 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
0305 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
0306 .quad 0x983e5152ee66dfab,0xa831c66d2db43210
0307 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
0308 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
0309 .quad 0x06ca6351e003826f,0x142929670a0e6e70
0310 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
0311 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
0312 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
0313 .quad 0x81c2c92e47edaee6,0x92722c851482353b
0314 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
0315 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
0316 .quad 0xd192e819d6ef5218,0xd69906245565a910
0317 .quad 0xf40e35855771202a,0x106aa07032bbd1b8
0318 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
0319 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
0320 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
0321 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
0322 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
0323 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
0324 .quad 0x90befffa23631e28,0xa4506cebde82bde9
0325 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
0326 .quad 0xca273eceea26619c,0xd186b8c721c0c207
0327 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
0328 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
0329 .quad 0x113f9804bef90dae,0x1b710b35131c471b
0330 .quad 0x28db77f523047d84,0x32caab7b40c72493
0331 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
0332 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
0333 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
0334 .quad 0 // terminator
0335 ___
0336 $code.=<<___ if ($SZ==4);
0337 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
0338 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
0339 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
0340 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
0341 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
0342 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
0343 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
0344 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
0345 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
0346 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
0347 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
0348 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
0349 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
0350 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
0351 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
0352 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
0353 .long 0 //terminator
0354 ___
0355 $code.=<<___;
0356 .size .LK$BITS,.-.LK$BITS
0357
0358 .align 3
0359 .LOPENSSL_armcap_P:
0360
0361 .long OPENSSL_armcap_P-.
0362
0363 .quad OPENSSL_armcap_P-.
0364
0365
0366 .asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
0367 .align 2
0368 ___
0369
0370 if ($SZ==4) {
0371 my $Ktbl="x3";
0372
0373 my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2));
0374 my @MSG=map("v$_.16b",(4..7));
0375 my ($W0,$W1)=("v16.4s","v17.4s");
0376 my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
0377
0378 $code.=<<___;
0379
0380 .type sha256_block_armv8,%function
0381 .align 6
0382 sha256_block_armv8:
0383 .Lv8_entry:
0384 stp x29,x30,[sp,
0385 add x29,sp,
0386
0387 ld1.32 {$ABCD,$EFGH},[$ctx]
0388 adr $Ktbl,.LK256
0389
0390 .Loop_hw:
0391 ld1 {@MSG[0]-@MSG[3]},[$inp],
0392 sub $num,$num,
0393 ld1.32 {$W0},[$Ktbl],
0394 rev32 @MSG[0],@MSG[0]
0395 rev32 @MSG[1],@MSG[1]
0396 rev32 @MSG[2],@MSG[2]
0397 rev32 @MSG[3],@MSG[3]
0398 orr $ABCD_SAVE,$ABCD,$ABCD // offload
0399 orr $EFGH_SAVE,$EFGH,$EFGH
0400 ___
0401 for($i=0;$i<12;$i++) {
0402 $code.=<<___;
0403 ld1.32 {$W1},[$Ktbl],
0404 add.i32 $W0,$W0,@MSG[0]
0405 sha256su0 @MSG[0],@MSG[1]
0406 orr $abcd,$ABCD,$ABCD
0407 sha256h $ABCD,$EFGH,$W0
0408 sha256h2 $EFGH,$abcd,$W0
0409 sha256su1 @MSG[0],@MSG[2],@MSG[3]
0410 ___
0411 ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
0412 }
0413 $code.=<<___;
0414 ld1.32 {$W1},[$Ktbl],
0415 add.i32 $W0,$W0,@MSG[0]
0416 orr $abcd,$ABCD,$ABCD
0417 sha256h $ABCD,$EFGH,$W0
0418 sha256h2 $EFGH,$abcd,$W0
0419
0420 ld1.32 {$W0},[$Ktbl],
0421 add.i32 $W1,$W1,@MSG[1]
0422 orr $abcd,$ABCD,$ABCD
0423 sha256h $ABCD,$EFGH,$W1
0424 sha256h2 $EFGH,$abcd,$W1
0425
0426 ld1.32 {$W1},[$Ktbl]
0427 add.i32 $W0,$W0,@MSG[2]
0428 sub $Ktbl,$Ktbl,
0429 orr $abcd,$ABCD,$ABCD
0430 sha256h $ABCD,$EFGH,$W0
0431 sha256h2 $EFGH,$abcd,$W0
0432
0433 add.i32 $W1,$W1,@MSG[3]
0434 orr $abcd,$ABCD,$ABCD
0435 sha256h $ABCD,$EFGH,$W1
0436 sha256h2 $EFGH,$abcd,$W1
0437
0438 add.i32 $ABCD,$ABCD,$ABCD_SAVE
0439 add.i32 $EFGH,$EFGH,$EFGH_SAVE
0440
0441 cbnz $num,.Loop_hw
0442
0443 st1.32 {$ABCD,$EFGH},[$ctx]
0444
0445 ldr x29,[sp],
0446 ret
0447 .size sha256_block_armv8,.-sha256_block_armv8
0448
0449 ___
0450 }
0451
0452 if ($SZ==4) {
0453
0454
0455
0456
0457
0458 my @V = ($A,$B,$C,$D,$E,$F,$G,$H) = map("w$_",(3..10));
0459 my ($t0,$t1,$t2,$t3,$t4) = map("w$_",(11..15));
0460 my $Ktbl="x16";
0461 my $Xfer="x17";
0462 my @X = map("q$_",(0..3));
0463 my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7) = map("q$_",(4..7,16..19));
0464 my $j=0;
0465
0466 sub AUTOLOAD()
0467 { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
0468 my $arg = pop;
0469 $arg = "#$arg" if ($arg*1 eq $arg);
0470 $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
0471 }
0472
0473 sub Dscalar { shift =~ ?"d$1":""; }
0474 sub Dlo { shift =~ ?"v$1.d[0]":""; }
0475 sub Dhi { shift =~ ?"v$1.d[1]":""; }
0476
0477 sub Xupdate()
0478 { use integer;
0479 my $body = shift;
0480 my @insns = (&$body,&$body,&$body,&$body);
0481 my ($a,$b,$c,$d,$e,$f,$g,$h);
0482
0483 &ext_8 ($T0,@X[0],@X[1],4);
0484 eval(shift(@insns));
0485 eval(shift(@insns));
0486 eval(shift(@insns));
0487 &ext_8 ($T3,@X[2],@X[3],4);
0488 eval(shift(@insns));
0489 eval(shift(@insns));
0490 &mov (&Dscalar($T7),&Dhi(@X[3]));
0491 eval(shift(@insns));
0492 eval(shift(@insns));
0493 &ushr_32 ($T2,$T0,$sigma0[0]);
0494 eval(shift(@insns));
0495 &ushr_32 ($T1,$T0,$sigma0[2]);
0496 eval(shift(@insns));
0497 &add_32 (@X[0],@X[0],$T3);
0498 eval(shift(@insns));
0499 &sli_32 ($T2,$T0,32-$sigma0[0]);
0500 eval(shift(@insns));
0501 eval(shift(@insns));
0502 &ushr_32 ($T3,$T0,$sigma0[1]);
0503 eval(shift(@insns));
0504 eval(shift(@insns));
0505 &eor_8 ($T1,$T1,$T2);
0506 eval(shift(@insns));
0507 eval(shift(@insns));
0508 &sli_32 ($T3,$T0,32-$sigma0[1]);
0509 eval(shift(@insns));
0510 eval(shift(@insns));
0511 &ushr_32 ($T4,$T7,$sigma1[0]);
0512 eval(shift(@insns));
0513 eval(shift(@insns));
0514 &eor_8 ($T1,$T1,$T3);
0515 eval(shift(@insns));
0516 eval(shift(@insns));
0517 &sli_32 ($T4,$T7,32-$sigma1[0]);
0518 eval(shift(@insns));
0519 eval(shift(@insns));
0520 &ushr_32 ($T5,$T7,$sigma1[2]);
0521 eval(shift(@insns));
0522 eval(shift(@insns));
0523 &ushr_32 ($T3,$T7,$sigma1[1]);
0524 eval(shift(@insns));
0525 eval(shift(@insns));
0526 &add_32 (@X[0],@X[0],$T1);
0527 eval(shift(@insns));
0528 eval(shift(@insns));
0529 &sli_u32 ($T3,$T7,32-$sigma1[1]);
0530 eval(shift(@insns));
0531 eval(shift(@insns));
0532 &eor_8 ($T5,$T5,$T4);
0533 eval(shift(@insns));
0534 eval(shift(@insns));
0535 eval(shift(@insns));
0536 &eor_8 ($T5,$T5,$T3);
0537 eval(shift(@insns));
0538 eval(shift(@insns));
0539 eval(shift(@insns));
0540 &add_32 (@X[0],@X[0],$T5);
0541 eval(shift(@insns));
0542 eval(shift(@insns));
0543 eval(shift(@insns));
0544 &ushr_32 ($T6,@X[0],$sigma1[0]);
0545 eval(shift(@insns));
0546 &ushr_32 ($T7,@X[0],$sigma1[2]);
0547 eval(shift(@insns));
0548 eval(shift(@insns));
0549 &sli_32 ($T6,@X[0],32-$sigma1[0]);
0550 eval(shift(@insns));
0551 &ushr_32 ($T5,@X[0],$sigma1[1]);
0552 eval(shift(@insns));
0553 eval(shift(@insns));
0554 &eor_8 ($T7,$T7,$T6);
0555 eval(shift(@insns));
0556 eval(shift(@insns));
0557 &sli_32 ($T5,@X[0],32-$sigma1[1]);
0558 eval(shift(@insns));
0559 eval(shift(@insns));
0560 &ld1_32 ("{$T0}","[$Ktbl], #16");
0561 eval(shift(@insns));
0562 &eor_8 ($T7,$T7,$T5);
0563 eval(shift(@insns));
0564 eval(shift(@insns));
0565 &eor_8 ($T5,$T5,$T5);
0566 eval(shift(@insns));
0567 eval(shift(@insns));
0568 &mov (&Dhi($T5), &Dlo($T7));
0569 eval(shift(@insns));
0570 eval(shift(@insns));
0571 eval(shift(@insns));
0572 &add_32 (@X[0],@X[0],$T5);
0573 eval(shift(@insns));
0574 eval(shift(@insns));
0575 eval(shift(@insns));
0576 &add_32 ($T0,$T0,@X[0]);
0577 while($#insns>=1) { eval(shift(@insns)); }
0578 &st1_32 ("{$T0}","[$Xfer], #16");
0579 eval(shift(@insns));
0580
0581 push(@X,shift(@X));
0582 }
0583
0584 sub Xpreload()
0585 { use integer;
0586 my $body = shift;
0587 my @insns = (&$body,&$body,&$body,&$body);
0588 my ($a,$b,$c,$d,$e,$f,$g,$h);
0589
0590 eval(shift(@insns));
0591 eval(shift(@insns));
0592 &ld1_8 ("{@X[0]}","[$inp],#16");
0593 eval(shift(@insns));
0594 eval(shift(@insns));
0595 &ld1_32 ("{$T0}","[$Ktbl],#16");
0596 eval(shift(@insns));
0597 eval(shift(@insns));
0598 eval(shift(@insns));
0599 eval(shift(@insns));
0600 &rev32 (@X[0],@X[0]);
0601 eval(shift(@insns));
0602 eval(shift(@insns));
0603 eval(shift(@insns));
0604 eval(shift(@insns));
0605 &add_32 ($T0,$T0,@X[0]);
0606 foreach (@insns) { eval; }
0607 &st1_32 ("{$T0}","[$Xfer], #16");
0608
0609 push(@X,shift(@X));
0610 }
0611
0612 sub body_00_15 () {
0613 (
0614 '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
0615 '&add ($h,$h,$t1)',
0616 '&add ($a,$a,$t4);'.
0617 '&and ($t1,$f,$e)',
0618 '&bic ($t4,$g,$e)',
0619 '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
0620 '&add ($a,$a,$t2)',
0621 '&orr ($t1,$t1,$t4)',
0622 '&eor ($t0,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',
0623 '&eor ($t4,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
0624 '&add ($h,$h,$t1)',
0625 '&ror ($t0,$t0,"#$Sigma1[0]")',
0626 '&eor ($t2,$a,$b)',
0627 '&eor ($t4,$t4,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',
0628 '&add ($h,$h,$t0)',
0629 '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
0630 '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
0631 '&and ($t3,$t3,$t2)',
0632 '&ror ($t4,$t4,"#$Sigma0[0]")',
0633 '&add ($d,$d,$h)',
0634 '&eor ($t3,$t3,$b)',
0635 '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
0636 )
0637 }
0638
0639 $code.=<<___;
0640
0641 .globl sha256_block_neon
0642
0643 .type sha256_block_neon,%function
0644 .align 4
0645 sha256_block_neon:
0646 .Lneon_entry:
0647 stp x29, x30, [sp,
0648 mov x29, sp
0649 sub sp,sp,
0650
0651 adr $Ktbl,.LK256
0652 add $num,$inp,$num,lsl
0653
0654 ld1.8 {@X[0]},[$inp],
0655 ld1.8 {@X[1]},[$inp],
0656 ld1.8 {@X[2]},[$inp],
0657 ld1.8 {@X[3]},[$inp],
0658 ld1.32 {$T0},[$Ktbl],
0659 ld1.32 {$T1},[$Ktbl],
0660 ld1.32 {$T2},[$Ktbl],
0661 ld1.32 {$T3},[$Ktbl],
0662 rev32 @X[0],@X[0] // yes, even on
0663 rev32 @X[1],@X[1] // big-endian
0664 rev32 @X[2],@X[2]
0665 rev32 @X[3],@X[3]
0666 mov $Xfer,sp
0667 add.32 $T0,$T0,@X[0]
0668 add.32 $T1,$T1,@X[1]
0669 add.32 $T2,$T2,@X[2]
0670 st1.32 {$T0-$T1},[$Xfer],
0671 add.32 $T3,$T3,@X[3]
0672 st1.32 {$T2-$T3},[$Xfer]
0673 sub $Xfer,$Xfer,
0674
0675 ldp $A,$B,[$ctx]
0676 ldp $C,$D,[$ctx,
0677 ldp $E,$F,[$ctx,
0678 ldp $G,$H,[$ctx,
0679 ldr $t1,[sp,
0680 mov $t2,wzr
0681 eor $t3,$B,$C
0682 mov $t4,wzr
0683 b .L_00_48
0684
0685 .align 4
0686 .L_00_48:
0687 ___
0688 &Xupdate(\&body_00_15);
0689 &Xupdate(\&body_00_15);
0690 &Xupdate(\&body_00_15);
0691 &Xupdate(\&body_00_15);
0692 $code.=<<___;
0693 cmp $t1,
0694 ldr $t1,[sp,
0695 sub $Xfer,$Xfer,
0696 bne .L_00_48
0697
0698 sub $Ktbl,$Ktbl,
0699 cmp $inp,$num
0700 mov $Xfer,
0701 csel $Xfer, $Xfer, xzr, eq
0702 sub $inp,$inp,$Xfer // avoid SEGV
0703 mov $Xfer,sp
0704 ___
0705 &Xpreload(\&body_00_15);
0706 &Xpreload(\&body_00_15);
0707 &Xpreload(\&body_00_15);
0708 &Xpreload(\&body_00_15);
0709 $code.=<<___;
0710 add $A,$A,$t4 // h+=Sigma0(a) from the past
0711 ldp $t0,$t1,[$ctx,
0712 add $A,$A,$t2 // h+=Maj(a,b,c) from the past
0713 ldp $t2,$t3,[$ctx,
0714 add $A,$A,$t0 // accumulate
0715 add $B,$B,$t1
0716 ldp $t0,$t1,[$ctx,
0717 add $C,$C,$t2
0718 add $D,$D,$t3
0719 ldp $t2,$t3,[$ctx,
0720 add $E,$E,$t0
0721 add $F,$F,$t1
0722 ldr $t1,[sp,
0723 stp $A,$B,[$ctx,
0724 add $G,$G,$t2
0725 mov $t2,wzr
0726 stp $C,$D,[$ctx,
0727 add $H,$H,$t3
0728 stp $E,$F,[$ctx,
0729 eor $t3,$B,$C
0730 stp $G,$H,[$ctx,
0731 mov $t4,wzr
0732 mov $Xfer,sp
0733 b.ne .L_00_48
0734
0735 ldr x29,[x29]
0736 add sp,sp,
0737 ret
0738 .size sha256_block_neon,.-sha256_block_neon
0739 ___
0740 }
0741
0742 $code.=<<___;
0743
0744 .comm OPENSSL_armcap_P,4,4
0745
0746 ___
0747
0748 { my %opcode = (
0749 "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000,
0750 "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 );
0751
0752 sub unsha256 {
0753 my ($mnemonic,$arg)=@_;
0754
0755 $arg =~
0756 &&
0757 sprintf ".inst\t0x%08x\t//%s %s",
0758 $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
0759 $mnemonic,$arg;
0760 }
0761 }
0762
0763 open SELF,$0;
0764 while(<SELF>) {
0765 next if (/^
0766 last if (!s/^#/\/\// and !/^$/);
0767 print;
0768 }
0769 close SELF;
0770
0771 foreach(split("\n",$code)) {
0772
0773 s/\`([^\`]*)\`/eval($1)/ge;
0774
0775 s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/ge;
0776
0777 s/\bq([0-9]+)\b/v$1.16b/g;
0778
0779 s/\.[ui]?8(\s)/$1/;
0780 s/\.\w?32\b// and s/\.16b/\.4s/g;
0781 and s/\.4s/\.s/g;
0782
0783 print $_,"\n";
0784 }
0785
0786 close STDOUT;