0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056 $hi="HI";
0057 $lo="LO";
0058
0059
0060 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
0061 open STDOUT,">$output";
0062
0063 $ctx="r0";
0064 $inp="r1";
0065 $len="r2";
0066
0067 $Tlo="r3";
0068 $Thi="r4";
0069 $Alo="r5";
0070 $Ahi="r6";
0071 $Elo="r7";
0072 $Ehi="r8";
0073 $t0="r9";
0074 $t1="r10";
0075 $t2="r11";
0076 $t3="r12";
0077
0078 $Ktbl="r14";
0079
0080
0081 $Aoff=8*0;
0082 $Boff=8*1;
0083 $Coff=8*2;
0084 $Doff=8*3;
0085 $Eoff=8*4;
0086 $Foff=8*5;
0087 $Goff=8*6;
0088 $Hoff=8*7;
0089 $Xoff=8*8;
0090
0091 sub BODY_00_15() {
0092 my $magic = shift;
0093 $code.=<<___;
0094 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
0095 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
0096 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
0097 mov $t0,$Elo,lsr
0098 str $Tlo,[sp,
0099 mov $t1,$Ehi,lsr
0100 str $Thi,[sp,
0101 eor $t0,$t0,$Ehi,lsl
0102 ldr $t2,[sp,
0103 eor $t1,$t1,$Elo,lsl
0104 ldr $t3,[sp,
0105 eor $t0,$t0,$Elo,lsr
0106 eor $t1,$t1,$Ehi,lsr
0107 eor $t0,$t0,$Ehi,lsl
0108 eor $t1,$t1,$Elo,lsl
0109 eor $t0,$t0,$Ehi,lsr
0110 eor $t1,$t1,$Elo,lsr
0111 eor $t0,$t0,$Elo,lsl
0112 eor $t1,$t1,$Ehi,lsl
0113 adds $Tlo,$Tlo,$t0
0114 ldr $t0,[sp,
0115 adc $Thi,$Thi,$t1 @ T += Sigma1(e)
0116 ldr $t1,[sp,
0117 adds $Tlo,$Tlo,$t2
0118 ldr $t2,[sp,
0119 adc $Thi,$Thi,$t3 @ T += h
0120 ldr $t3,[sp,
0121
0122 eor $t0,$t0,$t2
0123 str $Elo,[sp,
0124 eor $t1,$t1,$t3
0125 str $Ehi,[sp,
0126 and $t0,$t0,$Elo
0127 str $Alo,[sp,
0128 and $t1,$t1,$Ehi
0129 str $Ahi,[sp,
0130 eor $t0,$t0,$t2
0131 ldr $t2,[$Ktbl,
0132 eor $t1,$t1,$t3 @ Ch(e,f,g)
0133 ldr $t3,[$Ktbl,
0134
0135 adds $Tlo,$Tlo,$t0
0136 ldr $Elo,[sp,
0137 adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
0138 ldr $Ehi,[sp,
0139 adds $Tlo,$Tlo,$t2
0140 and $t0,$t2,
0141 adc $Thi,$Thi,$t3 @ T += K[i]
0142 adds $Elo,$Elo,$Tlo
0143 ldr $t2,[sp,
0144 adc $Ehi,$Ehi,$Thi @ d += T
0145 teq $t0,
0146
0147 ldr $t3,[sp,
0148
0149 it eq @ Thumb2 thing, sanity check in ARM
0150
0151 orreq $Ktbl,$Ktbl,
0152 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
0153 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
0154 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
0155 mov $t0,$Alo,lsr
0156 mov $t1,$Ahi,lsr
0157 eor $t0,$t0,$Ahi,lsl
0158 eor $t1,$t1,$Alo,lsl
0159 eor $t0,$t0,$Ahi,lsr
0160 eor $t1,$t1,$Alo,lsr
0161 eor $t0,$t0,$Alo,lsl
0162 eor $t1,$t1,$Ahi,lsl
0163 eor $t0,$t0,$Ahi,lsr
0164 eor $t1,$t1,$Alo,lsr
0165 eor $t0,$t0,$Alo,lsl
0166 eor $t1,$t1,$Ahi,lsl
0167 adds $Tlo,$Tlo,$t0
0168 and $t0,$Alo,$t2
0169 adc $Thi,$Thi,$t1 @ T += Sigma0(a)
0170
0171 ldr $t1,[sp,
0172 orr $Alo,$Alo,$t2
0173 ldr $t2,[sp,
0174 and $Alo,$Alo,$t3
0175 and $t3,$Ahi,$t1
0176 orr $Ahi,$Ahi,$t1
0177 orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
0178 and $Ahi,$Ahi,$t2
0179 adds $Alo,$Alo,$Tlo
0180 orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
0181 sub sp,sp,
0182 adc $Ahi,$Ahi,$Thi @ h += T
0183 tst $Ktbl,
0184 add $Ktbl,$Ktbl,
0185 ___
0186 }
0187 $code=<<___;
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209 .text
0210
0211 .code 32
0212
0213 .syntax unified
0214
0215 .thumb
0216
0217 .code 32
0218
0219
0220
0221 .type K512,%object
0222 .align 5
0223 K512:
0224 WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
0225 WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
0226 WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
0227 WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
0228 WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
0229 WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
0230 WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
0231 WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
0232 WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
0233 WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
0234 WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
0235 WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
0236 WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
0237 WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
0238 WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
0239 WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
0240 WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
0241 WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
0242 WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
0243 WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
0244 WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
0245 WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
0246 WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
0247 WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
0248 WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
0249 WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
0250 WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
0251 WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
0252 WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
0253 WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
0254 WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
0255 WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
0256 WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
0257 WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
0258 WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
0259 WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
0260 WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
0261 WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
0262 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
0263 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
0264 .size K512,.-K512
0265
0266 .LOPENSSL_armcap:
0267 .word OPENSSL_armcap_P-sha512_block_data_order
0268 .skip 32-4
0269
0270 .skip 32
0271
0272
0273 .global sha512_block_data_order
0274 .type sha512_block_data_order,%function
0275 sha512_block_data_order:
0276 .Lsha512_block_data_order:
0277
0278 sub r3,pc,
0279
0280 adr r3,.Lsha512_block_data_order
0281
0282
0283 ldr r12,.LOPENSSL_armcap
0284 ldr r12,[r3,r12] @ OPENSSL_armcap_P
0285 tst r12,
0286 bne .LNEON
0287
0288 add $len,$inp,$len,lsl
0289 stmdb sp!,{r4-r12,lr}
0290 sub $Ktbl,r3,
0291 sub sp,sp,
0292
0293 ldr $Elo,[$ctx,
0294 ldr $Ehi,[$ctx,
0295 ldr $t0, [$ctx,
0296 ldr $t1, [$ctx,
0297 ldr $t2, [$ctx,
0298 ldr $t3, [$ctx,
0299 .Loop:
0300 str $t0, [sp,
0301 str $t1, [sp,
0302 str $t2, [sp,
0303 str $t3, [sp,
0304 ldr $Alo,[$ctx,
0305 ldr $Ahi,[$ctx,
0306 ldr $Tlo,[$ctx,
0307 ldr $Thi,[$ctx,
0308 ldr $t0, [$ctx,
0309 ldr $t1, [$ctx,
0310 ldr $t2, [$ctx,
0311 ldr $t3, [$ctx,
0312 str $Tlo,[sp,
0313 str $Thi,[sp,
0314 str $t0, [sp,
0315 str $t1, [sp,
0316 str $t2, [sp,
0317 str $t3, [sp,
0318 ldr $Tlo,[$ctx,
0319 ldr $Thi,[$ctx,
0320 str $Tlo,[sp,
0321 str $Thi,[sp,
0322
0323 .L00_15:
0324
0325 ldrb $Tlo,[$inp,
0326 ldrb $t0, [$inp,
0327 ldrb $t1, [$inp,
0328 ldrb $t2, [$inp,
0329 ldrb $Thi,[$inp,
0330 ldrb $t3, [$inp,
0331 orr $Tlo,$Tlo,$t0,lsl
0332 ldrb $t0, [$inp,
0333 orr $Tlo,$Tlo,$t1,lsl
0334 ldrb $t1, [$inp],
0335 orr $Tlo,$Tlo,$t2,lsl
0336 orr $Thi,$Thi,$t3,lsl
0337 orr $Thi,$Thi,$t0,lsl
0338 orr $Thi,$Thi,$t1,lsl
0339
0340 ldr $Tlo,[$inp,
0341 ldr $Thi,[$inp],
0342
0343 rev $Tlo,$Tlo
0344 rev $Thi,$Thi
0345
0346
0347 ___
0348 &BODY_00_15(0x94);
0349 $code.=<<___;
0350 tst $Ktbl,
0351 beq .L00_15
0352 ldr $t0,[sp,
0353 ldr $t1,[sp,
0354 bic $Ktbl,$Ktbl,
0355 .L16_79:
0356 @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
0357 @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
0358 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
0359 mov $Tlo,$t0,lsr
0360 ldr $t2,[sp,
0361 mov $Thi,$t1,lsr
0362 ldr $t3,[sp,
0363 eor $Tlo,$Tlo,$t1,lsl
0364 eor $Thi,$Thi,$t0,lsl
0365 eor $Tlo,$Tlo,$t0,lsr
0366 eor $Thi,$Thi,$t1,lsr
0367 eor $Tlo,$Tlo,$t1,lsl
0368 eor $Thi,$Thi,$t0,lsl
0369 eor $Tlo,$Tlo,$t0,lsr
0370 eor $Thi,$Thi,$t1,lsr
0371 eor $Tlo,$Tlo,$t1,lsl
0372
0373 @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
0374 @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
0375 @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
0376 mov $t0,$t2,lsr
0377 mov $t1,$t3,lsr
0378 eor $t0,$t0,$t3,lsl
0379 eor $t1,$t1,$t2,lsl
0380 eor $t0,$t0,$t3,lsr
0381 eor $t1,$t1,$t2,lsr
0382 eor $t0,$t0,$t2,lsl
0383 eor $t1,$t1,$t3,lsl
0384 eor $t0,$t0,$t2,lsr
0385 eor $t1,$t1,$t3,lsr
0386 ldr $t2,[sp,
0387 eor $t0,$t0,$t3,lsl
0388
0389 ldr $t3,[sp,
0390 adds $Tlo,$Tlo,$t0
0391 ldr $t0,[sp,
0392 adc $Thi,$Thi,$t1
0393
0394 ldr $t1,[sp,
0395 adds $Tlo,$Tlo,$t2
0396 adc $Thi,$Thi,$t3
0397 adds $Tlo,$Tlo,$t0
0398 adc $Thi,$Thi,$t1
0399 ___
0400 &BODY_00_15(0x17);
0401 $code.=<<___;
0402
0403 ittt eq @ Thumb2 thing, sanity check in ARM
0404
0405 ldreq $t0,[sp,
0406 ldreq $t1,[sp,
0407 beq .L16_79
0408 bic $Ktbl,$Ktbl,
0409
0410 ldr $Tlo,[sp,
0411 ldr $Thi,[sp,
0412 ldr $t0, [$ctx,
0413 ldr $t1, [$ctx,
0414 ldr $t2, [$ctx,
0415 ldr $t3, [$ctx,
0416 adds $t0,$Alo,$t0
0417 str $t0, [$ctx,
0418 adc $t1,$Ahi,$t1
0419 str $t1, [$ctx,
0420 adds $t2,$Tlo,$t2
0421 str $t2, [$ctx,
0422 adc $t3,$Thi,$t3
0423 str $t3, [$ctx,
0424
0425 ldr $Alo,[sp,
0426 ldr $Ahi,[sp,
0427 ldr $Tlo,[sp,
0428 ldr $Thi,[sp,
0429 ldr $t0, [$ctx,
0430 ldr $t1, [$ctx,
0431 ldr $t2, [$ctx,
0432 ldr $t3, [$ctx,
0433 adds $t0,$Alo,$t0
0434 str $t0, [$ctx,
0435 adc $t1,$Ahi,$t1
0436 str $t1, [$ctx,
0437 adds $t2,$Tlo,$t2
0438 str $t2, [$ctx,
0439 adc $t3,$Thi,$t3
0440 str $t3, [$ctx,
0441
0442 ldr $Tlo,[sp,
0443 ldr $Thi,[sp,
0444 ldr $t0, [$ctx,
0445 ldr $t1, [$ctx,
0446 ldr $t2, [$ctx,
0447 ldr $t3, [$ctx,
0448 adds $Elo,$Elo,$t0
0449 str $Elo,[$ctx,
0450 adc $Ehi,$Ehi,$t1
0451 str $Ehi,[$ctx,
0452 adds $t2,$Tlo,$t2
0453 str $t2, [$ctx,
0454 adc $t3,$Thi,$t3
0455 str $t3, [$ctx,
0456
0457 ldr $Alo,[sp,
0458 ldr $Ahi,[sp,
0459 ldr $Tlo,[sp,
0460 ldr $Thi,[sp,
0461 ldr $t0, [$ctx,
0462 ldr $t1, [$ctx,
0463 ldr $t2, [$ctx,
0464 ldr $t3, [$ctx,
0465 adds $t0,$Alo,$t0
0466 str $t0, [$ctx,
0467 adc $t1,$Ahi,$t1
0468 str $t1, [$ctx,
0469 adds $t2,$Tlo,$t2
0470 str $t2, [$ctx,
0471 adc $t3,$Thi,$t3
0472 str $t3, [$ctx,
0473
0474 add sp,sp,
0475 sub $Ktbl,$Ktbl,
0476
0477 teq $inp,$len
0478 bne .Loop
0479
0480 add sp,sp,
0481
0482 ldmia sp!,{r4-r12,pc}
0483
0484 ldmia sp!,{r4-r12,lr}
0485 tst lr,
0486 moveq pc,lr @ be binary compatible with V4, yet
0487 bx lr @ interoperable with Thumb ISA:-)
0488
0489 .size sha512_block_data_order,.-sha512_block_data_order
0490 ___
0491
0492 {
0493 my @Sigma0=(28,34,39);
0494 my @Sigma1=(14,18,41);
0495 my @sigma0=(1, 8, 7);
0496 my @sigma1=(19,61,6);
0497
0498 my $Ktbl="r3";
0499 my $cnt="r12";
0500
0501 my @X=map("d$_",(0..15));
0502 my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23));
0503
0504 sub NEON_00_15() {
0505 my $i=shift;
0506 my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;
0507 my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31));
0508
0509 $code.=<<___ if ($i<16 || $i&1);
0510 vshr.u64 $t0,$e,
0511
0512 vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned
0513
0514 vshr.u64 $t1,$e,
0515
0516 vadd.i64 $a,$Maj @ h+=Maj from the past
0517
0518 vshr.u64 $t2,$e,
0519 ___
0520 $code.=<<___;
0521 vld1.64 {$K},[$Ktbl,:64]! @ K[i++]
0522 vsli.64 $t0,$e,
0523 vsli.64 $t1,$e,
0524 vmov $Ch,$e
0525 vsli.64 $t2,$e,
0526
0527 vrev64.8 @X[$i],@X[$i]
0528
0529 veor $t1,$t0
0530 vbsl $Ch,$f,$g @ Ch(e,f,g)
0531 vshr.u64 $t0,$a,
0532 veor $t2,$t1 @ Sigma1(e)
0533 vadd.i64 $T1,$Ch,$h
0534 vshr.u64 $t1,$a,
0535 vsli.64 $t0,$a,
0536 vadd.i64 $T1,$t2
0537 vshr.u64 $t2,$a,
0538 vadd.i64 $K,@X[$i%16]
0539 vsli.64 $t1,$a,
0540 veor $Maj,$a,$b
0541 vsli.64 $t2,$a,
0542 veor $h,$t0,$t1
0543 vadd.i64 $T1,$K
0544 vbsl $Maj,$c,$b @ Maj(a,b,c)
0545 veor $h,$t2 @ Sigma0(a)
0546 vadd.i64 $d,$T1
0547 vadd.i64 $Maj,$T1
0548 @ vadd.i64 $h,$Maj
0549 ___
0550 }
0551
0552 sub NEON_16_79() {
0553 my $i=shift;
0554
0555 if ($i&1) { &NEON_00_15($i,@_); return; }
0556
0557
0558 my @X=map("q$_",(0..7));
0559 my ($t0,$t1,$s0,$s1) = map("q$_",(12..15));
0560 my ($d0,$d1,$d2) = map("d$_",(24..26));
0561 my $e=@_[4];
0562 $i /= 2;
0563 $code.=<<___;
0564 vshr.u64 $t0,@X[($i+7)%8],
0565 vshr.u64 $t1,@X[($i+7)%8],
0566 vadd.i64 @_[0],d30 @ h+=Maj from the past
0567 vshr.u64 $s1,@X[($i+7)%8],
0568 vsli.64 $t0,@X[($i+7)%8],
0569 vext.8 $s0,@X[$i%8],@X[($i+1)%8],
0570 vsli.64 $t1,@X[($i+7)%8],
0571 veor $s1,$t0
0572 vshr.u64 $t0,$s0,
0573 veor $s1,$t1 @ sigma1(X[i+14])
0574 vshr.u64 $t1,$s0,
0575 vadd.i64 @X[$i%8],$s1
0576 vshr.u64 $s1,$s0,
0577 vsli.64 $t0,$s0,
0578 vsli.64 $t1,$s0,
0579 vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],
0580 veor $s1,$t0
0581 vshr.u64 $d0,$e,
0582 vadd.i64 @X[$i%8],$s0
0583 vshr.u64 $d1,$e,
0584 veor $s1,$t1 @ sigma0(X[i+1])
0585 vshr.u64 $d2,$e,
0586 vadd.i64 @X[$i%8],$s1
0587 ___
0588 &NEON_00_15(2*$i,@_);
0589 }
0590
0591 $code.=<<___;
0592
0593 .arch armv7-a
0594 .fpu neon
0595
0596 .global sha512_block_data_order_neon
0597 .type sha512_block_data_order_neon,%function
0598 .align 4
0599 sha512_block_data_order_neon:
0600 .LNEON:
0601 dmb @ errata
0602 add $len,$inp,$len,lsl
0603 VFP_ABI_PUSH
0604 adr $Ktbl,.Lsha512_block_data_order
0605 sub $Ktbl,$Ktbl,.Lsha512_block_data_order-K512
0606 vldmia $ctx,{$A-$H} @ load context
0607 .Loop_neon:
0608 ___
0609 for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); }
0610 $code.=<<___;
0611 mov $cnt,
0612 .L16_79_neon:
0613 subs $cnt,
0614 ___
0615 for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
0616 $code.=<<___;
0617 bne .L16_79_neon
0618
0619 vadd.i64 $A,d30 @ h+=Maj from the past
0620 vldmia $ctx,{d24-d31} @ load context to temp
0621 vadd.i64 q8,q12 @ vectorized accumulate
0622 vadd.i64 q9,q13
0623 vadd.i64 q10,q14
0624 vadd.i64 q11,q15
0625 vstmia $ctx,{$A-$H} @ save context
0626 teq $inp,$len
0627 sub $Ktbl,
0628 bne .Loop_neon
0629
0630 VFP_ABI_POP
0631 ret @ bx lr
0632 .size sha512_block_data_order_neon,.-sha512_block_data_order_neon
0633
0634 ___
0635 }
0636 $code.=<<___;
0637 .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
0638 .align 2
0639
0640 .comm OPENSSL_armcap_P,4,4
0641
0642 ___
0643
0644 $code =~ s/\`([^\`]*)\`/eval $1/gem;
0645 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;
0646 $code =~ s/\bret\b/bx lr/gm;
0647
0648 open SELF,$0;
0649 while(<SELF>) {
0650 next if (/^
0651 last if (!s/^#/@/ and !/^$/);
0652 print;
0653 }
0654 close SELF;
0655
0656 print $code;
0657 close STDOUT;