0001 #define __ARM_ARCH__ __LINUX_ARM_ARCH__
0002 @ SPDX-License-Identifier: GPL-2.0
0003
0004 @ This code is taken from the OpenSSL project but the author (Andy Polyakov)
0005 @ has relicensed it under the GPLv2. Therefore this program is free software;
0006 @ you can redistribute it and/or modify it under the terms of the GNU General
0007 @ Public License version 2 as published by the Free Software Foundation.
0008 @
0009 @ The original headers, including the original license headers, are
0010 @ included below for completeness.
0011
0012 @ ====================================================================
0013 @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
0014 @ project. The module is, however, dual licensed under OpenSSL and
0015 @ CRYPTOGAMS licenses depending on where you obtain it. For further
0016 @ details see https://www.openssl.org/~appro/cryptogams/.
0017 @ ====================================================================
0018
0019 @ sha1_block procedure for ARMv4.
0020 @
0021 @ January 2007.
0022
0023 @ Size/performance trade-off
0024 @ ====================================================================
0025 @ impl size in bytes comp cycles[*] measured performance
0026 @ ====================================================================
0027 @ thumb 304 3212 4420
0028 @ armv4-small 392/+29% 1958/+64% 2250/+96%
0029 @ armv4-compact 740/+89% 1552/+26% 1840/+22%
0030 @ armv4-large 1420/+92% 1307/+19% 1370/+34%[***]
0031 @ full unroll ~5100/+260% ~1260/+4% ~1300/+5%
0032 @ ====================================================================
0033 @ thumb = same as 'small' but in Thumb instructions[**] and
0034 @ with recurring code in two private functions;
0035 @ small = detached Xload/update, loops are folded;
0036 @ compact = detached Xload/update, 5x unroll;
0037 @ large = interleaved Xload/update, 5x unroll;
0038 @ full unroll = interleaved Xload/update, full unroll, estimated[!];
0039 @
0040 @ [*] Manually counted instructions in "grand" loop body. Measured
0041 @ performance is affected by prologue and epilogue overhead,
0042 @ i-cache availability, branch penalties, etc.
0043 @ [**] While each Thumb instruction is twice smaller, they are not as
0044 @ diverse as ARM ones: e.g., there are only two arithmetic
0045 @ instructions with 3 arguments, no [fixed] rotate, addressing
0046 @ modes are limited. As result it takes more instructions to do
0047 @ the same job in Thumb, therefore the code is never twice as
0048 @ small and always slower.
0049 @ [***] which is also ~35% better than compiler generated code. Dual-
0050 @ issue Cortex A8 core was measured to process input block in
0051 @ ~990 cycles.
0052
0053 @ August 2010.
0054 @
0055 @ Rescheduling for dual-issue pipeline resulted in 13% improvement on
0056 @ Cortex A8 core and in absolute terms ~870 cycles per input block
0057 @ [or 13.6 cycles per byte].
0058
0059 @ February 2011.
0060 @
0061 @ Profiler-assisted and platform-specific optimization resulted in 10%
0062 @ improvement on Cortex A8 core and 12.2 cycles per byte.
0063
0064 #include <linux/linkage.h>
0065
0066 .text
0067
0068 .align 2
0069 ENTRY(sha1_block_data_order)
0070 stmdb sp!,{r4-r12,lr}
0071 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
0072 ldmia r0,{r3,r4,r5,r6,r7}
0073 .Lloop:
0074 ldr r8,.LK_00_19
0075 mov r14,sp
0076 sub sp,sp,#15*4
0077 mov r5,r5,ror#30
0078 mov r6,r6,ror#30
0079 mov r7,r7,ror#30 @ [6]
0080 .L_00_15:
0081 #if __ARM_ARCH__<7
0082 ldrb r10,[r1,#2]
0083 ldrb r9,[r1,#3]
0084 ldrb r11,[r1,#1]
0085 add r7,r8,r7,ror#2 @ E+=K_00_19
0086 ldrb r12,[r1],#4
0087 orr r9,r9,r10,lsl#8
0088 eor r10,r5,r6 @ F_xx_xx
0089 orr r9,r9,r11,lsl#16
0090 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
0091 orr r9,r9,r12,lsl#24
0092 #else
0093 ldr r9,[r1],#4 @ handles unaligned
0094 add r7,r8,r7,ror#2 @ E+=K_00_19
0095 eor r10,r5,r6 @ F_xx_xx
0096 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
0097 #ifdef __ARMEL__
0098 rev r9,r9 @ byte swap
0099 #endif
0100 #endif
0101 and r10,r4,r10,ror#2
0102 add r7,r7,r9 @ E+=X[i]
0103 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
0104 str r9,[r14,#-4]!
0105 add r7,r7,r10 @ E+=F_00_19(B,C,D)
0106 #if __ARM_ARCH__<7
0107 ldrb r10,[r1,#2]
0108 ldrb r9,[r1,#3]
0109 ldrb r11,[r1,#1]
0110 add r6,r8,r6,ror#2 @ E+=K_00_19
0111 ldrb r12,[r1],#4
0112 orr r9,r9,r10,lsl#8
0113 eor r10,r4,r5 @ F_xx_xx
0114 orr r9,r9,r11,lsl#16
0115 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
0116 orr r9,r9,r12,lsl#24
0117 #else
0118 ldr r9,[r1],#4 @ handles unaligned
0119 add r6,r8,r6,ror#2 @ E+=K_00_19
0120 eor r10,r4,r5 @ F_xx_xx
0121 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
0122 #ifdef __ARMEL__
0123 rev r9,r9 @ byte swap
0124 #endif
0125 #endif
0126 and r10,r3,r10,ror#2
0127 add r6,r6,r9 @ E+=X[i]
0128 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
0129 str r9,[r14,#-4]!
0130 add r6,r6,r10 @ E+=F_00_19(B,C,D)
0131 #if __ARM_ARCH__<7
0132 ldrb r10,[r1,#2]
0133 ldrb r9,[r1,#3]
0134 ldrb r11,[r1,#1]
0135 add r5,r8,r5,ror#2 @ E+=K_00_19
0136 ldrb r12,[r1],#4
0137 orr r9,r9,r10,lsl#8
0138 eor r10,r3,r4 @ F_xx_xx
0139 orr r9,r9,r11,lsl#16
0140 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
0141 orr r9,r9,r12,lsl#24
0142 #else
0143 ldr r9,[r1],#4 @ handles unaligned
0144 add r5,r8,r5,ror#2 @ E+=K_00_19
0145 eor r10,r3,r4 @ F_xx_xx
0146 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
0147 #ifdef __ARMEL__
0148 rev r9,r9 @ byte swap
0149 #endif
0150 #endif
0151 and r10,r7,r10,ror#2
0152 add r5,r5,r9 @ E+=X[i]
0153 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
0154 str r9,[r14,#-4]!
0155 add r5,r5,r10 @ E+=F_00_19(B,C,D)
0156 #if __ARM_ARCH__<7
0157 ldrb r10,[r1,#2]
0158 ldrb r9,[r1,#3]
0159 ldrb r11,[r1,#1]
0160 add r4,r8,r4,ror#2 @ E+=K_00_19
0161 ldrb r12,[r1],#4
0162 orr r9,r9,r10,lsl#8
0163 eor r10,r7,r3 @ F_xx_xx
0164 orr r9,r9,r11,lsl#16
0165 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
0166 orr r9,r9,r12,lsl#24
0167 #else
0168 ldr r9,[r1],#4 @ handles unaligned
0169 add r4,r8,r4,ror#2 @ E+=K_00_19
0170 eor r10,r7,r3 @ F_xx_xx
0171 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
0172 #ifdef __ARMEL__
0173 rev r9,r9 @ byte swap
0174 #endif
0175 #endif
0176 and r10,r6,r10,ror#2
0177 add r4,r4,r9 @ E+=X[i]
0178 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
0179 str r9,[r14,#-4]!
0180 add r4,r4,r10 @ E+=F_00_19(B,C,D)
0181 #if __ARM_ARCH__<7
0182 ldrb r10,[r1,#2]
0183 ldrb r9,[r1,#3]
0184 ldrb r11,[r1,#1]
0185 add r3,r8,r3,ror#2 @ E+=K_00_19
0186 ldrb r12,[r1],#4
0187 orr r9,r9,r10,lsl#8
0188 eor r10,r6,r7 @ F_xx_xx
0189 orr r9,r9,r11,lsl#16
0190 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
0191 orr r9,r9,r12,lsl#24
0192 #else
0193 ldr r9,[r1],#4 @ handles unaligned
0194 add r3,r8,r3,ror#2 @ E+=K_00_19
0195 eor r10,r6,r7 @ F_xx_xx
0196 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
0197 #ifdef __ARMEL__
0198 rev r9,r9 @ byte swap
0199 #endif
0200 #endif
0201 and r10,r5,r10,ror#2
0202 add r3,r3,r9 @ E+=X[i]
0203 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
0204 str r9,[r14,#-4]!
0205 add r3,r3,r10 @ E+=F_00_19(B,C,D)
0206 cmp r14,sp
0207 bne .L_00_15 @ [((11+4)*5+2)*3]
0208 sub sp,sp,#25*4
0209 #if __ARM_ARCH__<7
0210 ldrb r10,[r1,#2]
0211 ldrb r9,[r1,#3]
0212 ldrb r11,[r1,#1]
0213 add r7,r8,r7,ror#2 @ E+=K_00_19
0214 ldrb r12,[r1],#4
0215 orr r9,r9,r10,lsl#8
0216 eor r10,r5,r6 @ F_xx_xx
0217 orr r9,r9,r11,lsl#16
0218 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
0219 orr r9,r9,r12,lsl#24
0220 #else
0221 ldr r9,[r1],#4 @ handles unaligned
0222 add r7,r8,r7,ror#2 @ E+=K_00_19
0223 eor r10,r5,r6 @ F_xx_xx
0224 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
0225 #ifdef __ARMEL__
0226 rev r9,r9 @ byte swap
0227 #endif
0228 #endif
0229 and r10,r4,r10,ror#2
0230 add r7,r7,r9 @ E+=X[i]
0231 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
0232 str r9,[r14,#-4]!
0233 add r7,r7,r10 @ E+=F_00_19(B,C,D)
0234 ldr r9,[r14,#15*4]
0235 ldr r10,[r14,#13*4]
0236 ldr r11,[r14,#7*4]
0237 add r6,r8,r6,ror#2 @ E+=K_xx_xx
0238 ldr r12,[r14,#2*4]
0239 eor r9,r9,r10
0240 eor r11,r11,r12 @ 1 cycle stall
0241 eor r10,r4,r5 @ F_xx_xx
0242 mov r9,r9,ror#31
0243 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
0244 eor r9,r9,r11,ror#31
0245 str r9,[r14,#-4]!
0246 and r10,r3,r10,ror#2 @ F_xx_xx
0247 @ F_xx_xx
0248 add r6,r6,r9 @ E+=X[i]
0249 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
0250 add r6,r6,r10 @ E+=F_00_19(B,C,D)
0251 ldr r9,[r14,#15*4]
0252 ldr r10,[r14,#13*4]
0253 ldr r11,[r14,#7*4]
0254 add r5,r8,r5,ror#2 @ E+=K_xx_xx
0255 ldr r12,[r14,#2*4]
0256 eor r9,r9,r10
0257 eor r11,r11,r12 @ 1 cycle stall
0258 eor r10,r3,r4 @ F_xx_xx
0259 mov r9,r9,ror#31
0260 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
0261 eor r9,r9,r11,ror#31
0262 str r9,[r14,#-4]!
0263 and r10,r7,r10,ror#2 @ F_xx_xx
0264 @ F_xx_xx
0265 add r5,r5,r9 @ E+=X[i]
0266 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
0267 add r5,r5,r10 @ E+=F_00_19(B,C,D)
0268 ldr r9,[r14,#15*4]
0269 ldr r10,[r14,#13*4]
0270 ldr r11,[r14,#7*4]
0271 add r4,r8,r4,ror#2 @ E+=K_xx_xx
0272 ldr r12,[r14,#2*4]
0273 eor r9,r9,r10
0274 eor r11,r11,r12 @ 1 cycle stall
0275 eor r10,r7,r3 @ F_xx_xx
0276 mov r9,r9,ror#31
0277 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
0278 eor r9,r9,r11,ror#31
0279 str r9,[r14,#-4]!
0280 and r10,r6,r10,ror#2 @ F_xx_xx
0281 @ F_xx_xx
0282 add r4,r4,r9 @ E+=X[i]
0283 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
0284 add r4,r4,r10 @ E+=F_00_19(B,C,D)
0285 ldr r9,[r14,#15*4]
0286 ldr r10,[r14,#13*4]
0287 ldr r11,[r14,#7*4]
0288 add r3,r8,r3,ror#2 @ E+=K_xx_xx
0289 ldr r12,[r14,#2*4]
0290 eor r9,r9,r10
0291 eor r11,r11,r12 @ 1 cycle stall
0292 eor r10,r6,r7 @ F_xx_xx
0293 mov r9,r9,ror#31
0294 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
0295 eor r9,r9,r11,ror#31
0296 str r9,[r14,#-4]!
0297 and r10,r5,r10,ror#2 @ F_xx_xx
0298 @ F_xx_xx
0299 add r3,r3,r9 @ E+=X[i]
0300 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
0301 add r3,r3,r10 @ E+=F_00_19(B,C,D)
0302
0303 ldr r8,.LK_20_39 @ [+15+16*4]
0304 cmn sp,#0 @ [+3], clear carry to denote 20_39
0305 .L_20_39_or_60_79:
0306 ldr r9,[r14,#15*4]
0307 ldr r10,[r14,#13*4]
0308 ldr r11,[r14,#7*4]
0309 add r7,r8,r7,ror#2 @ E+=K_xx_xx
0310 ldr r12,[r14,#2*4]
0311 eor r9,r9,r10
0312 eor r11,r11,r12 @ 1 cycle stall
0313 eor r10,r5,r6 @ F_xx_xx
0314 mov r9,r9,ror#31
0315 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
0316 eor r9,r9,r11,ror#31
0317 str r9,[r14,#-4]!
0318 eor r10,r4,r10,ror#2 @ F_xx_xx
0319 @ F_xx_xx
0320 add r7,r7,r9 @ E+=X[i]
0321 add r7,r7,r10 @ E+=F_20_39(B,C,D)
0322 ldr r9,[r14,#15*4]
0323 ldr r10,[r14,#13*4]
0324 ldr r11,[r14,#7*4]
0325 add r6,r8,r6,ror#2 @ E+=K_xx_xx
0326 ldr r12,[r14,#2*4]
0327 eor r9,r9,r10
0328 eor r11,r11,r12 @ 1 cycle stall
0329 eor r10,r4,r5 @ F_xx_xx
0330 mov r9,r9,ror#31
0331 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
0332 eor r9,r9,r11,ror#31
0333 str r9,[r14,#-4]!
0334 eor r10,r3,r10,ror#2 @ F_xx_xx
0335 @ F_xx_xx
0336 add r6,r6,r9 @ E+=X[i]
0337 add r6,r6,r10 @ E+=F_20_39(B,C,D)
0338 ldr r9,[r14,#15*4]
0339 ldr r10,[r14,#13*4]
0340 ldr r11,[r14,#7*4]
0341 add r5,r8,r5,ror#2 @ E+=K_xx_xx
0342 ldr r12,[r14,#2*4]
0343 eor r9,r9,r10
0344 eor r11,r11,r12 @ 1 cycle stall
0345 eor r10,r3,r4 @ F_xx_xx
0346 mov r9,r9,ror#31
0347 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
0348 eor r9,r9,r11,ror#31
0349 str r9,[r14,#-4]!
0350 eor r10,r7,r10,ror#2 @ F_xx_xx
0351 @ F_xx_xx
0352 add r5,r5,r9 @ E+=X[i]
0353 add r5,r5,r10 @ E+=F_20_39(B,C,D)
0354 ldr r9,[r14,#15*4]
0355 ldr r10,[r14,#13*4]
0356 ldr r11,[r14,#7*4]
0357 add r4,r8,r4,ror#2 @ E+=K_xx_xx
0358 ldr r12,[r14,#2*4]
0359 eor r9,r9,r10
0360 eor r11,r11,r12 @ 1 cycle stall
0361 eor r10,r7,r3 @ F_xx_xx
0362 mov r9,r9,ror#31
0363 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
0364 eor r9,r9,r11,ror#31
0365 str r9,[r14,#-4]!
0366 eor r10,r6,r10,ror#2 @ F_xx_xx
0367 @ F_xx_xx
0368 add r4,r4,r9 @ E+=X[i]
0369 add r4,r4,r10 @ E+=F_20_39(B,C,D)
0370 ldr r9,[r14,#15*4]
0371 ldr r10,[r14,#13*4]
0372 ldr r11,[r14,#7*4]
0373 add r3,r8,r3,ror#2 @ E+=K_xx_xx
0374 ldr r12,[r14,#2*4]
0375 eor r9,r9,r10
0376 eor r11,r11,r12 @ 1 cycle stall
0377 eor r10,r6,r7 @ F_xx_xx
0378 mov r9,r9,ror#31
0379 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
0380 eor r9,r9,r11,ror#31
0381 str r9,[r14,#-4]!
0382 eor r10,r5,r10,ror#2 @ F_xx_xx
0383 @ F_xx_xx
0384 add r3,r3,r9 @ E+=X[i]
0385 add r3,r3,r10 @ E+=F_20_39(B,C,D)
0386 ARM( teq r14,sp ) @ preserve carry
0387 THUMB( mov r11,sp )
0388 THUMB( teq r14,r11 ) @ preserve carry
0389 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
0390 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
0391
0392 ldr r8,.LK_40_59
0393 sub sp,sp,#20*4 @ [+2]
0394 .L_40_59:
0395 ldr r9,[r14,#15*4]
0396 ldr r10,[r14,#13*4]
0397 ldr r11,[r14,#7*4]
0398 add r7,r8,r7,ror#2 @ E+=K_xx_xx
0399 ldr r12,[r14,#2*4]
0400 eor r9,r9,r10
0401 eor r11,r11,r12 @ 1 cycle stall
0402 eor r10,r5,r6 @ F_xx_xx
0403 mov r9,r9,ror#31
0404 add r7,r7,r3,ror#27 @ E+=ROR(A,27)
0405 eor r9,r9,r11,ror#31
0406 str r9,[r14,#-4]!
0407 and r10,r4,r10,ror#2 @ F_xx_xx
0408 and r11,r5,r6 @ F_xx_xx
0409 add r7,r7,r9 @ E+=X[i]
0410 add r7,r7,r10 @ E+=F_40_59(B,C,D)
0411 add r7,r7,r11,ror#2
0412 ldr r9,[r14,#15*4]
0413 ldr r10,[r14,#13*4]
0414 ldr r11,[r14,#7*4]
0415 add r6,r8,r6,ror#2 @ E+=K_xx_xx
0416 ldr r12,[r14,#2*4]
0417 eor r9,r9,r10
0418 eor r11,r11,r12 @ 1 cycle stall
0419 eor r10,r4,r5 @ F_xx_xx
0420 mov r9,r9,ror#31
0421 add r6,r6,r7,ror#27 @ E+=ROR(A,27)
0422 eor r9,r9,r11,ror#31
0423 str r9,[r14,#-4]!
0424 and r10,r3,r10,ror#2 @ F_xx_xx
0425 and r11,r4,r5 @ F_xx_xx
0426 add r6,r6,r9 @ E+=X[i]
0427 add r6,r6,r10 @ E+=F_40_59(B,C,D)
0428 add r6,r6,r11,ror#2
0429 ldr r9,[r14,#15*4]
0430 ldr r10,[r14,#13*4]
0431 ldr r11,[r14,#7*4]
0432 add r5,r8,r5,ror#2 @ E+=K_xx_xx
0433 ldr r12,[r14,#2*4]
0434 eor r9,r9,r10
0435 eor r11,r11,r12 @ 1 cycle stall
0436 eor r10,r3,r4 @ F_xx_xx
0437 mov r9,r9,ror#31
0438 add r5,r5,r6,ror#27 @ E+=ROR(A,27)
0439 eor r9,r9,r11,ror#31
0440 str r9,[r14,#-4]!
0441 and r10,r7,r10,ror#2 @ F_xx_xx
0442 and r11,r3,r4 @ F_xx_xx
0443 add r5,r5,r9 @ E+=X[i]
0444 add r5,r5,r10 @ E+=F_40_59(B,C,D)
0445 add r5,r5,r11,ror#2
0446 ldr r9,[r14,#15*4]
0447 ldr r10,[r14,#13*4]
0448 ldr r11,[r14,#7*4]
0449 add r4,r8,r4,ror#2 @ E+=K_xx_xx
0450 ldr r12,[r14,#2*4]
0451 eor r9,r9,r10
0452 eor r11,r11,r12 @ 1 cycle stall
0453 eor r10,r7,r3 @ F_xx_xx
0454 mov r9,r9,ror#31
0455 add r4,r4,r5,ror#27 @ E+=ROR(A,27)
0456 eor r9,r9,r11,ror#31
0457 str r9,[r14,#-4]!
0458 and r10,r6,r10,ror#2 @ F_xx_xx
0459 and r11,r7,r3 @ F_xx_xx
0460 add r4,r4,r9 @ E+=X[i]
0461 add r4,r4,r10 @ E+=F_40_59(B,C,D)
0462 add r4,r4,r11,ror#2
0463 ldr r9,[r14,#15*4]
0464 ldr r10,[r14,#13*4]
0465 ldr r11,[r14,#7*4]
0466 add r3,r8,r3,ror#2 @ E+=K_xx_xx
0467 ldr r12,[r14,#2*4]
0468 eor r9,r9,r10
0469 eor r11,r11,r12 @ 1 cycle stall
0470 eor r10,r6,r7 @ F_xx_xx
0471 mov r9,r9,ror#31
0472 add r3,r3,r4,ror#27 @ E+=ROR(A,27)
0473 eor r9,r9,r11,ror#31
0474 str r9,[r14,#-4]!
0475 and r10,r5,r10,ror#2 @ F_xx_xx
0476 and r11,r6,r7 @ F_xx_xx
0477 add r3,r3,r9 @ E+=X[i]
0478 add r3,r3,r10 @ E+=F_40_59(B,C,D)
0479 add r3,r3,r11,ror#2
0480 cmp r14,sp
0481 bne .L_40_59 @ [+((12+5)*5+2)*4]
0482
0483 ldr r8,.LK_60_79
0484 sub sp,sp,#20*4
0485 cmp sp,#0 @ set carry to denote 60_79
0486 b .L_20_39_or_60_79 @ [+4], spare 300 bytes
0487 .L_done:
0488 add sp,sp,#80*4 @ "deallocate" stack frame
0489 ldmia r0,{r8,r9,r10,r11,r12}
0490 add r3,r8,r3
0491 add r4,r9,r4
0492 add r5,r10,r5,ror#2
0493 add r6,r11,r6,ror#2
0494 add r7,r12,r7,ror#2
0495 stmia r0,{r3,r4,r5,r6,r7}
0496 teq r1,r2
0497 bne .Lloop @ [+18], total 1307
0498
0499 ldmia sp!,{r4-r12,pc}
0500 .align 2
0501 .LK_00_19: .word 0x5a827999
0502 .LK_20_39: .word 0x6ed9eba1
0503 .LK_40_59: .word 0x8f1bbcdc
0504 .LK_60_79: .word 0xca62c1d6
0505 ENDPROC(sha1_block_data_order)
0506 .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
0507 .align 2