Back to home page

LXR

 
 

    


0001 #define __ARM_ARCH__ __LINUX_ARM_ARCH__
0002 @ ====================================================================
0003 @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
0004 @ project. The module is, however, dual licensed under OpenSSL and
0005 @ CRYPTOGAMS licenses depending on where you obtain it. For further
0006 @ details see http://www.openssl.org/~appro/cryptogams/.
0007 @ ====================================================================
0008 
0009 @ AES for ARMv4
0010 
0011 @ January 2007.
0012 @
0013 @ Code uses single 1K S-box and is >2 times faster than code generated
0014 @ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
0015 @ allows to merge logical or arithmetic operation with shift or rotate
0016 @ in one instruction and emit combined result every cycle. The module
0017 @ is endian-neutral. The performance is ~42 cycles/byte for 128-bit
0018 @ key [on single-issue Xscale PXA250 core].
0019 
0020 @ May 2007.
0021 @
0022 @ AES_set_[en|de]crypt_key is added.
0023 
0024 @ July 2010.
0025 @
0026 @ Rescheduling for dual-issue pipeline resulted in 12% improvement on
0027 @ Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
0028 
0029 @ February 2011.
0030 @
0031 @ Profiler-assisted and platform-specific optimization resulted in 16%
0032 @ improvement on Cortex A8 core and ~21.5 cycles per byte.
0033 
0034 @ A little glue here to select the correct code below for the ARM CPU
0035 @ that is being targetted.
0036 
0037 #include <linux/linkage.h>
0038 #include <asm/assembler.h>
0039 
0040 .text
0041 
0042 .type   AES_Te,%object
0043 .align  5
0044 AES_Te:
0045 .word   0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
0046 .word   0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
0047 .word   0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
0048 .word   0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
0049 .word   0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
0050 .word   0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
0051 .word   0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
0052 .word   0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
0053 .word   0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
0054 .word   0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
0055 .word   0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
0056 .word   0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
0057 .word   0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
0058 .word   0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
0059 .word   0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
0060 .word   0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
0061 .word   0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
0062 .word   0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
0063 .word   0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
0064 .word   0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
0065 .word   0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
0066 .word   0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
0067 .word   0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
0068 .word   0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
0069 .word   0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
0070 .word   0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
0071 .word   0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
0072 .word   0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
0073 .word   0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
0074 .word   0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
0075 .word   0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
0076 .word   0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
0077 .word   0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
0078 .word   0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
0079 .word   0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
0080 .word   0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
0081 .word   0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
0082 .word   0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
0083 .word   0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
0084 .word   0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
0085 .word   0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
0086 .word   0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
0087 .word   0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
0088 .word   0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
0089 .word   0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
0090 .word   0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
0091 .word   0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
0092 .word   0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
0093 .word   0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
0094 .word   0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
0095 .word   0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
0096 .word   0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
0097 .word   0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
0098 .word   0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
0099 .word   0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
0100 .word   0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
0101 .word   0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
0102 .word   0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
0103 .word   0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
0104 .word   0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
0105 .word   0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
0106 .word   0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
0107 .word   0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
0108 .word   0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
0109 @ Te4[256]
0110 .byte   0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
0111 .byte   0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
0112 .byte   0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
0113 .byte   0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
0114 .byte   0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
0115 .byte   0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
0116 .byte   0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
0117 .byte   0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
0118 .byte   0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
0119 .byte   0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
0120 .byte   0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
0121 .byte   0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
0122 .byte   0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
0123 .byte   0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
0124 .byte   0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
0125 .byte   0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
0126 .byte   0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
0127 .byte   0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
0128 .byte   0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
0129 .byte   0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
0130 .byte   0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
0131 .byte   0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
0132 .byte   0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
0133 .byte   0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
0134 .byte   0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
0135 .byte   0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
0136 .byte   0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
0137 .byte   0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
0138 .byte   0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
0139 .byte   0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
0140 .byte   0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
0141 .byte   0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
0142 @ rcon[]
0143 .word   0x01000000, 0x02000000, 0x04000000, 0x08000000
0144 .word   0x10000000, 0x20000000, 0x40000000, 0x80000000
0145 .word   0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
0146 .size   AES_Te,.-AES_Te
0147 
0148 @ void AES_encrypt(const unsigned char *in, unsigned char *out,
0149 @        const AES_KEY *key) {
0150 .align  5
0151 ENTRY(AES_encrypt)
0152     adr r3,AES_encrypt
0153     stmdb   sp!,{r1,r4-r12,lr}
0154     mov r12,r0      @ inp
0155     mov r11,r2
0156     sub r10,r3,#AES_encrypt-AES_Te  @ Te
0157 #if __ARM_ARCH__<7
0158     ldrb    r0,[r12,#3] @ load input data in endian-neutral
0159     ldrb    r4,[r12,#2] @ manner...
0160     ldrb    r5,[r12,#1]
0161     ldrb    r6,[r12,#0]
0162     orr r0,r0,r4,lsl#8
0163     ldrb    r1,[r12,#7]
0164     orr r0,r0,r5,lsl#16
0165     ldrb    r4,[r12,#6]
0166     orr r0,r0,r6,lsl#24
0167     ldrb    r5,[r12,#5]
0168     ldrb    r6,[r12,#4]
0169     orr r1,r1,r4,lsl#8
0170     ldrb    r2,[r12,#11]
0171     orr r1,r1,r5,lsl#16
0172     ldrb    r4,[r12,#10]
0173     orr r1,r1,r6,lsl#24
0174     ldrb    r5,[r12,#9]
0175     ldrb    r6,[r12,#8]
0176     orr r2,r2,r4,lsl#8
0177     ldrb    r3,[r12,#15]
0178     orr r2,r2,r5,lsl#16
0179     ldrb    r4,[r12,#14]
0180     orr r2,r2,r6,lsl#24
0181     ldrb    r5,[r12,#13]
0182     ldrb    r6,[r12,#12]
0183     orr r3,r3,r4,lsl#8
0184     orr r3,r3,r5,lsl#16
0185     orr r3,r3,r6,lsl#24
0186 #else
0187     ldr r0,[r12,#0]
0188     ldr r1,[r12,#4]
0189     ldr r2,[r12,#8]
0190     ldr r3,[r12,#12]
0191 #ifdef __ARMEL__
0192     rev r0,r0
0193     rev r1,r1
0194     rev r2,r2
0195     rev r3,r3
0196 #endif
0197 #endif
0198     bl  _armv4_AES_encrypt
0199 
0200     ldr r12,[sp],#4     @ pop out
0201 #if __ARM_ARCH__>=7
0202 #ifdef __ARMEL__
0203     rev r0,r0
0204     rev r1,r1
0205     rev r2,r2
0206     rev r3,r3
0207 #endif
0208     str r0,[r12,#0]
0209     str r1,[r12,#4]
0210     str r2,[r12,#8]
0211     str r3,[r12,#12]
0212 #else
0213     mov r4,r0,lsr#24        @ write output in endian-neutral
0214     mov r5,r0,lsr#16        @ manner...
0215     mov r6,r0,lsr#8
0216     strb    r4,[r12,#0]
0217     strb    r5,[r12,#1]
0218     mov r4,r1,lsr#24
0219     strb    r6,[r12,#2]
0220     mov r5,r1,lsr#16
0221     strb    r0,[r12,#3]
0222     mov r6,r1,lsr#8
0223     strb    r4,[r12,#4]
0224     strb    r5,[r12,#5]
0225     mov r4,r2,lsr#24
0226     strb    r6,[r12,#6]
0227     mov r5,r2,lsr#16
0228     strb    r1,[r12,#7]
0229     mov r6,r2,lsr#8
0230     strb    r4,[r12,#8]
0231     strb    r5,[r12,#9]
0232     mov r4,r3,lsr#24
0233     strb    r6,[r12,#10]
0234     mov r5,r3,lsr#16
0235     strb    r2,[r12,#11]
0236     mov r6,r3,lsr#8
0237     strb    r4,[r12,#12]
0238     strb    r5,[r12,#13]
0239     strb    r6,[r12,#14]
0240     strb    r3,[r12,#15]
0241 #endif
0242     ldmia   sp!,{r4-r12,pc}
0243 ENDPROC(AES_encrypt)
0244 
0245 .type   _armv4_AES_encrypt,%function
0246 .align  2
0247 _armv4_AES_encrypt:
0248     str lr,[sp,#-4]!        @ push lr
0249     ldmia   r11!,{r4-r7}
0250     eor r0,r0,r4
0251     ldr r12,[r11,#240-16]
0252     eor r1,r1,r5
0253     eor r2,r2,r6
0254     eor r3,r3,r7
0255     sub r12,r12,#1
0256     mov lr,#255
0257 
0258     and r7,lr,r0
0259     and r8,lr,r0,lsr#8
0260     and r9,lr,r0,lsr#16
0261     mov r0,r0,lsr#24
0262 .Lenc_loop:
0263     ldr r4,[r10,r7,lsl#2]   @ Te3[s0>>0]
0264     and r7,lr,r1,lsr#16 @ i0
0265     ldr r5,[r10,r8,lsl#2]   @ Te2[s0>>8]
0266     and r8,lr,r1
0267     ldr r6,[r10,r9,lsl#2]   @ Te1[s0>>16]
0268     and r9,lr,r1,lsr#8
0269     ldr r0,[r10,r0,lsl#2]   @ Te0[s0>>24]
0270     mov r1,r1,lsr#24
0271 
0272     ldr r7,[r10,r7,lsl#2]   @ Te1[s1>>16]
0273     ldr r8,[r10,r8,lsl#2]   @ Te3[s1>>0]
0274     ldr r9,[r10,r9,lsl#2]   @ Te2[s1>>8]
0275     eor r0,r0,r7,ror#8
0276     ldr r1,[r10,r1,lsl#2]   @ Te0[s1>>24]
0277     and r7,lr,r2,lsr#8  @ i0
0278     eor r5,r5,r8,ror#8
0279     and r8,lr,r2,lsr#16 @ i1
0280     eor r6,r6,r9,ror#8
0281     and r9,lr,r2
0282     ldr r7,[r10,r7,lsl#2]   @ Te2[s2>>8]
0283     eor r1,r1,r4,ror#24
0284     ldr r8,[r10,r8,lsl#2]   @ Te1[s2>>16]
0285     mov r2,r2,lsr#24
0286 
0287     ldr r9,[r10,r9,lsl#2]   @ Te3[s2>>0]
0288     eor r0,r0,r7,ror#16
0289     ldr r2,[r10,r2,lsl#2]   @ Te0[s2>>24]
0290     and r7,lr,r3        @ i0
0291     eor r1,r1,r8,ror#8
0292     and r8,lr,r3,lsr#8  @ i1
0293     eor r6,r6,r9,ror#16
0294     and r9,lr,r3,lsr#16 @ i2
0295     ldr r7,[r10,r7,lsl#2]   @ Te3[s3>>0]
0296     eor r2,r2,r5,ror#16
0297     ldr r8,[r10,r8,lsl#2]   @ Te2[s3>>8]
0298     mov r3,r3,lsr#24
0299 
0300     ldr r9,[r10,r9,lsl#2]   @ Te1[s3>>16]
0301     eor r0,r0,r7,ror#24
0302     ldr r7,[r11],#16
0303     eor r1,r1,r8,ror#16
0304     ldr r3,[r10,r3,lsl#2]   @ Te0[s3>>24]
0305     eor r2,r2,r9,ror#8
0306     ldr r4,[r11,#-12]
0307     eor r3,r3,r6,ror#8
0308 
0309     ldr r5,[r11,#-8]
0310     eor r0,r0,r7
0311     ldr r6,[r11,#-4]
0312     and r7,lr,r0
0313     eor r1,r1,r4
0314     and r8,lr,r0,lsr#8
0315     eor r2,r2,r5
0316     and r9,lr,r0,lsr#16
0317     eor r3,r3,r6
0318     mov r0,r0,lsr#24
0319 
0320     subs    r12,r12,#1
0321     bne .Lenc_loop
0322 
0323     add r10,r10,#2
0324 
0325     ldrb    r4,[r10,r7,lsl#2]   @ Te4[s0>>0]
0326     and r7,lr,r1,lsr#16 @ i0
0327     ldrb    r5,[r10,r8,lsl#2]   @ Te4[s0>>8]
0328     and r8,lr,r1
0329     ldrb    r6,[r10,r9,lsl#2]   @ Te4[s0>>16]
0330     and r9,lr,r1,lsr#8
0331     ldrb    r0,[r10,r0,lsl#2]   @ Te4[s0>>24]
0332     mov r1,r1,lsr#24
0333 
0334     ldrb    r7,[r10,r7,lsl#2]   @ Te4[s1>>16]
0335     ldrb    r8,[r10,r8,lsl#2]   @ Te4[s1>>0]
0336     ldrb    r9,[r10,r9,lsl#2]   @ Te4[s1>>8]
0337     eor r0,r7,r0,lsl#8
0338     ldrb    r1,[r10,r1,lsl#2]   @ Te4[s1>>24]
0339     and r7,lr,r2,lsr#8  @ i0
0340     eor r5,r8,r5,lsl#8
0341     and r8,lr,r2,lsr#16 @ i1
0342     eor r6,r9,r6,lsl#8
0343     and r9,lr,r2
0344     ldrb    r7,[r10,r7,lsl#2]   @ Te4[s2>>8]
0345     eor r1,r4,r1,lsl#24
0346     ldrb    r8,[r10,r8,lsl#2]   @ Te4[s2>>16]
0347     mov r2,r2,lsr#24
0348 
0349     ldrb    r9,[r10,r9,lsl#2]   @ Te4[s2>>0]
0350     eor r0,r7,r0,lsl#8
0351     ldrb    r2,[r10,r2,lsl#2]   @ Te4[s2>>24]
0352     and r7,lr,r3        @ i0
0353     eor r1,r1,r8,lsl#16
0354     and r8,lr,r3,lsr#8  @ i1
0355     eor r6,r9,r6,lsl#8
0356     and r9,lr,r3,lsr#16 @ i2
0357     ldrb    r7,[r10,r7,lsl#2]   @ Te4[s3>>0]
0358     eor r2,r5,r2,lsl#24
0359     ldrb    r8,[r10,r8,lsl#2]   @ Te4[s3>>8]
0360     mov r3,r3,lsr#24
0361 
0362     ldrb    r9,[r10,r9,lsl#2]   @ Te4[s3>>16]
0363     eor r0,r7,r0,lsl#8
0364     ldr r7,[r11,#0]
0365     ldrb    r3,[r10,r3,lsl#2]   @ Te4[s3>>24]
0366     eor r1,r1,r8,lsl#8
0367     ldr r4,[r11,#4]
0368     eor r2,r2,r9,lsl#16
0369     ldr r5,[r11,#8]
0370     eor r3,r6,r3,lsl#24
0371     ldr r6,[r11,#12]
0372 
0373     eor r0,r0,r7
0374     eor r1,r1,r4
0375     eor r2,r2,r5
0376     eor r3,r3,r6
0377 
0378     sub r10,r10,#2
0379     ldr pc,[sp],#4      @ pop and return
0380 .size   _armv4_AES_encrypt,.-_armv4_AES_encrypt
0381 
0382 .align  5
0383 ENTRY(private_AES_set_encrypt_key)
0384 _armv4_AES_set_encrypt_key:
0385     adr r3,_armv4_AES_set_encrypt_key
0386     teq r0,#0
0387     moveq   r0,#-1
0388     beq .Labrt
0389     teq r2,#0
0390     moveq   r0,#-1
0391     beq .Labrt
0392 
0393     teq r1,#128
0394     beq .Lok
0395     teq r1,#192
0396     beq .Lok
0397     teq r1,#256
0398     movne   r0,#-1
0399     bne .Labrt
0400 
0401 .Lok:   stmdb   sp!,{r4-r12,lr}
0402     sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024  @ Te4
0403 
0404     mov r12,r0      @ inp
0405     mov lr,r1           @ bits
0406     mov r11,r2          @ key
0407 
0408 #if __ARM_ARCH__<7
0409     ldrb    r0,[r12,#3] @ load input data in endian-neutral
0410     ldrb    r4,[r12,#2] @ manner...
0411     ldrb    r5,[r12,#1]
0412     ldrb    r6,[r12,#0]
0413     orr r0,r0,r4,lsl#8
0414     ldrb    r1,[r12,#7]
0415     orr r0,r0,r5,lsl#16
0416     ldrb    r4,[r12,#6]
0417     orr r0,r0,r6,lsl#24
0418     ldrb    r5,[r12,#5]
0419     ldrb    r6,[r12,#4]
0420     orr r1,r1,r4,lsl#8
0421     ldrb    r2,[r12,#11]
0422     orr r1,r1,r5,lsl#16
0423     ldrb    r4,[r12,#10]
0424     orr r1,r1,r6,lsl#24
0425     ldrb    r5,[r12,#9]
0426     ldrb    r6,[r12,#8]
0427     orr r2,r2,r4,lsl#8
0428     ldrb    r3,[r12,#15]
0429     orr r2,r2,r5,lsl#16
0430     ldrb    r4,[r12,#14]
0431     orr r2,r2,r6,lsl#24
0432     ldrb    r5,[r12,#13]
0433     ldrb    r6,[r12,#12]
0434     orr r3,r3,r4,lsl#8
0435     str r0,[r11],#16
0436     orr r3,r3,r5,lsl#16
0437     str r1,[r11,#-12]
0438     orr r3,r3,r6,lsl#24
0439     str r2,[r11,#-8]
0440     str r3,[r11,#-4]
0441 #else
0442     ldr r0,[r12,#0]
0443     ldr r1,[r12,#4]
0444     ldr r2,[r12,#8]
0445     ldr r3,[r12,#12]
0446 #ifdef __ARMEL__
0447     rev r0,r0
0448     rev r1,r1
0449     rev r2,r2
0450     rev r3,r3
0451 #endif
0452     str r0,[r11],#16
0453     str r1,[r11,#-12]
0454     str r2,[r11,#-8]
0455     str r3,[r11,#-4]
0456 #endif
0457 
0458     teq lr,#128
0459     bne .Lnot128
0460     mov r12,#10
0461     str r12,[r11,#240-16]
0462     add r6,r10,#256         @ rcon
0463     mov lr,#255
0464 
0465 .L128_loop:
0466     and r5,lr,r3,lsr#24
0467     and r7,lr,r3,lsr#16
0468     ldrb    r5,[r10,r5]
0469     and r8,lr,r3,lsr#8
0470     ldrb    r7,[r10,r7]
0471     and r9,lr,r3
0472     ldrb    r8,[r10,r8]
0473     orr r5,r5,r7,lsl#24
0474     ldrb    r9,[r10,r9]
0475     orr r5,r5,r8,lsl#16
0476     ldr r4,[r6],#4          @ rcon[i++]
0477     orr r5,r5,r9,lsl#8
0478     eor r5,r5,r4
0479     eor r0,r0,r5            @ rk[4]=rk[0]^...
0480     eor r1,r1,r0            @ rk[5]=rk[1]^rk[4]
0481     str r0,[r11],#16
0482     eor r2,r2,r1            @ rk[6]=rk[2]^rk[5]
0483     str r1,[r11,#-12]
0484     eor r3,r3,r2            @ rk[7]=rk[3]^rk[6]
0485     str r2,[r11,#-8]
0486     subs    r12,r12,#1
0487     str r3,[r11,#-4]
0488     bne .L128_loop
0489     sub r2,r11,#176
0490     b   .Ldone
0491 
0492 .Lnot128:
0493 #if __ARM_ARCH__<7
0494     ldrb    r8,[r12,#19]
0495     ldrb    r4,[r12,#18]
0496     ldrb    r5,[r12,#17]
0497     ldrb    r6,[r12,#16]
0498     orr r8,r8,r4,lsl#8
0499     ldrb    r9,[r12,#23]
0500     orr r8,r8,r5,lsl#16
0501     ldrb    r4,[r12,#22]
0502     orr r8,r8,r6,lsl#24
0503     ldrb    r5,[r12,#21]
0504     ldrb    r6,[r12,#20]
0505     orr r9,r9,r4,lsl#8
0506     orr r9,r9,r5,lsl#16
0507     str r8,[r11],#8
0508     orr r9,r9,r6,lsl#24
0509     str r9,[r11,#-4]
0510 #else
0511     ldr r8,[r12,#16]
0512     ldr r9,[r12,#20]
0513 #ifdef __ARMEL__
0514     rev r8,r8
0515     rev r9,r9
0516 #endif
0517     str r8,[r11],#8
0518     str r9,[r11,#-4]
0519 #endif
0520 
0521     teq lr,#192
0522     bne .Lnot192
0523     mov r12,#12
0524     str r12,[r11,#240-24]
0525     add r6,r10,#256         @ rcon
0526     mov lr,#255
0527     mov r12,#8
0528 
0529 .L192_loop:
0530     and r5,lr,r9,lsr#24
0531     and r7,lr,r9,lsr#16
0532     ldrb    r5,[r10,r5]
0533     and r8,lr,r9,lsr#8
0534     ldrb    r7,[r10,r7]
0535     and r9,lr,r9
0536     ldrb    r8,[r10,r8]
0537     orr r5,r5,r7,lsl#24
0538     ldrb    r9,[r10,r9]
0539     orr r5,r5,r8,lsl#16
0540     ldr r4,[r6],#4          @ rcon[i++]
0541     orr r5,r5,r9,lsl#8
0542     eor r9,r5,r4
0543     eor r0,r0,r9            @ rk[6]=rk[0]^...
0544     eor r1,r1,r0            @ rk[7]=rk[1]^rk[6]
0545     str r0,[r11],#24
0546     eor r2,r2,r1            @ rk[8]=rk[2]^rk[7]
0547     str r1,[r11,#-20]
0548     eor r3,r3,r2            @ rk[9]=rk[3]^rk[8]
0549     str r2,[r11,#-16]
0550     subs    r12,r12,#1
0551     str r3,[r11,#-12]
0552     subeq   r2,r11,#216
0553     beq .Ldone
0554 
0555     ldr r7,[r11,#-32]
0556     ldr r8,[r11,#-28]
0557     eor r7,r7,r3            @ rk[10]=rk[4]^rk[9]
0558     eor r9,r8,r7            @ rk[11]=rk[5]^rk[10]
0559     str r7,[r11,#-8]
0560     str r9,[r11,#-4]
0561     b   .L192_loop
0562 
0563 .Lnot192:
0564 #if __ARM_ARCH__<7
0565     ldrb    r8,[r12,#27]
0566     ldrb    r4,[r12,#26]
0567     ldrb    r5,[r12,#25]
0568     ldrb    r6,[r12,#24]
0569     orr r8,r8,r4,lsl#8
0570     ldrb    r9,[r12,#31]
0571     orr r8,r8,r5,lsl#16
0572     ldrb    r4,[r12,#30]
0573     orr r8,r8,r6,lsl#24
0574     ldrb    r5,[r12,#29]
0575     ldrb    r6,[r12,#28]
0576     orr r9,r9,r4,lsl#8
0577     orr r9,r9,r5,lsl#16
0578     str r8,[r11],#8
0579     orr r9,r9,r6,lsl#24
0580     str r9,[r11,#-4]
0581 #else
0582     ldr r8,[r12,#24]
0583     ldr r9,[r12,#28]
0584 #ifdef __ARMEL__
0585     rev r8,r8
0586     rev r9,r9
0587 #endif
0588     str r8,[r11],#8
0589     str r9,[r11,#-4]
0590 #endif
0591 
0592     mov r12,#14
0593     str r12,[r11,#240-32]
0594     add r6,r10,#256         @ rcon
0595     mov lr,#255
0596     mov r12,#7
0597 
0598 .L256_loop:
0599     and r5,lr,r9,lsr#24
0600     and r7,lr,r9,lsr#16
0601     ldrb    r5,[r10,r5]
0602     and r8,lr,r9,lsr#8
0603     ldrb    r7,[r10,r7]
0604     and r9,lr,r9
0605     ldrb    r8,[r10,r8]
0606     orr r5,r5,r7,lsl#24
0607     ldrb    r9,[r10,r9]
0608     orr r5,r5,r8,lsl#16
0609     ldr r4,[r6],#4          @ rcon[i++]
0610     orr r5,r5,r9,lsl#8
0611     eor r9,r5,r4
0612     eor r0,r0,r9            @ rk[8]=rk[0]^...
0613     eor r1,r1,r0            @ rk[9]=rk[1]^rk[8]
0614     str r0,[r11],#32
0615     eor r2,r2,r1            @ rk[10]=rk[2]^rk[9]
0616     str r1,[r11,#-28]
0617     eor r3,r3,r2            @ rk[11]=rk[3]^rk[10]
0618     str r2,[r11,#-24]
0619     subs    r12,r12,#1
0620     str r3,[r11,#-20]
0621     subeq   r2,r11,#256
0622     beq .Ldone
0623 
0624     and r5,lr,r3
0625     and r7,lr,r3,lsr#8
0626     ldrb    r5,[r10,r5]
0627     and r8,lr,r3,lsr#16
0628     ldrb    r7,[r10,r7]
0629     and r9,lr,r3,lsr#24
0630     ldrb    r8,[r10,r8]
0631     orr r5,r5,r7,lsl#8
0632     ldrb    r9,[r10,r9]
0633     orr r5,r5,r8,lsl#16
0634     ldr r4,[r11,#-48]
0635     orr r5,r5,r9,lsl#24
0636 
0637     ldr r7,[r11,#-44]
0638     ldr r8,[r11,#-40]
0639     eor r4,r4,r5            @ rk[12]=rk[4]^...
0640     ldr r9,[r11,#-36]
0641     eor r7,r7,r4            @ rk[13]=rk[5]^rk[12]
0642     str r4,[r11,#-16]
0643     eor r8,r8,r7            @ rk[14]=rk[6]^rk[13]
0644     str r7,[r11,#-12]
0645     eor r9,r9,r8            @ rk[15]=rk[7]^rk[14]
0646     str r8,[r11,#-8]
0647     str r9,[r11,#-4]
0648     b   .L256_loop
0649 
0650 .Ldone: mov r0,#0
0651     ldmia   sp!,{r4-r12,lr}
0652 .Labrt: ret lr
0653 ENDPROC(private_AES_set_encrypt_key)
0654 
0655 .align  5
0656 ENTRY(private_AES_set_decrypt_key)
0657     str lr,[sp,#-4]!            @ push lr
0658 #if 0
0659     @ kernel does both of these in setkey so optimise this bit out by
0660     @ expecting the key to already have the enc_key work done (see aes_glue.c)
0661     bl  _armv4_AES_set_encrypt_key
0662 #else
0663     mov r0,#0
0664 #endif
0665     teq r0,#0
0666     ldrne   lr,[sp],#4              @ pop lr
0667     bne .Labrt
0668 
0669     stmdb   sp!,{r4-r12}
0670 
0671     ldr r12,[r2,#240]   @ AES_set_encrypt_key preserves r2,
0672     mov r11,r2          @ which is AES_KEY *key
0673     mov r7,r2
0674     add r8,r2,r12,lsl#4
0675 
0676 .Linv:  ldr r0,[r7]
0677     ldr r1,[r7,#4]
0678     ldr r2,[r7,#8]
0679     ldr r3,[r7,#12]
0680     ldr r4,[r8]
0681     ldr r5,[r8,#4]
0682     ldr r6,[r8,#8]
0683     ldr r9,[r8,#12]
0684     str r0,[r8],#-16
0685     str r1,[r8,#16+4]
0686     str r2,[r8,#16+8]
0687     str r3,[r8,#16+12]
0688     str r4,[r7],#16
0689     str r5,[r7,#-12]
0690     str r6,[r7,#-8]
0691     str r9,[r7,#-4]
0692     teq r7,r8
0693     bne .Linv
0694     ldr r0,[r11,#16]!       @ prefetch tp1
0695     mov r7,#0x80
0696     mov r8,#0x1b
0697     orr r7,r7,#0x8000
0698     orr r8,r8,#0x1b00
0699     orr r7,r7,r7,lsl#16
0700     orr r8,r8,r8,lsl#16
0701     sub r12,r12,#1
0702     mvn r9,r7
0703     mov r12,r12,lsl#2   @ (rounds-1)*4
0704 
0705 .Lmix:  and r4,r0,r7
0706     and r1,r0,r9
0707     sub r4,r4,r4,lsr#7
0708     and r4,r4,r8
0709     eor r1,r4,r1,lsl#1  @ tp2
0710 
0711     and r4,r1,r7
0712     and r2,r1,r9
0713     sub r4,r4,r4,lsr#7
0714     and r4,r4,r8
0715     eor r2,r4,r2,lsl#1  @ tp4
0716 
0717     and r4,r2,r7
0718     and r3,r2,r9
0719     sub r4,r4,r4,lsr#7
0720     and r4,r4,r8
0721     eor r3,r4,r3,lsl#1  @ tp8
0722 
0723     eor r4,r1,r2
0724     eor r5,r0,r3        @ tp9
0725     eor r4,r4,r3        @ tpe
0726     eor r4,r4,r1,ror#24
0727     eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
0728     eor r4,r4,r2,ror#16
0729     eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
0730     eor r4,r4,r5,ror#8  @ ^= ROTATE(tp9,24)
0731 
0732     ldr r0,[r11,#4]     @ prefetch tp1
0733     str r4,[r11],#4
0734     subs    r12,r12,#1
0735     bne .Lmix
0736 
0737     mov r0,#0
0738     ldmia   sp!,{r4-r12,pc}
0739 ENDPROC(private_AES_set_decrypt_key)
0740 
0741 .type   AES_Td,%object
0742 .align  5
0743 AES_Td:
0744 .word   0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
0745 .word   0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
0746 .word   0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
0747 .word   0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
0748 .word   0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
0749 .word   0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
0750 .word   0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
0751 .word   0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
0752 .word   0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
0753 .word   0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
0754 .word   0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
0755 .word   0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
0756 .word   0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
0757 .word   0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
0758 .word   0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
0759 .word   0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
0760 .word   0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
0761 .word   0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
0762 .word   0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
0763 .word   0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
0764 .word   0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
0765 .word   0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
0766 .word   0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
0767 .word   0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
0768 .word   0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
0769 .word   0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
0770 .word   0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
0771 .word   0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
0772 .word   0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
0773 .word   0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
0774 .word   0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
0775 .word   0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
0776 .word   0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
0777 .word   0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
0778 .word   0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
0779 .word   0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
0780 .word   0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
0781 .word   0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
0782 .word   0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
0783 .word   0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
0784 .word   0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
0785 .word   0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
0786 .word   0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
0787 .word   0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
0788 .word   0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
0789 .word   0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
0790 .word   0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
0791 .word   0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
0792 .word   0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
0793 .word   0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
0794 .word   0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
0795 .word   0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
0796 .word   0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
0797 .word   0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
0798 .word   0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
0799 .word   0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
0800 .word   0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
0801 .word   0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
0802 .word   0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
0803 .word   0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
0804 .word   0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
0805 .word   0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
0806 .word   0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
0807 .word   0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
0808 @ Td4[256]
0809 .byte   0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
0810 .byte   0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
0811 .byte   0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
0812 .byte   0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
0813 .byte   0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
0814 .byte   0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
0815 .byte   0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
0816 .byte   0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
0817 .byte   0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
0818 .byte   0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
0819 .byte   0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
0820 .byte   0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
0821 .byte   0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
0822 .byte   0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
0823 .byte   0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
0824 .byte   0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
0825 .byte   0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
0826 .byte   0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
0827 .byte   0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
0828 .byte   0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
0829 .byte   0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
0830 .byte   0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
0831 .byte   0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
0832 .byte   0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
0833 .byte   0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
0834 .byte   0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
0835 .byte   0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
0836 .byte   0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
0837 .byte   0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
0838 .byte   0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
0839 .byte   0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
0840 .byte   0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
0841 .size   AES_Td,.-AES_Td
0842 
0843 @ void AES_decrypt(const unsigned char *in, unsigned char *out,
0844 @        const AES_KEY *key) {
0845 .align  5
0846 ENTRY(AES_decrypt)
0847     adr r3,AES_decrypt
0848     stmdb   sp!,{r1,r4-r12,lr}
0849     mov r12,r0      @ inp
0850     mov r11,r2
0851     sub r10,r3,#AES_decrypt-AES_Td      @ Td
0852 #if __ARM_ARCH__<7
0853     ldrb    r0,[r12,#3] @ load input data in endian-neutral
0854     ldrb    r4,[r12,#2] @ manner...
0855     ldrb    r5,[r12,#1]
0856     ldrb    r6,[r12,#0]
0857     orr r0,r0,r4,lsl#8
0858     ldrb    r1,[r12,#7]
0859     orr r0,r0,r5,lsl#16
0860     ldrb    r4,[r12,#6]
0861     orr r0,r0,r6,lsl#24
0862     ldrb    r5,[r12,#5]
0863     ldrb    r6,[r12,#4]
0864     orr r1,r1,r4,lsl#8
0865     ldrb    r2,[r12,#11]
0866     orr r1,r1,r5,lsl#16
0867     ldrb    r4,[r12,#10]
0868     orr r1,r1,r6,lsl#24
0869     ldrb    r5,[r12,#9]
0870     ldrb    r6,[r12,#8]
0871     orr r2,r2,r4,lsl#8
0872     ldrb    r3,[r12,#15]
0873     orr r2,r2,r5,lsl#16
0874     ldrb    r4,[r12,#14]
0875     orr r2,r2,r6,lsl#24
0876     ldrb    r5,[r12,#13]
0877     ldrb    r6,[r12,#12]
0878     orr r3,r3,r4,lsl#8
0879     orr r3,r3,r5,lsl#16
0880     orr r3,r3,r6,lsl#24
0881 #else
0882     ldr r0,[r12,#0]
0883     ldr r1,[r12,#4]
0884     ldr r2,[r12,#8]
0885     ldr r3,[r12,#12]
0886 #ifdef __ARMEL__
0887     rev r0,r0
0888     rev r1,r1
0889     rev r2,r2
0890     rev r3,r3
0891 #endif
0892 #endif
0893     bl  _armv4_AES_decrypt
0894 
0895     ldr r12,[sp],#4     @ pop out
0896 #if __ARM_ARCH__>=7
0897 #ifdef __ARMEL__
0898     rev r0,r0
0899     rev r1,r1
0900     rev r2,r2
0901     rev r3,r3
0902 #endif
0903     str r0,[r12,#0]
0904     str r1,[r12,#4]
0905     str r2,[r12,#8]
0906     str r3,[r12,#12]
0907 #else
0908     mov r4,r0,lsr#24        @ write output in endian-neutral
0909     mov r5,r0,lsr#16        @ manner...
0910     mov r6,r0,lsr#8
0911     strb    r4,[r12,#0]
0912     strb    r5,[r12,#1]
0913     mov r4,r1,lsr#24
0914     strb    r6,[r12,#2]
0915     mov r5,r1,lsr#16
0916     strb    r0,[r12,#3]
0917     mov r6,r1,lsr#8
0918     strb    r4,[r12,#4]
0919     strb    r5,[r12,#5]
0920     mov r4,r2,lsr#24
0921     strb    r6,[r12,#6]
0922     mov r5,r2,lsr#16
0923     strb    r1,[r12,#7]
0924     mov r6,r2,lsr#8
0925     strb    r4,[r12,#8]
0926     strb    r5,[r12,#9]
0927     mov r4,r3,lsr#24
0928     strb    r6,[r12,#10]
0929     mov r5,r3,lsr#16
0930     strb    r2,[r12,#11]
0931     mov r6,r3,lsr#8
0932     strb    r4,[r12,#12]
0933     strb    r5,[r12,#13]
0934     strb    r6,[r12,#14]
0935     strb    r3,[r12,#15]
0936 #endif
0937     ldmia   sp!,{r4-r12,pc}
0938 ENDPROC(AES_decrypt)
0939 
0940 .type   _armv4_AES_decrypt,%function
0941 .align  2
0942 _armv4_AES_decrypt:
0943     str lr,[sp,#-4]!        @ push lr
0944     ldmia   r11!,{r4-r7}
0945     eor r0,r0,r4
0946     ldr r12,[r11,#240-16]
0947     eor r1,r1,r5
0948     eor r2,r2,r6
0949     eor r3,r3,r7
0950     sub r12,r12,#1
0951     mov lr,#255
0952 
0953     and r7,lr,r0,lsr#16
0954     and r8,lr,r0,lsr#8
0955     and r9,lr,r0
0956     mov r0,r0,lsr#24
0957 .Ldec_loop:
0958     ldr r4,[r10,r7,lsl#2]   @ Td1[s0>>16]
0959     and r7,lr,r1        @ i0
0960     ldr r5,[r10,r8,lsl#2]   @ Td2[s0>>8]
0961     and r8,lr,r1,lsr#16
0962     ldr r6,[r10,r9,lsl#2]   @ Td3[s0>>0]
0963     and r9,lr,r1,lsr#8
0964     ldr r0,[r10,r0,lsl#2]   @ Td0[s0>>24]
0965     mov r1,r1,lsr#24
0966 
0967     ldr r7,[r10,r7,lsl#2]   @ Td3[s1>>0]
0968     ldr r8,[r10,r8,lsl#2]   @ Td1[s1>>16]
0969     ldr r9,[r10,r9,lsl#2]   @ Td2[s1>>8]
0970     eor r0,r0,r7,ror#24
0971     ldr r1,[r10,r1,lsl#2]   @ Td0[s1>>24]
0972     and r7,lr,r2,lsr#8  @ i0
0973     eor r5,r8,r5,ror#8
0974     and r8,lr,r2        @ i1
0975     eor r6,r9,r6,ror#8
0976     and r9,lr,r2,lsr#16
0977     ldr r7,[r10,r7,lsl#2]   @ Td2[s2>>8]
0978     eor r1,r1,r4,ror#8
0979     ldr r8,[r10,r8,lsl#2]   @ Td3[s2>>0]
0980     mov r2,r2,lsr#24
0981 
0982     ldr r9,[r10,r9,lsl#2]   @ Td1[s2>>16]
0983     eor r0,r0,r7,ror#16
0984     ldr r2,[r10,r2,lsl#2]   @ Td0[s2>>24]
0985     and r7,lr,r3,lsr#16 @ i0
0986     eor r1,r1,r8,ror#24
0987     and r8,lr,r3,lsr#8  @ i1
0988     eor r6,r9,r6,ror#8
0989     and r9,lr,r3        @ i2
0990     ldr r7,[r10,r7,lsl#2]   @ Td1[s3>>16]
0991     eor r2,r2,r5,ror#8
0992     ldr r8,[r10,r8,lsl#2]   @ Td2[s3>>8]
0993     mov r3,r3,lsr#24
0994 
0995     ldr r9,[r10,r9,lsl#2]   @ Td3[s3>>0]
0996     eor r0,r0,r7,ror#8
0997     ldr r7,[r11],#16
0998     eor r1,r1,r8,ror#16
0999     ldr r3,[r10,r3,lsl#2]   @ Td0[s3>>24]
1000     eor r2,r2,r9,ror#24
1001 
1002     ldr r4,[r11,#-12]
1003     eor r0,r0,r7
1004     ldr r5,[r11,#-8]
1005     eor r3,r3,r6,ror#8
1006     ldr r6,[r11,#-4]
1007     and r7,lr,r0,lsr#16
1008     eor r1,r1,r4
1009     and r8,lr,r0,lsr#8
1010     eor r2,r2,r5
1011     and r9,lr,r0
1012     eor r3,r3,r6
1013     mov r0,r0,lsr#24
1014 
1015     subs    r12,r12,#1
1016     bne .Ldec_loop
1017 
1018     add r10,r10,#1024
1019 
1020     ldr r5,[r10,#0]     @ prefetch Td4
1021     ldr r6,[r10,#32]
1022     ldr r4,[r10,#64]
1023     ldr r5,[r10,#96]
1024     ldr r6,[r10,#128]
1025     ldr r4,[r10,#160]
1026     ldr r5,[r10,#192]
1027     ldr r6,[r10,#224]
1028 
1029     ldrb    r0,[r10,r0]     @ Td4[s0>>24]
1030     ldrb    r4,[r10,r7]     @ Td4[s0>>16]
1031     and r7,lr,r1        @ i0
1032     ldrb    r5,[r10,r8]     @ Td4[s0>>8]
1033     and r8,lr,r1,lsr#16
1034     ldrb    r6,[r10,r9]     @ Td4[s0>>0]
1035     and r9,lr,r1,lsr#8
1036 
1037     ldrb    r7,[r10,r7]     @ Td4[s1>>0]
1038  ARM(   ldrb    r1,[r10,r1,lsr#24]  )   @ Td4[s1>>24]
1039  THUMB( add r1,r10,r1,lsr#24    )   @ Td4[s1>>24]
1040  THUMB( ldrb    r1,[r1]         )
1041     ldrb    r8,[r10,r8]     @ Td4[s1>>16]
1042     eor r0,r7,r0,lsl#24
1043     ldrb    r9,[r10,r9]     @ Td4[s1>>8]
1044     eor r1,r4,r1,lsl#8
1045     and r7,lr,r2,lsr#8  @ i0
1046     eor r5,r5,r8,lsl#8
1047     and r8,lr,r2        @ i1
1048     ldrb    r7,[r10,r7]     @ Td4[s2>>8]
1049     eor r6,r6,r9,lsl#8
1050     ldrb    r8,[r10,r8]     @ Td4[s2>>0]
1051     and r9,lr,r2,lsr#16
1052 
1053  ARM(   ldrb    r2,[r10,r2,lsr#24]  )   @ Td4[s2>>24]
1054  THUMB( add r2,r10,r2,lsr#24    )   @ Td4[s2>>24]
1055  THUMB( ldrb    r2,[r2]         )
1056     eor r0,r0,r7,lsl#8
1057     ldrb    r9,[r10,r9]     @ Td4[s2>>16]
1058     eor r1,r8,r1,lsl#16
1059     and r7,lr,r3,lsr#16 @ i0
1060     eor r2,r5,r2,lsl#16
1061     and r8,lr,r3,lsr#8  @ i1
1062     ldrb    r7,[r10,r7]     @ Td4[s3>>16]
1063     eor r6,r6,r9,lsl#16
1064     ldrb    r8,[r10,r8]     @ Td4[s3>>8]
1065     and r9,lr,r3        @ i2
1066 
1067     ldrb    r9,[r10,r9]     @ Td4[s3>>0]
1068  ARM(   ldrb    r3,[r10,r3,lsr#24]  )   @ Td4[s3>>24]
1069  THUMB( add r3,r10,r3,lsr#24    )   @ Td4[s3>>24]
1070  THUMB( ldrb    r3,[r3]         )
1071     eor r0,r0,r7,lsl#16
1072     ldr r7,[r11,#0]
1073     eor r1,r1,r8,lsl#8
1074     ldr r4,[r11,#4]
1075     eor r2,r9,r2,lsl#8
1076     ldr r5,[r11,#8]
1077     eor r3,r6,r3,lsl#24
1078     ldr r6,[r11,#12]
1079 
1080     eor r0,r0,r7
1081     eor r1,r1,r4
1082     eor r2,r2,r5
1083     eor r3,r3,r6
1084 
1085     sub r10,r10,#1024
1086     ldr pc,[sp],#4      @ pop and return
1087 .size   _armv4_AES_decrypt,.-_armv4_AES_decrypt
1088 .asciz  "AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
1089 .align  2