0001
0002
0003
0004
0005
0006
0007 .file "twofish-i586-asm.S"
0008 .text
0009
0010 #include <linux/linkage.h>
0011 #include <asm/asm-offsets.h>
0012
0013
0014
0015 #define in_blk 12
0016 #define out_blk 8
0017 #define ctx 4
0018
0019 #define a_offset 0
0020 #define b_offset 4
0021 #define c_offset 8
0022 #define d_offset 12
0023
0024
0025
0026 #define s0 0
0027 #define s1 1024
0028 #define s2 2048
0029 #define s3 3072
0030 #define w 4096
0031 #define k 4128
0032
0033
0034
0035 #define R0D %eax
0036 #define R0B %al
0037 #define R0H %ah
0038
0039 #define R1D %ebx
0040 #define R1B %bl
0041 #define R1H %bh
0042
0043 #define R2D %ecx
0044 #define R2B %cl
0045 #define R2H %ch
0046
0047 #define R3D %edx
0048 #define R3B %dl
0049 #define R3H %dh
0050
0051
0052
0053 #define input_whitening(src,context,offset)\
0054 xor w+offset(context), src;
0055
0056
0057 #define output_whitening(src,context,offset)\
0058 xor w+16+offset(context), src;
0059
0060
0061
0062
0063
0064
0065
0066
0067 #define encrypt_round(a,b,c,d,round)\
0068 push d ## D;\
0069 movzx b ## B, %edi;\
0070 mov s1(%ebp,%edi,4),d ## D;\
0071 movzx a ## B, %edi;\
0072 mov s2(%ebp,%edi,4),%esi;\
0073 movzx b ## H, %edi;\
0074 ror $16, b ## D;\
0075 xor s2(%ebp,%edi,4),d ## D;\
0076 movzx a ## H, %edi;\
0077 ror $16, a ## D;\
0078 xor s3(%ebp,%edi,4),%esi;\
0079 movzx b ## B, %edi;\
0080 xor s3(%ebp,%edi,4),d ## D;\
0081 movzx a ## B, %edi;\
0082 xor (%ebp,%edi,4), %esi;\
0083 movzx b ## H, %edi;\
0084 ror $15, b ## D;\
0085 xor (%ebp,%edi,4), d ## D;\
0086 movzx a ## H, %edi;\
0087 xor s1(%ebp,%edi,4),%esi;\
0088 pop %edi;\
0089 add d ## D, %esi;\
0090 add %esi, d ## D;\
0091 add k+round(%ebp), %esi;\
0092 xor %esi, c ## D;\
0093 rol $15, c ## D;\
0094 add k+4+round(%ebp),d ## D;\
0095 xor %edi, d ## D;
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105 #define encrypt_last_round(a,b,c,d,round)\
0106 push d ## D;\
0107 movzx b ## B, %edi;\
0108 mov s1(%ebp,%edi,4),d ## D;\
0109 movzx a ## B, %edi;\
0110 mov s2(%ebp,%edi,4),%esi;\
0111 movzx b ## H, %edi;\
0112 ror $16, b ## D;\
0113 xor s2(%ebp,%edi,4),d ## D;\
0114 movzx a ## H, %edi;\
0115 ror $16, a ## D;\
0116 xor s3(%ebp,%edi,4),%esi;\
0117 movzx b ## B, %edi;\
0118 xor s3(%ebp,%edi,4),d ## D;\
0119 movzx a ## B, %edi;\
0120 xor (%ebp,%edi,4), %esi;\
0121 movzx b ## H, %edi;\
0122 ror $16, b ## D;\
0123 xor (%ebp,%edi,4), d ## D;\
0124 movzx a ## H, %edi;\
0125 xor s1(%ebp,%edi,4),%esi;\
0126 pop %edi;\
0127 add d ## D, %esi;\
0128 add %esi, d ## D;\
0129 add k+round(%ebp), %esi;\
0130 xor %esi, c ## D;\
0131 ror $1, c ## D;\
0132 add k+4+round(%ebp),d ## D;\
0133 xor %edi, d ## D;
0134
0135
0136
0137
0138
0139
0140
0141
0142 #define decrypt_round(a,b,c,d,round)\
0143 push c ## D;\
0144 movzx a ## B, %edi;\
0145 mov (%ebp,%edi,4), c ## D;\
0146 movzx b ## B, %edi;\
0147 mov s3(%ebp,%edi,4),%esi;\
0148 movzx a ## H, %edi;\
0149 ror $16, a ## D;\
0150 xor s1(%ebp,%edi,4),c ## D;\
0151 movzx b ## H, %edi;\
0152 ror $16, b ## D;\
0153 xor (%ebp,%edi,4), %esi;\
0154 movzx a ## B, %edi;\
0155 xor s2(%ebp,%edi,4),c ## D;\
0156 movzx b ## B, %edi;\
0157 xor s1(%ebp,%edi,4),%esi;\
0158 movzx a ## H, %edi;\
0159 ror $15, a ## D;\
0160 xor s3(%ebp,%edi,4),c ## D;\
0161 movzx b ## H, %edi;\
0162 xor s2(%ebp,%edi,4),%esi;\
0163 pop %edi;\
0164 add %esi, c ## D;\
0165 add c ## D, %esi;\
0166 add k+round(%ebp), c ## D;\
0167 xor %edi, c ## D;\
0168 add k+4+round(%ebp),%esi;\
0169 xor %esi, d ## D;\
0170 rol $15, d ## D;
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180 #define decrypt_last_round(a,b,c,d,round)\
0181 push c ## D;\
0182 movzx a ## B, %edi;\
0183 mov (%ebp,%edi,4), c ## D;\
0184 movzx b ## B, %edi;\
0185 mov s3(%ebp,%edi,4),%esi;\
0186 movzx a ## H, %edi;\
0187 ror $16, a ## D;\
0188 xor s1(%ebp,%edi,4),c ## D;\
0189 movzx b ## H, %edi;\
0190 ror $16, b ## D;\
0191 xor (%ebp,%edi,4), %esi;\
0192 movzx a ## B, %edi;\
0193 xor s2(%ebp,%edi,4),c ## D;\
0194 movzx b ## B, %edi;\
0195 xor s1(%ebp,%edi,4),%esi;\
0196 movzx a ## H, %edi;\
0197 ror $16, a ## D;\
0198 xor s3(%ebp,%edi,4),c ## D;\
0199 movzx b ## H, %edi;\
0200 xor s2(%ebp,%edi,4),%esi;\
0201 pop %edi;\
0202 add %esi, c ## D;\
0203 add c ## D, %esi;\
0204 add k+round(%ebp), c ## D;\
0205 xor %edi, c ## D;\
0206 add k+4+round(%ebp),%esi;\
0207 xor %esi, d ## D;\
0208 ror $1, d ## D;
0209
0210 SYM_FUNC_START(twofish_enc_blk)
0211 push %ebp
0212 push %ebx
0213 push %esi
0214 push %edi
0215
0216 mov ctx + 16(%esp), %ebp
0217
0218 mov in_blk+16(%esp),%edi
0219
0220 mov (%edi), %eax
0221 mov b_offset(%edi), %ebx
0222 mov c_offset(%edi), %ecx
0223 mov d_offset(%edi), %edx
0224 input_whitening(%eax,%ebp,a_offset)
0225 ror $16, %eax
0226 input_whitening(%ebx,%ebp,b_offset)
0227 input_whitening(%ecx,%ebp,c_offset)
0228 input_whitening(%edx,%ebp,d_offset)
0229 rol $1, %edx
0230
0231 encrypt_round(R0,R1,R2,R3,0);
0232 encrypt_round(R2,R3,R0,R1,8);
0233 encrypt_round(R0,R1,R2,R3,2*8);
0234 encrypt_round(R2,R3,R0,R1,3*8);
0235 encrypt_round(R0,R1,R2,R3,4*8);
0236 encrypt_round(R2,R3,R0,R1,5*8);
0237 encrypt_round(R0,R1,R2,R3,6*8);
0238 encrypt_round(R2,R3,R0,R1,7*8);
0239 encrypt_round(R0,R1,R2,R3,8*8);
0240 encrypt_round(R2,R3,R0,R1,9*8);
0241 encrypt_round(R0,R1,R2,R3,10*8);
0242 encrypt_round(R2,R3,R0,R1,11*8);
0243 encrypt_round(R0,R1,R2,R3,12*8);
0244 encrypt_round(R2,R3,R0,R1,13*8);
0245 encrypt_round(R0,R1,R2,R3,14*8);
0246 encrypt_last_round(R2,R3,R0,R1,15*8);
0247
0248 output_whitening(%eax,%ebp,c_offset)
0249 output_whitening(%ebx,%ebp,d_offset)
0250 output_whitening(%ecx,%ebp,a_offset)
0251 output_whitening(%edx,%ebp,b_offset)
0252 mov out_blk+16(%esp),%edi;
0253 mov %eax, c_offset(%edi)
0254 mov %ebx, d_offset(%edi)
0255 mov %ecx, (%edi)
0256 mov %edx, b_offset(%edi)
0257
0258 pop %edi
0259 pop %esi
0260 pop %ebx
0261 pop %ebp
0262 mov $1, %eax
0263 RET
0264 SYM_FUNC_END(twofish_enc_blk)
0265
0266 SYM_FUNC_START(twofish_dec_blk)
0267 push %ebp
0268 push %ebx
0269 push %esi
0270 push %edi
0271
0272
0273 mov ctx + 16(%esp), %ebp
0274
0275 mov in_blk+16(%esp),%edi
0276
0277 mov (%edi), %eax
0278 mov b_offset(%edi), %ebx
0279 mov c_offset(%edi), %ecx
0280 mov d_offset(%edi), %edx
0281 output_whitening(%eax,%ebp,a_offset)
0282 output_whitening(%ebx,%ebp,b_offset)
0283 ror $16, %ebx
0284 output_whitening(%ecx,%ebp,c_offset)
0285 output_whitening(%edx,%ebp,d_offset)
0286 rol $1, %ecx
0287
0288 decrypt_round(R0,R1,R2,R3,15*8);
0289 decrypt_round(R2,R3,R0,R1,14*8);
0290 decrypt_round(R0,R1,R2,R3,13*8);
0291 decrypt_round(R2,R3,R0,R1,12*8);
0292 decrypt_round(R0,R1,R2,R3,11*8);
0293 decrypt_round(R2,R3,R0,R1,10*8);
0294 decrypt_round(R0,R1,R2,R3,9*8);
0295 decrypt_round(R2,R3,R0,R1,8*8);
0296 decrypt_round(R0,R1,R2,R3,7*8);
0297 decrypt_round(R2,R3,R0,R1,6*8);
0298 decrypt_round(R0,R1,R2,R3,5*8);
0299 decrypt_round(R2,R3,R0,R1,4*8);
0300 decrypt_round(R0,R1,R2,R3,3*8);
0301 decrypt_round(R2,R3,R0,R1,2*8);
0302 decrypt_round(R0,R1,R2,R3,1*8);
0303 decrypt_last_round(R2,R3,R0,R1,0);
0304
0305 input_whitening(%eax,%ebp,c_offset)
0306 input_whitening(%ebx,%ebp,d_offset)
0307 input_whitening(%ecx,%ebp,a_offset)
0308 input_whitening(%edx,%ebp,b_offset)
0309 mov out_blk+16(%esp),%edi;
0310 mov %eax, c_offset(%edi)
0311 mov %ebx, d_offset(%edi)
0312 mov %ecx, (%edi)
0313 mov %edx, b_offset(%edi)
0314
0315 pop %edi
0316 pop %esi
0317 pop %ebx
0318 pop %ebp
0319 mov $1, %eax
0320 RET
0321 SYM_FUNC_END(twofish_dec_blk)