Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /***************************************************************************
0003 *   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
0004 *                                                                         *
0005 ***************************************************************************/
0006 
0007 .file "twofish-i586-asm.S"
0008 .text
0009 
0010 #include <linux/linkage.h>
0011 #include <asm/asm-offsets.h>
0012 
0013 /* return address at 0 */
0014 
0015 #define in_blk    12  /* input byte array address parameter*/
0016 #define out_blk   8  /* output byte array address parameter*/
0017 #define ctx       4  /* Twofish context structure */
0018 
0019 #define a_offset    0
0020 #define b_offset    4
0021 #define c_offset    8
0022 #define d_offset    12
0023 
0024 /* Structure of the crypto context struct*/
0025 
0026 #define s0  0   /* S0 Array 256 Words each */
0027 #define s1  1024    /* S1 Array */
0028 #define s2  2048    /* S2 Array */
0029 #define s3  3072    /* S3 Array */
0030 #define w   4096    /* 8 whitening keys (word) */
0031 #define k   4128    /* key 1-32 ( word ) */
0032 
0033 /* define a few register aliases to allow macro substitution */
0034 
0035 #define R0D    %eax
0036 #define R0B    %al
0037 #define R0H    %ah
0038 
0039 #define R1D    %ebx
0040 #define R1B    %bl
0041 #define R1H    %bh
0042 
0043 #define R2D    %ecx
0044 #define R2B    %cl
0045 #define R2H    %ch
0046 
0047 #define R3D    %edx
0048 #define R3B    %dl
0049 #define R3H    %dh
0050 
0051 
0052 /* performs input whitening */
0053 #define input_whitening(src,context,offset)\
0054     xor w+offset(context),  src;
0055 
0056 /* performs input whitening */
0057 #define output_whitening(src,context,offset)\
0058     xor w+16+offset(context),   src;
0059 
0060 /*
0061  * a input register containing a (rotated 16)
0062  * b input register containing b
0063  * c input register containing c
0064  * d input register containing d (already rol $1)
0065  * operations on a and b are interleaved to increase performance
0066  */
0067 #define encrypt_round(a,b,c,d,round)\
0068     push    d ## D;\
0069     movzx   b ## B,     %edi;\
0070     mov s1(%ebp,%edi,4),d ## D;\
0071     movzx   a ## B,     %edi;\
0072     mov s2(%ebp,%edi,4),%esi;\
0073     movzx   b ## H,     %edi;\
0074     ror $16,        b ## D;\
0075     xor s2(%ebp,%edi,4),d ## D;\
0076     movzx   a ## H,     %edi;\
0077     ror $16,        a ## D;\
0078     xor s3(%ebp,%edi,4),%esi;\
0079     movzx   b ## B,     %edi;\
0080     xor s3(%ebp,%edi,4),d ## D;\
0081     movzx   a ## B,     %edi;\
0082     xor (%ebp,%edi,4),  %esi;\
0083     movzx   b ## H,     %edi;\
0084     ror $15,        b ## D;\
0085     xor (%ebp,%edi,4),  d ## D;\
0086     movzx   a ## H,     %edi;\
0087     xor s1(%ebp,%edi,4),%esi;\
0088     pop %edi;\
0089     add d ## D,     %esi;\
0090     add %esi,       d ## D;\
0091     add k+round(%ebp),  %esi;\
0092     xor %esi,       c ## D;\
0093     rol $15,        c ## D;\
0094     add k+4+round(%ebp),d ## D;\
0095     xor %edi,       d ## D;
0096 
0097 /*
0098  * a input register containing a (rotated 16)
0099  * b input register containing b
0100  * c input register containing c
0101  * d input register containing d (already rol $1)
0102  * operations on a and b are interleaved to increase performance
0103  * last round has different rotations for the output preparation
0104  */
0105 #define encrypt_last_round(a,b,c,d,round)\
0106     push    d ## D;\
0107     movzx   b ## B,     %edi;\
0108     mov s1(%ebp,%edi,4),d ## D;\
0109     movzx   a ## B,     %edi;\
0110     mov s2(%ebp,%edi,4),%esi;\
0111     movzx   b ## H,     %edi;\
0112     ror $16,        b ## D;\
0113     xor s2(%ebp,%edi,4),d ## D;\
0114     movzx   a ## H,     %edi;\
0115     ror $16,        a ## D;\
0116     xor s3(%ebp,%edi,4),%esi;\
0117     movzx   b ## B,     %edi;\
0118     xor s3(%ebp,%edi,4),d ## D;\
0119     movzx   a ## B,     %edi;\
0120     xor (%ebp,%edi,4),  %esi;\
0121     movzx   b ## H,     %edi;\
0122     ror $16,        b ## D;\
0123     xor (%ebp,%edi,4),  d ## D;\
0124     movzx   a ## H,     %edi;\
0125     xor s1(%ebp,%edi,4),%esi;\
0126     pop %edi;\
0127     add d ## D,     %esi;\
0128     add %esi,       d ## D;\
0129     add k+round(%ebp),  %esi;\
0130     xor %esi,       c ## D;\
0131     ror $1,     c ## D;\
0132     add k+4+round(%ebp),d ## D;\
0133     xor %edi,       d ## D;
0134 
0135 /*
0136  * a input register containing a
0137  * b input register containing b (rotated 16)
0138  * c input register containing c
0139  * d input register containing d (already rol $1)
0140  * operations on a and b are interleaved to increase performance
0141  */
0142 #define decrypt_round(a,b,c,d,round)\
0143     push    c ## D;\
0144     movzx   a ## B,     %edi;\
0145     mov (%ebp,%edi,4),  c ## D;\
0146     movzx   b ## B,     %edi;\
0147     mov s3(%ebp,%edi,4),%esi;\
0148     movzx   a ## H,     %edi;\
0149     ror $16,        a ## D;\
0150     xor s1(%ebp,%edi,4),c ## D;\
0151     movzx   b ## H,     %edi;\
0152     ror $16,        b ## D;\
0153     xor (%ebp,%edi,4),  %esi;\
0154     movzx   a ## B,     %edi;\
0155     xor s2(%ebp,%edi,4),c ## D;\
0156     movzx   b ## B,     %edi;\
0157     xor s1(%ebp,%edi,4),%esi;\
0158     movzx   a ## H,     %edi;\
0159     ror $15,        a ## D;\
0160     xor s3(%ebp,%edi,4),c ## D;\
0161     movzx   b ## H,     %edi;\
0162     xor s2(%ebp,%edi,4),%esi;\
0163     pop %edi;\
0164     add %esi,       c ## D;\
0165     add c ## D,     %esi;\
0166     add k+round(%ebp),  c ## D;\
0167     xor %edi,       c ## D;\
0168     add k+4+round(%ebp),%esi;\
0169     xor %esi,       d ## D;\
0170     rol $15,        d ## D;
0171 
0172 /*
0173  * a input register containing a
0174  * b input register containing b (rotated 16)
0175  * c input register containing c
0176  * d input register containing d (already rol $1)
0177  * operations on a and b are interleaved to increase performance
0178  * last round has different rotations for the output preparation
0179  */
0180 #define decrypt_last_round(a,b,c,d,round)\
0181     push    c ## D;\
0182     movzx   a ## B,     %edi;\
0183     mov (%ebp,%edi,4),  c ## D;\
0184     movzx   b ## B,     %edi;\
0185     mov s3(%ebp,%edi,4),%esi;\
0186     movzx   a ## H,     %edi;\
0187     ror $16,        a ## D;\
0188     xor s1(%ebp,%edi,4),c ## D;\
0189     movzx   b ## H,     %edi;\
0190     ror $16,        b ## D;\
0191     xor (%ebp,%edi,4),  %esi;\
0192     movzx   a ## B,     %edi;\
0193     xor s2(%ebp,%edi,4),c ## D;\
0194     movzx   b ## B,     %edi;\
0195     xor s1(%ebp,%edi,4),%esi;\
0196     movzx   a ## H,     %edi;\
0197     ror $16,        a ## D;\
0198     xor s3(%ebp,%edi,4),c ## D;\
0199     movzx   b ## H,     %edi;\
0200     xor s2(%ebp,%edi,4),%esi;\
0201     pop %edi;\
0202     add %esi,       c ## D;\
0203     add c ## D,     %esi;\
0204     add k+round(%ebp),  c ## D;\
0205     xor %edi,       c ## D;\
0206     add k+4+round(%ebp),%esi;\
0207     xor %esi,       d ## D;\
0208     ror $1,     d ## D;
0209 
0210 SYM_FUNC_START(twofish_enc_blk)
0211     push    %ebp            /* save registers according to calling convention*/
0212     push    %ebx
0213     push    %esi
0214     push    %edi
0215 
0216     mov ctx + 16(%esp), %ebp    /* abuse the base pointer: set new base
0217                      * pointer to the ctx address */
0218     mov     in_blk+16(%esp),%edi    /* input address in edi */
0219 
0220     mov (%edi),     %eax
0221     mov b_offset(%edi), %ebx
0222     mov c_offset(%edi), %ecx
0223     mov d_offset(%edi), %edx
0224     input_whitening(%eax,%ebp,a_offset)
0225     ror $16,    %eax
0226     input_whitening(%ebx,%ebp,b_offset)
0227     input_whitening(%ecx,%ebp,c_offset)
0228     input_whitening(%edx,%ebp,d_offset)
0229     rol $1, %edx
0230 
0231     encrypt_round(R0,R1,R2,R3,0);
0232     encrypt_round(R2,R3,R0,R1,8);
0233     encrypt_round(R0,R1,R2,R3,2*8);
0234     encrypt_round(R2,R3,R0,R1,3*8);
0235     encrypt_round(R0,R1,R2,R3,4*8);
0236     encrypt_round(R2,R3,R0,R1,5*8);
0237     encrypt_round(R0,R1,R2,R3,6*8);
0238     encrypt_round(R2,R3,R0,R1,7*8);
0239     encrypt_round(R0,R1,R2,R3,8*8);
0240     encrypt_round(R2,R3,R0,R1,9*8);
0241     encrypt_round(R0,R1,R2,R3,10*8);
0242     encrypt_round(R2,R3,R0,R1,11*8);
0243     encrypt_round(R0,R1,R2,R3,12*8);
0244     encrypt_round(R2,R3,R0,R1,13*8);
0245     encrypt_round(R0,R1,R2,R3,14*8);
0246     encrypt_last_round(R2,R3,R0,R1,15*8);
0247 
0248     output_whitening(%eax,%ebp,c_offset)
0249     output_whitening(%ebx,%ebp,d_offset)
0250     output_whitening(%ecx,%ebp,a_offset)
0251     output_whitening(%edx,%ebp,b_offset)
0252     mov out_blk+16(%esp),%edi;
0253     mov %eax,       c_offset(%edi)
0254     mov %ebx,       d_offset(%edi)
0255     mov %ecx,       (%edi)
0256     mov %edx,       b_offset(%edi)
0257 
0258     pop %edi
0259     pop %esi
0260     pop %ebx
0261     pop %ebp
0262     mov $1, %eax
0263     RET
0264 SYM_FUNC_END(twofish_enc_blk)
0265 
0266 SYM_FUNC_START(twofish_dec_blk)
0267     push    %ebp            /* save registers according to calling convention*/
0268     push    %ebx
0269     push    %esi
0270     push    %edi
0271 
0272 
0273     mov ctx + 16(%esp), %ebp    /* abuse the base pointer: set new base
0274                      * pointer to the ctx address */
0275     mov     in_blk+16(%esp),%edi    /* input address in edi */
0276 
0277     mov (%edi),     %eax
0278     mov b_offset(%edi), %ebx
0279     mov c_offset(%edi), %ecx
0280     mov d_offset(%edi), %edx
0281     output_whitening(%eax,%ebp,a_offset)
0282     output_whitening(%ebx,%ebp,b_offset)
0283     ror $16,    %ebx
0284     output_whitening(%ecx,%ebp,c_offset)
0285     output_whitening(%edx,%ebp,d_offset)
0286     rol $1, %ecx
0287 
0288     decrypt_round(R0,R1,R2,R3,15*8);
0289     decrypt_round(R2,R3,R0,R1,14*8);
0290     decrypt_round(R0,R1,R2,R3,13*8);
0291     decrypt_round(R2,R3,R0,R1,12*8);
0292     decrypt_round(R0,R1,R2,R3,11*8);
0293     decrypt_round(R2,R3,R0,R1,10*8);
0294     decrypt_round(R0,R1,R2,R3,9*8);
0295     decrypt_round(R2,R3,R0,R1,8*8);
0296     decrypt_round(R0,R1,R2,R3,7*8);
0297     decrypt_round(R2,R3,R0,R1,6*8);
0298     decrypt_round(R0,R1,R2,R3,5*8);
0299     decrypt_round(R2,R3,R0,R1,4*8);
0300     decrypt_round(R0,R1,R2,R3,3*8);
0301     decrypt_round(R2,R3,R0,R1,2*8);
0302     decrypt_round(R0,R1,R2,R3,1*8);
0303     decrypt_last_round(R2,R3,R0,R1,0);
0304 
0305     input_whitening(%eax,%ebp,c_offset)
0306     input_whitening(%ebx,%ebp,d_offset)
0307     input_whitening(%ecx,%ebp,a_offset)
0308     input_whitening(%edx,%ebp,b_offset)
0309     mov out_blk+16(%esp),%edi;
0310     mov %eax,       c_offset(%edi)
0311     mov %ebx,       d_offset(%edi)
0312     mov %ecx,       (%edi)
0313     mov %edx,       b_offset(%edi)
0314 
0315     pop %edi
0316     pop %esi
0317     pop %ebx
0318     pop %ebp
0319     mov $1, %eax
0320     RET
0321 SYM_FUNC_END(twofish_dec_blk)