Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * Twofish Cipher 3-way parallel algorithm (x86_64)
0004  *
0005  * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
0006  */
0007 
0008 #include <linux/linkage.h>
0009 
0010 .file "twofish-x86_64-asm-3way.S"
0011 .text
0012 
0013 /* structure of crypto context */
0014 #define s0  0
0015 #define s1  1024
0016 #define s2  2048
0017 #define s3  3072
0018 #define w   4096
0019 #define k   4128
0020 
0021 /**********************************************************************
0022   3-way twofish
0023  **********************************************************************/
0024 #define CTX %rdi
0025 #define RIO %rdx
0026 
0027 #define RAB0 %rax
0028 #define RAB1 %rbx
0029 #define RAB2 %rcx
0030 
0031 #define RAB0d %eax
0032 #define RAB1d %ebx
0033 #define RAB2d %ecx
0034 
0035 #define RAB0bh %ah
0036 #define RAB1bh %bh
0037 #define RAB2bh %ch
0038 
0039 #define RAB0bl %al
0040 #define RAB1bl %bl
0041 #define RAB2bl %cl
0042 
0043 #define CD0 0x0(%rsp)
0044 #define CD1 0x8(%rsp)
0045 #define CD2 0x10(%rsp)
0046 
0047 # used only before/after all rounds
0048 #define RCD0 %r8
0049 #define RCD1 %r9
0050 #define RCD2 %r10
0051 
0052 # used only during rounds
0053 #define RX0 %r8
0054 #define RX1 %r9
0055 #define RX2 %r10
0056 
0057 #define RX0d %r8d
0058 #define RX1d %r9d
0059 #define RX2d %r10d
0060 
0061 #define RY0 %r11
0062 #define RY1 %r12
0063 #define RY2 %r13
0064 
0065 #define RY0d %r11d
0066 #define RY1d %r12d
0067 #define RY2d %r13d
0068 
0069 #define RT0 %rdx
0070 #define RT1 %rsi
0071 
0072 #define RT0d %edx
0073 #define RT1d %esi
0074 
0075 #define RT1bl %sil
0076 
0077 #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
0078     movzbl ab ## bl,        tmp2 ## d; \
0079     movzbl ab ## bh,        tmp1 ## d; \
0080     rorq $(rot),            ab; \
0081     op1##l T0(CTX, tmp2, 4),    dst ## d; \
0082     op2##l T1(CTX, tmp1, 4),    dst ## d;
0083 
0084 #define swap_ab_with_cd(ab, cd, tmp)    \
0085     movq cd, tmp;           \
0086     movq ab, cd;            \
0087     movq tmp, ab;
0088 
0089 /*
0090  * Combined G1 & G2 function. Reordered with help of rotates to have moves
0091  * at beginning.
0092  */
0093 #define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
0094     /* G1,1 && G2,1 */ \
0095     do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
0096     do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
0097     \
0098     do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
0099     do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
0100     \
0101     do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
0102     do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
0103     \
0104     /* G1,2 && G2,2 */ \
0105     do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
0106     do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
0107     swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \
0108     \
0109     do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
0110     do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
0111     swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \
0112     \
0113     do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
0114     do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
0115     swap_ab_with_cd(ab ## 2, cd ## 2, RT0);
0116 
0117 #define enc_round_end(ab, x, y, n) \
0118     addl y ## d,            x ## d; \
0119     addl x ## d,            y ## d; \
0120     addl k+4*(2*(n))(CTX),      x ## d; \
0121     xorl ab ## d,           x ## d; \
0122     addl k+4*(2*(n)+1)(CTX),    y ## d; \
0123     shrq $32,           ab; \
0124     roll $1,            ab ## d; \
0125     xorl y ## d,            ab ## d; \
0126     shlq $32,           ab; \
0127     rorl $1,            x ## d; \
0128     orq x,              ab;
0129 
0130 #define dec_round_end(ba, x, y, n) \
0131     addl y ## d,            x ## d; \
0132     addl x ## d,            y ## d; \
0133     addl k+4*(2*(n))(CTX),      x ## d; \
0134     addl k+4*(2*(n)+1)(CTX),    y ## d; \
0135     xorl ba ## d,           y ## d; \
0136     shrq $32,           ba; \
0137     roll $1,            ba ## d; \
0138     xorl x ## d,            ba ## d; \
0139     shlq $32,           ba; \
0140     rorl $1,            y ## d; \
0141     orq y,              ba;
0142 
0143 #define encrypt_round3(ab, cd, n) \
0144     g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
0145     \
0146     enc_round_end(ab ## 0, RX0, RY0, n); \
0147     enc_round_end(ab ## 1, RX1, RY1, n); \
0148     enc_round_end(ab ## 2, RX2, RY2, n);
0149 
0150 #define decrypt_round3(ba, dc, n) \
0151     g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
0152     \
0153     dec_round_end(ba ## 0, RX0, RY0, n); \
0154     dec_round_end(ba ## 1, RX1, RY1, n); \
0155     dec_round_end(ba ## 2, RX2, RY2, n);
0156 
0157 #define encrypt_cycle3(ab, cd, n) \
0158     encrypt_round3(ab, cd, n*2); \
0159     encrypt_round3(ab, cd, (n*2)+1);
0160 
0161 #define decrypt_cycle3(ba, dc, n) \
0162     decrypt_round3(ba, dc, (n*2)+1); \
0163     decrypt_round3(ba, dc, (n*2));
0164 
0165 #define push_cd()   \
0166     pushq RCD2; \
0167     pushq RCD1; \
0168     pushq RCD0;
0169 
0170 #define pop_cd()    \
0171     popq RCD0;  \
0172     popq RCD1;  \
0173     popq RCD2;
0174 
0175 #define inpack3(in, n, xy, m) \
0176     movq 4*(n)(in),         xy ## 0; \
0177     xorq w+4*m(CTX),        xy ## 0; \
0178     \
0179     movq 4*(4+(n))(in),     xy ## 1; \
0180     xorq w+4*m(CTX),        xy ## 1; \
0181     \
0182     movq 4*(8+(n))(in),     xy ## 2; \
0183     xorq w+4*m(CTX),        xy ## 2;
0184 
0185 #define outunpack3(op, out, n, xy, m) \
0186     xorq w+4*m(CTX),        xy ## 0; \
0187     op ## q xy ## 0,        4*(n)(out); \
0188     \
0189     xorq w+4*m(CTX),        xy ## 1; \
0190     op ## q xy ## 1,        4*(4+(n))(out); \
0191     \
0192     xorq w+4*m(CTX),        xy ## 2; \
0193     op ## q xy ## 2,        4*(8+(n))(out);
0194 
0195 #define inpack_enc3() \
0196     inpack3(RIO, 0, RAB, 0); \
0197     inpack3(RIO, 2, RCD, 2);
0198 
0199 #define outunpack_enc3(op) \
0200     outunpack3(op, RIO, 2, RAB, 6); \
0201     outunpack3(op, RIO, 0, RCD, 4);
0202 
0203 #define inpack_dec3() \
0204     inpack3(RIO, 0, RAB, 4); \
0205     rorq $32,           RAB0; \
0206     rorq $32,           RAB1; \
0207     rorq $32,           RAB2; \
0208     inpack3(RIO, 2, RCD, 6); \
0209     rorq $32,           RCD0; \
0210     rorq $32,           RCD1; \
0211     rorq $32,           RCD2;
0212 
0213 #define outunpack_dec3() \
0214     rorq $32,           RCD0; \
0215     rorq $32,           RCD1; \
0216     rorq $32,           RCD2; \
0217     outunpack3(mov, RIO, 0, RCD, 0); \
0218     rorq $32,           RAB0; \
0219     rorq $32,           RAB1; \
0220     rorq $32,           RAB2; \
0221     outunpack3(mov, RIO, 2, RAB, 2);
0222 
0223 SYM_FUNC_START(__twofish_enc_blk_3way)
0224     /* input:
0225      *  %rdi: ctx, CTX
0226      *  %rsi: dst
0227      *  %rdx: src, RIO
0228      *  %rcx: bool, if true: xor output
0229      */
0230     pushq %r13;
0231     pushq %r12;
0232     pushq %rbx;
0233 
0234     pushq %rcx; /* bool xor */
0235     pushq %rsi; /* dst */
0236 
0237     inpack_enc3();
0238 
0239     push_cd();
0240     encrypt_cycle3(RAB, CD, 0);
0241     encrypt_cycle3(RAB, CD, 1);
0242     encrypt_cycle3(RAB, CD, 2);
0243     encrypt_cycle3(RAB, CD, 3);
0244     encrypt_cycle3(RAB, CD, 4);
0245     encrypt_cycle3(RAB, CD, 5);
0246     encrypt_cycle3(RAB, CD, 6);
0247     encrypt_cycle3(RAB, CD, 7);
0248     pop_cd();
0249 
0250     popq RIO; /* dst */
0251     popq RT1; /* bool xor */
0252 
0253     testb RT1bl, RT1bl;
0254     jnz .L__enc_xor3;
0255 
0256     outunpack_enc3(mov);
0257 
0258     popq %rbx;
0259     popq %r12;
0260     popq %r13;
0261     RET;
0262 
0263 .L__enc_xor3:
0264     outunpack_enc3(xor);
0265 
0266     popq %rbx;
0267     popq %r12;
0268     popq %r13;
0269     RET;
0270 SYM_FUNC_END(__twofish_enc_blk_3way)
0271 
0272 SYM_FUNC_START(twofish_dec_blk_3way)
0273     /* input:
0274      *  %rdi: ctx, CTX
0275      *  %rsi: dst
0276      *  %rdx: src, RIO
0277      */
0278     pushq %r13;
0279     pushq %r12;
0280     pushq %rbx;
0281 
0282     pushq %rsi; /* dst */
0283 
0284     inpack_dec3();
0285 
0286     push_cd();
0287     decrypt_cycle3(RAB, CD, 7);
0288     decrypt_cycle3(RAB, CD, 6);
0289     decrypt_cycle3(RAB, CD, 5);
0290     decrypt_cycle3(RAB, CD, 4);
0291     decrypt_cycle3(RAB, CD, 3);
0292     decrypt_cycle3(RAB, CD, 2);
0293     decrypt_cycle3(RAB, CD, 1);
0294     decrypt_cycle3(RAB, CD, 0);
0295     pop_cd();
0296 
0297     popq RIO; /* dst */
0298 
0299     outunpack_dec3();
0300 
0301     popq %rbx;
0302     popq %r12;
0303     popq %r13;
0304     RET;
0305 SYM_FUNC_END(twofish_dec_blk_3way)