Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * Camellia Cipher Algorithm (x86_64)
0004  *
0005  * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
0006  */
0007 
0008 #include <linux/linkage.h>
0009 
0010 .file "camellia-x86_64-asm_64.S"
0011 .text
0012 
0013 .extern camellia_sp10011110;
0014 .extern camellia_sp22000222;
0015 .extern camellia_sp03303033;
0016 .extern camellia_sp00444404;
0017 .extern camellia_sp02220222;
0018 .extern camellia_sp30333033;
0019 .extern camellia_sp44044404;
0020 .extern camellia_sp11101110;
0021 
0022 #define sp10011110 camellia_sp10011110
0023 #define sp22000222 camellia_sp22000222
0024 #define sp03303033 camellia_sp03303033
0025 #define sp00444404 camellia_sp00444404
0026 #define sp02220222 camellia_sp02220222
0027 #define sp30333033 camellia_sp30333033
0028 #define sp44044404 camellia_sp44044404
0029 #define sp11101110 camellia_sp11101110
0030 
0031 #define CAMELLIA_TABLE_BYTE_LEN 272
0032 
0033 /* struct camellia_ctx: */
0034 #define key_table 0
0035 #define key_length CAMELLIA_TABLE_BYTE_LEN
0036 
0037 /* register macros */
0038 #define CTX %rdi
0039 #define RIO %rsi
0040 #define RIOd %esi
0041 
0042 #define RAB0 %rax
0043 #define RCD0 %rcx
0044 #define RAB1 %rbx
0045 #define RCD1 %rdx
0046 
0047 #define RAB0d %eax
0048 #define RCD0d %ecx
0049 #define RAB1d %ebx
0050 #define RCD1d %edx
0051 
0052 #define RAB0bl %al
0053 #define RCD0bl %cl
0054 #define RAB1bl %bl
0055 #define RCD1bl %dl
0056 
0057 #define RAB0bh %ah
0058 #define RCD0bh %ch
0059 #define RAB1bh %bh
0060 #define RCD1bh %dh
0061 
0062 #define RT0 %rsi
0063 #define RT1 %r12
0064 #define RT2 %r8
0065 
0066 #define RT0d %esi
0067 #define RT1d %r12d
0068 #define RT2d %r8d
0069 
0070 #define RT2bl %r8b
0071 
0072 #define RXOR %r9
0073 #define RR12 %r10
0074 #define RDST %r11
0075 
0076 #define RXORd %r9d
0077 #define RXORbl %r9b
0078 
0079 #define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
0080     movzbl ab ## bl,        tmp2 ## d; \
0081     movzbl ab ## bh,        tmp1 ## d; \
0082     rorq $16,           ab; \
0083     xorq T0(, tmp2, 8),     dst; \
0084     xorq T1(, tmp1, 8),     dst;
0085 
0086 /**********************************************************************
0087   1-way camellia
0088  **********************************************************************/
0089 #define roundsm(ab, subkey, cd) \
0090     movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
0091     \
0092     xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
0093     xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
0094     xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
0095     xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
0096     \
0097     xorq RT2,                   cd ## 0;
0098 
0099 #define fls(l, r, kl, kr) \
0100     movl (key_table + ((kl) * 2) * 4)(CTX),     RT0d; \
0101     andl l ## 0d,                   RT0d; \
0102     roll $1,                    RT0d; \
0103     shlq $32,                   RT0; \
0104     xorq RT0,                   l ## 0; \
0105     movq (key_table + ((kr) * 2) * 4)(CTX),     RT1; \
0106     orq r ## 0,                 RT1; \
0107     shrq $32,                   RT1; \
0108     xorq RT1,                   r ## 0; \
0109     \
0110     movq (key_table + ((kl) * 2) * 4)(CTX),     RT2; \
0111     orq l ## 0,                 RT2; \
0112     shrq $32,                   RT2; \
0113     xorq RT2,                   l ## 0; \
0114     movl (key_table + ((kr) * 2) * 4)(CTX),     RT0d; \
0115     andl r ## 0d,                   RT0d; \
0116     roll $1,                    RT0d; \
0117     shlq $32,                   RT0; \
0118     xorq RT0,                   r ## 0;
0119 
0120 #define enc_rounds(i) \
0121     roundsm(RAB, i + 2, RCD); \
0122     roundsm(RCD, i + 3, RAB); \
0123     roundsm(RAB, i + 4, RCD); \
0124     roundsm(RCD, i + 5, RAB); \
0125     roundsm(RAB, i + 6, RCD); \
0126     roundsm(RCD, i + 7, RAB);
0127 
0128 #define enc_fls(i) \
0129     fls(RAB, RCD, i + 0, i + 1);
0130 
0131 #define enc_inpack() \
0132     movq (RIO),         RAB0; \
0133     bswapq              RAB0; \
0134     rolq $32,           RAB0; \
0135     movq 4*2(RIO),          RCD0; \
0136     bswapq              RCD0; \
0137     rorq $32,           RCD0; \
0138     xorq key_table(CTX),        RAB0;
0139 
0140 #define enc_outunpack(op, max) \
0141     xorq key_table(CTX, max, 8),    RCD0; \
0142     rorq $32,           RCD0; \
0143     bswapq              RCD0; \
0144     op ## q RCD0,           (RIO); \
0145     rolq $32,           RAB0; \
0146     bswapq              RAB0; \
0147     op ## q RAB0,           4*2(RIO);
0148 
0149 #define dec_rounds(i) \
0150     roundsm(RAB, i + 7, RCD); \
0151     roundsm(RCD, i + 6, RAB); \
0152     roundsm(RAB, i + 5, RCD); \
0153     roundsm(RCD, i + 4, RAB); \
0154     roundsm(RAB, i + 3, RCD); \
0155     roundsm(RCD, i + 2, RAB);
0156 
0157 #define dec_fls(i) \
0158     fls(RAB, RCD, i + 1, i + 0);
0159 
0160 #define dec_inpack(max) \
0161     movq (RIO),         RAB0; \
0162     bswapq              RAB0; \
0163     rolq $32,           RAB0; \
0164     movq 4*2(RIO),          RCD0; \
0165     bswapq              RCD0; \
0166     rorq $32,           RCD0; \
0167     xorq key_table(CTX, max, 8),    RAB0;
0168 
0169 #define dec_outunpack() \
0170     xorq key_table(CTX),        RCD0; \
0171     rorq $32,           RCD0; \
0172     bswapq              RCD0; \
0173     movq RCD0,          (RIO); \
0174     rolq $32,           RAB0; \
0175     bswapq              RAB0; \
0176     movq RAB0,          4*2(RIO);
0177 
0178 SYM_FUNC_START(__camellia_enc_blk)
0179     /* input:
0180      *  %rdi: ctx, CTX
0181      *  %rsi: dst
0182      *  %rdx: src
0183      *  %rcx: bool xor
0184      */
0185     movq %r12, RR12;
0186 
0187     movq %rcx, RXOR;
0188     movq %rsi, RDST;
0189     movq %rdx, RIO;
0190 
0191     enc_inpack();
0192 
0193     enc_rounds(0);
0194     enc_fls(8);
0195     enc_rounds(8);
0196     enc_fls(16);
0197     enc_rounds(16);
0198     movl $24, RT1d; /* max */
0199 
0200     cmpb $16, key_length(CTX);
0201     je .L__enc_done;
0202 
0203     enc_fls(24);
0204     enc_rounds(24);
0205     movl $32, RT1d; /* max */
0206 
0207 .L__enc_done:
0208     testb RXORbl, RXORbl;
0209     movq RDST, RIO;
0210 
0211     jnz .L__enc_xor;
0212 
0213     enc_outunpack(mov, RT1);
0214 
0215     movq RR12, %r12;
0216     RET;
0217 
0218 .L__enc_xor:
0219     enc_outunpack(xor, RT1);
0220 
0221     movq RR12, %r12;
0222     RET;
0223 SYM_FUNC_END(__camellia_enc_blk)
0224 
0225 SYM_FUNC_START(camellia_dec_blk)
0226     /* input:
0227      *  %rdi: ctx, CTX
0228      *  %rsi: dst
0229      *  %rdx: src
0230      */
0231     cmpl $16, key_length(CTX);
0232     movl $32, RT2d;
0233     movl $24, RXORd;
0234     cmovel RXORd, RT2d; /* max */
0235 
0236     movq %r12, RR12;
0237     movq %rsi, RDST;
0238     movq %rdx, RIO;
0239 
0240     dec_inpack(RT2);
0241 
0242     cmpb $24, RT2bl;
0243     je .L__dec_rounds16;
0244 
0245     dec_rounds(24);
0246     dec_fls(24);
0247 
0248 .L__dec_rounds16:
0249     dec_rounds(16);
0250     dec_fls(16);
0251     dec_rounds(8);
0252     dec_fls(8);
0253     dec_rounds(0);
0254 
0255     movq RDST, RIO;
0256 
0257     dec_outunpack();
0258 
0259     movq RR12, %r12;
0260     RET;
0261 SYM_FUNC_END(camellia_dec_blk)
0262 
0263 /**********************************************************************
0264   2-way camellia
0265  **********************************************************************/
0266 #define roundsm2(ab, subkey, cd) \
0267     movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
0268     xorq RT2,                   cd ## 1; \
0269     \
0270     xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
0271     xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
0272     xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
0273     xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
0274     \
0275         xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
0276         xorq RT2,                   cd ## 0; \
0277         xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
0278         xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
0279         xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
0280 
0281 #define fls2(l, r, kl, kr) \
0282     movl (key_table + ((kl) * 2) * 4)(CTX),     RT0d; \
0283     andl l ## 0d,                   RT0d; \
0284     roll $1,                    RT0d; \
0285     shlq $32,                   RT0; \
0286     xorq RT0,                   l ## 0; \
0287     movq (key_table + ((kr) * 2) * 4)(CTX),     RT1; \
0288     orq r ## 0,                 RT1; \
0289     shrq $32,                   RT1; \
0290     xorq RT1,                   r ## 0; \
0291     \
0292         movl (key_table + ((kl) * 2) * 4)(CTX),     RT2d; \
0293         andl l ## 1d,                   RT2d; \
0294         roll $1,                    RT2d; \
0295         shlq $32,                   RT2; \
0296         xorq RT2,                   l ## 1; \
0297         movq (key_table + ((kr) * 2) * 4)(CTX),     RT0; \
0298         orq r ## 1,                 RT0; \
0299         shrq $32,                   RT0; \
0300         xorq RT0,                   r ## 1; \
0301     \
0302     movq (key_table + ((kl) * 2) * 4)(CTX),     RT1; \
0303     orq l ## 0,                 RT1; \
0304     shrq $32,                   RT1; \
0305     xorq RT1,                   l ## 0; \
0306     movl (key_table + ((kr) * 2) * 4)(CTX),     RT2d; \
0307     andl r ## 0d,                   RT2d; \
0308     roll $1,                    RT2d; \
0309     shlq $32,                   RT2; \
0310     xorq RT2,                   r ## 0; \
0311     \
0312         movq (key_table + ((kl) * 2) * 4)(CTX),     RT0; \
0313         orq l ## 1,                 RT0; \
0314         shrq $32,                   RT0; \
0315         xorq RT0,                   l ## 1; \
0316         movl (key_table + ((kr) * 2) * 4)(CTX),     RT1d; \
0317         andl r ## 1d,                   RT1d; \
0318         roll $1,                    RT1d; \
0319         shlq $32,                   RT1; \
0320         xorq RT1,                   r ## 1;
0321 
0322 #define enc_rounds2(i) \
0323     roundsm2(RAB, i + 2, RCD); \
0324     roundsm2(RCD, i + 3, RAB); \
0325     roundsm2(RAB, i + 4, RCD); \
0326     roundsm2(RCD, i + 5, RAB); \
0327     roundsm2(RAB, i + 6, RCD); \
0328     roundsm2(RCD, i + 7, RAB);
0329 
0330 #define enc_fls2(i) \
0331     fls2(RAB, RCD, i + 0, i + 1);
0332 
0333 #define enc_inpack2() \
0334     movq (RIO),         RAB0; \
0335     bswapq              RAB0; \
0336     rorq $32,           RAB0; \
0337     movq 4*2(RIO),          RCD0; \
0338     bswapq              RCD0; \
0339     rolq $32,           RCD0; \
0340     xorq key_table(CTX),        RAB0; \
0341     \
0342         movq 8*2(RIO),          RAB1; \
0343         bswapq              RAB1; \
0344         rorq $32,           RAB1; \
0345         movq 12*2(RIO),         RCD1; \
0346         bswapq              RCD1; \
0347         rolq $32,           RCD1; \
0348         xorq key_table(CTX),        RAB1;
0349 
0350 #define enc_outunpack2(op, max) \
0351     xorq key_table(CTX, max, 8),    RCD0; \
0352     rolq $32,           RCD0; \
0353     bswapq              RCD0; \
0354     op ## q RCD0,           (RIO); \
0355     rorq $32,           RAB0; \
0356     bswapq              RAB0; \
0357     op ## q RAB0,           4*2(RIO); \
0358     \
0359         xorq key_table(CTX, max, 8),    RCD1; \
0360         rolq $32,           RCD1; \
0361         bswapq              RCD1; \
0362         op ## q RCD1,           8*2(RIO); \
0363         rorq $32,           RAB1; \
0364         bswapq              RAB1; \
0365         op ## q RAB1,           12*2(RIO);
0366 
0367 #define dec_rounds2(i) \
0368     roundsm2(RAB, i + 7, RCD); \
0369     roundsm2(RCD, i + 6, RAB); \
0370     roundsm2(RAB, i + 5, RCD); \
0371     roundsm2(RCD, i + 4, RAB); \
0372     roundsm2(RAB, i + 3, RCD); \
0373     roundsm2(RCD, i + 2, RAB);
0374 
0375 #define dec_fls2(i) \
0376     fls2(RAB, RCD, i + 1, i + 0);
0377 
0378 #define dec_inpack2(max) \
0379     movq (RIO),         RAB0; \
0380     bswapq              RAB0; \
0381     rorq $32,           RAB0; \
0382     movq 4*2(RIO),          RCD0; \
0383     bswapq              RCD0; \
0384     rolq $32,           RCD0; \
0385     xorq key_table(CTX, max, 8),    RAB0; \
0386     \
0387         movq 8*2(RIO),          RAB1; \
0388         bswapq              RAB1; \
0389         rorq $32,           RAB1; \
0390         movq 12*2(RIO),         RCD1; \
0391         bswapq              RCD1; \
0392         rolq $32,           RCD1; \
0393         xorq key_table(CTX, max, 8),    RAB1;
0394 
0395 #define dec_outunpack2() \
0396     xorq key_table(CTX),        RCD0; \
0397     rolq $32,           RCD0; \
0398     bswapq              RCD0; \
0399     movq RCD0,          (RIO); \
0400     rorq $32,           RAB0; \
0401     bswapq              RAB0; \
0402     movq RAB0,          4*2(RIO); \
0403     \
0404         xorq key_table(CTX),        RCD1; \
0405         rolq $32,           RCD1; \
0406         bswapq              RCD1; \
0407         movq RCD1,          8*2(RIO); \
0408         rorq $32,           RAB1; \
0409         bswapq              RAB1; \
0410         movq RAB1,          12*2(RIO);
0411 
0412 SYM_FUNC_START(__camellia_enc_blk_2way)
0413     /* input:
0414      *  %rdi: ctx, CTX
0415      *  %rsi: dst
0416      *  %rdx: src
0417      *  %rcx: bool xor
0418      */
0419     pushq %rbx;
0420 
0421     movq %r12, RR12;
0422     movq %rcx, RXOR;
0423     movq %rsi, RDST;
0424     movq %rdx, RIO;
0425 
0426     enc_inpack2();
0427 
0428     enc_rounds2(0);
0429     enc_fls2(8);
0430     enc_rounds2(8);
0431     enc_fls2(16);
0432     enc_rounds2(16);
0433     movl $24, RT2d; /* max */
0434 
0435     cmpb $16, key_length(CTX);
0436     je .L__enc2_done;
0437 
0438     enc_fls2(24);
0439     enc_rounds2(24);
0440     movl $32, RT2d; /* max */
0441 
0442 .L__enc2_done:
0443     test RXORbl, RXORbl;
0444     movq RDST, RIO;
0445     jnz .L__enc2_xor;
0446 
0447     enc_outunpack2(mov, RT2);
0448 
0449     movq RR12, %r12;
0450     popq %rbx;
0451     RET;
0452 
0453 .L__enc2_xor:
0454     enc_outunpack2(xor, RT2);
0455 
0456     movq RR12, %r12;
0457     popq %rbx;
0458     RET;
0459 SYM_FUNC_END(__camellia_enc_blk_2way)
0460 
0461 SYM_FUNC_START(camellia_dec_blk_2way)
0462     /* input:
0463      *  %rdi: ctx, CTX
0464      *  %rsi: dst
0465      *  %rdx: src
0466      */
0467     cmpl $16, key_length(CTX);
0468     movl $32, RT2d;
0469     movl $24, RXORd;
0470     cmovel RXORd, RT2d; /* max */
0471 
0472     movq %rbx, RXOR;
0473     movq %r12, RR12;
0474     movq %rsi, RDST;
0475     movq %rdx, RIO;
0476 
0477     dec_inpack2(RT2);
0478 
0479     cmpb $24, RT2bl;
0480     je .L__dec2_rounds16;
0481 
0482     dec_rounds2(24);
0483     dec_fls2(24);
0484 
0485 .L__dec2_rounds16:
0486     dec_rounds2(16);
0487     dec_fls2(16);
0488     dec_rounds2(8);
0489     dec_fls2(8);
0490     dec_rounds2(0);
0491 
0492     movq RDST, RIO;
0493 
0494     dec_outunpack2();
0495 
0496     movq RR12, %r12;
0497     movq RXOR, %rbx;
0498     RET;
0499 SYM_FUNC_END(camellia_dec_blk_2way)