Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
0004  * instructions. This file contains accelerated part of ghash
0005  * implementation. More information about PCLMULQDQ can be found at:
0006  *
0007  * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
0008  *
0009  * Copyright (c) 2009 Intel Corp.
0010  *   Author: Huang Ying <ying.huang@intel.com>
0011  *       Vinodh Gopal
0012  *       Erdinc Ozturk
0013  *       Deniz Karakoyunlu
0014  */
0015 
0016 #include <linux/linkage.h>
0017 #include <asm/frame.h>
0018 
0019 .section    .rodata.cst16.bswap_mask, "aM", @progbits, 16
0020 .align 16
0021 .Lbswap_mask:
0022     .octa 0x000102030405060708090a0b0c0d0e0f
0023 
0024 #define DATA    %xmm0
0025 #define SHASH   %xmm1
0026 #define T1  %xmm2
0027 #define T2  %xmm3
0028 #define T3  %xmm4
0029 #define BSWAP   %xmm5
0030 #define IN1 %xmm6
0031 
0032 .text
0033 
0034 /*
0035  * __clmul_gf128mul_ble:    internal ABI
0036  * input:
0037  *  DATA:           operand1
0038  *  SHASH:          operand2, hash_key << 1 mod poly
0039  * output:
0040  *  DATA:           operand1 * operand2 mod poly
0041  * changed:
0042  *  T1
0043  *  T2
0044  *  T3
0045  */
0046 SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
0047     movaps DATA, T1
0048     pshufd $0b01001110, DATA, T2
0049     pshufd $0b01001110, SHASH, T3
0050     pxor DATA, T2
0051     pxor SHASH, T3
0052 
0053     pclmulqdq $0x00, SHASH, DATA    # DATA = a0 * b0
0054     pclmulqdq $0x11, SHASH, T1  # T1 = a1 * b1
0055     pclmulqdq $0x00, T3, T2     # T2 = (a1 + a0) * (b1 + b0)
0056     pxor DATA, T2
0057     pxor T1, T2         # T2 = a0 * b1 + a1 * b0
0058 
0059     movaps T2, T3
0060     pslldq $8, T3
0061     psrldq $8, T2
0062     pxor T3, DATA
0063     pxor T2, T1         # <T1:DATA> is result of
0064                     # carry-less multiplication
0065 
0066     # first phase of the reduction
0067     movaps DATA, T3
0068     psllq $1, T3
0069     pxor DATA, T3
0070     psllq $5, T3
0071     pxor DATA, T3
0072     psllq $57, T3
0073     movaps T3, T2
0074     pslldq $8, T2
0075     psrldq $8, T3
0076     pxor T2, DATA
0077     pxor T3, T1
0078 
0079     # second phase of the reduction
0080     movaps DATA, T2
0081     psrlq $5, T2
0082     pxor DATA, T2
0083     psrlq $1, T2
0084     pxor DATA, T2
0085     psrlq $1, T2
0086     pxor T2, T1
0087     pxor T1, DATA
0088     RET
0089 SYM_FUNC_END(__clmul_gf128mul_ble)
0090 
0091 /* void clmul_ghash_mul(char *dst, const u128 *shash) */
0092 SYM_FUNC_START(clmul_ghash_mul)
0093     FRAME_BEGIN
0094     movups (%rdi), DATA
0095     movups (%rsi), SHASH
0096     movaps .Lbswap_mask, BSWAP
0097     pshufb BSWAP, DATA
0098     call __clmul_gf128mul_ble
0099     pshufb BSWAP, DATA
0100     movups DATA, (%rdi)
0101     FRAME_END
0102     RET
0103 SYM_FUNC_END(clmul_ghash_mul)
0104 
0105 /*
0106  * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
0107  *             const u128 *shash);
0108  */
0109 SYM_FUNC_START(clmul_ghash_update)
0110     FRAME_BEGIN
0111     cmp $16, %rdx
0112     jb .Lupdate_just_ret    # check length
0113     movaps .Lbswap_mask, BSWAP
0114     movups (%rdi), DATA
0115     movups (%rcx), SHASH
0116     pshufb BSWAP, DATA
0117 .align 4
0118 .Lupdate_loop:
0119     movups (%rsi), IN1
0120     pshufb BSWAP, IN1
0121     pxor IN1, DATA
0122     call __clmul_gf128mul_ble
0123     sub $16, %rdx
0124     add $16, %rsi
0125     cmp $16, %rdx
0126     jge .Lupdate_loop
0127     pshufb BSWAP, DATA
0128     movups DATA, (%rdi)
0129 .Lupdate_just_ret:
0130     FRAME_END
0131     RET
0132 SYM_FUNC_END(clmul_ghash_update)