Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Intel SHA Extensions optimized implementation of a SHA-1 update function
0003  *
0004  * This file is provided under a dual BSD/GPLv2 license.  When using or
0005  * redistributing this file, you may do so under either license.
0006  *
0007  * GPL LICENSE SUMMARY
0008  *
0009  * Copyright(c) 2015 Intel Corporation.
0010  *
0011  * This program is free software; you can redistribute it and/or modify
0012  * it under the terms of version 2 of the GNU General Public License as
0013  * published by the Free Software Foundation.
0014  *
0015  * This program is distributed in the hope that it will be useful, but
0016  * WITHOUT ANY WARRANTY; without even the implied warranty of
0017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0018  * General Public License for more details.
0019  *
0020  * Contact Information:
0021  *  Sean Gulley <sean.m.gulley@intel.com>
0022  *  Tim Chen <tim.c.chen@linux.intel.com>
0023  *
0024  * BSD LICENSE
0025  *
0026  * Copyright(c) 2015 Intel Corporation.
0027  *
0028  * Redistribution and use in source and binary forms, with or without
0029  * modification, are permitted provided that the following conditions
0030  * are met:
0031  *
0032  *  * Redistributions of source code must retain the above copyright
0033  *    notice, this list of conditions and the following disclaimer.
0034  *  * Redistributions in binary form must reproduce the above copyright
0035  *    notice, this list of conditions and the following disclaimer in
0036  *    the documentation and/or other materials provided with the
0037  *    distribution.
0038  *  * Neither the name of Intel Corporation nor the names of its
0039  *    contributors may be used to endorse or promote products derived
0040  *    from this software without specific prior written permission.
0041  *
0042  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0043  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0044  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0045  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0046  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0047  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0048  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0049  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0050  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0051  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0052  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0053  *
0054  */
0055 
0056 #include <linux/linkage.h>
0057 
0058 #define DIGEST_PTR  %rdi    /* 1st arg */
0059 #define DATA_PTR    %rsi    /* 2nd arg */
0060 #define NUM_BLKS    %rdx    /* 3rd arg */
0061 
0062 /* gcc conversion */
0063 #define FRAME_SIZE  32  /* space for 2x16 bytes */
0064 
0065 #define ABCD        %xmm0
0066 #define E0      %xmm1   /* Need two E's b/c they ping pong */
0067 #define E1      %xmm2
0068 #define MSG0        %xmm3
0069 #define MSG1        %xmm4
0070 #define MSG2        %xmm5
0071 #define MSG3        %xmm6
0072 #define SHUF_MASK   %xmm7
0073 
0074 
0075 /*
0076  * Intel SHA Extensions optimized implementation of a SHA-1 update function
0077  *
0078  * The function takes a pointer to the current hash values, a pointer to the
0079  * input data, and a number of 64 byte blocks to process.  Once all blocks have
0080  * been processed, the digest pointer is  updated with the resulting hash value.
0081  * The function only processes complete blocks, there is no functionality to
0082  * store partial blocks. All message padding and hash value initialization must
0083  * be done outside the update function.
0084  *
0085  * The indented lines in the loop are instructions related to rounds processing.
0086  * The non-indented lines are instructions related to the message schedule.
0087  *
0088  * void sha1_ni_transform(uint32_t *digest, const void *data,
0089         uint32_t numBlocks)
0090  * digest : pointer to digest
0091  * data: pointer to input data
0092  * numBlocks: Number of blocks to process
0093  */
0094 .text
0095 .align 32
0096 SYM_FUNC_START(sha1_ni_transform)
0097     push        %rbp
0098     mov     %rsp, %rbp
0099     sub     $FRAME_SIZE, %rsp
0100     and     $~0xF, %rsp
0101 
0102     shl     $6, NUM_BLKS        /* convert to bytes */
0103     jz      .Ldone_hash
0104     add     DATA_PTR, NUM_BLKS  /* pointer to end of data */
0105 
0106     /* load initial hash values */
0107     pinsrd      $3, 1*16(DIGEST_PTR), E0
0108     movdqu      0*16(DIGEST_PTR), ABCD
0109     pand        UPPER_WORD_MASK(%rip), E0
0110     pshufd      $0x1B, ABCD, ABCD
0111 
0112     movdqa      PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
0113 
0114 .Lloop0:
0115     /* Save hash values for addition after rounds */
0116     movdqa      E0, (0*16)(%rsp)
0117     movdqa      ABCD, (1*16)(%rsp)
0118 
0119     /* Rounds 0-3 */
0120     movdqu      0*16(DATA_PTR), MSG0
0121     pshufb      SHUF_MASK, MSG0
0122         paddd       MSG0, E0
0123         movdqa      ABCD, E1
0124         sha1rnds4   $0, E0, ABCD
0125 
0126     /* Rounds 4-7 */
0127     movdqu      1*16(DATA_PTR), MSG1
0128     pshufb      SHUF_MASK, MSG1
0129         sha1nexte   MSG1, E1
0130         movdqa      ABCD, E0
0131         sha1rnds4   $0, E1, ABCD
0132     sha1msg1    MSG1, MSG0
0133 
0134     /* Rounds 8-11 */
0135     movdqu      2*16(DATA_PTR), MSG2
0136     pshufb      SHUF_MASK, MSG2
0137         sha1nexte   MSG2, E0
0138         movdqa      ABCD, E1
0139         sha1rnds4   $0, E0, ABCD
0140     sha1msg1    MSG2, MSG1
0141     pxor        MSG2, MSG0
0142 
0143     /* Rounds 12-15 */
0144     movdqu      3*16(DATA_PTR), MSG3
0145     pshufb      SHUF_MASK, MSG3
0146         sha1nexte   MSG3, E1
0147         movdqa      ABCD, E0
0148     sha1msg2    MSG3, MSG0
0149         sha1rnds4   $0, E1, ABCD
0150     sha1msg1    MSG3, MSG2
0151     pxor        MSG3, MSG1
0152 
0153     /* Rounds 16-19 */
0154         sha1nexte   MSG0, E0
0155         movdqa      ABCD, E1
0156     sha1msg2    MSG0, MSG1
0157         sha1rnds4   $0, E0, ABCD
0158     sha1msg1    MSG0, MSG3
0159     pxor        MSG0, MSG2
0160 
0161     /* Rounds 20-23 */
0162         sha1nexte   MSG1, E1
0163         movdqa      ABCD, E0
0164     sha1msg2    MSG1, MSG2
0165         sha1rnds4   $1, E1, ABCD
0166     sha1msg1    MSG1, MSG0
0167     pxor        MSG1, MSG3
0168 
0169     /* Rounds 24-27 */
0170         sha1nexte   MSG2, E0
0171         movdqa      ABCD, E1
0172     sha1msg2    MSG2, MSG3
0173         sha1rnds4   $1, E0, ABCD
0174     sha1msg1    MSG2, MSG1
0175     pxor        MSG2, MSG0
0176 
0177     /* Rounds 28-31 */
0178         sha1nexte   MSG3, E1
0179         movdqa      ABCD, E0
0180     sha1msg2    MSG3, MSG0
0181         sha1rnds4   $1, E1, ABCD
0182     sha1msg1    MSG3, MSG2
0183     pxor        MSG3, MSG1
0184 
0185     /* Rounds 32-35 */
0186         sha1nexte   MSG0, E0
0187         movdqa      ABCD, E1
0188     sha1msg2    MSG0, MSG1
0189         sha1rnds4   $1, E0, ABCD
0190     sha1msg1    MSG0, MSG3
0191     pxor        MSG0, MSG2
0192 
0193     /* Rounds 36-39 */
0194         sha1nexte   MSG1, E1
0195         movdqa      ABCD, E0
0196     sha1msg2    MSG1, MSG2
0197         sha1rnds4   $1, E1, ABCD
0198     sha1msg1    MSG1, MSG0
0199     pxor        MSG1, MSG3
0200 
0201     /* Rounds 40-43 */
0202         sha1nexte   MSG2, E0
0203         movdqa      ABCD, E1
0204     sha1msg2    MSG2, MSG3
0205         sha1rnds4   $2, E0, ABCD
0206     sha1msg1    MSG2, MSG1
0207     pxor        MSG2, MSG0
0208 
0209     /* Rounds 44-47 */
0210         sha1nexte   MSG3, E1
0211         movdqa      ABCD, E0
0212     sha1msg2    MSG3, MSG0
0213         sha1rnds4   $2, E1, ABCD
0214     sha1msg1    MSG3, MSG2
0215     pxor        MSG3, MSG1
0216 
0217     /* Rounds 48-51 */
0218         sha1nexte   MSG0, E0
0219         movdqa      ABCD, E1
0220     sha1msg2    MSG0, MSG1
0221         sha1rnds4   $2, E0, ABCD
0222     sha1msg1    MSG0, MSG3
0223     pxor        MSG0, MSG2
0224 
0225     /* Rounds 52-55 */
0226         sha1nexte   MSG1, E1
0227         movdqa      ABCD, E0
0228     sha1msg2    MSG1, MSG2
0229         sha1rnds4   $2, E1, ABCD
0230     sha1msg1    MSG1, MSG0
0231     pxor        MSG1, MSG3
0232 
0233     /* Rounds 56-59 */
0234         sha1nexte   MSG2, E0
0235         movdqa      ABCD, E1
0236     sha1msg2    MSG2, MSG3
0237         sha1rnds4   $2, E0, ABCD
0238     sha1msg1    MSG2, MSG1
0239     pxor        MSG2, MSG0
0240 
0241     /* Rounds 60-63 */
0242         sha1nexte   MSG3, E1
0243         movdqa      ABCD, E0
0244     sha1msg2    MSG3, MSG0
0245         sha1rnds4   $3, E1, ABCD
0246     sha1msg1    MSG3, MSG2
0247     pxor        MSG3, MSG1
0248 
0249     /* Rounds 64-67 */
0250         sha1nexte   MSG0, E0
0251         movdqa      ABCD, E1
0252     sha1msg2    MSG0, MSG1
0253         sha1rnds4   $3, E0, ABCD
0254     sha1msg1    MSG0, MSG3
0255     pxor        MSG0, MSG2
0256 
0257     /* Rounds 68-71 */
0258         sha1nexte   MSG1, E1
0259         movdqa      ABCD, E0
0260     sha1msg2    MSG1, MSG2
0261         sha1rnds4   $3, E1, ABCD
0262     pxor        MSG1, MSG3
0263 
0264     /* Rounds 72-75 */
0265         sha1nexte   MSG2, E0
0266         movdqa      ABCD, E1
0267     sha1msg2    MSG2, MSG3
0268         sha1rnds4   $3, E0, ABCD
0269 
0270     /* Rounds 76-79 */
0271         sha1nexte   MSG3, E1
0272         movdqa      ABCD, E0
0273         sha1rnds4   $3, E1, ABCD
0274 
0275     /* Add current hash values with previously saved */
0276     sha1nexte   (0*16)(%rsp), E0
0277     paddd       (1*16)(%rsp), ABCD
0278 
0279     /* Increment data pointer and loop if more to process */
0280     add     $64, DATA_PTR
0281     cmp     NUM_BLKS, DATA_PTR
0282     jne     .Lloop0
0283 
0284     /* Write hash values back in the correct order */
0285     pshufd      $0x1B, ABCD, ABCD
0286     movdqu      ABCD, 0*16(DIGEST_PTR)
0287     pextrd      $3, E0, 1*16(DIGEST_PTR)
0288 
0289 .Ldone_hash:
0290     mov     %rbp, %rsp
0291     pop     %rbp
0292 
0293     RET
0294 SYM_FUNC_END(sha1_ni_transform)
0295 
0296 .section    .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
0297 .align 16
0298 PSHUFFLE_BYTE_FLIP_MASK:
0299     .octa 0x000102030405060708090a0b0c0d0e0f
0300 
0301 .section    .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
0302 .align 16
0303 UPPER_WORD_MASK:
0304     .octa 0xFFFFFFFF000000000000000000000000