Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Intel SHA Extensions optimized implementation of a SHA-256 update function
0003  *
0004  * This file is provided under a dual BSD/GPLv2 license.  When using or
0005  * redistributing this file, you may do so under either license.
0006  *
0007  * GPL LICENSE SUMMARY
0008  *
0009  * Copyright(c) 2015 Intel Corporation.
0010  *
0011  * This program is free software; you can redistribute it and/or modify
0012  * it under the terms of version 2 of the GNU General Public License as
0013  * published by the Free Software Foundation.
0014  *
0015  * This program is distributed in the hope that it will be useful, but
0016  * WITHOUT ANY WARRANTY; without even the implied warranty of
0017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0018  * General Public License for more details.
0019  *
0020  * Contact Information:
0021  *  Sean Gulley <sean.m.gulley@intel.com>
0022  *  Tim Chen <tim.c.chen@linux.intel.com>
0023  *
0024  * BSD LICENSE
0025  *
0026  * Copyright(c) 2015 Intel Corporation.
0027  *
0028  * Redistribution and use in source and binary forms, with or without
0029  * modification, are permitted provided that the following conditions
0030  * are met:
0031  *
0032  *  * Redistributions of source code must retain the above copyright
0033  *    notice, this list of conditions and the following disclaimer.
0034  *  * Redistributions in binary form must reproduce the above copyright
0035  *    notice, this list of conditions and the following disclaimer in
0036  *    the documentation and/or other materials provided with the
0037  *    distribution.
0038  *  * Neither the name of Intel Corporation nor the names of its
0039  *    contributors may be used to endorse or promote products derived
0040  *    from this software without specific prior written permission.
0041  *
0042  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0043  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0044  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0045  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0046  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0047  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0048  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0049  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0050  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0051  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0052  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0053  *
0054  */
0055 
0056 #include <linux/linkage.h>
0057 
0058 #define DIGEST_PTR  %rdi    /* 1st arg */
0059 #define DATA_PTR    %rsi    /* 2nd arg */
0060 #define NUM_BLKS    %rdx    /* 3rd arg */
0061 
0062 #define SHA256CONSTANTS %rax
0063 
0064 #define MSG     %xmm0
0065 #define STATE0      %xmm1
0066 #define STATE1      %xmm2
0067 #define MSGTMP0     %xmm3
0068 #define MSGTMP1     %xmm4
0069 #define MSGTMP2     %xmm5
0070 #define MSGTMP3     %xmm6
0071 #define MSGTMP4     %xmm7
0072 
0073 #define SHUF_MASK   %xmm8
0074 
0075 #define ABEF_SAVE   %xmm9
0076 #define CDGH_SAVE   %xmm10
0077 
0078 /*
0079  * Intel SHA Extensions optimized implementation of a SHA-256 update function
0080  *
0081  * The function takes a pointer to the current hash values, a pointer to the
0082  * input data, and a number of 64 byte blocks to process.  Once all blocks have
0083  * been processed, the digest pointer is  updated with the resulting hash value.
0084  * The function only processes complete blocks, there is no functionality to
0085  * store partial blocks.  All message padding and hash value initialization must
0086  * be done outside the update function.
0087  *
0088  * The indented lines in the loop are instructions related to rounds processing.
0089  * The non-indented lines are instructions related to the message schedule.
0090  *
0091  * void sha256_ni_transform(uint32_t *digest, const void *data,
0092         uint32_t numBlocks);
0093  * digest : pointer to digest
0094  * data: pointer to input data
0095  * numBlocks: Number of blocks to process
0096  */
0097 
0098 .text
0099 .align 32
0100 SYM_FUNC_START(sha256_ni_transform)
0101 
0102     shl     $6, NUM_BLKS        /*  convert to bytes */
0103     jz      .Ldone_hash
0104     add     DATA_PTR, NUM_BLKS  /* pointer to end of data */
0105 
0106     /*
0107      * load initial hash values
0108      * Need to reorder these appropriately
0109      * DCBA, HGFE -> ABEF, CDGH
0110      */
0111     movdqu      0*16(DIGEST_PTR), STATE0
0112     movdqu      1*16(DIGEST_PTR), STATE1
0113 
0114     pshufd      $0xB1, STATE0,  STATE0      /* CDAB */
0115     pshufd      $0x1B, STATE1,  STATE1      /* EFGH */
0116     movdqa      STATE0, MSGTMP4
0117     palignr     $8, STATE1,  STATE0     /* ABEF */
0118     pblendw     $0xF0, MSGTMP4, STATE1      /* CDGH */
0119 
0120     movdqa      PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
0121     lea     K256(%rip), SHA256CONSTANTS
0122 
0123 .Lloop0:
0124     /* Save hash values for addition after rounds */
0125     movdqa      STATE0, ABEF_SAVE
0126     movdqa      STATE1, CDGH_SAVE
0127 
0128     /* Rounds 0-3 */
0129     movdqu      0*16(DATA_PTR), MSG
0130     pshufb      SHUF_MASK, MSG
0131     movdqa      MSG, MSGTMP0
0132         paddd       0*16(SHA256CONSTANTS), MSG
0133         sha256rnds2 STATE0, STATE1
0134         pshufd      $0x0E, MSG, MSG
0135         sha256rnds2 STATE1, STATE0
0136 
0137     /* Rounds 4-7 */
0138     movdqu      1*16(DATA_PTR), MSG
0139     pshufb      SHUF_MASK, MSG
0140     movdqa      MSG, MSGTMP1
0141         paddd       1*16(SHA256CONSTANTS), MSG
0142         sha256rnds2 STATE0, STATE1
0143         pshufd      $0x0E, MSG, MSG
0144         sha256rnds2 STATE1, STATE0
0145     sha256msg1  MSGTMP1, MSGTMP0
0146 
0147     /* Rounds 8-11 */
0148     movdqu      2*16(DATA_PTR), MSG
0149     pshufb      SHUF_MASK, MSG
0150     movdqa      MSG, MSGTMP2
0151         paddd       2*16(SHA256CONSTANTS), MSG
0152         sha256rnds2 STATE0, STATE1
0153         pshufd      $0x0E, MSG, MSG
0154         sha256rnds2 STATE1, STATE0
0155     sha256msg1  MSGTMP2, MSGTMP1
0156 
0157     /* Rounds 12-15 */
0158     movdqu      3*16(DATA_PTR), MSG
0159     pshufb      SHUF_MASK, MSG
0160     movdqa      MSG, MSGTMP3
0161         paddd       3*16(SHA256CONSTANTS), MSG
0162         sha256rnds2 STATE0, STATE1
0163     movdqa      MSGTMP3, MSGTMP4
0164     palignr     $4, MSGTMP2, MSGTMP4
0165     paddd       MSGTMP4, MSGTMP0
0166     sha256msg2  MSGTMP3, MSGTMP0
0167         pshufd      $0x0E, MSG, MSG
0168         sha256rnds2 STATE1, STATE0
0169     sha256msg1  MSGTMP3, MSGTMP2
0170 
0171     /* Rounds 16-19 */
0172     movdqa      MSGTMP0, MSG
0173         paddd       4*16(SHA256CONSTANTS), MSG
0174         sha256rnds2 STATE0, STATE1
0175     movdqa      MSGTMP0, MSGTMP4
0176     palignr     $4, MSGTMP3, MSGTMP4
0177     paddd       MSGTMP4, MSGTMP1
0178     sha256msg2  MSGTMP0, MSGTMP1
0179         pshufd      $0x0E, MSG, MSG
0180         sha256rnds2 STATE1, STATE0
0181     sha256msg1  MSGTMP0, MSGTMP3
0182 
0183     /* Rounds 20-23 */
0184     movdqa      MSGTMP1, MSG
0185         paddd       5*16(SHA256CONSTANTS), MSG
0186         sha256rnds2 STATE0, STATE1
0187     movdqa      MSGTMP1, MSGTMP4
0188     palignr     $4, MSGTMP0, MSGTMP4
0189     paddd       MSGTMP4, MSGTMP2
0190     sha256msg2  MSGTMP1, MSGTMP2
0191         pshufd      $0x0E, MSG, MSG
0192         sha256rnds2 STATE1, STATE0
0193     sha256msg1  MSGTMP1, MSGTMP0
0194 
0195     /* Rounds 24-27 */
0196     movdqa      MSGTMP2, MSG
0197         paddd       6*16(SHA256CONSTANTS), MSG
0198         sha256rnds2 STATE0, STATE1
0199     movdqa      MSGTMP2, MSGTMP4
0200     palignr     $4, MSGTMP1, MSGTMP4
0201     paddd       MSGTMP4, MSGTMP3
0202     sha256msg2  MSGTMP2, MSGTMP3
0203         pshufd      $0x0E, MSG, MSG
0204         sha256rnds2 STATE1, STATE0
0205     sha256msg1  MSGTMP2, MSGTMP1
0206 
0207     /* Rounds 28-31 */
0208     movdqa      MSGTMP3, MSG
0209         paddd       7*16(SHA256CONSTANTS), MSG
0210         sha256rnds2 STATE0, STATE1
0211     movdqa      MSGTMP3, MSGTMP4
0212     palignr     $4, MSGTMP2, MSGTMP4
0213     paddd       MSGTMP4, MSGTMP0
0214     sha256msg2  MSGTMP3, MSGTMP0
0215         pshufd      $0x0E, MSG, MSG
0216         sha256rnds2 STATE1, STATE0
0217     sha256msg1  MSGTMP3, MSGTMP2
0218 
0219     /* Rounds 32-35 */
0220     movdqa      MSGTMP0, MSG
0221         paddd       8*16(SHA256CONSTANTS), MSG
0222         sha256rnds2 STATE0, STATE1
0223     movdqa      MSGTMP0, MSGTMP4
0224     palignr     $4, MSGTMP3, MSGTMP4
0225     paddd       MSGTMP4, MSGTMP1
0226     sha256msg2  MSGTMP0, MSGTMP1
0227         pshufd      $0x0E, MSG, MSG
0228         sha256rnds2 STATE1, STATE0
0229     sha256msg1  MSGTMP0, MSGTMP3
0230 
0231     /* Rounds 36-39 */
0232     movdqa      MSGTMP1, MSG
0233         paddd       9*16(SHA256CONSTANTS), MSG
0234         sha256rnds2 STATE0, STATE1
0235     movdqa      MSGTMP1, MSGTMP4
0236     palignr     $4, MSGTMP0, MSGTMP4
0237     paddd       MSGTMP4, MSGTMP2
0238     sha256msg2  MSGTMP1, MSGTMP2
0239         pshufd      $0x0E, MSG, MSG
0240         sha256rnds2 STATE1, STATE0
0241     sha256msg1  MSGTMP1, MSGTMP0
0242 
0243     /* Rounds 40-43 */
0244     movdqa      MSGTMP2, MSG
0245         paddd       10*16(SHA256CONSTANTS), MSG
0246         sha256rnds2 STATE0, STATE1
0247     movdqa      MSGTMP2, MSGTMP4
0248     palignr     $4, MSGTMP1, MSGTMP4
0249     paddd       MSGTMP4, MSGTMP3
0250     sha256msg2  MSGTMP2, MSGTMP3
0251         pshufd      $0x0E, MSG, MSG
0252         sha256rnds2 STATE1, STATE0
0253     sha256msg1  MSGTMP2, MSGTMP1
0254 
0255     /* Rounds 44-47 */
0256     movdqa      MSGTMP3, MSG
0257         paddd       11*16(SHA256CONSTANTS), MSG
0258         sha256rnds2 STATE0, STATE1
0259     movdqa      MSGTMP3, MSGTMP4
0260     palignr     $4, MSGTMP2, MSGTMP4
0261     paddd       MSGTMP4, MSGTMP0
0262     sha256msg2  MSGTMP3, MSGTMP0
0263         pshufd      $0x0E, MSG, MSG
0264         sha256rnds2 STATE1, STATE0
0265     sha256msg1  MSGTMP3, MSGTMP2
0266 
0267     /* Rounds 48-51 */
0268     movdqa      MSGTMP0, MSG
0269         paddd       12*16(SHA256CONSTANTS), MSG
0270         sha256rnds2 STATE0, STATE1
0271     movdqa      MSGTMP0, MSGTMP4
0272     palignr     $4, MSGTMP3, MSGTMP4
0273     paddd       MSGTMP4, MSGTMP1
0274     sha256msg2  MSGTMP0, MSGTMP1
0275         pshufd      $0x0E, MSG, MSG
0276         sha256rnds2 STATE1, STATE0
0277     sha256msg1  MSGTMP0, MSGTMP3
0278 
0279     /* Rounds 52-55 */
0280     movdqa      MSGTMP1, MSG
0281         paddd       13*16(SHA256CONSTANTS), MSG
0282         sha256rnds2 STATE0, STATE1
0283     movdqa      MSGTMP1, MSGTMP4
0284     palignr     $4, MSGTMP0, MSGTMP4
0285     paddd       MSGTMP4, MSGTMP2
0286     sha256msg2  MSGTMP1, MSGTMP2
0287         pshufd      $0x0E, MSG, MSG
0288         sha256rnds2 STATE1, STATE0
0289 
0290     /* Rounds 56-59 */
0291     movdqa      MSGTMP2, MSG
0292         paddd       14*16(SHA256CONSTANTS), MSG
0293         sha256rnds2 STATE0, STATE1
0294     movdqa      MSGTMP2, MSGTMP4
0295     palignr     $4, MSGTMP1, MSGTMP4
0296     paddd       MSGTMP4, MSGTMP3
0297     sha256msg2  MSGTMP2, MSGTMP3
0298         pshufd      $0x0E, MSG, MSG
0299         sha256rnds2 STATE1, STATE0
0300 
0301     /* Rounds 60-63 */
0302     movdqa      MSGTMP3, MSG
0303         paddd       15*16(SHA256CONSTANTS), MSG
0304         sha256rnds2 STATE0, STATE1
0305         pshufd      $0x0E, MSG, MSG
0306         sha256rnds2 STATE1, STATE0
0307 
0308     /* Add current hash values with previously saved */
0309     paddd       ABEF_SAVE, STATE0
0310     paddd       CDGH_SAVE, STATE1
0311 
0312     /* Increment data pointer and loop if more to process */
0313     add     $64, DATA_PTR
0314     cmp     NUM_BLKS, DATA_PTR
0315     jne     .Lloop0
0316 
0317     /* Write hash values back in the correct order */
0318     pshufd      $0x1B, STATE0,  STATE0      /* FEBA */
0319     pshufd      $0xB1, STATE1,  STATE1      /* DCHG */
0320     movdqa      STATE0, MSGTMP4
0321     pblendw     $0xF0, STATE1,  STATE0      /* DCBA */
0322     palignr     $8, MSGTMP4, STATE1     /* HGFE */
0323 
0324     movdqu      STATE0, 0*16(DIGEST_PTR)
0325     movdqu      STATE1, 1*16(DIGEST_PTR)
0326 
0327 .Ldone_hash:
0328 
0329     RET
0330 SYM_FUNC_END(sha256_ni_transform)
0331 
0332 .section    .rodata.cst256.K256, "aM", @progbits, 256
0333 .align 64
0334 K256:
0335     .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
0336     .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
0337     .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
0338     .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
0339     .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
0340     .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
0341     .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
0342     .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
0343     .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
0344     .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
0345     .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
0346     .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
0347     .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
0348     .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
0349     .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
0350     .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
0351 
0352 .section    .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
0353 .align 16
0354 PSHUFFLE_BYTE_FLIP_MASK:
0355     .octa 0x0c0d0e0f08090a0b0405060700010203