Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * Fast SHA-1 implementation for SPE instruction set (PPC)
0004  *
0005  * This code makes use of the SPE SIMD instruction set as defined in
0006  * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
0007  * Implementation is based on optimization guide notes from
0008  * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
0009  *
0010  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
0011  */
0012 
0013 #include <asm/ppc_asm.h>
0014 #include <asm/asm-offsets.h>
0015 
0016 #define rHP r3  /* pointer to hash value            */
0017 #define rWP r4  /* pointer to input             */
0018 #define rKP r5  /* pointer to constants             */
0019 
0020 #define rW0 r14 /* 64 bit round words               */
0021 #define rW1 r15
0022 #define rW2 r16
0023 #define rW3 r17
0024 #define rW4 r18
0025 #define rW5 r19
0026 #define rW6 r20
0027 #define rW7 r21
0028 
0029 #define rH0 r6  /* 32 bit hash values               */
0030 #define rH1 r7
0031 #define rH2 r8
0032 #define rH3 r9
0033 #define rH4 r10
0034 
0035 #define rT0 r22 /* 64 bit temporary             */
0036 #define rT1 r0  /* 32 bit temporaries               */
0037 #define rT2 r11
0038 #define rT3 r12
0039 
0040 #define rK  r23 /* 64 bit constant in volatile register     */
0041 
0042 #define LOAD_K01
0043 
0044 #define LOAD_K11 \
0045     evlwwsplat  rK,0(rKP);
0046 
0047 #define LOAD_K21 \
0048     evlwwsplat  rK,4(rKP);
0049 
0050 #define LOAD_K31 \
0051     evlwwsplat  rK,8(rKP);
0052 
0053 #define LOAD_K41 \
0054     evlwwsplat  rK,12(rKP);
0055 
0056 #define INITIALIZE \
0057     stwu        r1,-128(r1);    /* create stack frame       */ \
0058     evstdw      r14,8(r1);  /* We must save non volatile    */ \
0059     evstdw      r15,16(r1); /* registers. Take the chance   */ \
0060     evstdw      r16,24(r1); /* and save the SPE part too    */ \
0061     evstdw      r17,32(r1);                    \
0062     evstdw      r18,40(r1);                    \
0063     evstdw      r19,48(r1);                    \
0064     evstdw      r20,56(r1);                    \
0065     evstdw      r21,64(r1);                    \
0066     evstdw      r22,72(r1);                    \
0067     evstdw      r23,80(r1);
0068 
0069 
0070 #define FINALIZE \
0071     evldw       r14,8(r1);  /* restore SPE registers    */ \
0072     evldw       r15,16(r1);                    \
0073     evldw       r16,24(r1);                    \
0074     evldw       r17,32(r1);                    \
0075     evldw       r18,40(r1);                    \
0076     evldw       r19,48(r1);                    \
0077     evldw       r20,56(r1);                    \
0078     evldw       r21,64(r1);                    \
0079     evldw       r22,72(r1);                    \
0080     evldw       r23,80(r1);                    \
0081     xor     r0,r0,r0;                      \
0082     stw     r0,8(r1);   /* Delete sensitive data    */ \
0083     stw     r0,16(r1);  /* that we might have pushed    */ \
0084     stw     r0,24(r1);  /* from other context that runs */ \
0085     stw     r0,32(r1);  /* the same code. Assume that   */ \
0086     stw     r0,40(r1);  /* the lower part of the GPRs   */ \
0087     stw     r0,48(r1);  /* were already overwritten on  */ \
0088     stw     r0,56(r1);  /* the way down to here     */ \
0089     stw     r0,64(r1);                     \
0090     stw     r0,72(r1);                     \
0091     stw     r0,80(r1);                     \
0092     addi        r1,r1,128;  /* cleanup stack frame      */
0093 
0094 #ifdef __BIG_ENDIAN__
0095 #define LOAD_DATA(reg, off) \
0096     lwz     reg,off(rWP);   /* load data            */
0097 #define NEXT_BLOCK \
0098     addi        rWP,rWP,64; /* increment per block      */
0099 #else
0100 #define LOAD_DATA(reg, off) \
0101     lwbrx       reg,0,rWP;  /* load data            */ \
0102     addi        rWP,rWP,4;  /* increment per word       */
0103 #define NEXT_BLOCK          /* nothing to do        */
0104 #endif
0105 
0106 #define R_00_15(a, b, c, d, e, w0, w1, k, off) \
0107     LOAD_DATA(w0, off)      /* 1: W             */ \
0108     and     rT2,b,c;    /* 1: F' = B and C      */ \
0109     LOAD_K##k##1                               \
0110     andc        rT1,d,b;    /* 1: F" = ~B and D         */ \
0111     rotrwi      rT0,a,27;   /* 1: A' = A rotl 5     */ \
0112     or      rT2,rT2,rT1;    /* 1: F = F' or F"      */ \
0113     add     e,e,rT0;    /* 1: E = E + A'        */ \
0114     rotrwi      b,b,2;      /* 1: B = B rotl 30     */ \
0115     add     e,e,w0;     /* 1: E = E + W         */ \
0116     LOAD_DATA(w1, off+4)        /* 2: W             */ \
0117     add     e,e,rT2;    /* 1: E = E + F         */ \
0118     and     rT1,a,b;    /* 2: F' = B and C      */ \
0119     add     e,e,rK;     /* 1: E = E + K         */ \
0120     andc        rT2,c,a;    /* 2: F" = ~B and D         */ \
0121     add     d,d,rK;     /* 2: E = E + K         */ \
0122     or      rT2,rT2,rT1;    /* 2: F = F' or F"      */ \
0123     rotrwi      rT0,e,27;   /* 2: A' = A rotl 5     */ \
0124     add     d,d,w1;     /* 2: E = E + W         */ \
0125     rotrwi      a,a,2;      /* 2: B = B rotl 30     */ \
0126     add     d,d,rT0;    /* 2: E = E + A'        */ \
0127     evmergelo   w1,w1,w0;   /*    mix W[0]/W[1]     */ \
0128     add     d,d,rT2     /* 2: E = E + F         */
0129 
0130 #define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
0131     and     rT2,b,c;    /* 1: F' = B and C      */ \
0132     evmergelohi rT0,w7,w6;  /*    W[-3]         */ \
0133     andc        rT1,d,b;    /* 1: F" = ~B and D         */ \
0134     evxor       w0,w0,rT0;  /*    W = W[-16] xor W[-3]  */ \
0135     or      rT1,rT1,rT2;    /* 1: F = F' or F"      */ \
0136     evxor       w0,w0,w4;   /*    W = W xor W[-8]       */ \
0137     add     e,e,rT1;    /* 1: E = E + F         */ \
0138     evxor       w0,w0,w1;   /*    W = W xor W[-14]      */ \
0139     rotrwi      rT2,a,27;   /* 1: A' = A rotl 5     */ \
0140     evrlwi      w0,w0,1;    /*    W = W rotl 1      */ \
0141     add     e,e,rT2;    /* 1: E = E + A'        */ \
0142     evaddw      rT0,w0,rK;  /*    WK = W + K        */ \
0143     rotrwi      b,b,2;      /* 1: B = B rotl 30     */ \
0144     LOAD_K##k##1                               \
0145     evmergehi   rT1,rT1,rT0;    /*    WK1/WK2           */ \
0146     add     e,e,rT0;    /* 1: E = E + WK        */ \
0147     add     d,d,rT1;    /* 2: E = E + WK        */ \
0148     and     rT2,a,b;    /* 2: F' = B and C      */ \
0149     andc        rT1,c,a;    /* 2: F" = ~B and D         */ \
0150     rotrwi      rT0,e,27;   /* 2: A' = A rotl 5     */ \
0151     or      rT1,rT1,rT2;    /* 2: F = F' or F"      */ \
0152     add     d,d,rT0;    /* 2: E = E + A'        */ \
0153     rotrwi      a,a,2;      /* 2: B = B rotl 30     */ \
0154     add     d,d,rT1     /* 2: E = E + F         */
0155 
0156 #define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
0157     evmergelohi rT0,w7,w6;  /*    W[-3]         */ \
0158     xor     rT2,b,c;    /* 1: F' = B xor C      */ \
0159     evxor       w0,w0,rT0;  /*    W = W[-16] xor W[-3]  */ \
0160     xor     rT2,rT2,d;  /* 1: F = F' xor D      */ \
0161     evxor       w0,w0,w4;   /*    W = W xor W[-8]       */ \
0162     add     e,e,rT2;    /* 1: E = E + F         */ \
0163     evxor       w0,w0,w1;   /*    W = W xor W[-14]      */ \
0164     rotrwi      rT2,a,27;   /* 1: A' = A rotl 5     */ \
0165     evrlwi      w0,w0,1;    /*    W = W rotl 1      */ \
0166     add     e,e,rT2;    /* 1: E = E + A'        */ \
0167     evaddw      rT0,w0,rK;  /*    WK = W + K        */ \
0168     rotrwi      b,b,2;      /* 1: B = B rotl 30     */ \
0169     LOAD_K##k##1                               \
0170     evmergehi   rT1,rT1,rT0;    /*    WK1/WK2           */ \
0171     add     e,e,rT0;    /* 1: E = E + WK        */ \
0172     xor     rT2,a,b;    /* 2: F' = B xor C      */ \
0173     add     d,d,rT1;    /* 2: E = E + WK        */ \
0174     xor     rT2,rT2,c;  /* 2: F = F' xor D      */ \
0175     rotrwi      rT0,e,27;   /* 2: A' = A rotl 5     */ \
0176     add     d,d,rT2;    /* 2: E = E + F         */ \
0177     rotrwi      a,a,2;      /* 2: B = B rotl 30     */ \
0178     add     d,d,rT0     /* 2: E = E + A'        */
0179 
0180 #define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
0181     and     rT2,b,c;    /* 1: F' = B and C      */ \
0182     evmergelohi rT0,w7,w6;  /*    W[-3]         */ \
0183     or      rT1,b,c;    /* 1: F" = B or C       */ \
0184     evxor       w0,w0,rT0;  /*    W = W[-16] xor W[-3]  */ \
0185     and     rT1,d,rT1;  /* 1: F" = F" and D     */ \
0186     evxor       w0,w0,w4;   /*    W = W xor W[-8]       */ \
0187     or      rT2,rT2,rT1;    /* 1: F = F' or F"      */ \
0188     evxor       w0,w0,w1;   /*    W = W xor W[-14]      */ \
0189     add     e,e,rT2;    /* 1: E = E + F         */ \
0190     evrlwi      w0,w0,1;    /*    W = W rotl 1      */ \
0191     rotrwi      rT2,a,27;   /* 1: A' = A rotl 5     */ \
0192     evaddw      rT0,w0,rK;  /*    WK = W + K        */ \
0193     add     e,e,rT2;    /* 1: E = E + A'        */ \
0194     LOAD_K##k##1                               \
0195     evmergehi   rT1,rT1,rT0;    /*    WK1/WK2           */ \
0196     rotrwi      b,b,2;      /* 1: B = B rotl 30     */ \
0197     add     e,e,rT0;    /* 1: E = E + WK        */ \
0198     and     rT2,a,b;    /* 2: F' = B and C      */ \
0199     or      rT0,a,b;    /* 2: F" = B or C       */ \
0200     add     d,d,rT1;    /* 2: E = E + WK        */ \
0201     and     rT0,c,rT0;  /* 2: F" = F" and D     */ \
0202     rotrwi      a,a,2;      /* 2: B = B rotl 30     */ \
0203     or      rT2,rT2,rT0;    /* 2: F = F' or F"      */ \
0204     rotrwi      rT0,e,27;   /* 2: A' = A rotl 5     */ \
0205     add     d,d,rT2;    /* 2: E = E + F         */ \
0206     add     d,d,rT0     /* 2: E = E + A'        */
0207 
0208 #define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
0209     R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
0210 
0211 _GLOBAL(ppc_spe_sha1_transform)
0212     INITIALIZE
0213 
0214     lwz     rH0,0(rHP)
0215     lwz     rH1,4(rHP)
0216     mtctr       r5
0217     lwz     rH2,8(rHP)
0218     lis     rKP,PPC_SPE_SHA1_K@h
0219     lwz     rH3,12(rHP)
0220     ori     rKP,rKP,PPC_SPE_SHA1_K@l
0221     lwz     rH4,16(rHP)
0222 
0223 ppc_spe_sha1_main:
0224     R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
0225     R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
0226     R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
0227     R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
0228     R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
0229     R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
0230     R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
0231     R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
0232 
0233     R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
0234     R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
0235 
0236     R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
0237     R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
0238     R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
0239     R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
0240     R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
0241     R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
0242     R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
0243     R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
0244     R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
0245     R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
0246 
0247     R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
0248     R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
0249     R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
0250     R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
0251     R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
0252     R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
0253     R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
0254     R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
0255     R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
0256     R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
0257 
0258     R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
0259     R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
0260     R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
0261     R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
0262     R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
0263     R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
0264     R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
0265     lwz     rT3,0(rHP)
0266     R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
0267     lwz     rW1,4(rHP)
0268     R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
0269     lwz     rW2,8(rHP)
0270     R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
0271     lwz     rW3,12(rHP)
0272     NEXT_BLOCK
0273     lwz     rW4,16(rHP)
0274 
0275     add     rH0,rH0,rT3
0276     stw     rH0,0(rHP)
0277     add     rH1,rH1,rW1
0278     stw     rH1,4(rHP)
0279     add     rH2,rH2,rW2
0280     stw     rH2,8(rHP)
0281     add     rH3,rH3,rW3
0282     stw     rH3,12(rHP)
0283     add     rH4,rH4,rW4
0284     stw     rH4,16(rHP)
0285 
0286     bdnz        ppc_spe_sha1_main
0287 
0288     FINALIZE
0289     blr
0290 
0291 .data
0292 .align 4
0293 PPC_SPE_SHA1_K:
0294     .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6