0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <asm/ppc_asm.h>
0014 #include <asm/asm-offsets.h>
0015
0016 #define rHP r3
0017 #define rWP r4
0018 #define rKP r5
0019
0020 #define rW0 r14
0021 #define rW1 r15
0022 #define rW2 r16
0023 #define rW3 r17
0024 #define rW4 r18
0025 #define rW5 r19
0026 #define rW6 r20
0027 #define rW7 r21
0028
0029 #define rH0 r6
0030 #define rH1 r7
0031 #define rH2 r8
0032 #define rH3 r9
0033 #define rH4 r10
0034
0035 #define rT0 r22
0036 #define rT1 r0
0037 #define rT2 r11
0038 #define rT3 r12
0039
0040 #define rK r23
0041
0042 #define LOAD_K01
0043
0044 #define LOAD_K11 \
0045 evlwwsplat rK,0(rKP);
0046
0047 #define LOAD_K21 \
0048 evlwwsplat rK,4(rKP);
0049
0050 #define LOAD_K31 \
0051 evlwwsplat rK,8(rKP);
0052
0053 #define LOAD_K41 \
0054 evlwwsplat rK,12(rKP);
0055
0056 #define INITIALIZE \
0057 stwu r1,-128(r1); \
0058 evstdw r14,8(r1); \
0059 evstdw r15,16(r1); \
0060 evstdw r16,24(r1); \
0061 evstdw r17,32(r1); \
0062 evstdw r18,40(r1); \
0063 evstdw r19,48(r1); \
0064 evstdw r20,56(r1); \
0065 evstdw r21,64(r1); \
0066 evstdw r22,72(r1); \
0067 evstdw r23,80(r1);
0068
0069
0070 #define FINALIZE \
0071 evldw r14,8(r1); \
0072 evldw r15,16(r1); \
0073 evldw r16,24(r1); \
0074 evldw r17,32(r1); \
0075 evldw r18,40(r1); \
0076 evldw r19,48(r1); \
0077 evldw r20,56(r1); \
0078 evldw r21,64(r1); \
0079 evldw r22,72(r1); \
0080 evldw r23,80(r1); \
0081 xor r0,r0,r0; \
0082 stw r0,8(r1); \
0083 stw r0,16(r1); \
0084 stw r0,24(r1); \
0085 stw r0,32(r1); \
0086 stw r0,40(r1); \
0087 stw r0,48(r1); \
0088 stw r0,56(r1); \
0089 stw r0,64(r1); \
0090 stw r0,72(r1); \
0091 stw r0,80(r1); \
0092 addi r1,r1,128;
0093
0094 #ifdef __BIG_ENDIAN__
0095 #define LOAD_DATA(reg, off) \
0096 lwz reg,off(rWP);
0097 #define NEXT_BLOCK \
0098 addi rWP,rWP,64;
0099 #else
0100 #define LOAD_DATA(reg, off) \
0101 lwbrx reg,0,rWP; \
0102 addi rWP,rWP,4;
0103 #define NEXT_BLOCK
0104 #endif
0105
0106 #define R_00_15(a, b, c, d, e, w0, w1, k, off) \
0107 LOAD_DATA(w0, off) \
0108 and rT2,b,c; \
0109 LOAD_K##k##1 \
0110 andc rT1,d,b; \
0111 rotrwi rT0,a,27; \
0112 or rT2,rT2,rT1; \
0113 add e,e,rT0; \
0114 rotrwi b,b,2; \
0115 add e,e,w0; \
0116 LOAD_DATA(w1, off+4) \
0117 add e,e,rT2; \
0118 and rT1,a,b; \
0119 add e,e,rK; \
0120 andc rT2,c,a; \
0121 add d,d,rK; \
0122 or rT2,rT2,rT1; \
0123 rotrwi rT0,e,27; \
0124 add d,d,w1; \
0125 rotrwi a,a,2; \
0126 add d,d,rT0; \
0127 evmergelo w1,w1,w0; \
0128 add d,d,rT2
0129
0130 #define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
0131 and rT2,b,c; \
0132 evmergelohi rT0,w7,w6; \
0133 andc rT1,d,b; \
0134 evxor w0,w0,rT0; \
0135 or rT1,rT1,rT2; \
0136 evxor w0,w0,w4; \
0137 add e,e,rT1; \
0138 evxor w0,w0,w1; \
0139 rotrwi rT2,a,27; \
0140 evrlwi w0,w0,1; \
0141 add e,e,rT2; \
0142 evaddw rT0,w0,rK; \
0143 rotrwi b,b,2; \
0144 LOAD_K##k##1 \
0145 evmergehi rT1,rT1,rT0; \
0146 add e,e,rT0; \
0147 add d,d,rT1; \
0148 and rT2,a,b; \
0149 andc rT1,c,a; \
0150 rotrwi rT0,e,27; \
0151 or rT1,rT1,rT2; \
0152 add d,d,rT0; \
0153 rotrwi a,a,2; \
0154 add d,d,rT1
0155
0156 #define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
0157 evmergelohi rT0,w7,w6; \
0158 xor rT2,b,c; \
0159 evxor w0,w0,rT0; \
0160 xor rT2,rT2,d; \
0161 evxor w0,w0,w4; \
0162 add e,e,rT2; \
0163 evxor w0,w0,w1; \
0164 rotrwi rT2,a,27; \
0165 evrlwi w0,w0,1; \
0166 add e,e,rT2; \
0167 evaddw rT0,w0,rK; \
0168 rotrwi b,b,2; \
0169 LOAD_K##k##1 \
0170 evmergehi rT1,rT1,rT0; \
0171 add e,e,rT0; \
0172 xor rT2,a,b; \
0173 add d,d,rT1; \
0174 xor rT2,rT2,c; \
0175 rotrwi rT0,e,27; \
0176 add d,d,rT2; \
0177 rotrwi a,a,2; \
0178 add d,d,rT0
0179
0180 #define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
0181 and rT2,b,c; \
0182 evmergelohi rT0,w7,w6; \
0183 or rT1,b,c; \
0184 evxor w0,w0,rT0; \
0185 and rT1,d,rT1; \
0186 evxor w0,w0,w4; \
0187 or rT2,rT2,rT1; \
0188 evxor w0,w0,w1; \
0189 add e,e,rT2; \
0190 evrlwi w0,w0,1; \
0191 rotrwi rT2,a,27; \
0192 evaddw rT0,w0,rK; \
0193 add e,e,rT2; \
0194 LOAD_K##k##1 \
0195 evmergehi rT1,rT1,rT0; \
0196 rotrwi b,b,2; \
0197 add e,e,rT0; \
0198 and rT2,a,b; \
0199 or rT0,a,b; \
0200 add d,d,rT1; \
0201 and rT0,c,rT0; \
0202 rotrwi a,a,2; \
0203 or rT2,rT2,rT0; \
0204 rotrwi rT0,e,27; \
0205 add d,d,rT2; \
0206 add d,d,rT0
0207
0208 #define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
0209 R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
0210
0211 _GLOBAL(ppc_spe_sha1_transform)
0212 INITIALIZE
0213
0214 lwz rH0,0(rHP)
0215 lwz rH1,4(rHP)
0216 mtctr r5
0217 lwz rH2,8(rHP)
0218 lis rKP,PPC_SPE_SHA1_K@h
0219 lwz rH3,12(rHP)
0220 ori rKP,rKP,PPC_SPE_SHA1_K@l
0221 lwz rH4,16(rHP)
0222
0223 ppc_spe_sha1_main:
0224 R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
0225 R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
0226 R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
0227 R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
0228 R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
0229 R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
0230 R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
0231 R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
0232
0233 R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
0234 R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
0235
0236 R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
0237 R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
0238 R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
0239 R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
0240 R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
0241 R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
0242 R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
0243 R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
0244 R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
0245 R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
0246
0247 R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
0248 R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
0249 R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
0250 R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
0251 R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
0252 R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
0253 R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
0254 R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
0255 R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
0256 R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
0257
0258 R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
0259 R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
0260 R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
0261 R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
0262 R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
0263 R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
0264 R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
0265 lwz rT3,0(rHP)
0266 R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
0267 lwz rW1,4(rHP)
0268 R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
0269 lwz rW2,8(rHP)
0270 R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
0271 lwz rW3,12(rHP)
0272 NEXT_BLOCK
0273 lwz rW4,16(rHP)
0274
0275 add rH0,rH0,rT3
0276 stw rH0,0(rHP)
0277 add rH1,rH1,rW1
0278 stw rH1,4(rHP)
0279 add rH2,rH2,rW2
0280 stw rH2,8(rHP)
0281 add rH3,rH3,rW3
0282 stw rH3,12(rHP)
0283 add rH4,rH4,rW4
0284 stw rH4,16(rHP)
0285
0286 bdnz ppc_spe_sha1_main
0287
0288 FINALIZE
0289 blr
0290
0291 .data
0292 .align 4
0293 PPC_SPE_SHA1_K:
0294 .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6