0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <asm/ppc_asm.h>
0014 #include <asm/asm-offsets.h>
0015
0016 #define rHP r3
0017 #define rKP r24
0018 #define rWP r4
0019
0020 #define rH0 r5
0021 #define rH1 r6
0022 #define rH2 r7
0023 #define rH3 r8
0024 #define rH4 r9
0025 #define rH5 r10
0026 #define rH6 r11
0027 #define rH7 r12
0028
0029 #define rW0 r14
0030 #define rW1 r15
0031 #define rW2 r16
0032 #define rW3 r17
0033 #define rW4 r18
0034 #define rW5 r19
0035 #define rW6 r20
0036 #define rW7 r21
0037
0038 #define rT0 r22
0039 #define rT1 r23
0040 #define rT2 r0
0041 #define rT3 r25
0042
0043 #define CMP_KN_LOOP
0044 #define CMP_KC_LOOP \
0045 cmpwi rT1,0;
0046
0047 #define INITIALIZE \
0048 stwu r1,-128(r1); \
0049 evstdw r14,8(r1); \
0050 evstdw r15,16(r1); \
0051 evstdw r16,24(r1); \
0052 evstdw r17,32(r1); \
0053 evstdw r18,40(r1); \
0054 evstdw r19,48(r1); \
0055 evstdw r20,56(r1); \
0056 evstdw r21,64(r1); \
0057 evstdw r22,72(r1); \
0058 evstdw r23,80(r1); \
0059 stw r24,88(r1); \
0060 stw r25,92(r1);
0061
0062
0063 #define FINALIZE \
0064 evldw r14,8(r1); \
0065 evldw r15,16(r1); \
0066 evldw r16,24(r1); \
0067 evldw r17,32(r1); \
0068 evldw r18,40(r1); \
0069 evldw r19,48(r1); \
0070 evldw r20,56(r1); \
0071 evldw r21,64(r1); \
0072 evldw r22,72(r1); \
0073 evldw r23,80(r1); \
0074 lwz r24,88(r1); \
0075 lwz r25,92(r1); \
0076 xor r0,r0,r0; \
0077 stw r0,8(r1); \
0078 stw r0,16(r1); \
0079 stw r0,24(r1); \
0080 stw r0,32(r1); \
0081 stw r0,40(r1); \
0082 stw r0,48(r1); \
0083 stw r0,56(r1); \
0084 stw r0,64(r1); \
0085 stw r0,72(r1); \
0086 stw r0,80(r1); \
0087 addi r1,r1,128;
0088
0089 #ifdef __BIG_ENDIAN__
0090 #define LOAD_DATA(reg, off) \
0091 lwz reg,off(rWP);
0092 #define NEXT_BLOCK \
0093 addi rWP,rWP,64;
0094 #else
0095 #define LOAD_DATA(reg, off) \
0096 lwbrx reg,0,rWP; \
0097 addi rWP,rWP,4;
0098 #define NEXT_BLOCK
0099 #endif
0100
0101 #define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
0102 LOAD_DATA(w, off) \
0103 rotrwi rT0,e,6; \
0104 rotrwi rT1,e,11; \
0105 rotrwi rT2,e,25; \
0106 xor rT0,rT0,rT1; \
0107 and rT3,e,f; \
0108 xor rT0,rT0,rT2; \
0109 andc rT1,g,e; \
0110 lwz rT2,off(rKP); \
0111 xor rT3,rT3,rT1; \
0112 add h,h,rT0; \
0113 add rT3,rT3,w; \
0114 rotrwi rT0,a,2; \
0115 add h,h,rT3; \
0116 rotrwi rT1,a,13; \
0117 add h,h,rT2; \
0118 rotrwi rT3,a,22; \
0119 xor rT0,rT0,rT1; \
0120 add d,d,h; \
0121 xor rT3,rT0,rT3; \
0122 evmergelo w,w,w; \
0123 or rT2,a,b; \
0124 and rT1,a,b; \
0125 and rT2,rT2,c; \
0126 LOAD_DATA(w, off+4) \
0127 or rT2,rT1,rT2; \
0128 rotrwi rT0,d,6; \
0129 add rT3,rT3,rT2; \
0130 rotrwi rT1,d,11; \
0131 add h,h,rT3; \
0132 rotrwi rT2,d,25; \
0133 xor rT0,rT0,rT1; \
0134 and rT3,d,e; \
0135 xor rT0,rT0,rT2; \
0136 andc rT1,f,d; \
0137 lwz rT2,off+4(rKP); \
0138 xor rT3,rT3,rT1; \
0139 add g,g,rT0; \
0140 add rT3,rT3,w; \
0141 rotrwi rT0,h,2; \
0142 add g,g,rT3; \
0143 rotrwi rT1,h,13; \
0144 add g,g,rT2; \
0145 rotrwi rT3,h,22; \
0146 xor rT0,rT0,rT1; \
0147 or rT2,h,a; \
0148 xor rT3,rT0,rT3; \
0149 and rT1,h,a; \
0150 and rT2,rT2,b; \
0151 add c,c,g; \
0152 or rT2,rT1,rT2; \
0153 add rT3,rT3,rT2; \
0154 add g,g,rT3
0155
0156 #define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
0157 rotrwi rT2,e,6; \
0158 evmergelohi rT0,w0,w1; \
0159 rotrwi rT3,e,11; \
0160 evsrwiu rT1,rT0,3; \
0161 xor rT2,rT2,rT3; \
0162 evrlwi rT0,rT0,25; \
0163 rotrwi rT3,e,25; \
0164 evxor rT1,rT1,rT0; \
0165 xor rT2,rT2,rT3; \
0166 evrlwi rT0,rT0,21; \
0167 add h,h,rT2; \
0168 evxor rT0,rT0,rT1; \
0169 and rT2,e,f; \
0170 evaddw w0,w0,rT0; \
0171 andc rT3,g,e; \
0172 evsrwiu rT0,w7,10; \
0173 xor rT2,rT2,rT3; \
0174 evrlwi rT1,w7,15; \
0175 add h,h,rT2; \
0176 evxor rT0,rT0,rT1; \
0177 rotrwi rT2,a,2; \
0178 evrlwi rT1,w7,13; \
0179 rotrwi rT3,a,13; \
0180 evxor rT0,rT0,rT1; \
0181 xor rT2,rT2,rT3; \
0182 evldw rT1,off(rKP); \
0183 rotrwi rT3,a,22; \
0184 evaddw w0,w0,rT0; \
0185 xor rT2,rT2,rT3; \
0186 evmergelohi rT0,w4,w5; \
0187 and rT3,a,b; \
0188 evaddw w0,w0,rT0; \
0189 CMP_K##k##_LOOP \
0190 add rT2,rT2,rT3; \
0191 evaddw rT1,rT1,w0; \
0192 xor rT3,a,b; \
0193 evmergehi rT0,rT1,rT1; \
0194 and rT3,rT3,c; \
0195 add h,h,rT0; \
0196 add rT2,rT2,rT3; \
0197 add g,g,rT1; \
0198 add d,d,h; \
0199 rotrwi rT0,d,6; \
0200 add h,h,rT2; \
0201 rotrwi rT1,d,11; \
0202 rotrwi rT2,d,25; \
0203 xor rT0,rT0,rT1; \
0204 and rT3,d,e; \
0205 xor rT0,rT0,rT2; \
0206 andc rT1,f,d; \
0207 add g,g,rT0; \
0208 xor rT3,rT3,rT1; \
0209 rotrwi rT0,h,2; \
0210 add g,g,rT3; \
0211 rotrwi rT1,h,13; \
0212 rotrwi rT3,h,22; \
0213 xor rT0,rT0,rT1; \
0214 or rT2,h,a; \
0215 and rT1,h,a; \
0216 and rT2,rT2,b; \
0217 xor rT3,rT0,rT3; \
0218 or rT2,rT1,rT2; \
0219 add c,c,g; \
0220 add rT3,rT3,rT2; \
0221 add g,g,rT3
0222
0223 _GLOBAL(ppc_spe_sha256_transform)
0224 INITIALIZE
0225
0226 mtctr r5
0227 lwz rH0,0(rHP)
0228 lwz rH1,4(rHP)
0229 lwz rH2,8(rHP)
0230 lwz rH3,12(rHP)
0231 lwz rH4,16(rHP)
0232 lwz rH5,20(rHP)
0233 lwz rH6,24(rHP)
0234 lwz rH7,28(rHP)
0235
0236 ppc_spe_sha256_main:
0237 lis rKP,PPC_SPE_SHA256_K@ha
0238 addi rKP,rKP,PPC_SPE_SHA256_K@l
0239
0240 R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
0241 R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
0242 R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
0243 R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
0244 R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
0245 R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
0246 R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
0247 R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
0248 ppc_spe_sha256_16_rounds:
0249 addi rKP,rKP,64
0250 R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
0251 rW0, rW1, rW4, rW5, rW7, N, 0)
0252 R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
0253 rW1, rW2, rW5, rW6, rW0, N, 8)
0254 R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
0255 rW2, rW3, rW6, rW7, rW1, N, 16)
0256 R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
0257 rW3, rW4, rW7, rW0, rW2, N, 24)
0258 R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
0259 rW4, rW5, rW0, rW1, rW3, N, 32)
0260 R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
0261 rW5, rW6, rW1, rW2, rW4, N, 40)
0262 R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
0263 rW6, rW7, rW2, rW3, rW5, N, 48)
0264 R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
0265 rW7, rW0, rW3, rW4, rW6, C, 56)
0266 bt gt,ppc_spe_sha256_16_rounds
0267
0268 lwz rW0,0(rHP)
0269 NEXT_BLOCK
0270 lwz rW1,4(rHP)
0271 lwz rW2,8(rHP)
0272 lwz rW3,12(rHP)
0273 lwz rW4,16(rHP)
0274 lwz rW5,20(rHP)
0275 lwz rW6,24(rHP)
0276 lwz rW7,28(rHP)
0277
0278 add rH0,rH0,rW0
0279 stw rH0,0(rHP)
0280 add rH1,rH1,rW1
0281 stw rH1,4(rHP)
0282 add rH2,rH2,rW2
0283 stw rH2,8(rHP)
0284 add rH3,rH3,rW3
0285 stw rH3,12(rHP)
0286 add rH4,rH4,rW4
0287 stw rH4,16(rHP)
0288 add rH5,rH5,rW5
0289 stw rH5,20(rHP)
0290 add rH6,rH6,rW6
0291 stw rH6,24(rHP)
0292 add rH7,rH7,rW7
0293 stw rH7,28(rHP)
0294
0295 bdnz ppc_spe_sha256_main
0296
0297 FINALIZE
0298 blr
0299
0300 .data
0301 .align 5
0302 PPC_SPE_SHA256_K:
0303 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
0304 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
0305 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
0306 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
0307 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
0308 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
0309 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
0310 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
0311 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
0312 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
0313 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
0314 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
0315 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
0316 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
0317 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
0318 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2