0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <asm/unaligned.h>
0014 #include <crypto/curve25519.h>
0015 #include <linux/string.h>
0016
0017
0018
0019
0020
0021
0022
0023 typedef struct fe { u32 v[10]; } fe;
0024
0025
0026
0027
0028 typedef struct fe_loose { u32 v[10]; } fe_loose;
0029
0030 static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
0031 {
0032
0033 u32 a0 = get_unaligned_le32(s);
0034 u32 a1 = get_unaligned_le32(s+4);
0035 u32 a2 = get_unaligned_le32(s+8);
0036 u32 a3 = get_unaligned_le32(s+12);
0037 u32 a4 = get_unaligned_le32(s+16);
0038 u32 a5 = get_unaligned_le32(s+20);
0039 u32 a6 = get_unaligned_le32(s+24);
0040 u32 a7 = get_unaligned_le32(s+28);
0041 h[0] = a0&((1<<26)-1);
0042 h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6);
0043 h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13);
0044 h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19);
0045 h[4] = (a3>> 6);
0046 h[5] = a4&((1<<25)-1);
0047 h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7);
0048 h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13);
0049 h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20);
0050 h[9] = (a7>> 6)&((1<<25)-1);
0051 }
0052
0053 static __always_inline void fe_frombytes(fe *h, const u8 *s)
0054 {
0055 fe_frombytes_impl(h->v, s);
0056 }
0057
0058 static __always_inline u8
0059 addcarryx_u25(u8 c, u32 a, u32 b, u32 *low)
0060 {
0061
0062
0063
0064 u32 x = a + b + c;
0065 *low = x & ((1 << 25) - 1);
0066 return (x >> 25) & 1;
0067 }
0068
0069 static __always_inline u8
0070 addcarryx_u26(u8 c, u32 a, u32 b, u32 *low)
0071 {
0072
0073
0074
0075 u32 x = a + b + c;
0076 *low = x & ((1 << 26) - 1);
0077 return (x >> 26) & 1;
0078 }
0079
0080 static __always_inline u8
0081 subborrow_u25(u8 c, u32 a, u32 b, u32 *low)
0082 {
0083
0084
0085
0086 u32 x = a - b - c;
0087 *low = x & ((1 << 25) - 1);
0088 return x >> 31;
0089 }
0090
0091 static __always_inline u8
0092 subborrow_u26(u8 c, u32 a, u32 b, u32 *low)
0093 {
0094
0095
0096
0097 u32 x = a - b - c;
0098 *low = x & ((1 << 26) - 1);
0099 return x >> 31;
0100 }
0101
0102 static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
0103 {
0104 t = -!!t;
0105 return (t&nz) | ((~t)&z);
0106 }
0107
0108 static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
0109 {
0110 { const u32 x17 = in1[9];
0111 { const u32 x18 = in1[8];
0112 { const u32 x16 = in1[7];
0113 { const u32 x14 = in1[6];
0114 { const u32 x12 = in1[5];
0115 { const u32 x10 = in1[4];
0116 { const u32 x8 = in1[3];
0117 { const u32 x6 = in1[2];
0118 { const u32 x4 = in1[1];
0119 { const u32 x2 = in1[0];
0120 { u32 x20; u8 x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
0121 { u32 x23; u8 x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
0122 { u32 x26; u8 x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
0123 { u32 x29; u8 x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
0124 { u32 x32; u8 x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
0125 { u32 x35; u8 x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
0126 { u32 x38; u8 x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
0127 { u32 x41; u8 x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
0128 { u32 x44; u8 x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
0129 { u32 x47; u8 x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
0130 { u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
0131 { u32 x50 = (x49 & 0x3ffffed);
0132 { u32 x52; u8 x53 = addcarryx_u26(0x0, x20, x50, &x52);
0133 { u32 x54 = (x49 & 0x1ffffff);
0134 { u32 x56; u8 x57 = addcarryx_u25(x53, x23, x54, &x56);
0135 { u32 x58 = (x49 & 0x3ffffff);
0136 { u32 x60; u8 x61 = addcarryx_u26(x57, x26, x58, &x60);
0137 { u32 x62 = (x49 & 0x1ffffff);
0138 { u32 x64; u8 x65 = addcarryx_u25(x61, x29, x62, &x64);
0139 { u32 x66 = (x49 & 0x3ffffff);
0140 { u32 x68; u8 x69 = addcarryx_u26(x65, x32, x66, &x68);
0141 { u32 x70 = (x49 & 0x1ffffff);
0142 { u32 x72; u8 x73 = addcarryx_u25(x69, x35, x70, &x72);
0143 { u32 x74 = (x49 & 0x3ffffff);
0144 { u32 x76; u8 x77 = addcarryx_u26(x73, x38, x74, &x76);
0145 { u32 x78 = (x49 & 0x1ffffff);
0146 { u32 x80; u8 x81 = addcarryx_u25(x77, x41, x78, &x80);
0147 { u32 x82 = (x49 & 0x3ffffff);
0148 { u32 x84; u8 x85 = addcarryx_u26(x81, x44, x82, &x84);
0149 { u32 x86 = (x49 & 0x1ffffff);
0150 { u32 x88; addcarryx_u25(x85, x47, x86, &x88);
0151 out[0] = x52;
0152 out[1] = x56;
0153 out[2] = x60;
0154 out[3] = x64;
0155 out[4] = x68;
0156 out[5] = x72;
0157 out[6] = x76;
0158 out[7] = x80;
0159 out[8] = x84;
0160 out[9] = x88;
0161 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
0162 }
0163
0164 static __always_inline void fe_tobytes(u8 s[32], const fe *f)
0165 {
0166 u32 h[10];
0167 fe_freeze(h, f->v);
0168 s[0] = h[0] >> 0;
0169 s[1] = h[0] >> 8;
0170 s[2] = h[0] >> 16;
0171 s[3] = (h[0] >> 24) | (h[1] << 2);
0172 s[4] = h[1] >> 6;
0173 s[5] = h[1] >> 14;
0174 s[6] = (h[1] >> 22) | (h[2] << 3);
0175 s[7] = h[2] >> 5;
0176 s[8] = h[2] >> 13;
0177 s[9] = (h[2] >> 21) | (h[3] << 5);
0178 s[10] = h[3] >> 3;
0179 s[11] = h[3] >> 11;
0180 s[12] = (h[3] >> 19) | (h[4] << 6);
0181 s[13] = h[4] >> 2;
0182 s[14] = h[4] >> 10;
0183 s[15] = h[4] >> 18;
0184 s[16] = h[5] >> 0;
0185 s[17] = h[5] >> 8;
0186 s[18] = h[5] >> 16;
0187 s[19] = (h[5] >> 24) | (h[6] << 1);
0188 s[20] = h[6] >> 7;
0189 s[21] = h[6] >> 15;
0190 s[22] = (h[6] >> 23) | (h[7] << 3);
0191 s[23] = h[7] >> 5;
0192 s[24] = h[7] >> 13;
0193 s[25] = (h[7] >> 21) | (h[8] << 4);
0194 s[26] = h[8] >> 4;
0195 s[27] = h[8] >> 12;
0196 s[28] = (h[8] >> 20) | (h[9] << 6);
0197 s[29] = h[9] >> 2;
0198 s[30] = h[9] >> 10;
0199 s[31] = h[9] >> 18;
0200 }
0201
0202
0203 static __always_inline void fe_copy(fe *h, const fe *f)
0204 {
0205 memmove(h, f, sizeof(u32) * 10);
0206 }
0207
0208 static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
0209 {
0210 memmove(h, f, sizeof(u32) * 10);
0211 }
0212
0213
0214 static __always_inline void fe_0(fe *h)
0215 {
0216 memset(h, 0, sizeof(u32) * 10);
0217 }
0218
0219
0220 static __always_inline void fe_1(fe *h)
0221 {
0222 memset(h, 0, sizeof(u32) * 10);
0223 h->v[0] = 1;
0224 }
0225
0226 static noinline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
0227 {
0228 { const u32 x20 = in1[9];
0229 { const u32 x21 = in1[8];
0230 { const u32 x19 = in1[7];
0231 { const u32 x17 = in1[6];
0232 { const u32 x15 = in1[5];
0233 { const u32 x13 = in1[4];
0234 { const u32 x11 = in1[3];
0235 { const u32 x9 = in1[2];
0236 { const u32 x7 = in1[1];
0237 { const u32 x5 = in1[0];
0238 { const u32 x38 = in2[9];
0239 { const u32 x39 = in2[8];
0240 { const u32 x37 = in2[7];
0241 { const u32 x35 = in2[6];
0242 { const u32 x33 = in2[5];
0243 { const u32 x31 = in2[4];
0244 { const u32 x29 = in2[3];
0245 { const u32 x27 = in2[2];
0246 { const u32 x25 = in2[1];
0247 { const u32 x23 = in2[0];
0248 out[0] = (x5 + x23);
0249 out[1] = (x7 + x25);
0250 out[2] = (x9 + x27);
0251 out[3] = (x11 + x29);
0252 out[4] = (x13 + x31);
0253 out[5] = (x15 + x33);
0254 out[6] = (x17 + x35);
0255 out[7] = (x19 + x37);
0256 out[8] = (x21 + x39);
0257 out[9] = (x20 + x38);
0258 }}}}}}}}}}}}}}}}}}}}
0259 }
0260
0261
0262
0263
0264 static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
0265 {
0266 fe_add_impl(h->v, f->v, g->v);
0267 }
0268
0269 static noinline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
0270 {
0271 { const u32 x20 = in1[9];
0272 { const u32 x21 = in1[8];
0273 { const u32 x19 = in1[7];
0274 { const u32 x17 = in1[6];
0275 { const u32 x15 = in1[5];
0276 { const u32 x13 = in1[4];
0277 { const u32 x11 = in1[3];
0278 { const u32 x9 = in1[2];
0279 { const u32 x7 = in1[1];
0280 { const u32 x5 = in1[0];
0281 { const u32 x38 = in2[9];
0282 { const u32 x39 = in2[8];
0283 { const u32 x37 = in2[7];
0284 { const u32 x35 = in2[6];
0285 { const u32 x33 = in2[5];
0286 { const u32 x31 = in2[4];
0287 { const u32 x29 = in2[3];
0288 { const u32 x27 = in2[2];
0289 { const u32 x25 = in2[1];
0290 { const u32 x23 = in2[0];
0291 out[0] = ((0x7ffffda + x5) - x23);
0292 out[1] = ((0x3fffffe + x7) - x25);
0293 out[2] = ((0x7fffffe + x9) - x27);
0294 out[3] = ((0x3fffffe + x11) - x29);
0295 out[4] = ((0x7fffffe + x13) - x31);
0296 out[5] = ((0x3fffffe + x15) - x33);
0297 out[6] = ((0x7fffffe + x17) - x35);
0298 out[7] = ((0x3fffffe + x19) - x37);
0299 out[8] = ((0x7fffffe + x21) - x39);
0300 out[9] = ((0x3fffffe + x20) - x38);
0301 }}}}}}}}}}}}}}}}}}}}
0302 }
0303
0304
0305
0306
0307 static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
0308 {
0309 fe_sub_impl(h->v, f->v, g->v);
0310 }
0311
0312 static noinline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
0313 {
0314 { const u32 x20 = in1[9];
0315 { const u32 x21 = in1[8];
0316 { const u32 x19 = in1[7];
0317 { const u32 x17 = in1[6];
0318 { const u32 x15 = in1[5];
0319 { const u32 x13 = in1[4];
0320 { const u32 x11 = in1[3];
0321 { const u32 x9 = in1[2];
0322 { const u32 x7 = in1[1];
0323 { const u32 x5 = in1[0];
0324 { const u32 x38 = in2[9];
0325 { const u32 x39 = in2[8];
0326 { const u32 x37 = in2[7];
0327 { const u32 x35 = in2[6];
0328 { const u32 x33 = in2[5];
0329 { const u32 x31 = in2[4];
0330 { const u32 x29 = in2[3];
0331 { const u32 x27 = in2[2];
0332 { const u32 x25 = in2[1];
0333 { const u32 x23 = in2[0];
0334 { u64 x40 = ((u64)x23 * x5);
0335 { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
0336 { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
0337 { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
0338 { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
0339 { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
0340 { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
0341 { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
0342 { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
0343 { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
0344 { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
0345 { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
0346 { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
0347 { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
0348 { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
0349 { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
0350 { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
0351 { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
0352 { u64 x58 = ((u64)(0x2 * x38) * x20);
0353 { u64 x59 = (x48 + (x58 << 0x4));
0354 { u64 x60 = (x59 + (x58 << 0x1));
0355 { u64 x61 = (x60 + x58);
0356 { u64 x62 = (x47 + (x57 << 0x4));
0357 { u64 x63 = (x62 + (x57 << 0x1));
0358 { u64 x64 = (x63 + x57);
0359 { u64 x65 = (x46 + (x56 << 0x4));
0360 { u64 x66 = (x65 + (x56 << 0x1));
0361 { u64 x67 = (x66 + x56);
0362 { u64 x68 = (x45 + (x55 << 0x4));
0363 { u64 x69 = (x68 + (x55 << 0x1));
0364 { u64 x70 = (x69 + x55);
0365 { u64 x71 = (x44 + (x54 << 0x4));
0366 { u64 x72 = (x71 + (x54 << 0x1));
0367 { u64 x73 = (x72 + x54);
0368 { u64 x74 = (x43 + (x53 << 0x4));
0369 { u64 x75 = (x74 + (x53 << 0x1));
0370 { u64 x76 = (x75 + x53);
0371 { u64 x77 = (x42 + (x52 << 0x4));
0372 { u64 x78 = (x77 + (x52 << 0x1));
0373 { u64 x79 = (x78 + x52);
0374 { u64 x80 = (x41 + (x51 << 0x4));
0375 { u64 x81 = (x80 + (x51 << 0x1));
0376 { u64 x82 = (x81 + x51);
0377 { u64 x83 = (x40 + (x50 << 0x4));
0378 { u64 x84 = (x83 + (x50 << 0x1));
0379 { u64 x85 = (x84 + x50);
0380 { u64 x86 = (x85 >> 0x1a);
0381 { u32 x87 = ((u32)x85 & 0x3ffffff);
0382 { u64 x88 = (x86 + x82);
0383 { u64 x89 = (x88 >> 0x19);
0384 { u32 x90 = ((u32)x88 & 0x1ffffff);
0385 { u64 x91 = (x89 + x79);
0386 { u64 x92 = (x91 >> 0x1a);
0387 { u32 x93 = ((u32)x91 & 0x3ffffff);
0388 { u64 x94 = (x92 + x76);
0389 { u64 x95 = (x94 >> 0x19);
0390 { u32 x96 = ((u32)x94 & 0x1ffffff);
0391 { u64 x97 = (x95 + x73);
0392 { u64 x98 = (x97 >> 0x1a);
0393 { u32 x99 = ((u32)x97 & 0x3ffffff);
0394 { u64 x100 = (x98 + x70);
0395 { u64 x101 = (x100 >> 0x19);
0396 { u32 x102 = ((u32)x100 & 0x1ffffff);
0397 { u64 x103 = (x101 + x67);
0398 { u64 x104 = (x103 >> 0x1a);
0399 { u32 x105 = ((u32)x103 & 0x3ffffff);
0400 { u64 x106 = (x104 + x64);
0401 { u64 x107 = (x106 >> 0x19);
0402 { u32 x108 = ((u32)x106 & 0x1ffffff);
0403 { u64 x109 = (x107 + x61);
0404 { u64 x110 = (x109 >> 0x1a);
0405 { u32 x111 = ((u32)x109 & 0x3ffffff);
0406 { u64 x112 = (x110 + x49);
0407 { u64 x113 = (x112 >> 0x19);
0408 { u32 x114 = ((u32)x112 & 0x1ffffff);
0409 { u64 x115 = (x87 + (0x13 * x113));
0410 { u32 x116 = (u32) (x115 >> 0x1a);
0411 { u32 x117 = ((u32)x115 & 0x3ffffff);
0412 { u32 x118 = (x116 + x90);
0413 { u32 x119 = (x118 >> 0x19);
0414 { u32 x120 = (x118 & 0x1ffffff);
0415 out[0] = x117;
0416 out[1] = x120;
0417 out[2] = (x119 + x93);
0418 out[3] = x96;
0419 out[4] = x99;
0420 out[5] = x102;
0421 out[6] = x105;
0422 out[7] = x108;
0423 out[8] = x111;
0424 out[9] = x114;
0425 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
0426 }
0427
0428 static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
0429 {
0430 fe_mul_impl(h->v, f->v, g->v);
0431 }
0432
0433 static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
0434 {
0435 fe_mul_impl(h->v, f->v, g->v);
0436 }
0437
0438 static __always_inline void
0439 fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
0440 {
0441 fe_mul_impl(h->v, f->v, g->v);
0442 }
0443
0444 static noinline void fe_sqr_impl(u32 out[10], const u32 in1[10])
0445 {
0446 { const u32 x17 = in1[9];
0447 { const u32 x18 = in1[8];
0448 { const u32 x16 = in1[7];
0449 { const u32 x14 = in1[6];
0450 { const u32 x12 = in1[5];
0451 { const u32 x10 = in1[4];
0452 { const u32 x8 = in1[3];
0453 { const u32 x6 = in1[2];
0454 { const u32 x4 = in1[1];
0455 { const u32 x2 = in1[0];
0456 { u64 x19 = ((u64)x2 * x2);
0457 { u64 x20 = ((u64)(0x2 * x2) * x4);
0458 { u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
0459 { u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
0460 { u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
0461 { u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
0462 { u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
0463 { u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
0464 { u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
0465 { u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
0466 { u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
0467 { u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
0468 { u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
0469 { u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
0470 { u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
0471 { u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
0472 { u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
0473 { u64 x36 = ((u64)(0x2 * x18) * x17);
0474 { u64 x37 = ((u64)(0x2 * x17) * x17);
0475 { u64 x38 = (x27 + (x37 << 0x4));
0476 { u64 x39 = (x38 + (x37 << 0x1));
0477 { u64 x40 = (x39 + x37);
0478 { u64 x41 = (x26 + (x36 << 0x4));
0479 { u64 x42 = (x41 + (x36 << 0x1));
0480 { u64 x43 = (x42 + x36);
0481 { u64 x44 = (x25 + (x35 << 0x4));
0482 { u64 x45 = (x44 + (x35 << 0x1));
0483 { u64 x46 = (x45 + x35);
0484 { u64 x47 = (x24 + (x34 << 0x4));
0485 { u64 x48 = (x47 + (x34 << 0x1));
0486 { u64 x49 = (x48 + x34);
0487 { u64 x50 = (x23 + (x33 << 0x4));
0488 { u64 x51 = (x50 + (x33 << 0x1));
0489 { u64 x52 = (x51 + x33);
0490 { u64 x53 = (x22 + (x32 << 0x4));
0491 { u64 x54 = (x53 + (x32 << 0x1));
0492 { u64 x55 = (x54 + x32);
0493 { u64 x56 = (x21 + (x31 << 0x4));
0494 { u64 x57 = (x56 + (x31 << 0x1));
0495 { u64 x58 = (x57 + x31);
0496 { u64 x59 = (x20 + (x30 << 0x4));
0497 { u64 x60 = (x59 + (x30 << 0x1));
0498 { u64 x61 = (x60 + x30);
0499 { u64 x62 = (x19 + (x29 << 0x4));
0500 { u64 x63 = (x62 + (x29 << 0x1));
0501 { u64 x64 = (x63 + x29);
0502 { u64 x65 = (x64 >> 0x1a);
0503 { u32 x66 = ((u32)x64 & 0x3ffffff);
0504 { u64 x67 = (x65 + x61);
0505 { u64 x68 = (x67 >> 0x19);
0506 { u32 x69 = ((u32)x67 & 0x1ffffff);
0507 { u64 x70 = (x68 + x58);
0508 { u64 x71 = (x70 >> 0x1a);
0509 { u32 x72 = ((u32)x70 & 0x3ffffff);
0510 { u64 x73 = (x71 + x55);
0511 { u64 x74 = (x73 >> 0x19);
0512 { u32 x75 = ((u32)x73 & 0x1ffffff);
0513 { u64 x76 = (x74 + x52);
0514 { u64 x77 = (x76 >> 0x1a);
0515 { u32 x78 = ((u32)x76 & 0x3ffffff);
0516 { u64 x79 = (x77 + x49);
0517 { u64 x80 = (x79 >> 0x19);
0518 { u32 x81 = ((u32)x79 & 0x1ffffff);
0519 { u64 x82 = (x80 + x46);
0520 { u64 x83 = (x82 >> 0x1a);
0521 { u32 x84 = ((u32)x82 & 0x3ffffff);
0522 { u64 x85 = (x83 + x43);
0523 { u64 x86 = (x85 >> 0x19);
0524 { u32 x87 = ((u32)x85 & 0x1ffffff);
0525 { u64 x88 = (x86 + x40);
0526 { u64 x89 = (x88 >> 0x1a);
0527 { u32 x90 = ((u32)x88 & 0x3ffffff);
0528 { u64 x91 = (x89 + x28);
0529 { u64 x92 = (x91 >> 0x19);
0530 { u32 x93 = ((u32)x91 & 0x1ffffff);
0531 { u64 x94 = (x66 + (0x13 * x92));
0532 { u32 x95 = (u32) (x94 >> 0x1a);
0533 { u32 x96 = ((u32)x94 & 0x3ffffff);
0534 { u32 x97 = (x95 + x69);
0535 { u32 x98 = (x97 >> 0x19);
0536 { u32 x99 = (x97 & 0x1ffffff);
0537 out[0] = x96;
0538 out[1] = x99;
0539 out[2] = (x98 + x72);
0540 out[3] = x75;
0541 out[4] = x78;
0542 out[5] = x81;
0543 out[6] = x84;
0544 out[7] = x87;
0545 out[8] = x90;
0546 out[9] = x93;
0547 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
0548 }
0549
0550 static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
0551 {
0552 fe_sqr_impl(h->v, f->v);
0553 }
0554
0555 static __always_inline void fe_sq_tt(fe *h, const fe *f)
0556 {
0557 fe_sqr_impl(h->v, f->v);
0558 }
0559
0560 static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
0561 {
0562 fe t0;
0563 fe t1;
0564 fe t2;
0565 fe t3;
0566 int i;
0567
0568 fe_sq_tl(&t0, z);
0569 fe_sq_tt(&t1, &t0);
0570 for (i = 1; i < 2; ++i)
0571 fe_sq_tt(&t1, &t1);
0572 fe_mul_tlt(&t1, z, &t1);
0573 fe_mul_ttt(&t0, &t0, &t1);
0574 fe_sq_tt(&t2, &t0);
0575 fe_mul_ttt(&t1, &t1, &t2);
0576 fe_sq_tt(&t2, &t1);
0577 for (i = 1; i < 5; ++i)
0578 fe_sq_tt(&t2, &t2);
0579 fe_mul_ttt(&t1, &t2, &t1);
0580 fe_sq_tt(&t2, &t1);
0581 for (i = 1; i < 10; ++i)
0582 fe_sq_tt(&t2, &t2);
0583 fe_mul_ttt(&t2, &t2, &t1);
0584 fe_sq_tt(&t3, &t2);
0585 for (i = 1; i < 20; ++i)
0586 fe_sq_tt(&t3, &t3);
0587 fe_mul_ttt(&t2, &t3, &t2);
0588 fe_sq_tt(&t2, &t2);
0589 for (i = 1; i < 10; ++i)
0590 fe_sq_tt(&t2, &t2);
0591 fe_mul_ttt(&t1, &t2, &t1);
0592 fe_sq_tt(&t2, &t1);
0593 for (i = 1; i < 50; ++i)
0594 fe_sq_tt(&t2, &t2);
0595 fe_mul_ttt(&t2, &t2, &t1);
0596 fe_sq_tt(&t3, &t2);
0597 for (i = 1; i < 100; ++i)
0598 fe_sq_tt(&t3, &t3);
0599 fe_mul_ttt(&t2, &t3, &t2);
0600 fe_sq_tt(&t2, &t2);
0601 for (i = 1; i < 50; ++i)
0602 fe_sq_tt(&t2, &t2);
0603 fe_mul_ttt(&t1, &t2, &t1);
0604 fe_sq_tt(&t1, &t1);
0605 for (i = 1; i < 5; ++i)
0606 fe_sq_tt(&t1, &t1);
0607 fe_mul_ttt(out, &t1, &t0);
0608 }
0609
0610 static __always_inline void fe_invert(fe *out, const fe *z)
0611 {
0612 fe_loose l;
0613 fe_copy_lt(&l, z);
0614 fe_loose_invert(out, &l);
0615 }
0616
0617
0618
0619
0620
0621
0622 static noinline void fe_cswap(fe *f, fe *g, unsigned int b)
0623 {
0624 unsigned i;
0625 b = 0 - b;
0626 for (i = 0; i < 10; i++) {
0627 u32 x = f->v[i] ^ g->v[i];
0628 x &= b;
0629 f->v[i] ^= x;
0630 g->v[i] ^= x;
0631 }
0632 }
0633
0634
0635 static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
0636 {
0637 { const u32 x20 = in1[9];
0638 { const u32 x21 = in1[8];
0639 { const u32 x19 = in1[7];
0640 { const u32 x17 = in1[6];
0641 { const u32 x15 = in1[5];
0642 { const u32 x13 = in1[4];
0643 { const u32 x11 = in1[3];
0644 { const u32 x9 = in1[2];
0645 { const u32 x7 = in1[1];
0646 { const u32 x5 = in1[0];
0647 { const u32 x38 = 0;
0648 { const u32 x39 = 0;
0649 { const u32 x37 = 0;
0650 { const u32 x35 = 0;
0651 { const u32 x33 = 0;
0652 { const u32 x31 = 0;
0653 { const u32 x29 = 0;
0654 { const u32 x27 = 0;
0655 { const u32 x25 = 0;
0656 { const u32 x23 = 121666;
0657 { u64 x40 = ((u64)x23 * x5);
0658 { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
0659 { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
0660 { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
0661 { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
0662 { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
0663 { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
0664 { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
0665 { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
0666 { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
0667 { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
0668 { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
0669 { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
0670 { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
0671 { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
0672 { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
0673 { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
0674 { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
0675 { u64 x58 = ((u64)(0x2 * x38) * x20);
0676 { u64 x59 = (x48 + (x58 << 0x4));
0677 { u64 x60 = (x59 + (x58 << 0x1));
0678 { u64 x61 = (x60 + x58);
0679 { u64 x62 = (x47 + (x57 << 0x4));
0680 { u64 x63 = (x62 + (x57 << 0x1));
0681 { u64 x64 = (x63 + x57);
0682 { u64 x65 = (x46 + (x56 << 0x4));
0683 { u64 x66 = (x65 + (x56 << 0x1));
0684 { u64 x67 = (x66 + x56);
0685 { u64 x68 = (x45 + (x55 << 0x4));
0686 { u64 x69 = (x68 + (x55 << 0x1));
0687 { u64 x70 = (x69 + x55);
0688 { u64 x71 = (x44 + (x54 << 0x4));
0689 { u64 x72 = (x71 + (x54 << 0x1));
0690 { u64 x73 = (x72 + x54);
0691 { u64 x74 = (x43 + (x53 << 0x4));
0692 { u64 x75 = (x74 + (x53 << 0x1));
0693 { u64 x76 = (x75 + x53);
0694 { u64 x77 = (x42 + (x52 << 0x4));
0695 { u64 x78 = (x77 + (x52 << 0x1));
0696 { u64 x79 = (x78 + x52);
0697 { u64 x80 = (x41 + (x51 << 0x4));
0698 { u64 x81 = (x80 + (x51 << 0x1));
0699 { u64 x82 = (x81 + x51);
0700 { u64 x83 = (x40 + (x50 << 0x4));
0701 { u64 x84 = (x83 + (x50 << 0x1));
0702 { u64 x85 = (x84 + x50);
0703 { u64 x86 = (x85 >> 0x1a);
0704 { u32 x87 = ((u32)x85 & 0x3ffffff);
0705 { u64 x88 = (x86 + x82);
0706 { u64 x89 = (x88 >> 0x19);
0707 { u32 x90 = ((u32)x88 & 0x1ffffff);
0708 { u64 x91 = (x89 + x79);
0709 { u64 x92 = (x91 >> 0x1a);
0710 { u32 x93 = ((u32)x91 & 0x3ffffff);
0711 { u64 x94 = (x92 + x76);
0712 { u64 x95 = (x94 >> 0x19);
0713 { u32 x96 = ((u32)x94 & 0x1ffffff);
0714 { u64 x97 = (x95 + x73);
0715 { u64 x98 = (x97 >> 0x1a);
0716 { u32 x99 = ((u32)x97 & 0x3ffffff);
0717 { u64 x100 = (x98 + x70);
0718 { u64 x101 = (x100 >> 0x19);
0719 { u32 x102 = ((u32)x100 & 0x1ffffff);
0720 { u64 x103 = (x101 + x67);
0721 { u64 x104 = (x103 >> 0x1a);
0722 { u32 x105 = ((u32)x103 & 0x3ffffff);
0723 { u64 x106 = (x104 + x64);
0724 { u64 x107 = (x106 >> 0x19);
0725 { u32 x108 = ((u32)x106 & 0x1ffffff);
0726 { u64 x109 = (x107 + x61);
0727 { u64 x110 = (x109 >> 0x1a);
0728 { u32 x111 = ((u32)x109 & 0x3ffffff);
0729 { u64 x112 = (x110 + x49);
0730 { u64 x113 = (x112 >> 0x19);
0731 { u32 x114 = ((u32)x112 & 0x1ffffff);
0732 { u64 x115 = (x87 + (0x13 * x113));
0733 { u32 x116 = (u32) (x115 >> 0x1a);
0734 { u32 x117 = ((u32)x115 & 0x3ffffff);
0735 { u32 x118 = (x116 + x90);
0736 { u32 x119 = (x118 >> 0x19);
0737 { u32 x120 = (x118 & 0x1ffffff);
0738 out[0] = x117;
0739 out[1] = x120;
0740 out[2] = (x119 + x93);
0741 out[3] = x96;
0742 out[4] = x99;
0743 out[5] = x102;
0744 out[6] = x105;
0745 out[7] = x108;
0746 out[8] = x111;
0747 out[9] = x114;
0748 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
0749 }
0750
0751 static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
0752 {
0753 fe_mul_121666_impl(h->v, f->v);
0754 }
0755
0756 void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
0757 const u8 scalar[CURVE25519_KEY_SIZE],
0758 const u8 point[CURVE25519_KEY_SIZE])
0759 {
0760 fe x1, x2, z2, x3, z3;
0761 fe_loose x2l, z2l, x3l;
0762 unsigned swap = 0;
0763 int pos;
0764 u8 e[32];
0765
0766 memcpy(e, scalar, 32);
0767 curve25519_clamp_secret(e);
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794 fe_frombytes(&x1, point);
0795 fe_1(&x2);
0796 fe_0(&z2);
0797 fe_copy(&x3, &x1);
0798 fe_1(&z3);
0799
0800 for (pos = 254; pos >= 0; --pos) {
0801 fe tmp0, tmp1;
0802 fe_loose tmp0l, tmp1l;
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814 unsigned b = 1 & (e[pos / 8] >> (pos & 7));
0815 swap ^= b;
0816 fe_cswap(&x2, &x3, swap);
0817 fe_cswap(&z2, &z3, swap);
0818 swap = b;
0819
0820
0821
0822
0823
0824
0825
0826 fe_sub(&tmp0l, &x3, &z3);
0827 fe_sub(&tmp1l, &x2, &z2);
0828 fe_add(&x2l, &x2, &z2);
0829 fe_add(&z2l, &x3, &z3);
0830 fe_mul_tll(&z3, &tmp0l, &x2l);
0831 fe_mul_tll(&z2, &z2l, &tmp1l);
0832 fe_sq_tl(&tmp0, &tmp1l);
0833 fe_sq_tl(&tmp1, &x2l);
0834 fe_add(&x3l, &z3, &z2);
0835 fe_sub(&z2l, &z3, &z2);
0836 fe_mul_ttt(&x2, &tmp1, &tmp0);
0837 fe_sub(&tmp1l, &tmp1, &tmp0);
0838 fe_sq_tl(&z2, &z2l);
0839 fe_mul121666(&z3, &tmp1l);
0840 fe_sq_tl(&x3, &x3l);
0841 fe_add(&tmp0l, &tmp0, &z3);
0842 fe_mul_ttt(&z3, &x1, &z2);
0843 fe_mul_tll(&z2, &tmp1l, &tmp0l);
0844 }
0845
0846
0847
0848 fe_cswap(&x2, &x3, swap);
0849 fe_cswap(&z2, &z3, swap);
0850
0851 fe_invert(&z2, &z2);
0852 fe_mul_ttt(&x2, &x2, &z2);
0853 fe_tobytes(out, &x2);
0854
0855 memzero_explicit(&x1, sizeof(x1));
0856 memzero_explicit(&x2, sizeof(x2));
0857 memzero_explicit(&z2, sizeof(z2));
0858 memzero_explicit(&x3, sizeof(x3));
0859 memzero_explicit(&z3, sizeof(z3));
0860 memzero_explicit(&x2l, sizeof(x2l));
0861 memzero_explicit(&z2l, sizeof(z2l));
0862 memzero_explicit(&x3l, sizeof(x3l));
0863 memzero_explicit(&e, sizeof(e));
0864 }