lib/crypto/curve25519-fiat32.c

0001 // SPDX-License-Identifier: GPL-2.0 OR MIT
0002 /*
0003  * Copyright (C) 2015-2016 The fiat-crypto Authors.
0004  * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
0005  *
0006  * This is a machine-generated formally verified implementation of Curve25519
0007  * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
0008  * machine generated, it has been tweaked to be suitable for use in the kernel.
0009  * It is optimized for 32-bit machines and machines that cannot work efficiently
0010  * with 128-bit integer types.
0011  */
0012
0013 #include <asm/unaligned.h>
0014 #include <crypto/curve25519.h>
0015 #include <linux/string.h>
0016
0017 /* fe means field element. Here the field is \Z/(2^255-19). An element t,
0018  * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
0019  * t[3]+2^102 t[4]+...+2^230 t[9].
0020  * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
0021  * Multiplication and carrying produce fe from fe_loose.
0022  */
0023 typedef struct fe { u32 v[10]; } fe;
0024
0025 /* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
0026  * Addition and subtraction produce fe_loose from (fe, fe).
0027  */
0028 typedef struct fe_loose { u32 v[10]; } fe_loose;
0029
0030 static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
0031 {
0032     /* Ignores top bit of s. */
0033     u32 a0 = get_unaligned_le32(s);
0034     u32 a1 = get_unaligned_le32(s+4);
0035     u32 a2 = get_unaligned_le32(s+8);
0036     u32 a3 = get_unaligned_le32(s+12);
0037     u32 a4 = get_unaligned_le32(s+16);
0038     u32 a5 = get_unaligned_le32(s+20);
0039     u32 a6 = get_unaligned_le32(s+24);
0040     u32 a7 = get_unaligned_le32(s+28);
0041     h[0] = a0&((1<<26)-1);                    /* 26 used, 32-26 left.   26 */
0042     h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 =  6+19 = 25 */
0043     h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
0044     h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) +  6 = 19+ 6 = 25 */
0045     h[4] = (a3>> 6);                          /* (32- 6)              = 26 */
0046     h[5] = a4&((1<<25)-1);                    /*                        25 */
0047     h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 =  7+19 = 26 */
0048     h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
0049     h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) +  6 = 20+ 6 = 26 */
0050     h[9] = (a7>> 6)&((1<<25)-1); /*                                     25 */
0051 }
0052
0053 static __always_inline void fe_frombytes(fe *h, const u8 *s)
0054 {
0055     fe_frombytes_impl(h->v, s);
0056 }
0057
0058 static __always_inline u8 /*bool*/
0059 addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
0060 {
0061     /* This function extracts 25 bits of result and 1 bit of carry
0062      * (26 total), so a 32-bit intermediate is sufficient.
0063      */
0064     u32 x = a + b + c;
0065     *low = x & ((1 << 25) - 1);
0066     return (x >> 25) & 1;
0067 }
0068
0069 static __always_inline u8 /*bool*/
0070 addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
0071 {
0072     /* This function extracts 26 bits of result and 1 bit of carry
0073      * (27 total), so a 32-bit intermediate is sufficient.
0074      */
0075     u32 x = a + b + c;
0076     *low = x & ((1 << 26) - 1);
0077     return (x >> 26) & 1;
0078 }
0079
0080 static __always_inline u8 /*bool*/
0081 subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
0082 {
0083     /* This function extracts 25 bits of result and 1 bit of borrow
0084      * (26 total), so a 32-bit intermediate is sufficient.
0085      */
0086     u32 x = a - b - c;
0087     *low = x & ((1 << 25) - 1);
0088     return x >> 31;
0089 }
0090
0091 static __always_inline u8 /*bool*/
0092 subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
0093 {
0094     /* This function extracts 26 bits of result and 1 bit of borrow
0095      *(27 total), so a 32-bit intermediate is sufficient.
0096      */
0097     u32 x = a - b - c;
0098     *low = x & ((1 << 26) - 1);
0099     return x >> 31;
0100 }
0101
0102 static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
0103 {
0104     t = -!!t; /* all set if nonzero, 0 if 0 */
0105     return (t&nz) | ((~t)&z);
0106 }
0107
0108 static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
0109 {
0110     { const u32 x17 = in1[9];
0111     { const u32 x18 = in1[8];
0112     { const u32 x16 = in1[7];
0113     { const u32 x14 = in1[6];
0114     { const u32 x12 = in1[5];
0115     { const u32 x10 = in1[4];
0116     { const u32 x8 = in1[3];
0117     { const u32 x6 = in1[2];
0118     { const u32 x4 = in1[1];
0119     { const u32 x2 = in1[0];
0120     { u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
0121     { u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
0122     { u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
0123     { u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
0124     { u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
0125     { u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
0126     { u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
0127     { u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
0128     { u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
0129     { u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
0130     { u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
0131     { u32 x50 = (x49 & 0x3ffffed);
0132     { u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
0133     { u32 x54 = (x49 & 0x1ffffff);
0134     { u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
0135     { u32 x58 = (x49 & 0x3ffffff);
0136     { u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
0137     { u32 x62 = (x49 & 0x1ffffff);
0138     { u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
0139     { u32 x66 = (x49 & 0x3ffffff);
0140     { u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
0141     { u32 x70 = (x49 & 0x1ffffff);
0142     { u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
0143     { u32 x74 = (x49 & 0x3ffffff);
0144     { u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
0145     { u32 x78 = (x49 & 0x1ffffff);
0146     { u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
0147     { u32 x82 = (x49 & 0x3ffffff);
0148     { u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
0149     { u32 x86 = (x49 & 0x1ffffff);
0150     { u32 x88; addcarryx_u25(x85, x47, x86, &x88);
0151     out[0] = x52;
0152     out[1] = x56;
0153     out[2] = x60;
0154     out[3] = x64;
0155     out[4] = x68;
0156     out[5] = x72;
0157     out[6] = x76;
0158     out[7] = x80;
0159     out[8] = x84;
0160     out[9] = x88;
0161     }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
0162 }
0163
0164 static __always_inline void fe_tobytes(u8 s[32], const fe *f)
0165 {
0166     u32 h[10];
0167     fe_freeze(h, f->v);
0168     s[0] = h[0] >> 0;
0169     s[1] = h[0] >> 8;
0170     s[2] = h[0] >> 16;
0171     s[3] = (h[0] >> 24) | (h[1] << 2);
0172     s[4] = h[1] >> 6;
0173     s[5] = h[1] >> 14;
0174     s[6] = (h[1] >> 22) | (h[2] << 3);
0175     s[7] = h[2] >> 5;
0176     s[8] = h[2] >> 13;
0177     s[9] = (h[2] >> 21) | (h[3] << 5);
0178     s[10] = h[3] >> 3;
0179     s[11] = h[3] >> 11;
0180     s[12] = (h[3] >> 19) | (h[4] << 6);
0181     s[13] = h[4] >> 2;
0182     s[14] = h[4] >> 10;
0183     s[15] = h[4] >> 18;
0184     s[16] = h[5] >> 0;
0185     s[17] = h[5] >> 8;
0186     s[18] = h[5] >> 16;
0187     s[19] = (h[5] >> 24) | (h[6] << 1);
0188     s[20] = h[6] >> 7;
0189     s[21] = h[6] >> 15;
0190     s[22] = (h[6] >> 23) | (h[7] << 3);
0191     s[23] = h[7] >> 5;
0192     s[24] = h[7] >> 13;
0193     s[25] = (h[7] >> 21) | (h[8] << 4);
0194     s[26] = h[8] >> 4;
0195     s[27] = h[8] >> 12;
0196     s[28] = (h[8] >> 20) | (h[9] << 6);
0197     s[29] = h[9] >> 2;
0198     s[30] = h[9] >> 10;
0199     s[31] = h[9] >> 18;
0200 }
0201
0202 /* h = f */
0203 static __always_inline void fe_copy(fe *h, const fe *f)
0204 {
0205     memmove(h, f, sizeof(u32) * 10);
0206 }
0207
0208 static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
0209 {
0210     memmove(h, f, sizeof(u32) * 10);
0211 }
0212
0213 /* h = 0 */
0214 static __always_inline void fe_0(fe *h)
0215 {
0216     memset(h, 0, sizeof(u32) * 10);
0217 }
0218
0219 /* h = 1 */
0220 static __always_inline void fe_1(fe *h)
0221 {
0222     memset(h, 0, sizeof(u32) * 10);
0223     h->v[0] = 1;
0224 }
0225
0226 static noinline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
0227 {
0228     { const u32 x20 = in1[9];
0229     { const u32 x21 = in1[8];
0230     { const u32 x19 = in1[7];
0231     { const u32 x17 = in1[6];
0232     { const u32 x15 = in1[5];
0233     { const u32 x13 = in1[4];
0234     { const u32 x11 = in1[3];
0235     { const u32 x9 = in1[2];
0236     { const u32 x7 = in1[1];
0237     { const u32 x5 = in1[0];
0238     { const u32 x38 = in2[9];
0239     { const u32 x39 = in2[8];
0240     { const u32 x37 = in2[7];
0241     { const u32 x35 = in2[6];
0242     { const u32 x33 = in2[5];
0243     { const u32 x31 = in2[4];
0244     { const u32 x29 = in2[3];
0245     { const u32 x27 = in2[2];
0246     { const u32 x25 = in2[1];
0247     { const u32 x23 = in2[0];
0248     out[0] = (x5 + x23);
0249     out[1] = (x7 + x25);
0250     out[2] = (x9 + x27);
0251     out[3] = (x11 + x29);
0252     out[4] = (x13 + x31);
0253     out[5] = (x15 + x33);
0254     out[6] = (x17 + x35);
0255     out[7] = (x19 + x37);
0256     out[8] = (x21 + x39);
0257     out[9] = (x20 + x38);
0258     }}}}}}}}}}}}}}}}}}}}
0259 }
0260
0261 /* h = f + g
0262  * Can overlap h with f or g.
0263  */
0264 static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
0265 {
0266     fe_add_impl(h->v, f->v, g->v);
0267 }
0268
0269 static noinline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
0270 {
0271     { const u32 x20 = in1[9];
0272     { const u32 x21 = in1[8];
0273     { const u32 x19 = in1[7];
0274     { const u32 x17 = in1[6];
0275     { const u32 x15 = in1[5];
0276     { const u32 x13 = in1[4];
0277     { const u32 x11 = in1[3];
0278     { const u32 x9 = in1[2];
0279     { const u32 x7 = in1[1];
0280     { const u32 x5 = in1[0];
0281     { const u32 x38 = in2[9];
0282     { const u32 x39 = in2[8];
0283     { const u32 x37 = in2[7];
0284     { const u32 x35 = in2[6];
0285     { const u32 x33 = in2[5];
0286     { const u32 x31 = in2[4];
0287     { const u32 x29 = in2[3];
0288     { const u32 x27 = in2[2];
0289     { const u32 x25 = in2[1];
0290     { const u32 x23 = in2[0];
0291     out[0] = ((0x7ffffda + x5) - x23);
0292     out[1] = ((0x3fffffe + x7) - x25);
0293     out[2] = ((0x7fffffe + x9) - x27);
0294     out[3] = ((0x3fffffe + x11) - x29);
0295     out[4] = ((0x7fffffe + x13) - x31);
0296     out[5] = ((0x3fffffe + x15) - x33);
0297     out[6] = ((0x7fffffe + x17) - x35);
0298     out[7] = ((0x3fffffe + x19) - x37);
0299     out[8] = ((0x7fffffe + x21) - x39);
0300     out[9] = ((0x3fffffe + x20) - x38);
0301     }}}}}}}}}}}}}}}}}}}}
0302 }
0303
0304 /* h = f - g
0305  * Can overlap h with f or g.
0306  */
0307 static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
0308 {
0309     fe_sub_impl(h->v, f->v, g->v);
0310 }
0311
0312 static noinline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
0313 {
0314     { const u32 x20 = in1[9];
0315     { const u32 x21 = in1[8];
0316     { const u32 x19 = in1[7];
0317     { const u32 x17 = in1[6];
0318     { const u32 x15 = in1[5];
0319     { const u32 x13 = in1[4];
0320     { const u32 x11 = in1[3];
0321     { const u32 x9 = in1[2];
0322     { const u32 x7 = in1[1];
0323     { const u32 x5 = in1[0];
0324     { const u32 x38 = in2[9];
0325     { const u32 x39 = in2[8];
0326     { const u32 x37 = in2[7];
0327     { const u32 x35 = in2[6];
0328     { const u32 x33 = in2[5];
0329     { const u32 x31 = in2[4];
0330     { const u32 x29 = in2[3];
0331     { const u32 x27 = in2[2];
0332     { const u32 x25 = in2[1];
0333     { const u32 x23 = in2[0];
0334     { u64 x40 = ((u64)x23 * x5);
0335     { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
0336     { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
0337     { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
0338     { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
0339     { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
0340     { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
0341     { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
0342     { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
0343     { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
0344     { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
0345     { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
0346     { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
0347     { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
0348     { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
0349     { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
0350     { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
0351     { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
0352     { u64 x58 = ((u64)(0x2 * x38) * x20);
0353     { u64 x59 = (x48 + (x58 << 0x4));
0354     { u64 x60 = (x59 + (x58 << 0x1));
0355     { u64 x61 = (x60 + x58);
0356     { u64 x62 = (x47 + (x57 << 0x4));
0357     { u64 x63 = (x62 + (x57 << 0x1));
0358     { u64 x64 = (x63 + x57);
0359     { u64 x65 = (x46 + (x56 << 0x4));
0360     { u64 x66 = (x65 + (x56 << 0x1));
0361     { u64 x67 = (x66 + x56);
0362     { u64 x68 = (x45 + (x55 << 0x4));
0363     { u64 x69 = (x68 + (x55 << 0x1));
0364     { u64 x70 = (x69 + x55);
0365     { u64 x71 = (x44 + (x54 << 0x4));
0366     { u64 x72 = (x71 + (x54 << 0x1));
0367     { u64 x73 = (x72 + x54);
0368     { u64 x74 = (x43 + (x53 << 0x4));
0369     { u64 x75 = (x74 + (x53 << 0x1));
0370     { u64 x76 = (x75 + x53);
0371     { u64 x77 = (x42 + (x52 << 0x4));
0372     { u64 x78 = (x77 + (x52 << 0x1));
0373     { u64 x79 = (x78 + x52);
0374     { u64 x80 = (x41 + (x51 << 0x4));
0375     { u64 x81 = (x80 + (x51 << 0x1));
0376     { u64 x82 = (x81 + x51);
0377     { u64 x83 = (x40 + (x50 << 0x4));
0378     { u64 x84 = (x83 + (x50 << 0x1));
0379     { u64 x85 = (x84 + x50);
0380     { u64 x86 = (x85 >> 0x1a);
0381     { u32 x87 = ((u32)x85 & 0x3ffffff);
0382     { u64 x88 = (x86 + x82);
0383     { u64 x89 = (x88 >> 0x19);
0384     { u32 x90 = ((u32)x88 & 0x1ffffff);
0385     { u64 x91 = (x89 + x79);
0386     { u64 x92 = (x91 >> 0x1a);
0387     { u32 x93 = ((u32)x91 & 0x3ffffff);
0388     { u64 x94 = (x92 + x76);
0389     { u64 x95 = (x94 >> 0x19);
0390     { u32 x96 = ((u32)x94 & 0x1ffffff);
0391     { u64 x97 = (x95 + x73);
0392     { u64 x98 = (x97 >> 0x1a);
0393     { u32 x99 = ((u32)x97 & 0x3ffffff);
0394     { u64 x100 = (x98 + x70);
0395     { u64 x101 = (x100 >> 0x19);
0396     { u32 x102 = ((u32)x100 & 0x1ffffff);
0397     { u64 x103 = (x101 + x67);
0398     { u64 x104 = (x103 >> 0x1a);
0399     { u32 x105 = ((u32)x103 & 0x3ffffff);
0400     { u64 x106 = (x104 + x64);
0401     { u64 x107 = (x106 >> 0x19);
0402     { u32 x108 = ((u32)x106 & 0x1ffffff);
0403     { u64 x109 = (x107 + x61);
0404     { u64 x110 = (x109 >> 0x1a);
0405     { u32 x111 = ((u32)x109 & 0x3ffffff);
0406     { u64 x112 = (x110 + x49);
0407     { u64 x113 = (x112 >> 0x19);
0408     { u32 x114 = ((u32)x112 & 0x1ffffff);
0409     { u64 x115 = (x87 + (0x13 * x113));
0410     { u32 x116 = (u32) (x115 >> 0x1a);
0411     { u32 x117 = ((u32)x115 & 0x3ffffff);
0412     { u32 x118 = (x116 + x90);
0413     { u32 x119 = (x118 >> 0x19);
0414     { u32 x120 = (x118 & 0x1ffffff);
0415     out[0] = x117;
0416     out[1] = x120;
0417     out[2] = (x119 + x93);
0418     out[3] = x96;
0419     out[4] = x99;
0420     out[5] = x102;
0421     out[6] = x105;
0422     out[7] = x108;
0423     out[8] = x111;
0424     out[9] = x114;
0425     }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
0426 }
0427
0428 static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
0429 {
0430     fe_mul_impl(h->v, f->v, g->v);
0431 }
0432
0433 static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
0434 {
0435     fe_mul_impl(h->v, f->v, g->v);
0436 }
0437
0438 static __always_inline void
0439 fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
0440 {
0441     fe_mul_impl(h->v, f->v, g->v);
0442 }
0443
0444 static noinline void fe_sqr_impl(u32 out[10], const u32 in1[10])
0445 {
0446     { const u32 x17 = in1[9];
0447     { const u32 x18 = in1[8];
0448     { const u32 x16 = in1[7];
0449     { const u32 x14 = in1[6];
0450     { const u32 x12 = in1[5];
0451     { const u32 x10 = in1[4];
0452     { const u32 x8 = in1[3];
0453     { const u32 x6 = in1[2];
0454     { const u32 x4 = in1[1];
0455     { const u32 x2 = in1[0];
0456     { u64 x19 = ((u64)x2 * x2);
0457     { u64 x20 = ((u64)(0x2 * x2) * x4);
0458     { u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
0459     { u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
0460     { u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
0461     { u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
0462     { u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
0463     { u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
0464     { u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
0465     { u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
0466     { u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
0467     { u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
0468     { u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
0469     { u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
0470     { u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
0471     { u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
0472     { u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
0473     { u64 x36 = ((u64)(0x2 * x18) * x17);
0474     { u64 x37 = ((u64)(0x2 * x17) * x17);
0475     { u64 x38 = (x27 + (x37 << 0x4));
0476     { u64 x39 = (x38 + (x37 << 0x1));
0477     { u64 x40 = (x39 + x37);
0478     { u64 x41 = (x26 + (x36 << 0x4));
0479     { u64 x42 = (x41 + (x36 << 0x1));
0480     { u64 x43 = (x42 + x36);
0481     { u64 x44 = (x25 + (x35 << 0x4));
0482     { u64 x45 = (x44 + (x35 << 0x1));
0483     { u64 x46 = (x45 + x35);
0484     { u64 x47 = (x24 + (x34 << 0x4));
0485     { u64 x48 = (x47 + (x34 << 0x1));
0486     { u64 x49 = (x48 + x34);
0487     { u64 x50 = (x23 + (x33 << 0x4));
0488     { u64 x51 = (x50 + (x33 << 0x1));
0489     { u64 x52 = (x51 + x33);
0490     { u64 x53 = (x22 + (x32 << 0x4));
0491     { u64 x54 = (x53 + (x32 << 0x1));
0492     { u64 x55 = (x54 + x32);
0493     { u64 x56 = (x21 + (x31 << 0x4));
0494     { u64 x57 = (x56 + (x31 << 0x1));
0495     { u64 x58 = (x57 + x31);
0496     { u64 x59 = (x20 + (x30 << 0x4));
0497     { u64 x60 = (x59 + (x30 << 0x1));
0498     { u64 x61 = (x60 + x30);
0499     { u64 x62 = (x19 + (x29 << 0x4));
0500     { u64 x63 = (x62 + (x29 << 0x1));
0501     { u64 x64 = (x63 + x29);
0502     { u64 x65 = (x64 >> 0x1a);
0503     { u32 x66 = ((u32)x64 & 0x3ffffff);
0504     { u64 x67 = (x65 + x61);
0505     { u64 x68 = (x67 >> 0x19);
0506     { u32 x69 = ((u32)x67 & 0x1ffffff);
0507     { u64 x70 = (x68 + x58);
0508     { u64 x71 = (x70 >> 0x1a);
0509     { u32 x72 = ((u32)x70 & 0x3ffffff);
0510     { u64 x73 = (x71 + x55);
0511     { u64 x74 = (x73 >> 0x19);
0512     { u32 x75 = ((u32)x73 & 0x1ffffff);
0513     { u64 x76 = (x74 + x52);
0514     { u64 x77 = (x76 >> 0x1a);
0515     { u32 x78 = ((u32)x76 & 0x3ffffff);
0516     { u64 x79 = (x77 + x49);
0517     { u64 x80 = (x79 >> 0x19);
0518     { u32 x81 = ((u32)x79 & 0x1ffffff);
0519     { u64 x82 = (x80 + x46);
0520     { u64 x83 = (x82 >> 0x1a);
0521     { u32 x84 = ((u32)x82 & 0x3ffffff);
0522     { u64 x85 = (x83 + x43);
0523     { u64 x86 = (x85 >> 0x19);
0524     { u32 x87 = ((u32)x85 & 0x1ffffff);
0525     { u64 x88 = (x86 + x40);
0526     { u64 x89 = (x88 >> 0x1a);
0527     { u32 x90 = ((u32)x88 & 0x3ffffff);
0528     { u64 x91 = (x89 + x28);
0529     { u64 x92 = (x91 >> 0x19);
0530     { u32 x93 = ((u32)x91 & 0x1ffffff);
0531     { u64 x94 = (x66 + (0x13 * x92));
0532     { u32 x95 = (u32) (x94 >> 0x1a);
0533     { u32 x96 = ((u32)x94 & 0x3ffffff);
0534     { u32 x97 = (x95 + x69);
0535     { u32 x98 = (x97 >> 0x19);
0536     { u32 x99 = (x97 & 0x1ffffff);
0537     out[0] = x96;
0538     out[1] = x99;
0539     out[2] = (x98 + x72);
0540     out[3] = x75;
0541     out[4] = x78;
0542     out[5] = x81;
0543     out[6] = x84;
0544     out[7] = x87;
0545     out[8] = x90;
0546     out[9] = x93;
0547     }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
0548 }
0549
0550 static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
0551 {
0552     fe_sqr_impl(h->v, f->v);
0553 }
0554
0555 static __always_inline void fe_sq_tt(fe *h, const fe *f)
0556 {
0557     fe_sqr_impl(h->v, f->v);
0558 }
0559
0560 static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
0561 {
0562     fe t0;
0563     fe t1;
0564     fe t2;
0565     fe t3;
0566     int i;
0567
0568     fe_sq_tl(&t0, z);
0569     fe_sq_tt(&t1, &t0);
0570     for (i = 1; i < 2; ++i)
0571         fe_sq_tt(&t1, &t1);
0572     fe_mul_tlt(&t1, z, &t1);
0573     fe_mul_ttt(&t0, &t0, &t1);
0574     fe_sq_tt(&t2, &t0);
0575     fe_mul_ttt(&t1, &t1, &t2);
0576     fe_sq_tt(&t2, &t1);
0577     for (i = 1; i < 5; ++i)
0578         fe_sq_tt(&t2, &t2);
0579     fe_mul_ttt(&t1, &t2, &t1);
0580     fe_sq_tt(&t2, &t1);
0581     for (i = 1; i < 10; ++i)
0582         fe_sq_tt(&t2, &t2);
0583     fe_mul_ttt(&t2, &t2, &t1);
0584     fe_sq_tt(&t3, &t2);
0585     for (i = 1; i < 20; ++i)
0586         fe_sq_tt(&t3, &t3);
0587     fe_mul_ttt(&t2, &t3, &t2);
0588     fe_sq_tt(&t2, &t2);
0589     for (i = 1; i < 10; ++i)
0590         fe_sq_tt(&t2, &t2);
0591     fe_mul_ttt(&t1, &t2, &t1);
0592     fe_sq_tt(&t2, &t1);
0593     for (i = 1; i < 50; ++i)
0594         fe_sq_tt(&t2, &t2);
0595     fe_mul_ttt(&t2, &t2, &t1);
0596     fe_sq_tt(&t3, &t2);
0597     for (i = 1; i < 100; ++i)
0598         fe_sq_tt(&t3, &t3);
0599     fe_mul_ttt(&t2, &t3, &t2);
0600     fe_sq_tt(&t2, &t2);
0601     for (i = 1; i < 50; ++i)
0602         fe_sq_tt(&t2, &t2);
0603     fe_mul_ttt(&t1, &t2, &t1);
0604     fe_sq_tt(&t1, &t1);
0605     for (i = 1; i < 5; ++i)
0606         fe_sq_tt(&t1, &t1);
0607     fe_mul_ttt(out, &t1, &t0);
0608 }
0609
0610 static __always_inline void fe_invert(fe *out, const fe *z)
0611 {
0612     fe_loose l;
0613     fe_copy_lt(&l, z);
0614     fe_loose_invert(out, &l);
0615 }
0616
0617 /* Replace (f,g) with (g,f) if b == 1;
0618  * replace (f,g) with (f,g) if b == 0.
0619  *
0620  * Preconditions: b in {0,1}
0621  */
0622 static noinline void fe_cswap(fe *f, fe *g, unsigned int b)
0623 {
0624     unsigned i;
0625     b = 0 - b;
0626     for (i = 0; i < 10; i++) {
0627         u32 x = f->v[i] ^ g->v[i];
0628         x &= b;
0629         f->v[i] ^= x;
0630         g->v[i] ^= x;
0631     }
0632 }
0633
0634 /* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
0635 static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
0636 {
0637     { const u32 x20 = in1[9];
0638     { const u32 x21 = in1[8];
0639     { const u32 x19 = in1[7];
0640     { const u32 x17 = in1[6];
0641     { const u32 x15 = in1[5];
0642     { const u32 x13 = in1[4];
0643     { const u32 x11 = in1[3];
0644     { const u32 x9 = in1[2];
0645     { const u32 x7 = in1[1];
0646     { const u32 x5 = in1[0];
0647     { const u32 x38 = 0;
0648     { const u32 x39 = 0;
0649     { const u32 x37 = 0;
0650     { const u32 x35 = 0;
0651     { const u32 x33 = 0;
0652     { const u32 x31 = 0;
0653     { const u32 x29 = 0;
0654     { const u32 x27 = 0;
0655     { const u32 x25 = 0;
0656     { const u32 x23 = 121666;
0657     { u64 x40 = ((u64)x23 * x5);
0658     { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
0659     { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
0660     { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
0661     { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
0662     { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
0663     { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
0664     { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
0665     { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
0666     { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
0667     { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
0668     { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
0669     { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
0670     { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
0671     { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
0672     { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
0673     { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
0674     { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
0675     { u64 x58 = ((u64)(0x2 * x38) * x20);
0676     { u64 x59 = (x48 + (x58 << 0x4));
0677     { u64 x60 = (x59 + (x58 << 0x1));
0678     { u64 x61 = (x60 + x58);
0679     { u64 x62 = (x47 + (x57 << 0x4));
0680     { u64 x63 = (x62 + (x57 << 0x1));
0681     { u64 x64 = (x63 + x57);
0682     { u64 x65 = (x46 + (x56 << 0x4));
0683     { u64 x66 = (x65 + (x56 << 0x1));
0684     { u64 x67 = (x66 + x56);
0685     { u64 x68 = (x45 + (x55 << 0x4));
0686     { u64 x69 = (x68 + (x55 << 0x1));
0687     { u64 x70 = (x69 + x55);
0688     { u64 x71 = (x44 + (x54 << 0x4));
0689     { u64 x72 = (x71 + (x54 << 0x1));
0690     { u64 x73 = (x72 + x54);
0691     { u64 x74 = (x43 + (x53 << 0x4));
0692     { u64 x75 = (x74 + (x53 << 0x1));
0693     { u64 x76 = (x75 + x53);
0694     { u64 x77 = (x42 + (x52 << 0x4));
0695     { u64 x78 = (x77 + (x52 << 0x1));
0696     { u64 x79 = (x78 + x52);
0697     { u64 x80 = (x41 + (x51 << 0x4));
0698     { u64 x81 = (x80 + (x51 << 0x1));
0699     { u64 x82 = (x81 + x51);
0700     { u64 x83 = (x40 + (x50 << 0x4));
0701     { u64 x84 = (x83 + (x50 << 0x1));
0702     { u64 x85 = (x84 + x50);
0703     { u64 x86 = (x85 >> 0x1a);
0704     { u32 x87 = ((u32)x85 & 0x3ffffff);
0705     { u64 x88 = (x86 + x82);
0706     { u64 x89 = (x88 >> 0x19);
0707     { u32 x90 = ((u32)x88 & 0x1ffffff);
0708     { u64 x91 = (x89 + x79);
0709     { u64 x92 = (x91 >> 0x1a);
0710     { u32 x93 = ((u32)x91 & 0x3ffffff);
0711     { u64 x94 = (x92 + x76);
0712     { u64 x95 = (x94 >> 0x19);
0713     { u32 x96 = ((u32)x94 & 0x1ffffff);
0714     { u64 x97 = (x95 + x73);
0715     { u64 x98 = (x97 >> 0x1a);
0716     { u32 x99 = ((u32)x97 & 0x3ffffff);
0717     { u64 x100 = (x98 + x70);
0718     { u64 x101 = (x100 >> 0x19);
0719     { u32 x102 = ((u32)x100 & 0x1ffffff);
0720     { u64 x103 = (x101 + x67);
0721     { u64 x104 = (x103 >> 0x1a);
0722     { u32 x105 = ((u32)x103 & 0x3ffffff);
0723     { u64 x106 = (x104 + x64);
0724     { u64 x107 = (x106 >> 0x19);
0725     { u32 x108 = ((u32)x106 & 0x1ffffff);
0726     { u64 x109 = (x107 + x61);
0727     { u64 x110 = (x109 >> 0x1a);
0728     { u32 x111 = ((u32)x109 & 0x3ffffff);
0729     { u64 x112 = (x110 + x49);
0730     { u64 x113 = (x112 >> 0x19);
0731     { u32 x114 = ((u32)x112 & 0x1ffffff);
0732     { u64 x115 = (x87 + (0x13 * x113));
0733     { u32 x116 = (u32) (x115 >> 0x1a);
0734     { u32 x117 = ((u32)x115 & 0x3ffffff);
0735     { u32 x118 = (x116 + x90);
0736     { u32 x119 = (x118 >> 0x19);
0737     { u32 x120 = (x118 & 0x1ffffff);
0738     out[0] = x117;
0739     out[1] = x120;
0740     out[2] = (x119 + x93);
0741     out[3] = x96;
0742     out[4] = x99;
0743     out[5] = x102;
0744     out[6] = x105;
0745     out[7] = x108;
0746     out[8] = x111;
0747     out[9] = x114;
0748     }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
0749 }
0750
0751 static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
0752 {
0753     fe_mul_121666_impl(h->v, f->v);
0754 }
0755
0756 void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
0757             const u8 scalar[CURVE25519_KEY_SIZE],
0758             const u8 point[CURVE25519_KEY_SIZE])
0759 {
0760     fe x1, x2, z2, x3, z3;
0761     fe_loose x2l, z2l, x3l;
0762     unsigned swap = 0;
0763     int pos;
0764     u8 e[32];
0765
0766     memcpy(e, scalar, 32);
0767     curve25519_clamp_secret(e);
0768
0769     /* The following implementation was transcribed to Coq and proven to
0770      * correspond to unary scalar multiplication in affine coordinates given
0771      * that x1 != 0 is the x coordinate of some point on the curve. It was
0772      * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
0773      * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
0774      * quantified over the underlying field, so it applies to Curve25519
0775      * itself and the quadratic twist of Curve25519. It was not proven in
0776      * Coq that prime-field arithmetic correctly simulates extension-field
0777      * arithmetic on prime-field values. The decoding of the byte array
0778      * representation of e was not considered.
0779      *
0780      * Specification of Montgomery curves in affine coordinates:
0781      * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
0782      *
0783      * Proof that these form a group that is isomorphic to a Weierstrass
0784      * curve:
0785      * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
0786      *
0787      * Coq transcription and correctness proof of the loop
0788      * (where scalarbits=255):
0789      * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
0790      * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
0791      * preconditions: 0 <= e < 2^255 (not necessarily e < order),
0792      * fe_invert(0) = 0
0793      */
0794     fe_frombytes(&x1, point);
0795     fe_1(&x2);
0796     fe_0(&z2);
0797     fe_copy(&x3, &x1);
0798     fe_1(&z3);
0799
0800     for (pos = 254; pos >= 0; --pos) {
0801         fe tmp0, tmp1;
0802         fe_loose tmp0l, tmp1l;
0803         /* loop invariant as of right before the test, for the case
0804          * where x1 != 0:
0805          *   pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
0806          *   is nonzero
0807          *   let r := e >> (pos+1) in the following equalities of
0808          *   projective points:
0809          *   to_xz (r*P)     === if swap then (x3, z3) else (x2, z2)
0810          *   to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
0811          *   x1 is the nonzero x coordinate of the nonzero
0812          *   point (r*P-(r+1)*P)
0813          */
0814         unsigned b = 1 & (e[pos / 8] >> (pos & 7));
0815         swap ^= b;
0816         fe_cswap(&x2, &x3, swap);
0817         fe_cswap(&z2, &z3, swap);
0818         swap = b;
0819         /* Coq transcription of ladderstep formula (called from
0820          * transcribed loop):
0821          * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
0822          * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
0823          * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
0824          * x1  = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
0825          */
0826         fe_sub(&tmp0l, &x3, &z3);
0827         fe_sub(&tmp1l, &x2, &z2);
0828         fe_add(&x2l, &x2, &z2);
0829         fe_add(&z2l, &x3, &z3);
0830         fe_mul_tll(&z3, &tmp0l, &x2l);
0831         fe_mul_tll(&z2, &z2l, &tmp1l);
0832         fe_sq_tl(&tmp0, &tmp1l);
0833         fe_sq_tl(&tmp1, &x2l);
0834         fe_add(&x3l, &z3, &z2);
0835         fe_sub(&z2l, &z3, &z2);
0836         fe_mul_ttt(&x2, &tmp1, &tmp0);
0837         fe_sub(&tmp1l, &tmp1, &tmp0);
0838         fe_sq_tl(&z2, &z2l);
0839         fe_mul121666(&z3, &tmp1l);
0840         fe_sq_tl(&x3, &x3l);
0841         fe_add(&tmp0l, &tmp0, &z3);
0842         fe_mul_ttt(&z3, &x1, &z2);
0843         fe_mul_tll(&z2, &tmp1l, &tmp0l);
0844     }
0845     /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
0846      * else (x2, z2)
0847      */
0848     fe_cswap(&x2, &x3, swap);
0849     fe_cswap(&z2, &z3, swap);
0850
0851     fe_invert(&z2, &z2);
0852     fe_mul_ttt(&x2, &x2, &z2);
0853     fe_tobytes(out, &x2);
0854
0855     memzero_explicit(&x1, sizeof(x1));
0856     memzero_explicit(&x2, sizeof(x2));
0857     memzero_explicit(&z2, sizeof(z2));
0858     memzero_explicit(&x3, sizeof(x3));
0859     memzero_explicit(&z3, sizeof(z3));
0860     memzero_explicit(&x2l, sizeof(x2l));
0861     memzero_explicit(&z2l, sizeof(z2l));
0862     memzero_explicit(&x3l, sizeof(x3l));
0863     memzero_explicit(&e, sizeof(e));
0864 }