0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include <linux/string.h>
0018 #include "mpi-internal.h"
0019 #include "longlong.h"
0020
0021 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
0022 do { \
0023 if ((size) < KARATSUBA_THRESHOLD) \
0024 mul_n_basecase(prodp, up, vp, size); \
0025 else \
0026 mul_n(prodp, up, vp, size, tspace); \
0027 } while (0);
0028
0029 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
0030 do { \
0031 if ((size) < KARATSUBA_THRESHOLD) \
0032 mpih_sqr_n_basecase(prodp, up, size); \
0033 else \
0034 mpih_sqr_n(prodp, up, size, tspace); \
0035 } while (0);
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054 static mpi_limb_t
0055 mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
0056 {
0057 mpi_size_t i;
0058 mpi_limb_t cy;
0059 mpi_limb_t v_limb;
0060
0061
0062
0063 v_limb = vp[0];
0064 if (v_limb <= 1) {
0065 if (v_limb == 1)
0066 MPN_COPY(prodp, up, size);
0067 else
0068 MPN_ZERO(prodp, size);
0069 cy = 0;
0070 } else
0071 cy = mpihelp_mul_1(prodp, up, size, v_limb);
0072
0073 prodp[size] = cy;
0074 prodp++;
0075
0076
0077
0078 for (i = 1; i < size; i++) {
0079 v_limb = vp[i];
0080 if (v_limb <= 1) {
0081 cy = 0;
0082 if (v_limb == 1)
0083 cy = mpihelp_add_n(prodp, prodp, up, size);
0084 } else
0085 cy = mpihelp_addmul_1(prodp, up, size, v_limb);
0086
0087 prodp[size] = cy;
0088 prodp++;
0089 }
0090
0091 return cy;
0092 }
0093
0094 static void
0095 mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
0096 mpi_size_t size, mpi_ptr_t tspace)
0097 {
0098 if (size & 1) {
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109 mpi_size_t esize = size - 1;
0110 mpi_limb_t cy_limb;
0111
0112 MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
0113 cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
0114 prodp[esize + esize] = cy_limb;
0115 cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
0116 prodp[esize + size] = cy_limb;
0117 } else {
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133 mpi_size_t hsize = size >> 1;
0134 mpi_limb_t cy;
0135 int negflg;
0136
0137
0138
0139
0140
0141
0142 MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
0143 tspace);
0144
0145
0146
0147
0148 if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
0149 mpihelp_sub_n(prodp, up + hsize, up, hsize);
0150 negflg = 0;
0151 } else {
0152 mpihelp_sub_n(prodp, up, up + hsize, hsize);
0153 negflg = 1;
0154 }
0155 if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
0156 mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
0157 negflg ^= 1;
0158 } else {
0159 mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
0160
0161 }
0162
0163
0164
0165
0166 MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
0167 tspace + size);
0168
0169
0170 MPN_COPY(prodp + hsize, prodp + size, hsize);
0171 cy = mpihelp_add_n(prodp + size, prodp + size,
0172 prodp + size + hsize, hsize);
0173
0174
0175 if (negflg)
0176 cy -=
0177 mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
0178 size);
0179 else
0180 cy +=
0181 mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
0182 size);
0183
0184
0185
0186
0187
0188
0189
0190 MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
0191
0192
0193
0194 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
0195 if (cy)
0196 mpihelp_add_1(prodp + hsize + size,
0197 prodp + hsize + size, hsize, cy);
0198
0199 MPN_COPY(prodp, tspace, hsize);
0200 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
0201 hsize);
0202 if (cy)
0203 mpihelp_add_1(prodp + size, prodp + size, size, 1);
0204 }
0205 }
0206
0207 void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
0208 {
0209 mpi_size_t i;
0210 mpi_limb_t cy_limb;
0211 mpi_limb_t v_limb;
0212
0213
0214
0215 v_limb = up[0];
0216 if (v_limb <= 1) {
0217 if (v_limb == 1)
0218 MPN_COPY(prodp, up, size);
0219 else
0220 MPN_ZERO(prodp, size);
0221 cy_limb = 0;
0222 } else
0223 cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
0224
0225 prodp[size] = cy_limb;
0226 prodp++;
0227
0228
0229
0230 for (i = 1; i < size; i++) {
0231 v_limb = up[i];
0232 if (v_limb <= 1) {
0233 cy_limb = 0;
0234 if (v_limb == 1)
0235 cy_limb = mpihelp_add_n(prodp, prodp, up, size);
0236 } else
0237 cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
0238
0239 prodp[size] = cy_limb;
0240 prodp++;
0241 }
0242 }
0243
0244 void
0245 mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
0246 {
0247 if (size & 1) {
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258 mpi_size_t esize = size - 1;
0259 mpi_limb_t cy_limb;
0260
0261 MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
0262 cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
0263 prodp[esize + esize] = cy_limb;
0264 cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
0265
0266 prodp[esize + size] = cy_limb;
0267 } else {
0268 mpi_size_t hsize = size >> 1;
0269 mpi_limb_t cy;
0270
0271
0272
0273
0274
0275
0276 MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
0277
0278
0279
0280
0281 if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
0282 mpihelp_sub_n(prodp, up + hsize, up, hsize);
0283 else
0284 mpihelp_sub_n(prodp, up, up + hsize, hsize);
0285
0286
0287
0288
0289 MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
0290
0291
0292 MPN_COPY(prodp + hsize, prodp + size, hsize);
0293 cy = mpihelp_add_n(prodp + size, prodp + size,
0294 prodp + size + hsize, hsize);
0295
0296
0297 cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
0298
0299
0300
0301
0302
0303
0304 MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
0305
0306
0307 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
0308 if (cy)
0309 mpihelp_add_1(prodp + hsize + size,
0310 prodp + hsize + size, hsize, cy);
0311
0312 MPN_COPY(prodp, tspace, hsize);
0313 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
0314 hsize);
0315 if (cy)
0316 mpihelp_add_1(prodp + size, prodp + size, size, 1);
0317 }
0318 }
0319
0320
0321 void mpihelp_mul_n(mpi_ptr_t prodp,
0322 mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
0323 {
0324 if (up == vp) {
0325 if (size < KARATSUBA_THRESHOLD)
0326 mpih_sqr_n_basecase(prodp, up, size);
0327 else {
0328 mpi_ptr_t tspace;
0329 tspace = mpi_alloc_limb_space(2 * size);
0330 mpih_sqr_n(prodp, up, size, tspace);
0331 mpi_free_limb_space(tspace);
0332 }
0333 } else {
0334 if (size < KARATSUBA_THRESHOLD)
0335 mul_n_basecase(prodp, up, vp, size);
0336 else {
0337 mpi_ptr_t tspace;
0338 tspace = mpi_alloc_limb_space(2 * size);
0339 mul_n(prodp, up, vp, size, tspace);
0340 mpi_free_limb_space(tspace);
0341 }
0342 }
0343 }
0344
0345 int
0346 mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
0347 mpi_ptr_t up, mpi_size_t usize,
0348 mpi_ptr_t vp, mpi_size_t vsize,
0349 struct karatsuba_ctx *ctx)
0350 {
0351 mpi_limb_t cy;
0352
0353 if (!ctx->tspace || ctx->tspace_size < vsize) {
0354 if (ctx->tspace)
0355 mpi_free_limb_space(ctx->tspace);
0356 ctx->tspace = mpi_alloc_limb_space(2 * vsize);
0357 if (!ctx->tspace)
0358 return -ENOMEM;
0359 ctx->tspace_size = vsize;
0360 }
0361
0362 MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
0363
0364 prodp += vsize;
0365 up += vsize;
0366 usize -= vsize;
0367 if (usize >= vsize) {
0368 if (!ctx->tp || ctx->tp_size < vsize) {
0369 if (ctx->tp)
0370 mpi_free_limb_space(ctx->tp);
0371 ctx->tp = mpi_alloc_limb_space(2 * vsize);
0372 if (!ctx->tp) {
0373 if (ctx->tspace)
0374 mpi_free_limb_space(ctx->tspace);
0375 ctx->tspace = NULL;
0376 return -ENOMEM;
0377 }
0378 ctx->tp_size = vsize;
0379 }
0380
0381 do {
0382 MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
0383 cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
0384 mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
0385 cy);
0386 prodp += vsize;
0387 up += vsize;
0388 usize -= vsize;
0389 } while (usize >= vsize);
0390 }
0391
0392 if (usize) {
0393 if (usize < KARATSUBA_THRESHOLD) {
0394 mpi_limb_t tmp;
0395 if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
0396 < 0)
0397 return -ENOMEM;
0398 } else {
0399 if (!ctx->next) {
0400 ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
0401 if (!ctx->next)
0402 return -ENOMEM;
0403 }
0404 if (mpihelp_mul_karatsuba_case(ctx->tspace,
0405 vp, vsize,
0406 up, usize,
0407 ctx->next) < 0)
0408 return -ENOMEM;
0409 }
0410
0411 cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
0412 mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
0413 }
0414
0415 return 0;
0416 }
0417
0418 void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
0419 {
0420 struct karatsuba_ctx *ctx2;
0421
0422 if (ctx->tp)
0423 mpi_free_limb_space(ctx->tp);
0424 if (ctx->tspace)
0425 mpi_free_limb_space(ctx->tspace);
0426 for (ctx = ctx->next; ctx; ctx = ctx2) {
0427 ctx2 = ctx->next;
0428 if (ctx->tp)
0429 mpi_free_limb_space(ctx->tp);
0430 if (ctx->tspace)
0431 mpi_free_limb_space(ctx->tspace);
0432 kfree(ctx);
0433 }
0434 }
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451 int
0452 mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
0453 mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
0454 {
0455 mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
0456 mpi_limb_t cy;
0457 struct karatsuba_ctx ctx;
0458
0459 if (vsize < KARATSUBA_THRESHOLD) {
0460 mpi_size_t i;
0461 mpi_limb_t v_limb;
0462
0463 if (!vsize) {
0464 *_result = 0;
0465 return 0;
0466 }
0467
0468
0469
0470 v_limb = vp[0];
0471 if (v_limb <= 1) {
0472 if (v_limb == 1)
0473 MPN_COPY(prodp, up, usize);
0474 else
0475 MPN_ZERO(prodp, usize);
0476 cy = 0;
0477 } else
0478 cy = mpihelp_mul_1(prodp, up, usize, v_limb);
0479
0480 prodp[usize] = cy;
0481 prodp++;
0482
0483
0484
0485 for (i = 1; i < vsize; i++) {
0486 v_limb = vp[i];
0487 if (v_limb <= 1) {
0488 cy = 0;
0489 if (v_limb == 1)
0490 cy = mpihelp_add_n(prodp, prodp, up,
0491 usize);
0492 } else
0493 cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
0494
0495 prodp[usize] = cy;
0496 prodp++;
0497 }
0498
0499 *_result = cy;
0500 return 0;
0501 }
0502
0503 memset(&ctx, 0, sizeof ctx);
0504 if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
0505 return -ENOMEM;
0506 mpihelp_release_karatsuba_ctx(&ctx);
0507 *_result = *prod_endp;
0508 return 0;
0509 }