0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include "mpi-internal.h"
0018 #include "longlong.h"
0019
0020 #ifndef UMUL_TIME
0021 #define UMUL_TIME 1
0022 #endif
0023 #ifndef UDIV_TIME
0024 #define UDIV_TIME UMUL_TIME
0025 #endif
0026
0027
0028 mpi_limb_t
0029 mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
0030 mpi_limb_t divisor_limb)
0031 {
0032 mpi_size_t i;
0033 mpi_limb_t n1, n0, r;
0034 mpi_limb_t dummy __maybe_unused;
0035
0036
0037 if (!dividend_size)
0038 return 0;
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048 if (UDIV_TIME > (2 * UMUL_TIME + 6)
0049 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
0050 int normalization_steps;
0051
0052 normalization_steps = count_leading_zeros(divisor_limb);
0053 if (normalization_steps) {
0054 mpi_limb_t divisor_limb_inverted;
0055
0056 divisor_limb <<= normalization_steps;
0057
0058
0059
0060
0061
0062
0063
0064 if (!(divisor_limb << 1))
0065 divisor_limb_inverted = ~(mpi_limb_t)0;
0066 else
0067 udiv_qrnnd(divisor_limb_inverted, dummy,
0068 -divisor_limb, 0, divisor_limb);
0069
0070 n1 = dividend_ptr[dividend_size - 1];
0071 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
0072
0073
0074
0075
0076
0077
0078
0079 for (i = dividend_size - 2; i >= 0; i--) {
0080 n0 = dividend_ptr[i];
0081 UDIV_QRNND_PREINV(dummy, r, r,
0082 ((n1 << normalization_steps)
0083 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
0084 divisor_limb, divisor_limb_inverted);
0085 n1 = n0;
0086 }
0087 UDIV_QRNND_PREINV(dummy, r, r,
0088 n1 << normalization_steps,
0089 divisor_limb, divisor_limb_inverted);
0090 return r >> normalization_steps;
0091 } else {
0092 mpi_limb_t divisor_limb_inverted;
0093
0094
0095
0096
0097
0098
0099
0100 if (!(divisor_limb << 1))
0101 divisor_limb_inverted = ~(mpi_limb_t)0;
0102 else
0103 udiv_qrnnd(divisor_limb_inverted, dummy,
0104 -divisor_limb, 0, divisor_limb);
0105
0106 i = dividend_size - 1;
0107 r = dividend_ptr[i];
0108
0109 if (r >= divisor_limb)
0110 r = 0;
0111 else
0112 i--;
0113
0114 for ( ; i >= 0; i--) {
0115 n0 = dividend_ptr[i];
0116 UDIV_QRNND_PREINV(dummy, r, r,
0117 n0, divisor_limb, divisor_limb_inverted);
0118 }
0119 return r;
0120 }
0121 } else {
0122 if (UDIV_NEEDS_NORMALIZATION) {
0123 int normalization_steps;
0124
0125 normalization_steps = count_leading_zeros(divisor_limb);
0126 if (normalization_steps) {
0127 divisor_limb <<= normalization_steps;
0128
0129 n1 = dividend_ptr[dividend_size - 1];
0130 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
0131
0132
0133
0134
0135
0136
0137
0138 for (i = dividend_size - 2; i >= 0; i--) {
0139 n0 = dividend_ptr[i];
0140 udiv_qrnnd(dummy, r, r,
0141 ((n1 << normalization_steps)
0142 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
0143 divisor_limb);
0144 n1 = n0;
0145 }
0146 udiv_qrnnd(dummy, r, r,
0147 n1 << normalization_steps,
0148 divisor_limb);
0149 return r >> normalization_steps;
0150 }
0151 }
0152
0153
0154
0155 i = dividend_size - 1;
0156 r = dividend_ptr[i];
0157
0158 if (r >= divisor_limb)
0159 r = 0;
0160 else
0161 i--;
0162
0163 for (; i >= 0; i--) {
0164 n0 = dividend_ptr[i];
0165 udiv_qrnnd(dummy, r, r, n0, divisor_limb);
0166 }
0167 return r;
0168 }
0169 }
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188 mpi_limb_t
0189 mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
0190 mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
0191 {
0192 mpi_limb_t most_significant_q_limb = 0;
0193
0194 switch (dsize) {
0195 case 0:
0196
0197
0198
0199
0200
0201
0202 return 1 / dsize;
0203
0204 case 1:
0205 {
0206 mpi_size_t i;
0207 mpi_limb_t n1;
0208 mpi_limb_t d;
0209
0210 d = dp[0];
0211 n1 = np[nsize - 1];
0212
0213 if (n1 >= d) {
0214 n1 -= d;
0215 most_significant_q_limb = 1;
0216 }
0217
0218 qp += qextra_limbs;
0219 for (i = nsize - 2; i >= 0; i--)
0220 udiv_qrnnd(qp[i], n1, n1, np[i], d);
0221 qp -= qextra_limbs;
0222
0223 for (i = qextra_limbs - 1; i >= 0; i--)
0224 udiv_qrnnd(qp[i], n1, n1, 0, d);
0225
0226 np[0] = n1;
0227 }
0228 break;
0229
0230 case 2:
0231 {
0232 mpi_size_t i;
0233 mpi_limb_t n1, n0, n2;
0234 mpi_limb_t d1, d0;
0235
0236 np += nsize - 2;
0237 d1 = dp[1];
0238 d0 = dp[0];
0239 n1 = np[1];
0240 n0 = np[0];
0241
0242 if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
0243 sub_ddmmss(n1, n0, n1, n0, d1, d0);
0244 most_significant_q_limb = 1;
0245 }
0246
0247 for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
0248 mpi_limb_t q;
0249 mpi_limb_t r;
0250
0251 if (i >= qextra_limbs)
0252 np--;
0253 else
0254 np[0] = 0;
0255
0256 if (n1 == d1) {
0257
0258
0259
0260 q = ~(mpi_limb_t) 0;
0261
0262 r = n0 + d1;
0263 if (r < d1) {
0264 add_ssaaaa(n1, n0, r - d0,
0265 np[0], 0, d0);
0266 qp[i] = q;
0267 continue;
0268 }
0269 n1 = d0 - (d0 != 0 ? 1 : 0);
0270 n0 = -d0;
0271 } else {
0272 udiv_qrnnd(q, r, n1, n0, d1);
0273 umul_ppmm(n1, n0, d0, q);
0274 }
0275
0276 n2 = np[0];
0277 q_test:
0278 if (n1 > r || (n1 == r && n0 > n2)) {
0279
0280 q--;
0281 sub_ddmmss(n1, n0, n1, n0, 0, d0);
0282 r += d1;
0283 if (r >= d1)
0284 goto q_test;
0285 }
0286
0287 qp[i] = q;
0288 sub_ddmmss(n1, n0, r, n2, n1, n0);
0289 }
0290 np[1] = n1;
0291 np[0] = n0;
0292 }
0293 break;
0294
0295 default:
0296 {
0297 mpi_size_t i;
0298 mpi_limb_t dX, d1, n0;
0299
0300 np += nsize - dsize;
0301 dX = dp[dsize - 1];
0302 d1 = dp[dsize - 2];
0303 n0 = np[dsize - 1];
0304
0305 if (n0 >= dX) {
0306 if (n0 > dX
0307 || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
0308 mpihelp_sub_n(np, np, dp, dsize);
0309 n0 = np[dsize - 1];
0310 most_significant_q_limb = 1;
0311 }
0312 }
0313
0314 for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
0315 mpi_limb_t q;
0316 mpi_limb_t n1, n2;
0317 mpi_limb_t cy_limb;
0318
0319 if (i >= qextra_limbs) {
0320 np--;
0321 n2 = np[dsize];
0322 } else {
0323 n2 = np[dsize - 1];
0324 MPN_COPY_DECR(np + 1, np, dsize - 1);
0325 np[0] = 0;
0326 }
0327
0328 if (n0 == dX) {
0329
0330
0331 q = ~(mpi_limb_t) 0;
0332 } else {
0333 mpi_limb_t r;
0334
0335 udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
0336 umul_ppmm(n1, n0, d1, q);
0337
0338 while (n1 > r
0339 || (n1 == r
0340 && n0 > np[dsize - 2])) {
0341 q--;
0342 r += dX;
0343 if (r < dX)
0344 break;
0345 n1 -= n0 < d1;
0346 n0 -= d1;
0347 }
0348 }
0349
0350
0351
0352
0353 cy_limb = mpihelp_submul_1(np, dp, dsize, q);
0354
0355 if (n2 != cy_limb) {
0356 mpihelp_add_n(np, np, dp, dsize);
0357 q--;
0358 }
0359
0360 qp[i] = q;
0361 n0 = np[dsize - 1];
0362 }
0363 }
0364 }
0365
0366 return most_significant_q_limb;
0367 }
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378 mpi_limb_t
0379 mpihelp_divmod_1(mpi_ptr_t quot_ptr,
0380 mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
0381 mpi_limb_t divisor_limb)
0382 {
0383 mpi_size_t i;
0384 mpi_limb_t n1, n0, r;
0385 mpi_limb_t dummy __maybe_unused;
0386
0387 if (!dividend_size)
0388 return 0;
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398 if (UDIV_TIME > (2 * UMUL_TIME + 6)
0399 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
0400 int normalization_steps;
0401
0402 normalization_steps = count_leading_zeros(divisor_limb);
0403 if (normalization_steps) {
0404 mpi_limb_t divisor_limb_inverted;
0405
0406 divisor_limb <<= normalization_steps;
0407
0408
0409
0410
0411
0412
0413 if (!(divisor_limb << 1))
0414 divisor_limb_inverted = ~(mpi_limb_t)0;
0415 else
0416 udiv_qrnnd(divisor_limb_inverted, dummy,
0417 -divisor_limb, 0, divisor_limb);
0418
0419 n1 = dividend_ptr[dividend_size - 1];
0420 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
0421
0422
0423
0424
0425
0426
0427
0428 for (i = dividend_size - 2; i >= 0; i--) {
0429 n0 = dividend_ptr[i];
0430 UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
0431 ((n1 << normalization_steps)
0432 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
0433 divisor_limb, divisor_limb_inverted);
0434 n1 = n0;
0435 }
0436 UDIV_QRNND_PREINV(quot_ptr[0], r, r,
0437 n1 << normalization_steps,
0438 divisor_limb, divisor_limb_inverted);
0439 return r >> normalization_steps;
0440 } else {
0441 mpi_limb_t divisor_limb_inverted;
0442
0443
0444
0445
0446
0447
0448 if (!(divisor_limb << 1))
0449 divisor_limb_inverted = ~(mpi_limb_t) 0;
0450 else
0451 udiv_qrnnd(divisor_limb_inverted, dummy,
0452 -divisor_limb, 0, divisor_limb);
0453
0454 i = dividend_size - 1;
0455 r = dividend_ptr[i];
0456
0457 if (r >= divisor_limb)
0458 r = 0;
0459 else
0460 quot_ptr[i--] = 0;
0461
0462 for ( ; i >= 0; i--) {
0463 n0 = dividend_ptr[i];
0464 UDIV_QRNND_PREINV(quot_ptr[i], r, r,
0465 n0, divisor_limb, divisor_limb_inverted);
0466 }
0467 return r;
0468 }
0469 } else {
0470 if (UDIV_NEEDS_NORMALIZATION) {
0471 int normalization_steps;
0472
0473 normalization_steps = count_leading_zeros(divisor_limb);
0474 if (normalization_steps) {
0475 divisor_limb <<= normalization_steps;
0476
0477 n1 = dividend_ptr[dividend_size - 1];
0478 r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
0479
0480
0481
0482
0483
0484
0485
0486 for (i = dividend_size - 2; i >= 0; i--) {
0487 n0 = dividend_ptr[i];
0488 udiv_qrnnd(quot_ptr[i + 1], r, r,
0489 ((n1 << normalization_steps)
0490 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
0491 divisor_limb);
0492 n1 = n0;
0493 }
0494 udiv_qrnnd(quot_ptr[0], r, r,
0495 n1 << normalization_steps,
0496 divisor_limb);
0497 return r >> normalization_steps;
0498 }
0499 }
0500
0501
0502
0503 i = dividend_size - 1;
0504 r = dividend_ptr[i];
0505
0506 if (r >= divisor_limb)
0507 r = 0;
0508 else
0509 quot_ptr[i--] = 0;
0510
0511 for (; i >= 0; i--) {
0512 n0 = dividend_ptr[i];
0513 udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
0514 }
0515 return r;
0516 }
0517 }