x86/math-emu/poly_sin.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*---------------------------------------------------------------------------+
0003  |  poly_sin.c                                                               |
0004  |                                                                           |
0005  |  Computation of an approximation of the sin function and the cosine       |
0006  |  function by a polynomial.                                                |
0007  |                                                                           |
0008  | Copyright (C) 1992,1993,1994,1997,1999                                    |
0009  |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
0010  |                  E-mail   billm@melbpc.org.au                             |
0011  |                                                                           |
0012  |                                                                           |
0013  +---------------------------------------------------------------------------*/
0014
0015 #include "exception.h"
0016 #include "reg_constant.h"
0017 #include "fpu_emu.h"
0018 #include "fpu_system.h"
0019 #include "control_w.h"
0020 #include "poly.h"
0021
0022 #define N_COEFF_P   4
0023 #define N_COEFF_N   4
0024
0025 static const unsigned long long pos_terms_l[N_COEFF_P] = {
0026     0xaaaaaaaaaaaaaaabLL,
0027     0x00d00d00d00cf906LL,
0028     0x000006b99159a8bbLL,
0029     0x000000000d7392e6LL
0030 };
0031
0032 static const unsigned long long neg_terms_l[N_COEFF_N] = {
0033     0x2222222222222167LL,
0034     0x0002e3bc74aab624LL,
0035     0x0000000b09229062LL,
0036     0x00000000000c7973LL
0037 };
0038
0039 #define N_COEFF_PH  4
0040 #define N_COEFF_NH  4
0041 static const unsigned long long pos_terms_h[N_COEFF_PH] = {
0042     0x0000000000000000LL,
0043     0x05b05b05b05b0406LL,
0044     0x000049f93edd91a9LL,
0045     0x00000000c9c9ed62LL
0046 };
0047
0048 static const unsigned long long neg_terms_h[N_COEFF_NH] = {
0049     0xaaaaaaaaaaaaaa98LL,
0050     0x001a01a01a019064LL,
0051     0x0000008f76c68a77LL,
0052     0x0000000000d58f5eLL
0053 };
0054
0055 /*--- poly_sine() -----------------------------------------------------------+
0056  |                                                                           |
0057  +---------------------------------------------------------------------------*/
0058 void poly_sine(FPU_REG *st0_ptr)
0059 {
0060     int exponent, echange;
0061     Xsig accumulator, argSqrd, argTo4;
0062     unsigned long fix_up, adj;
0063     unsigned long long fixed_arg;
0064     FPU_REG result;
0065
0066     exponent = exponent(st0_ptr);
0067
0068     accumulator.lsw = accumulator.midw = accumulator.msw = 0;
0069
0070     /* Split into two ranges, for arguments below and above 1.0 */
0071     /* The boundary between upper and lower is approx 0.88309101259 */
0072     if ((exponent < -1)
0073         || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
0074         /* The argument is <= 0.88309101259 */
0075
0076         argSqrd.msw = st0_ptr->sigh;
0077         argSqrd.midw = st0_ptr->sigl;
0078         argSqrd.lsw = 0;
0079         mul64_Xsig(&argSqrd, &significand(st0_ptr));
0080         shr_Xsig(&argSqrd, 2 * (-1 - exponent));
0081         argTo4.msw = argSqrd.msw;
0082         argTo4.midw = argSqrd.midw;
0083         argTo4.lsw = argSqrd.lsw;
0084         mul_Xsig_Xsig(&argTo4, &argTo4);
0085
0086         polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
0087                 N_COEFF_N - 1);
0088         mul_Xsig_Xsig(&accumulator, &argSqrd);
0089         negate_Xsig(&accumulator);
0090
0091         polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
0092                 N_COEFF_P - 1);
0093
0094         shr_Xsig(&accumulator, 2);  /* Divide by four */
0095         accumulator.msw |= 0x80000000;  /* Add 1.0 */
0096
0097         mul64_Xsig(&accumulator, &significand(st0_ptr));
0098         mul64_Xsig(&accumulator, &significand(st0_ptr));
0099         mul64_Xsig(&accumulator, &significand(st0_ptr));
0100
0101         /* Divide by four, FPU_REG compatible, etc */
0102         exponent = 3 * exponent;
0103
0104         /* The minimum exponent difference is 3 */
0105         shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
0106
0107         negate_Xsig(&accumulator);
0108         XSIG_LL(accumulator) += significand(st0_ptr);
0109
0110         echange = round_Xsig(&accumulator);
0111
0112         setexponentpos(&result, exponent(st0_ptr) + echange);
0113     } else {
0114         /* The argument is > 0.88309101259 */
0115         /* We use sin(st(0)) = cos(pi/2-st(0)) */
0116
0117         fixed_arg = significand(st0_ptr);
0118
0119         if (exponent == 0) {
0120             /* The argument is >= 1.0 */
0121
0122             /* Put the binary point at the left. */
0123             fixed_arg <<= 1;
0124         }
0125         /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
0126         fixed_arg = 0x921fb54442d18469LL - fixed_arg;
0127         /* There is a special case which arises due to rounding, to fix here. */
0128         if (fixed_arg == 0xffffffffffffffffLL)
0129             fixed_arg = 0;
0130
0131         XSIG_LL(argSqrd) = fixed_arg;
0132         argSqrd.lsw = 0;
0133         mul64_Xsig(&argSqrd, &fixed_arg);
0134
0135         XSIG_LL(argTo4) = XSIG_LL(argSqrd);
0136         argTo4.lsw = argSqrd.lsw;
0137         mul_Xsig_Xsig(&argTo4, &argTo4);
0138
0139         polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
0140                 N_COEFF_NH - 1);
0141         mul_Xsig_Xsig(&accumulator, &argSqrd);
0142         negate_Xsig(&accumulator);
0143
0144         polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
0145                 N_COEFF_PH - 1);
0146         negate_Xsig(&accumulator);
0147
0148         mul64_Xsig(&accumulator, &fixed_arg);
0149         mul64_Xsig(&accumulator, &fixed_arg);
0150
0151         shr_Xsig(&accumulator, 3);
0152         negate_Xsig(&accumulator);
0153
0154         add_Xsig_Xsig(&accumulator, &argSqrd);
0155
0156         shr_Xsig(&accumulator, 1);
0157
0158         accumulator.lsw |= 1;   /* A zero accumulator here would cause problems */
0159         negate_Xsig(&accumulator);
0160
0161         /* The basic computation is complete. Now fix the answer to
0162            compensate for the error due to the approximation used for
0163            pi/2
0164          */
0165
0166         /* This has an exponent of -65 */
0167         fix_up = 0x898cc517;
0168         /* The fix-up needs to be improved for larger args */
0169         if (argSqrd.msw & 0xffc00000) {
0170             /* Get about 32 bit precision in these: */
0171             fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
0172         }
0173         fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
0174
0175         adj = accumulator.lsw;  /* temp save */
0176         accumulator.lsw -= fix_up;
0177         if (accumulator.lsw > adj)
0178             XSIG_LL(accumulator)--;
0179
0180         echange = round_Xsig(&accumulator);
0181
0182         setexponentpos(&result, echange - 1);
0183     }
0184
0185     significand(&result) = XSIG_LL(accumulator);
0186     setsign(&result, getsign(st0_ptr));
0187     FPU_copy_to_reg0(&result, TAG_Valid);
0188
0189 #ifdef PARANOID
0190     if ((exponent(&result) >= 0)
0191         && (significand(&result) > 0x8000000000000000LL)) {
0192         EXCEPTION(EX_INTERNAL | 0x150);
0193     }
0194 #endif /* PARANOID */
0195
0196 }
0197
0198 /*--- poly_cos() ------------------------------------------------------------+
0199  |                                                                           |
0200  +---------------------------------------------------------------------------*/
0201 void poly_cos(FPU_REG *st0_ptr)
0202 {
0203     FPU_REG result;
0204     long int exponent, exp2, echange;
0205     Xsig accumulator, argSqrd, fix_up, argTo4;
0206     unsigned long long fixed_arg;
0207
0208 #ifdef PARANOID
0209     if ((exponent(st0_ptr) > 0)
0210         || ((exponent(st0_ptr) == 0)
0211         && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
0212         EXCEPTION(EX_Invalid);
0213         FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
0214         return;
0215     }
0216 #endif /* PARANOID */
0217
0218     exponent = exponent(st0_ptr);
0219
0220     accumulator.lsw = accumulator.midw = accumulator.msw = 0;
0221
0222     if ((exponent < -1)
0223         || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
0224         /* arg is < 0.687705 */
0225
0226         argSqrd.msw = st0_ptr->sigh;
0227         argSqrd.midw = st0_ptr->sigl;
0228         argSqrd.lsw = 0;
0229         mul64_Xsig(&argSqrd, &significand(st0_ptr));
0230
0231         if (exponent < -1) {
0232             /* shift the argument right by the required places */
0233             shr_Xsig(&argSqrd, 2 * (-1 - exponent));
0234         }
0235
0236         argTo4.msw = argSqrd.msw;
0237         argTo4.midw = argSqrd.midw;
0238         argTo4.lsw = argSqrd.lsw;
0239         mul_Xsig_Xsig(&argTo4, &argTo4);
0240
0241         polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
0242                 N_COEFF_NH - 1);
0243         mul_Xsig_Xsig(&accumulator, &argSqrd);
0244         negate_Xsig(&accumulator);
0245
0246         polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
0247                 N_COEFF_PH - 1);
0248         negate_Xsig(&accumulator);
0249
0250         mul64_Xsig(&accumulator, &significand(st0_ptr));
0251         mul64_Xsig(&accumulator, &significand(st0_ptr));
0252         shr_Xsig(&accumulator, -2 * (1 + exponent));
0253
0254         shr_Xsig(&accumulator, 3);
0255         negate_Xsig(&accumulator);
0256
0257         add_Xsig_Xsig(&accumulator, &argSqrd);
0258
0259         shr_Xsig(&accumulator, 1);
0260
0261         /* It doesn't matter if accumulator is all zero here, the
0262            following code will work ok */
0263         negate_Xsig(&accumulator);
0264
0265         if (accumulator.lsw & 0x80000000)
0266             XSIG_LL(accumulator)++;
0267         if (accumulator.msw == 0) {
0268             /* The result is 1.0 */
0269             FPU_copy_to_reg0(&CONST_1, TAG_Valid);
0270             return;
0271         } else {
0272             significand(&result) = XSIG_LL(accumulator);
0273
0274             /* will be a valid positive nr with expon = -1 */
0275             setexponentpos(&result, -1);
0276         }
0277     } else {
0278         fixed_arg = significand(st0_ptr);
0279
0280         if (exponent == 0) {
0281             /* The argument is >= 1.0 */
0282
0283             /* Put the binary point at the left. */
0284             fixed_arg <<= 1;
0285         }
0286         /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
0287         fixed_arg = 0x921fb54442d18469LL - fixed_arg;
0288         /* There is a special case which arises due to rounding, to fix here. */
0289         if (fixed_arg == 0xffffffffffffffffLL)
0290             fixed_arg = 0;
0291
0292         exponent = -1;
0293         exp2 = -1;
0294
0295         /* A shift is needed here only for a narrow range of arguments,
0296            i.e. for fixed_arg approx 2^-32, but we pick up more... */
0297         if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
0298             fixed_arg <<= 16;
0299             exponent -= 16;
0300             exp2 -= 16;
0301         }
0302
0303         XSIG_LL(argSqrd) = fixed_arg;
0304         argSqrd.lsw = 0;
0305         mul64_Xsig(&argSqrd, &fixed_arg);
0306
0307         if (exponent < -1) {
0308             /* shift the argument right by the required places */
0309             shr_Xsig(&argSqrd, 2 * (-1 - exponent));
0310         }
0311
0312         argTo4.msw = argSqrd.msw;
0313         argTo4.midw = argSqrd.midw;
0314         argTo4.lsw = argSqrd.lsw;
0315         mul_Xsig_Xsig(&argTo4, &argTo4);
0316
0317         polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
0318                 N_COEFF_N - 1);
0319         mul_Xsig_Xsig(&accumulator, &argSqrd);
0320         negate_Xsig(&accumulator);
0321
0322         polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
0323                 N_COEFF_P - 1);
0324
0325         shr_Xsig(&accumulator, 2);  /* Divide by four */
0326         accumulator.msw |= 0x80000000;  /* Add 1.0 */
0327
0328         mul64_Xsig(&accumulator, &fixed_arg);
0329         mul64_Xsig(&accumulator, &fixed_arg);
0330         mul64_Xsig(&accumulator, &fixed_arg);
0331
0332         /* Divide by four, FPU_REG compatible, etc */
0333         exponent = 3 * exponent;
0334
0335         /* The minimum exponent difference is 3 */
0336         shr_Xsig(&accumulator, exp2 - exponent);
0337
0338         negate_Xsig(&accumulator);
0339         XSIG_LL(accumulator) += fixed_arg;
0340
0341         /* The basic computation is complete. Now fix the answer to
0342            compensate for the error due to the approximation used for
0343            pi/2
0344          */
0345
0346         /* This has an exponent of -65 */
0347         XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
0348         fix_up.lsw = 0;
0349
0350         /* The fix-up needs to be improved for larger args */
0351         if (argSqrd.msw & 0xffc00000) {
0352             /* Get about 32 bit precision in these: */
0353             fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
0354             fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
0355         }
0356
0357         exp2 += norm_Xsig(&accumulator);
0358         shr_Xsig(&accumulator, 1);  /* Prevent overflow */
0359         exp2++;
0360         shr_Xsig(&fix_up, 65 + exp2);
0361
0362         add_Xsig_Xsig(&accumulator, &fix_up);
0363
0364         echange = round_Xsig(&accumulator);
0365
0366         setexponentpos(&result, exp2 + echange);
0367         significand(&result) = XSIG_LL(accumulator);
0368     }
0369
0370     FPU_copy_to_reg0(&result, TAG_Valid);
0371
0372 #ifdef PARANOID
0373     if ((exponent(&result) >= 0)
0374         && (significand(&result) > 0x8000000000000000LL)) {
0375         EXCEPTION(EX_INTERNAL | 0x151);
0376     }
0377 #endif /* PARANOID */
0378
0379 }