include/math-emu/op-2.h

0001 /* Software floating-point emulation.
0002    Basic two-word fraction declaration and manipulation.
0003    Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
0004    This file is part of the GNU C Library.
0005    Contributed by Richard Henderson (rth@cygnus.com),
0006           Jakub Jelinek (jj@ultra.linux.cz),
0007           David S. Miller (davem@redhat.com) and
0008           Peter Maydell (pmaydell@chiark.greenend.org.uk).
0009
0010    The GNU C Library is free software; you can redistribute it and/or
0011    modify it under the terms of the GNU Library General Public License as
0012    published by the Free Software Foundation; either version 2 of the
0013    License, or (at your option) any later version.
0014
0015    The GNU C Library is distributed in the hope that it will be useful,
0016    but WITHOUT ANY WARRANTY; without even the implied warranty of
0017    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0018    Library General Public License for more details.
0019
0020    You should have received a copy of the GNU Library General Public
0021    License along with the GNU C Library; see the file COPYING.LIB.  If
0022    not, write to the Free Software Foundation, Inc.,
0023    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
0024
0025 #ifndef __MATH_EMU_OP_2_H__
0026 #define __MATH_EMU_OP_2_H__
0027
0028 #define _FP_FRAC_DECL_2(X)  _FP_W_TYPE X##_f0 = 0, X##_f1 = 0
0029 #define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
0030 #define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
0031 #define _FP_FRAC_HIGH_2(X)  (X##_f1)
0032 #define _FP_FRAC_LOW_2(X)   (X##_f0)
0033 #define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
0034 #define _FP_FRAC_SLL_2(X, N) (                             \
0035     (void) (((N) < _FP_W_TYPE_SIZE)                        \
0036       ? ({                                     \
0037         if (__builtin_constant_p(N) && (N) == 1) {             \
0038             X##_f1 = X##_f1 + X##_f1 +                 \
0039                 (((_FP_WS_TYPE) (X##_f0)) < 0);            \
0040             X##_f0 += X##_f0;                      \
0041         } else {                               \
0042             X##_f1 = X##_f1 << (N) | X##_f0 >>             \
0043                         (_FP_W_TYPE_SIZE - (N));       \
0044             X##_f0 <<= (N);                        \
0045         }                                  \
0046         0;                                 \
0047         })                                     \
0048       : ({                                     \
0049           X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);              \
0050           X##_f0 = 0;                              \
0051       })))
0052
0053
0054 #define _FP_FRAC_SRL_2(X, N) (                             \
0055     (void) (((N) < _FP_W_TYPE_SIZE)                        \
0056       ? ({                                     \
0057           X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));      \
0058           X##_f1 >>= (N);                              \
0059         })                                     \
0060       : ({                                     \
0061           X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);              \
0062           X##_f1 = 0;                              \
0063         })))
0064
0065
0066 /* Right shift with sticky-lsb.  */
0067 #define _FP_FRAC_SRS_2(X, N, sz) (                         \
0068     (void) (((N) < _FP_W_TYPE_SIZE)                        \
0069       ? ({                                     \
0070           X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)      \
0071             | (__builtin_constant_p(N) && (N) == 1             \
0072                ? X##_f0 & 1                        \
0073                : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));       \
0074         X##_f1 >>= (N);                            \
0075         })                                     \
0076       : ({                                     \
0077           X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)              \
0078             | ((((N) == _FP_W_TYPE_SIZE                \
0079                  ? 0                           \
0080                  : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))          \
0081                 | X##_f0) != 0));                      \
0082           X##_f1 = 0;                              \
0083         })))
0084
0085 #define _FP_FRAC_ADDI_2(X,I)    \
0086   __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
0087
0088 #define _FP_FRAC_ADD_2(R,X,Y)   \
0089   __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
0090
0091 #define _FP_FRAC_SUB_2(R,X,Y)   \
0092   __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
0093
0094 #define _FP_FRAC_DEC_2(X,Y) \
0095   __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
0096
0097 #define _FP_FRAC_CLZ_2(R,X) \
0098   do {              \
0099     if (X##_f1)         \
0100       __FP_CLZ(R,X##_f1);   \
0101     else            \
0102     {               \
0103       __FP_CLZ(R,X##_f0);   \
0104       R += _FP_W_TYPE_SIZE; \
0105     }               \
0106   } while(0)
0107
0108 /* Predicates */
0109 #define _FP_FRAC_NEGP_2(X)  ((_FP_WS_TYPE)X##_f1 < 0)
0110 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
0111 #define _FP_FRAC_OVERP_2(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
0112 #define _FP_FRAC_CLEAR_OVERP_2(fs,X)    (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
0113 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
0114 #define _FP_FRAC_GT_2(X, Y) \
0115   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
0116 #define _FP_FRAC_GE_2(X, Y) \
0117   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
0118
0119 #define _FP_ZEROFRAC_2      0, 0
0120 #define _FP_MINFRAC_2       0, 1
0121 #define _FP_MAXFRAC_2       (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
0122
0123 /*
0124  * Internals
0125  */
0126
0127 #define __FP_FRAC_SET_2(X,I1,I0)    (X##_f0 = I0, X##_f1 = I1)
0128
0129 #define __FP_CLZ_2(R, xh, xl)   \
0130   do {              \
0131     if (xh)         \
0132       __FP_CLZ(R,xh);       \
0133     else            \
0134     {               \
0135       __FP_CLZ(R,xl);       \
0136       R += _FP_W_TYPE_SIZE; \
0137     }               \
0138   } while(0)
0139
0140 #if 0
0141
0142 #ifndef __FP_FRAC_ADDI_2
0143 #define __FP_FRAC_ADDI_2(xh, xl, i) \
0144   (xh += ((xl += i) < i))
0145 #endif
0146 #ifndef __FP_FRAC_ADD_2
0147 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
0148   (rh = xh + yh + ((rl = xl + yl) < xl))
0149 #endif
0150 #ifndef __FP_FRAC_SUB_2
0151 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
0152   (rh = xh - yh - ((rl = xl - yl) > xl))
0153 #endif
0154 #ifndef __FP_FRAC_DEC_2
0155 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
0156   do {                  \
0157     UWtype _t = xl;         \
0158     xh -= yh + ((xl -= yl) > _t);   \
0159   } while (0)
0160 #endif
0161
0162 #else
0163
0164 #undef __FP_FRAC_ADDI_2
0165 #define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
0166 #undef __FP_FRAC_ADD_2
0167 #define __FP_FRAC_ADD_2         add_ssaaaa
0168 #undef __FP_FRAC_SUB_2
0169 #define __FP_FRAC_SUB_2         sub_ddmmss
0170 #undef __FP_FRAC_DEC_2
0171 #define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
0172
0173 #endif
0174
0175 /*
0176  * Unpack the raw bits of a native fp value.  Do not classify or
0177  * normalize the data.
0178  */
0179
0180 #define _FP_UNPACK_RAW_2(fs, X, val)            \
0181   do {                          \
0182     union _FP_UNION_##fs _flo; _flo.flt = (val);    \
0183                             \
0184     X##_f0 = _flo.bits.frac0;               \
0185     X##_f1 = _flo.bits.frac1;               \
0186     X##_e  = _flo.bits.exp;             \
0187     X##_s  = _flo.bits.sign;                \
0188   } while (0)
0189
0190 #define _FP_UNPACK_RAW_2_P(fs, X, val)          \
0191   do {                          \
0192     union _FP_UNION_##fs *_flo =            \
0193       (union _FP_UNION_##fs *)(val);            \
0194                             \
0195     X##_f0 = _flo->bits.frac0;              \
0196     X##_f1 = _flo->bits.frac1;              \
0197     X##_e  = _flo->bits.exp;                \
0198     X##_s  = _flo->bits.sign;               \
0199   } while (0)
0200
0201
0202 /*
0203  * Repack the raw bits of a native fp value.
0204  */
0205
0206 #define _FP_PACK_RAW_2(fs, val, X)          \
0207   do {                          \
0208     union _FP_UNION_##fs _flo;              \
0209                             \
0210     _flo.bits.frac0 = X##_f0;               \
0211     _flo.bits.frac1 = X##_f1;               \
0212     _flo.bits.exp   = X##_e;                \
0213     _flo.bits.sign  = X##_s;                \
0214                             \
0215     (val) = _flo.flt;                   \
0216   } while (0)
0217
0218 #define _FP_PACK_RAW_2_P(fs, val, X)            \
0219   do {                          \
0220     union _FP_UNION_##fs *_flo =            \
0221       (union _FP_UNION_##fs *)(val);            \
0222                             \
0223     _flo->bits.frac0 = X##_f0;              \
0224     _flo->bits.frac1 = X##_f1;              \
0225     _flo->bits.exp   = X##_e;               \
0226     _flo->bits.sign  = X##_s;               \
0227   } while (0)
0228
0229
0230 /*
0231  * Multiplication algorithms:
0232  */
0233
0234 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
0235
0236 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)           \
0237   do {                                  \
0238     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);  \
0239                                     \
0240     doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
0241     doit(_b_f1, _b_f0, X##_f0, Y##_f1);                 \
0242     doit(_c_f1, _c_f0, X##_f1, Y##_f0);                 \
0243     doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
0244                                     \
0245     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),    \
0246             _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,     \
0247             _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),    \
0248             _FP_FRAC_WORD_4(_z,1));             \
0249     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),    \
0250             _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,     \
0251             _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),    \
0252             _FP_FRAC_WORD_4(_z,1));             \
0253                                     \
0254     /* Normalize since we know where the msb of the multiplicands   \
0255        were (bit B), we know that the msb of the of the product is  \
0256        at either 2B or 2B-1.  */                    \
0257     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);           \
0258     R##_f0 = _FP_FRAC_WORD_4(_z,0);                 \
0259     R##_f1 = _FP_FRAC_WORD_4(_z,1);                 \
0260   } while (0)
0261
0262 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
0263    Do only 3 multiplications instead of four. This one is for machines
0264    where multiplication is much more expensive than subtraction.  */
0265
0266 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)      \
0267   do {                                  \
0268     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);  \
0269     _FP_W_TYPE _d;                          \
0270     int _c1, _c2;                           \
0271                                     \
0272     _b_f0 = X##_f0 + X##_f1;                        \
0273     _c1 = _b_f0 < X##_f0;                       \
0274     _b_f1 = Y##_f0 + Y##_f1;                        \
0275     _c2 = _b_f1 < Y##_f0;                       \
0276     doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);            \
0277     doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);   \
0278     doit(_c_f1, _c_f0, X##_f1, Y##_f1);                 \
0279                                     \
0280     _b_f0 &= -_c2;                          \
0281     _b_f1 &= -_c1;                          \
0282     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),    \
0283             _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,      \
0284             0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));   \
0285     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),   \
0286              _b_f0);                        \
0287     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),   \
0288              _b_f1);                        \
0289     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),    \
0290             _FP_FRAC_WORD_4(_z,1),              \
0291             0, _d, _FP_FRAC_WORD_4(_z,0));          \
0292     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),    \
0293             _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);        \
0294     __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),   \
0295             _c_f1, _c_f0,                   \
0296             _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));  \
0297                                     \
0298     /* Normalize since we know where the msb of the multiplicands   \
0299        were (bit B), we know that the msb of the of the product is  \
0300        at either 2B or 2B-1.  */                    \
0301     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);           \
0302     R##_f0 = _FP_FRAC_WORD_4(_z,0);                 \
0303     R##_f1 = _FP_FRAC_WORD_4(_z,1);                 \
0304   } while (0)
0305
0306 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)              \
0307   do {                                  \
0308     _FP_FRAC_DECL_4(_z);                        \
0309     _FP_W_TYPE _x[2], _y[2];                        \
0310     _x[0] = X##_f0; _x[1] = X##_f1;                 \
0311     _y[0] = Y##_f0; _y[1] = Y##_f1;                 \
0312                                     \
0313     mpn_mul_n(_z_f, _x, _y, 2);                     \
0314                                     \
0315     /* Normalize since we know where the msb of the multiplicands   \
0316        were (bit B), we know that the msb of the of the product is  \
0317        at either 2B or 2B-1.  */                    \
0318     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);           \
0319     R##_f0 = _z_f[0];                           \
0320     R##_f1 = _z_f[1];                           \
0321   } while (0)
0322
0323 /* Do at most 120x120=240 bits multiplication using double floating
0324    point multiplication.  This is useful if floating point
0325    multiplication has much bigger throughput than integer multiply.
0326    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
0327    between 106 and 120 only.
0328    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
0329    SETFETZ is a macro which will disable all FPU exceptions and set rounding
0330    towards zero,  RESETFE should optionally reset it back.  */
0331
0332 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
0333   do {                                      \
0334     static const double _const[] = {                        \
0335       /* 2^-24 */ 5.9604644775390625e-08,                   \
0336       /* 2^-48 */ 3.5527136788005009e-15,                   \
0337       /* 2^-72 */ 2.1175823681357508e-22,                   \
0338       /* 2^-96 */ 1.2621774483536189e-29,                   \
0339       /* 2^28 */ 2.68435456e+08,                        \
0340       /* 2^4 */ 1.600000e+01,                           \
0341       /* 2^-20 */ 9.5367431640625e-07,                      \
0342       /* 2^-44 */ 5.6843418860808015e-14,                   \
0343       /* 2^-68 */ 3.3881317890172014e-21,                   \
0344       /* 2^-92 */ 2.0194839173657902e-28,                   \
0345       /* 2^-116 */ 1.2037062152420224e-35};                 \
0346     double _a240, _b240, _c240, _d240, _e240, _f240,                \
0347        _g240, _h240, _i240, _j240, _k240;                   \
0348     union { double d; UDItype i; } _l240, _m240, _n240, _o240,          \
0349                    _p240, _q240, _r240, _s240;          \
0350     UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;           \
0351                                         \
0352     if (wfracbits < 106 || wfracbits > 120)                 \
0353       abort();                                  \
0354                                         \
0355     setfetz;                                    \
0356                                         \
0357     _e240 = (double)(long)(X##_f0 & 0xffffff);                  \
0358     _j240 = (double)(long)(Y##_f0 & 0xffffff);                  \
0359     _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);              \
0360     _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);              \
0361     _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));   \
0362     _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));   \
0363     _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);               \
0364     _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);               \
0365     _a240 = (double)(long)(X##_f1 >> 32);                   \
0366     _f240 = (double)(long)(Y##_f1 >> 32);                   \
0367     _e240 *= _const[3];                             \
0368     _j240 *= _const[3];                             \
0369     _d240 *= _const[2];                             \
0370     _i240 *= _const[2];                             \
0371     _c240 *= _const[1];                             \
0372     _h240 *= _const[1];                             \
0373     _b240 *= _const[0];                             \
0374     _g240 *= _const[0];                             \
0375     _s240.d =                                 _e240*_j240;\
0376     _r240.d =                       _d240*_j240 + _e240*_i240;\
0377     _q240.d =                 _c240*_j240 + _d240*_i240 + _e240*_h240;\
0378     _p240.d =           _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
0379     _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
0380     _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;        \
0381     _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;              \
0382     _l240.d = _a240*_g240 + _b240*_f240;                    \
0383     _k240 =   _a240*_f240;                          \
0384     _r240.d += _s240.d;                             \
0385     _q240.d += _r240.d;                             \
0386     _p240.d += _q240.d;                             \
0387     _o240.d += _p240.d;                             \
0388     _n240.d += _o240.d;                             \
0389     _m240.d += _n240.d;                             \
0390     _l240.d += _m240.d;                             \
0391     _k240 += _l240.d;                               \
0392     _s240.d -= ((_const[10]+_s240.d)-_const[10]);               \
0393     _r240.d -= ((_const[9]+_r240.d)-_const[9]);                 \
0394     _q240.d -= ((_const[8]+_q240.d)-_const[8]);                 \
0395     _p240.d -= ((_const[7]+_p240.d)-_const[7]);                 \
0396     _o240.d += _const[7];                           \
0397     _n240.d += _const[6];                           \
0398     _m240.d += _const[5];                           \
0399     _l240.d += _const[4];                           \
0400     if (_s240.d != 0.0) _y240 = 1;                      \
0401     if (_r240.d != 0.0) _y240 = 1;                      \
0402     if (_q240.d != 0.0) _y240 = 1;                      \
0403     if (_p240.d != 0.0) _y240 = 1;                      \
0404     _t240 = (DItype)_k240;                          \
0405     _u240 = _l240.i;                                \
0406     _v240 = _m240.i;                                \
0407     _w240 = _n240.i;                                \
0408     _x240 = _o240.i;                                \
0409     R##_f1 = (_t240 << (128 - (wfracbits - 1)))                 \
0410          | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));         \
0411     R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))            \
0412              | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))          \
0413              | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))          \
0414              | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))           \
0415              | _y240;                               \
0416     resetfe;                                    \
0417   } while (0)
0418
0419 /*
0420  * Division algorithms:
0421  */
0422
0423 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)                \
0424   do {                                  \
0425     _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;     \
0426     if (_FP_FRAC_GT_2(X, Y))                        \
0427       {                                 \
0428     _n_f2 = X##_f1 >> 1;                        \
0429     _n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;      \
0430     _n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);            \
0431       }                                 \
0432     else                                \
0433       {                                 \
0434     R##_e--;                            \
0435     _n_f2 = X##_f1;                         \
0436     _n_f1 = X##_f0;                         \
0437     _n_f0 = 0;                          \
0438       }                                 \
0439                                     \
0440     /* Normalize, i.e. make the most significant bit of the         \
0441        denominator set. */                      \
0442     _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);             \
0443                                     \
0444     udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);            \
0445     umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);                \
0446     _r_f0 = _n_f0;                          \
0447     if (_FP_FRAC_GT_2(_m, _r))                      \
0448       {                                 \
0449     R##_f1--;                           \
0450     _FP_FRAC_ADD_2(_r, Y, _r);                  \
0451     if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))      \
0452       {                             \
0453         R##_f1--;                           \
0454         _FP_FRAC_ADD_2(_r, Y, _r);                  \
0455       }                             \
0456       }                                 \
0457     _FP_FRAC_DEC_2(_r, _m);                     \
0458                                     \
0459     if (_r_f1 == Y##_f1)                        \
0460       {                                 \
0461     /* This is a special case, not an optimization          \
0462        (_r/Y##_f1 would not fit into UWtype).           \
0463        As _r is guaranteed to be < Y,  R##_f0 can be either     \
0464        (UWtype)-1 or (UWtype)-2.  But as we know what kind      \
0465        of bits it is (sticky, guard, round),  we don't care.    \
0466        We also don't care what the reminder is,  because the    \
0467        guard bit will be set anyway.  -jj */            \
0468     R##_f0 = -1;                            \
0469       }                                 \
0470     else                                \
0471       {                                 \
0472     udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);        \
0473     umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);            \
0474     _r_f0 = 0;                          \
0475     if (_FP_FRAC_GT_2(_m, _r))                  \
0476       {                             \
0477         R##_f0--;                           \
0478         _FP_FRAC_ADD_2(_r, Y, _r);                  \
0479         if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))      \
0480           {                             \
0481         R##_f0--;                       \
0482         _FP_FRAC_ADD_2(_r, Y, _r);              \
0483           }                             \
0484       }                             \
0485     if (!_FP_FRAC_EQ_2(_r, _m))                 \
0486       R##_f0 |= _FP_WORK_STICKY;                    \
0487       }                                 \
0488   } while (0)
0489
0490
0491 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                 \
0492   do {                                  \
0493     _FP_W_TYPE _x[4], _y[2], _z[4];                 \
0494     _y[0] = Y##_f0; _y[1] = Y##_f1;                 \
0495     _x[0] = _x[3] = 0;                          \
0496     if (_FP_FRAC_GT_2(X, Y))                        \
0497       {                                 \
0498     R##_e++;                            \
0499     _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |   \
0500          X##_f1 >> (_FP_W_TYPE_SIZE -               \
0501                 (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
0502     _x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
0503       }                                 \
0504     else                                \
0505       {                                 \
0506     _x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
0507          X##_f1 >> (_FP_W_TYPE_SIZE -               \
0508                 (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));   \
0509     _x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);   \
0510       }                                 \
0511                                     \
0512     (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                \
0513     R##_f1 = _z[1];                         \
0514     R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                \
0515   } while (0)
0516
0517
0518 /*
0519  * Square root algorithms:
0520  * We have just one right now, maybe Newton approximation
0521  * should be added for those machines where division is fast.
0522  */
0523
0524 #define _FP_SQRT_MEAT_2(R, S, T, X, q)          \
0525   do {                          \
0526     while (q)                       \
0527       {                         \
0528     T##_f1 = S##_f1 + q;                \
0529     if (T##_f1 <= X##_f1)               \
0530       {                     \
0531         S##_f1 = T##_f1 + q;            \
0532         X##_f1 -= T##_f1;               \
0533         R##_f1 += q;                \
0534       }                     \
0535     _FP_FRAC_SLL_2(X, 1);               \
0536     q >>= 1;                    \
0537       }                         \
0538     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);     \
0539     while (q != _FP_WORK_ROUND)             \
0540       {                         \
0541     T##_f0 = S##_f0 + q;                \
0542     T##_f1 = S##_f1;                \
0543     if (T##_f1 < X##_f1 ||              \
0544         (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
0545       {                     \
0546         S##_f0 = T##_f0 + q;            \
0547         S##_f1 += (T##_f0 > S##_f0);        \
0548         _FP_FRAC_DEC_2(X, T);           \
0549         R##_f0 += q;                \
0550       }                     \
0551     _FP_FRAC_SLL_2(X, 1);               \
0552     q >>= 1;                    \
0553       }                         \
0554     if (X##_f0 | X##_f1)                \
0555       {                         \
0556     if (S##_f1 < X##_f1 ||              \
0557         (S##_f1 == X##_f1 && S##_f0 < X##_f0))  \
0558       R##_f0 |= _FP_WORK_ROUND;         \
0559     R##_f0 |= _FP_WORK_STICKY;          \
0560       }                         \
0561   } while (0)
0562
0563
0564 /*
0565  * Assembly/disassembly for converting to/from integral types.
0566  * No shifting or overflow handled here.
0567  */
0568
0569 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)    \
0570     (void) (((rsize) <= _FP_W_TYPE_SIZE)    \
0571         ? ({ (r) = X##_f0; })       \
0572         : ({                \
0573              (r) = X##_f1;      \
0574              (r) <<= _FP_W_TYPE_SIZE;   \
0575              (r) += X##_f0;     \
0576             }))
0577
0578 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)             \
0579   do {                                  \
0580     X##_f0 = r;                             \
0581     X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
0582   } while (0)
0583
0584 /*
0585  * Convert FP values between word sizes
0586  */
0587
0588 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)               \
0589   do {                                  \
0590     if (S##_c != FP_CLS_NAN)                        \
0591       _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),    \
0592              _FP_WFRACBITS_##sfs);              \
0593     else                                \
0594       _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));   \
0595     D##_f = S##_f0;                         \
0596   } while (0)
0597
0598 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)               \
0599   do {                                  \
0600     D##_f0 = S##_f;                         \
0601     D##_f1 = 0;                             \
0602     _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
0603   } while (0)
0604
0605 #endif