mips/math-emu/dp_maddf.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * IEEE754 floating point arithmetic
0004  * double precision: MADDF.f (Fused Multiply Add)
0005  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
0006  *
0007  * MIPS floating point support
0008  * Copyright (C) 2015 Imagination Technologies, Ltd.
0009  * Author: Markos Chandras <markos.chandras@imgtec.com>
0010  */
0011
0012 #include "ieee754dp.h"
0013
0014
0015 /* 128 bits shift right logical with rounding. */
0016 static void srl128(u64 *hptr, u64 *lptr, int count)
0017 {
0018     u64 low;
0019
0020     if (count >= 128) {
0021         *lptr = *hptr != 0 || *lptr != 0;
0022         *hptr = 0;
0023     } else if (count >= 64) {
0024         if (count == 64) {
0025             *lptr = *hptr | (*lptr != 0);
0026         } else {
0027             low = *lptr;
0028             *lptr = *hptr >> (count - 64);
0029             *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
0030         }
0031         *hptr = 0;
0032     } else {
0033         low = *lptr;
0034         *lptr = low >> count | *hptr << (64 - count);
0035         *lptr |= (low << (64 - count)) != 0;
0036         *hptr = *hptr >> count;
0037     }
0038 }
0039
0040 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
0041                  union ieee754dp y, enum maddf_flags flags)
0042 {
0043     int re;
0044     int rs;
0045     unsigned int lxm;
0046     unsigned int hxm;
0047     unsigned int lym;
0048     unsigned int hym;
0049     u64 lrm;
0050     u64 hrm;
0051     u64 lzm;
0052     u64 hzm;
0053     u64 t;
0054     u64 at;
0055     int s;
0056
0057     COMPXDP;
0058     COMPYDP;
0059     COMPZDP;
0060
0061     EXPLODEXDP;
0062     EXPLODEYDP;
0063     EXPLODEZDP;
0064
0065     FLUSHXDP;
0066     FLUSHYDP;
0067     FLUSHZDP;
0068
0069     ieee754_clearcx();
0070
0071     rs = xs ^ ys;
0072     if (flags & MADDF_NEGATE_PRODUCT)
0073         rs ^= 1;
0074     if (flags & MADDF_NEGATE_ADDITION)
0075         zs ^= 1;
0076
0077     /*
0078      * Handle the cases when at least one of x, y or z is a NaN.
0079      * Order of precedence is sNaN, qNaN and z, x, y.
0080      */
0081     if (zc == IEEE754_CLASS_SNAN)
0082         return ieee754dp_nanxcpt(z);
0083     if (xc == IEEE754_CLASS_SNAN)
0084         return ieee754dp_nanxcpt(x);
0085     if (yc == IEEE754_CLASS_SNAN)
0086         return ieee754dp_nanxcpt(y);
0087     if (zc == IEEE754_CLASS_QNAN)
0088         return z;
0089     if (xc == IEEE754_CLASS_QNAN)
0090         return x;
0091     if (yc == IEEE754_CLASS_QNAN)
0092         return y;
0093
0094     if (zc == IEEE754_CLASS_DNORM)
0095         DPDNORMZ;
0096     /* ZERO z cases are handled separately below */
0097
0098     switch (CLPAIR(xc, yc)) {
0099
0100     /*
0101      * Infinity handling
0102      */
0103     case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
0104     case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
0105         ieee754_setcx(IEEE754_INVALID_OPERATION);
0106         return ieee754dp_indef();
0107
0108     case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
0109     case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
0110     case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
0111     case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
0112     case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
0113         if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
0114             /*
0115              * Cases of addition of infinities with opposite signs
0116              * or subtraction of infinities with same signs.
0117              */
0118             ieee754_setcx(IEEE754_INVALID_OPERATION);
0119             return ieee754dp_indef();
0120         }
0121         /*
0122          * z is here either not an infinity, or an infinity having the
0123          * same sign as product (x*y). The result must be an infinity,
0124          * and its sign is determined only by the sign of product (x*y).
0125          */
0126         return ieee754dp_inf(rs);
0127
0128     case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
0129     case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
0130     case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
0131     case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
0132     case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
0133         if (zc == IEEE754_CLASS_INF)
0134             return ieee754dp_inf(zs);
0135         if (zc == IEEE754_CLASS_ZERO) {
0136             /* Handle cases +0 + (-0) and similar ones. */
0137             if (zs == rs)
0138                 /*
0139                  * Cases of addition of zeros of equal signs
0140                  * or subtraction of zeroes of opposite signs.
0141                  * The sign of the resulting zero is in any
0142                  * such case determined only by the sign of z.
0143                  */
0144                 return z;
0145
0146             return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
0147         }
0148         /* x*y is here 0, and z is not 0, so just return z */
0149         return z;
0150
0151     case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
0152         DPDNORMX;
0153         fallthrough;
0154     case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
0155         if (zc == IEEE754_CLASS_INF)
0156             return ieee754dp_inf(zs);
0157         DPDNORMY;
0158         break;
0159
0160     case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
0161         if (zc == IEEE754_CLASS_INF)
0162             return ieee754dp_inf(zs);
0163         DPDNORMX;
0164         break;
0165
0166     case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
0167         if (zc == IEEE754_CLASS_INF)
0168             return ieee754dp_inf(zs);
0169         /* continue to real computations */
0170     }
0171
0172     /* Finally get to do some computation */
0173
0174     /*
0175      * Do the multiplication bit first
0176      *
0177      * rm = xm * ym, re = xe + ye basically
0178      *
0179      * At this point xm and ym should have been normalized.
0180      */
0181     assert(xm & DP_HIDDEN_BIT);
0182     assert(ym & DP_HIDDEN_BIT);
0183
0184     re = xe + ye;
0185
0186     /* shunt to top of word */
0187     xm <<= 64 - (DP_FBITS + 1);
0188     ym <<= 64 - (DP_FBITS + 1);
0189
0190     /*
0191      * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
0192      */
0193
0194     lxm = xm;
0195     hxm = xm >> 32;
0196     lym = ym;
0197     hym = ym >> 32;
0198
0199     lrm = DPXMULT(lxm, lym);
0200     hrm = DPXMULT(hxm, hym);
0201
0202     t = DPXMULT(lxm, hym);
0203
0204     at = lrm + (t << 32);
0205     hrm += at < lrm;
0206     lrm = at;
0207
0208     hrm = hrm + (t >> 32);
0209
0210     t = DPXMULT(hxm, lym);
0211
0212     at = lrm + (t << 32);
0213     hrm += at < lrm;
0214     lrm = at;
0215
0216     hrm = hrm + (t >> 32);
0217
0218     /* Put explicit bit at bit 126 if necessary */
0219     if ((int64_t)hrm < 0) {
0220         lrm = (hrm << 63) | (lrm >> 1);
0221         hrm = hrm >> 1;
0222         re++;
0223     }
0224
0225     assert(hrm & (1 << 62));
0226
0227     if (zc == IEEE754_CLASS_ZERO) {
0228         /*
0229          * Move explicit bit from bit 126 to bit 55 since the
0230          * ieee754dp_format code expects the mantissa to be
0231          * 56 bits wide (53 + 3 rounding bits).
0232          */
0233         srl128(&hrm, &lrm, (126 - 55));
0234         return ieee754dp_format(rs, re, lrm);
0235     }
0236
0237     /* Move explicit bit from bit 52 to bit 126 */
0238     lzm = 0;
0239     hzm = zm << 10;
0240     assert(hzm & (1 << 62));
0241
0242     /* Make the exponents the same */
0243     if (ze > re) {
0244         /*
0245          * Have to shift y fraction right to align.
0246          */
0247         s = ze - re;
0248         srl128(&hrm, &lrm, s);
0249         re += s;
0250     } else if (re > ze) {
0251         /*
0252          * Have to shift x fraction right to align.
0253          */
0254         s = re - ze;
0255         srl128(&hzm, &lzm, s);
0256         ze += s;
0257     }
0258     assert(ze == re);
0259     assert(ze <= DP_EMAX);
0260
0261     /* Do the addition */
0262     if (zs == rs) {
0263         /*
0264          * Generate 128 bit result by adding two 127 bit numbers
0265          * leaving result in hzm:lzm, zs and ze.
0266          */
0267         hzm = hzm + hrm + (lzm > (lzm + lrm));
0268         lzm = lzm + lrm;
0269         if ((int64_t)hzm < 0) {        /* carry out */
0270             srl128(&hzm, &lzm, 1);
0271             ze++;
0272         }
0273     } else {
0274         if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
0275             hzm = hzm - hrm - (lzm < lrm);
0276             lzm = lzm - lrm;
0277         } else {
0278             hzm = hrm - hzm - (lrm < lzm);
0279             lzm = lrm - lzm;
0280             zs = rs;
0281         }
0282         if (lzm == 0 && hzm == 0)
0283             return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
0284
0285         /*
0286          * Put explicit bit at bit 126 if necessary.
0287          */
0288         if (hzm == 0) {
0289             /* left shift by 63 or 64 bits */
0290             if ((int64_t)lzm < 0) {
0291                 /* MSB of lzm is the explicit bit */
0292                 hzm = lzm >> 1;
0293                 lzm = lzm << 63;
0294                 ze -= 63;
0295             } else {
0296                 hzm = lzm;
0297                 lzm = 0;
0298                 ze -= 64;
0299             }
0300         }
0301
0302         t = 0;
0303         while ((hzm >> (62 - t)) == 0)
0304             t++;
0305
0306         assert(t <= 62);
0307         if (t) {
0308             hzm = hzm << t | lzm >> (64 - t);
0309             lzm = lzm << t;
0310             ze -= t;
0311         }
0312     }
0313
0314     /*
0315      * Move explicit bit from bit 126 to bit 55 since the
0316      * ieee754dp_format code expects the mantissa to be
0317      * 56 bits wide (53 + 3 rounding bits).
0318      */
0319     srl128(&hzm, &lzm, (126 - 55));
0320
0321     return ieee754dp_format(zs, ze, lzm);
0322 }
0323
0324 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
0325                 union ieee754dp y)
0326 {
0327     return _dp_maddf(z, x, y, 0);
0328 }
0329
0330 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
0331                 union ieee754dp y)
0332 {
0333     return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
0334 }
0335
0336 union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
0337                 union ieee754dp y)
0338 {
0339     return _dp_maddf(z, x, y, 0);
0340 }
0341
0342 union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
0343                 union ieee754dp y)
0344 {
0345     return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
0346 }
0347
0348 union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
0349                 union ieee754dp y)
0350 {
0351     return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
0352 }
0353
0354 union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
0355                 union ieee754dp y)
0356 {
0357     return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
0358 }