arm/vfp/vfpdouble.c

0001 /*
0002  *  linux/arch/arm/vfp/vfpdouble.c
0003  *
0004  * This code is derived in part from John R. Housers softfloat library, which
0005  * carries the following notice:
0006  *
0007  * ===========================================================================
0008  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
0009  * Arithmetic Package, Release 2.
0010  *
0011  * Written by John R. Hauser.  This work was made possible in part by the
0012  * International Computer Science Institute, located at Suite 600, 1947 Center
0013  * Street, Berkeley, California 94704.  Funding was partially provided by the
0014  * National Science Foundation under grant MIP-9311980.  The original version
0015  * of this code was written as part of a project to build a fixed-point vector
0016  * processor in collaboration with the University of California at Berkeley,
0017  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
0018  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
0019  * arithmetic/softfloat.html'.
0020  *
0021  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
0022  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
0023  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
0024  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
0025  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
0026  *
0027  * Derivative works are acceptable, even for commercial purposes, so long as
0028  * (1) they include prominent notice that the work is derivative, and (2) they
0029  * include prominent notice akin to these three paragraphs for those parts of
0030  * this code that are retained.
0031  * ===========================================================================
0032  */
0033 #include <linux/kernel.h>
0034 #include <linux/bitops.h>
0035
0036 #include <asm/div64.h>
0037 #include <asm/vfp.h>
0038
0039 #include "vfpinstr.h"
0040 #include "vfp.h"
0041
0042 static struct vfp_double vfp_double_default_qnan = {
0043     .exponent   = 2047,
0044     .sign       = 0,
0045     .significand    = VFP_DOUBLE_SIGNIFICAND_QNAN,
0046 };
0047
0048 static void vfp_double_dump(const char *str, struct vfp_double *d)
0049 {
0050     pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
0051          str, d->sign != 0, d->exponent, d->significand);
0052 }
0053
0054 static void vfp_double_normalise_denormal(struct vfp_double *vd)
0055 {
0056     int bits = 31 - fls(vd->significand >> 32);
0057     if (bits == 31)
0058         bits = 63 - fls(vd->significand);
0059
0060     vfp_double_dump("normalise_denormal: in", vd);
0061
0062     if (bits) {
0063         vd->exponent -= bits - 1;
0064         vd->significand <<= bits;
0065     }
0066
0067     vfp_double_dump("normalise_denormal: out", vd);
0068 }
0069
0070 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
0071 {
0072     u64 significand, incr;
0073     int exponent, shift, underflow;
0074     u32 rmode;
0075
0076     vfp_double_dump("pack: in", vd);
0077
0078     /*
0079      * Infinities and NaNs are a special case.
0080      */
0081     if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
0082         goto pack;
0083
0084     /*
0085      * Special-case zero.
0086      */
0087     if (vd->significand == 0) {
0088         vd->exponent = 0;
0089         goto pack;
0090     }
0091
0092     exponent = vd->exponent;
0093     significand = vd->significand;
0094
0095     shift = 32 - fls(significand >> 32);
0096     if (shift == 32)
0097         shift = 64 - fls(significand);
0098     if (shift) {
0099         exponent -= shift;
0100         significand <<= shift;
0101     }
0102
0103 #ifdef DEBUG
0104     vd->exponent = exponent;
0105     vd->significand = significand;
0106     vfp_double_dump("pack: normalised", vd);
0107 #endif
0108
0109     /*
0110      * Tiny number?
0111      */
0112     underflow = exponent < 0;
0113     if (underflow) {
0114         significand = vfp_shiftright64jamming(significand, -exponent);
0115         exponent = 0;
0116 #ifdef DEBUG
0117         vd->exponent = exponent;
0118         vd->significand = significand;
0119         vfp_double_dump("pack: tiny number", vd);
0120 #endif
0121         if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
0122             underflow = 0;
0123     }
0124
0125     /*
0126      * Select rounding increment.
0127      */
0128     incr = 0;
0129     rmode = fpscr & FPSCR_RMODE_MASK;
0130
0131     if (rmode == FPSCR_ROUND_NEAREST) {
0132         incr = 1ULL << VFP_DOUBLE_LOW_BITS;
0133         if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
0134             incr -= 1;
0135     } else if (rmode == FPSCR_ROUND_TOZERO) {
0136         incr = 0;
0137     } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
0138         incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
0139
0140     pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
0141
0142     /*
0143      * Is our rounding going to overflow?
0144      */
0145     if ((significand + incr) < significand) {
0146         exponent += 1;
0147         significand = (significand >> 1) | (significand & 1);
0148         incr >>= 1;
0149 #ifdef DEBUG
0150         vd->exponent = exponent;
0151         vd->significand = significand;
0152         vfp_double_dump("pack: overflow", vd);
0153 #endif
0154     }
0155
0156     /*
0157      * If any of the low bits (which will be shifted out of the
0158      * number) are non-zero, the result is inexact.
0159      */
0160     if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
0161         exceptions |= FPSCR_IXC;
0162
0163     /*
0164      * Do our rounding.
0165      */
0166     significand += incr;
0167
0168     /*
0169      * Infinity?
0170      */
0171     if (exponent >= 2046) {
0172         exceptions |= FPSCR_OFC | FPSCR_IXC;
0173         if (incr == 0) {
0174             vd->exponent = 2045;
0175             vd->significand = 0x7fffffffffffffffULL;
0176         } else {
0177             vd->exponent = 2047;        /* infinity */
0178             vd->significand = 0;
0179         }
0180     } else {
0181         if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
0182             exponent = 0;
0183         if (exponent || significand > 0x8000000000000000ULL)
0184             underflow = 0;
0185         if (underflow)
0186             exceptions |= FPSCR_UFC;
0187         vd->exponent = exponent;
0188         vd->significand = significand >> 1;
0189     }
0190
0191  pack:
0192     vfp_double_dump("pack: final", vd);
0193     {
0194         s64 d = vfp_double_pack(vd);
0195         pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
0196              dd, d, exceptions);
0197         vfp_put_double(d, dd);
0198     }
0199     return exceptions;
0200 }
0201
0202 /*
0203  * Propagate the NaN, setting exceptions if it is signalling.
0204  * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
0205  */
0206 static u32
0207 vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
0208           struct vfp_double *vdm, u32 fpscr)
0209 {
0210     struct vfp_double *nan;
0211     int tn, tm = 0;
0212
0213     tn = vfp_double_type(vdn);
0214
0215     if (vdm)
0216         tm = vfp_double_type(vdm);
0217
0218     if (fpscr & FPSCR_DEFAULT_NAN)
0219         /*
0220          * Default NaN mode - always returns a quiet NaN
0221          */
0222         nan = &vfp_double_default_qnan;
0223     else {
0224         /*
0225          * Contemporary mode - select the first signalling
0226          * NAN, or if neither are signalling, the first
0227          * quiet NAN.
0228          */
0229         if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
0230             nan = vdn;
0231         else
0232             nan = vdm;
0233         /*
0234          * Make the NaN quiet.
0235          */
0236         nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
0237     }
0238
0239     *vdd = *nan;
0240
0241     /*
0242      * If one was a signalling NAN, raise invalid operation.
0243      */
0244     return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
0245 }
0246
0247 /*
0248  * Extended operations
0249  */
0250 static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
0251 {
0252     vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd);
0253     return 0;
0254 }
0255
0256 static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
0257 {
0258     vfp_put_double(vfp_get_double(dm), dd);
0259     return 0;
0260 }
0261
0262 static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
0263 {
0264     vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd);
0265     return 0;
0266 }
0267
0268 static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
0269 {
0270     struct vfp_double vdm, vdd;
0271     int ret, tm;
0272
0273     vfp_double_unpack(&vdm, vfp_get_double(dm));
0274     tm = vfp_double_type(&vdm);
0275     if (tm & (VFP_NAN|VFP_INFINITY)) {
0276         struct vfp_double *vdp = &vdd;
0277
0278         if (tm & VFP_NAN)
0279             ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
0280         else if (vdm.sign == 0) {
0281  sqrt_copy:
0282             vdp = &vdm;
0283             ret = 0;
0284         } else {
0285  sqrt_invalid:
0286             vdp = &vfp_double_default_qnan;
0287             ret = FPSCR_IOC;
0288         }
0289         vfp_put_double(vfp_double_pack(vdp), dd);
0290         return ret;
0291     }
0292
0293     /*
0294      * sqrt(+/- 0) == +/- 0
0295      */
0296     if (tm & VFP_ZERO)
0297         goto sqrt_copy;
0298
0299     /*
0300      * Normalise a denormalised number
0301      */
0302     if (tm & VFP_DENORMAL)
0303         vfp_double_normalise_denormal(&vdm);
0304
0305     /*
0306      * sqrt(<0) = invalid
0307      */
0308     if (vdm.sign)
0309         goto sqrt_invalid;
0310
0311     vfp_double_dump("sqrt", &vdm);
0312
0313     /*
0314      * Estimate the square root.
0315      */
0316     vdd.sign = 0;
0317     vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
0318     vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
0319
0320     vfp_double_dump("sqrt estimate1", &vdd);
0321
0322     vdm.significand >>= 1 + (vdm.exponent & 1);
0323     vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
0324
0325     vfp_double_dump("sqrt estimate2", &vdd);
0326
0327     /*
0328      * And now adjust.
0329      */
0330     if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
0331         if (vdd.significand < 2) {
0332             vdd.significand = ~0ULL;
0333         } else {
0334             u64 termh, terml, remh, reml;
0335             vdm.significand <<= 2;
0336             mul64to128(&termh, &terml, vdd.significand, vdd.significand);
0337             sub128(&remh, &reml, vdm.significand, 0, termh, terml);
0338             while ((s64)remh < 0) {
0339                 vdd.significand -= 1;
0340                 shift64left(&termh, &terml, vdd.significand);
0341                 terml |= 1;
0342                 add128(&remh, &reml, remh, reml, termh, terml);
0343             }
0344             vdd.significand |= (remh | reml) != 0;
0345         }
0346     }
0347     vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
0348
0349     return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt");
0350 }
0351
0352 /*
0353  * Equal    := ZC
0354  * Less than    := N
0355  * Greater than := C
0356  * Unordered    := CV
0357  */
0358 static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
0359 {
0360     s64 d, m;
0361     u32 ret = 0;
0362
0363     m = vfp_get_double(dm);
0364     if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
0365         ret |= FPSCR_C | FPSCR_V;
0366         if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
0367             /*
0368              * Signalling NaN, or signalling on quiet NaN
0369              */
0370             ret |= FPSCR_IOC;
0371     }
0372
0373     d = vfp_get_double(dd);
0374     if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
0375         ret |= FPSCR_C | FPSCR_V;
0376         if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
0377             /*
0378              * Signalling NaN, or signalling on quiet NaN
0379              */
0380             ret |= FPSCR_IOC;
0381     }
0382
0383     if (ret == 0) {
0384         if (d == m || vfp_double_packed_abs(d | m) == 0) {
0385             /*
0386              * equal
0387              */
0388             ret |= FPSCR_Z | FPSCR_C;
0389         } else if (vfp_double_packed_sign(d ^ m)) {
0390             /*
0391              * different signs
0392              */
0393             if (vfp_double_packed_sign(d))
0394                 /*
0395                  * d is negative, so d < m
0396                  */
0397                 ret |= FPSCR_N;
0398             else
0399                 /*
0400                  * d is positive, so d > m
0401                  */
0402                 ret |= FPSCR_C;
0403         } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
0404             /*
0405              * d < m
0406              */
0407             ret |= FPSCR_N;
0408         } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
0409             /*
0410              * d > m
0411              */
0412             ret |= FPSCR_C;
0413         }
0414     }
0415
0416     return ret;
0417 }
0418
0419 static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
0420 {
0421     return vfp_compare(dd, 0, dm, fpscr);
0422 }
0423
0424 static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
0425 {
0426     return vfp_compare(dd, 1, dm, fpscr);
0427 }
0428
0429 static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
0430 {
0431     return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr);
0432 }
0433
0434 static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
0435 {
0436     return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr);
0437 }
0438
0439 static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
0440 {
0441     struct vfp_double vdm;
0442     struct vfp_single vsd;
0443     int tm;
0444     u32 exceptions = 0;
0445
0446     vfp_double_unpack(&vdm, vfp_get_double(dm));
0447
0448     tm = vfp_double_type(&vdm);
0449
0450     /*
0451      * If we have a signalling NaN, signal invalid operation.
0452      */
0453     if (tm == VFP_SNAN)
0454         exceptions = FPSCR_IOC;
0455
0456     if (tm & VFP_DENORMAL)
0457         vfp_double_normalise_denormal(&vdm);
0458
0459     vsd.sign = vdm.sign;
0460     vsd.significand = vfp_hi64to32jamming(vdm.significand);
0461
0462     /*
0463      * If we have an infinity or a NaN, the exponent must be 255
0464      */
0465     if (tm & (VFP_INFINITY|VFP_NAN)) {
0466         vsd.exponent = 255;
0467         if (tm == VFP_QNAN)
0468             vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
0469         goto pack_nan;
0470     } else if (tm & VFP_ZERO)
0471         vsd.exponent = 0;
0472     else
0473         vsd.exponent = vdm.exponent - (1023 - 127);
0474
0475     return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
0476
0477  pack_nan:
0478     vfp_put_float(vfp_single_pack(&vsd), sd);
0479     return exceptions;
0480 }
0481
0482 static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
0483 {
0484     struct vfp_double vdm;
0485     u32 m = vfp_get_float(dm);
0486
0487     vdm.sign = 0;
0488     vdm.exponent = 1023 + 63 - 1;
0489     vdm.significand = (u64)m;
0490
0491     return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito");
0492 }
0493
0494 static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
0495 {
0496     struct vfp_double vdm;
0497     u32 m = vfp_get_float(dm);
0498
0499     vdm.sign = (m & 0x80000000) >> 16;
0500     vdm.exponent = 1023 + 63 - 1;
0501     vdm.significand = vdm.sign ? -m : m;
0502
0503     return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito");
0504 }
0505
0506 static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
0507 {
0508     struct vfp_double vdm;
0509     u32 d, exceptions = 0;
0510     int rmode = fpscr & FPSCR_RMODE_MASK;
0511     int tm;
0512
0513     vfp_double_unpack(&vdm, vfp_get_double(dm));
0514
0515     /*
0516      * Do we have a denormalised number?
0517      */
0518     tm = vfp_double_type(&vdm);
0519     if (tm & VFP_DENORMAL)
0520         exceptions |= FPSCR_IDC;
0521
0522     if (tm & VFP_NAN)
0523         vdm.sign = 0;
0524
0525     if (vdm.exponent >= 1023 + 32) {
0526         d = vdm.sign ? 0 : 0xffffffff;
0527         exceptions = FPSCR_IOC;
0528     } else if (vdm.exponent >= 1023 - 1) {
0529         int shift = 1023 + 63 - vdm.exponent;
0530         u64 rem, incr = 0;
0531
0532         /*
0533          * 2^0 <= m < 2^32-2^8
0534          */
0535         d = (vdm.significand << 1) >> shift;
0536         rem = vdm.significand << (65 - shift);
0537
0538         if (rmode == FPSCR_ROUND_NEAREST) {
0539             incr = 0x8000000000000000ULL;
0540             if ((d & 1) == 0)
0541                 incr -= 1;
0542         } else if (rmode == FPSCR_ROUND_TOZERO) {
0543             incr = 0;
0544         } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
0545             incr = ~0ULL;
0546         }
0547
0548         if ((rem + incr) < rem) {
0549             if (d < 0xffffffff)
0550                 d += 1;
0551             else
0552                 exceptions |= FPSCR_IOC;
0553         }
0554
0555         if (d && vdm.sign) {
0556             d = 0;
0557             exceptions |= FPSCR_IOC;
0558         } else if (rem)
0559             exceptions |= FPSCR_IXC;
0560     } else {
0561         d = 0;
0562         if (vdm.exponent | vdm.significand) {
0563             exceptions |= FPSCR_IXC;
0564             if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
0565                 d = 1;
0566             else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
0567                 d = 0;
0568                 exceptions |= FPSCR_IOC;
0569             }
0570         }
0571     }
0572
0573     pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
0574
0575     vfp_put_float(d, sd);
0576
0577     return exceptions;
0578 }
0579
0580 static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
0581 {
0582     return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO);
0583 }
0584
0585 static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
0586 {
0587     struct vfp_double vdm;
0588     u32 d, exceptions = 0;
0589     int rmode = fpscr & FPSCR_RMODE_MASK;
0590     int tm;
0591
0592     vfp_double_unpack(&vdm, vfp_get_double(dm));
0593     vfp_double_dump("VDM", &vdm);
0594
0595     /*
0596      * Do we have denormalised number?
0597      */
0598     tm = vfp_double_type(&vdm);
0599     if (tm & VFP_DENORMAL)
0600         exceptions |= FPSCR_IDC;
0601
0602     if (tm & VFP_NAN) {
0603         d = 0;
0604         exceptions |= FPSCR_IOC;
0605     } else if (vdm.exponent >= 1023 + 32) {
0606         d = 0x7fffffff;
0607         if (vdm.sign)
0608             d = ~d;
0609         exceptions |= FPSCR_IOC;
0610     } else if (vdm.exponent >= 1023 - 1) {
0611         int shift = 1023 + 63 - vdm.exponent;   /* 58 */
0612         u64 rem, incr = 0;
0613
0614         d = (vdm.significand << 1) >> shift;
0615         rem = vdm.significand << (65 - shift);
0616
0617         if (rmode == FPSCR_ROUND_NEAREST) {
0618             incr = 0x8000000000000000ULL;
0619             if ((d & 1) == 0)
0620                 incr -= 1;
0621         } else if (rmode == FPSCR_ROUND_TOZERO) {
0622             incr = 0;
0623         } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
0624             incr = ~0ULL;
0625         }
0626
0627         if ((rem + incr) < rem && d < 0xffffffff)
0628             d += 1;
0629         if (d > 0x7fffffff + (vdm.sign != 0)) {
0630             d = 0x7fffffff + (vdm.sign != 0);
0631             exceptions |= FPSCR_IOC;
0632         } else if (rem)
0633             exceptions |= FPSCR_IXC;
0634
0635         if (vdm.sign)
0636             d = -d;
0637     } else {
0638         d = 0;
0639         if (vdm.exponent | vdm.significand) {
0640             exceptions |= FPSCR_IXC;
0641             if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
0642                 d = 1;
0643             else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
0644                 d = -1;
0645         }
0646     }
0647
0648     pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
0649
0650     vfp_put_float((s32)d, sd);
0651
0652     return exceptions;
0653 }
0654
0655 static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
0656 {
0657     return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO);
0658 }
0659
0660
0661 static struct op fops_ext[32] = {
0662     [FEXT_TO_IDX(FEXT_FCPY)]    = { vfp_double_fcpy,   0 },
0663     [FEXT_TO_IDX(FEXT_FABS)]    = { vfp_double_fabs,   0 },
0664     [FEXT_TO_IDX(FEXT_FNEG)]    = { vfp_double_fneg,   0 },
0665     [FEXT_TO_IDX(FEXT_FSQRT)]   = { vfp_double_fsqrt,  0 },
0666     [FEXT_TO_IDX(FEXT_FCMP)]    = { vfp_double_fcmp,   OP_SCALAR },
0667     [FEXT_TO_IDX(FEXT_FCMPE)]   = { vfp_double_fcmpe,  OP_SCALAR },
0668     [FEXT_TO_IDX(FEXT_FCMPZ)]   = { vfp_double_fcmpz,  OP_SCALAR },
0669     [FEXT_TO_IDX(FEXT_FCMPEZ)]  = { vfp_double_fcmpez, OP_SCALAR },
0670     [FEXT_TO_IDX(FEXT_FCVT)]    = { vfp_double_fcvts,  OP_SCALAR|OP_SD },
0671     [FEXT_TO_IDX(FEXT_FUITO)]   = { vfp_double_fuito,  OP_SCALAR|OP_SM },
0672     [FEXT_TO_IDX(FEXT_FSITO)]   = { vfp_double_fsito,  OP_SCALAR|OP_SM },
0673     [FEXT_TO_IDX(FEXT_FTOUI)]   = { vfp_double_ftoui,  OP_SCALAR|OP_SD },
0674     [FEXT_TO_IDX(FEXT_FTOUIZ)]  = { vfp_double_ftouiz, OP_SCALAR|OP_SD },
0675     [FEXT_TO_IDX(FEXT_FTOSI)]   = { vfp_double_ftosi,  OP_SCALAR|OP_SD },
0676     [FEXT_TO_IDX(FEXT_FTOSIZ)]  = { vfp_double_ftosiz, OP_SCALAR|OP_SD },
0677 };
0678
0679
0680
0681
0682 static u32
0683 vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
0684               struct vfp_double *vdm, u32 fpscr)
0685 {
0686     struct vfp_double *vdp;
0687     u32 exceptions = 0;
0688     int tn, tm;
0689
0690     tn = vfp_double_type(vdn);
0691     tm = vfp_double_type(vdm);
0692
0693     if (tn & tm & VFP_INFINITY) {
0694         /*
0695          * Two infinities.  Are they different signs?
0696          */
0697         if (vdn->sign ^ vdm->sign) {
0698             /*
0699              * different signs -> invalid
0700              */
0701             exceptions = FPSCR_IOC;
0702             vdp = &vfp_double_default_qnan;
0703         } else {
0704             /*
0705              * same signs -> valid
0706              */
0707             vdp = vdn;
0708         }
0709     } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
0710         /*
0711          * One infinity and one number -> infinity
0712          */
0713         vdp = vdn;
0714     } else {
0715         /*
0716          * 'n' is a NaN of some type
0717          */
0718         return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
0719     }
0720     *vdd = *vdp;
0721     return exceptions;
0722 }
0723
0724 static u32
0725 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
0726            struct vfp_double *vdm, u32 fpscr)
0727 {
0728     u32 exp_diff;
0729     u64 m_sig;
0730
0731     if (vdn->significand & (1ULL << 63) ||
0732         vdm->significand & (1ULL << 63)) {
0733         pr_info("VFP: bad FP values in %s\n", __func__);
0734         vfp_double_dump("VDN", vdn);
0735         vfp_double_dump("VDM", vdm);
0736     }
0737
0738     /*
0739      * Ensure that 'n' is the largest magnitude number.  Note that
0740      * if 'n' and 'm' have equal exponents, we do not swap them.
0741      * This ensures that NaN propagation works correctly.
0742      */
0743     if (vdn->exponent < vdm->exponent) {
0744         struct vfp_double *t = vdn;
0745         vdn = vdm;
0746         vdm = t;
0747     }
0748
0749     /*
0750      * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
0751      * infinity or a NaN here.
0752      */
0753     if (vdn->exponent == 2047)
0754         return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
0755
0756     /*
0757      * We have two proper numbers, where 'vdn' is the larger magnitude.
0758      *
0759      * Copy 'n' to 'd' before doing the arithmetic.
0760      */
0761     *vdd = *vdn;
0762
0763     /*
0764      * Align 'm' with the result.
0765      */
0766     exp_diff = vdn->exponent - vdm->exponent;
0767     m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
0768
0769     /*
0770      * If the signs are different, we are really subtracting.
0771      */
0772     if (vdn->sign ^ vdm->sign) {
0773         m_sig = vdn->significand - m_sig;
0774         if ((s64)m_sig < 0) {
0775             vdd->sign = vfp_sign_negate(vdd->sign);
0776             m_sig = -m_sig;
0777         } else if (m_sig == 0) {
0778             vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
0779                       FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
0780         }
0781     } else {
0782         m_sig += vdn->significand;
0783     }
0784     vdd->significand = m_sig;
0785
0786     return 0;
0787 }
0788
0789 static u32
0790 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
0791             struct vfp_double *vdm, u32 fpscr)
0792 {
0793     vfp_double_dump("VDN", vdn);
0794     vfp_double_dump("VDM", vdm);
0795
0796     /*
0797      * Ensure that 'n' is the largest magnitude number.  Note that
0798      * if 'n' and 'm' have equal exponents, we do not swap them.
0799      * This ensures that NaN propagation works correctly.
0800      */
0801     if (vdn->exponent < vdm->exponent) {
0802         struct vfp_double *t = vdn;
0803         vdn = vdm;
0804         vdm = t;
0805         pr_debug("VFP: swapping M <-> N\n");
0806     }
0807
0808     vdd->sign = vdn->sign ^ vdm->sign;
0809
0810     /*
0811      * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
0812      */
0813     if (vdn->exponent == 2047) {
0814         if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
0815             return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
0816         if ((vdm->exponent | vdm->significand) == 0) {
0817             *vdd = vfp_double_default_qnan;
0818             return FPSCR_IOC;
0819         }
0820         vdd->exponent = vdn->exponent;
0821         vdd->significand = 0;
0822         return 0;
0823     }
0824
0825     /*
0826      * If 'm' is zero, the result is always zero.  In this case,
0827      * 'n' may be zero or a number, but it doesn't matter which.
0828      */
0829     if ((vdm->exponent | vdm->significand) == 0) {
0830         vdd->exponent = 0;
0831         vdd->significand = 0;
0832         return 0;
0833     }
0834
0835     /*
0836      * We add 2 to the destination exponent for the same reason
0837      * as the addition case - though this time we have +1 from
0838      * each input operand.
0839      */
0840     vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
0841     vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
0842
0843     vfp_double_dump("VDD", vdd);
0844     return 0;
0845 }
0846
0847 #define NEG_MULTIPLY    (1 << 0)
0848 #define NEG_SUBTRACT    (1 << 1)
0849
0850 static u32
0851 vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
0852 {
0853     struct vfp_double vdd, vdp, vdn, vdm;
0854     u32 exceptions;
0855
0856     vfp_double_unpack(&vdn, vfp_get_double(dn));
0857     if (vdn.exponent == 0 && vdn.significand)
0858         vfp_double_normalise_denormal(&vdn);
0859
0860     vfp_double_unpack(&vdm, vfp_get_double(dm));
0861     if (vdm.exponent == 0 && vdm.significand)
0862         vfp_double_normalise_denormal(&vdm);
0863
0864     exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
0865     if (negate & NEG_MULTIPLY)
0866         vdp.sign = vfp_sign_negate(vdp.sign);
0867
0868     vfp_double_unpack(&vdn, vfp_get_double(dd));
0869     if (vdn.exponent == 0 && vdn.significand)
0870         vfp_double_normalise_denormal(&vdn);
0871     if (negate & NEG_SUBTRACT)
0872         vdn.sign = vfp_sign_negate(vdn.sign);
0873
0874     exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
0875
0876     return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func);
0877 }
0878
0879 /*
0880  * Standard operations
0881  */
0882
0883 /*
0884  * sd = sd + (sn * sm)
0885  */
0886 static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
0887 {
0888     return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac");
0889 }
0890
0891 /*
0892  * sd = sd - (sn * sm)
0893  */
0894 static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
0895 {
0896     return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
0897 }
0898
0899 /*
0900  * sd = -sd + (sn * sm)
0901  */
0902 static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
0903 {
0904     return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
0905 }
0906
0907 /*
0908  * sd = -sd - (sn * sm)
0909  */
0910 static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
0911 {
0912     return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
0913 }
0914
0915 /*
0916  * sd = sn * sm
0917  */
0918 static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
0919 {
0920     struct vfp_double vdd, vdn, vdm;
0921     u32 exceptions;
0922
0923     vfp_double_unpack(&vdn, vfp_get_double(dn));
0924     if (vdn.exponent == 0 && vdn.significand)
0925         vfp_double_normalise_denormal(&vdn);
0926
0927     vfp_double_unpack(&vdm, vfp_get_double(dm));
0928     if (vdm.exponent == 0 && vdm.significand)
0929         vfp_double_normalise_denormal(&vdm);
0930
0931     exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
0932     return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul");
0933 }
0934
0935 /*
0936  * sd = -(sn * sm)
0937  */
0938 static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
0939 {
0940     struct vfp_double vdd, vdn, vdm;
0941     u32 exceptions;
0942
0943     vfp_double_unpack(&vdn, vfp_get_double(dn));
0944     if (vdn.exponent == 0 && vdn.significand)
0945         vfp_double_normalise_denormal(&vdn);
0946
0947     vfp_double_unpack(&vdm, vfp_get_double(dm));
0948     if (vdm.exponent == 0 && vdm.significand)
0949         vfp_double_normalise_denormal(&vdm);
0950
0951     exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
0952     vdd.sign = vfp_sign_negate(vdd.sign);
0953
0954     return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul");
0955 }
0956
0957 /*
0958  * sd = sn + sm
0959  */
0960 static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
0961 {
0962     struct vfp_double vdd, vdn, vdm;
0963     u32 exceptions;
0964
0965     vfp_double_unpack(&vdn, vfp_get_double(dn));
0966     if (vdn.exponent == 0 && vdn.significand)
0967         vfp_double_normalise_denormal(&vdn);
0968
0969     vfp_double_unpack(&vdm, vfp_get_double(dm));
0970     if (vdm.exponent == 0 && vdm.significand)
0971         vfp_double_normalise_denormal(&vdm);
0972
0973     exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
0974
0975     return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd");
0976 }
0977
0978 /*
0979  * sd = sn - sm
0980  */
0981 static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
0982 {
0983     struct vfp_double vdd, vdn, vdm;
0984     u32 exceptions;
0985
0986     vfp_double_unpack(&vdn, vfp_get_double(dn));
0987     if (vdn.exponent == 0 && vdn.significand)
0988         vfp_double_normalise_denormal(&vdn);
0989
0990     vfp_double_unpack(&vdm, vfp_get_double(dm));
0991     if (vdm.exponent == 0 && vdm.significand)
0992         vfp_double_normalise_denormal(&vdm);
0993
0994     /*
0995      * Subtraction is like addition, but with a negated operand.
0996      */
0997     vdm.sign = vfp_sign_negate(vdm.sign);
0998
0999     exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
1000
1001     return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub");
1002 }
1003
1004 /*
1005  * sd = sn / sm
1006  */
1007 static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
1008 {
1009     struct vfp_double vdd, vdn, vdm;
1010     u32 exceptions = 0;
1011     int tm, tn;
1012
1013     vfp_double_unpack(&vdn, vfp_get_double(dn));
1014     vfp_double_unpack(&vdm, vfp_get_double(dm));
1015
1016     vdd.sign = vdn.sign ^ vdm.sign;
1017
1018     tn = vfp_double_type(&vdn);
1019     tm = vfp_double_type(&vdm);
1020
1021     /*
1022      * Is n a NAN?
1023      */
1024     if (tn & VFP_NAN)
1025         goto vdn_nan;
1026
1027     /*
1028      * Is m a NAN?
1029      */
1030     if (tm & VFP_NAN)
1031         goto vdm_nan;
1032
1033     /*
1034      * If n and m are infinity, the result is invalid
1035      * If n and m are zero, the result is invalid
1036      */
1037     if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1038         goto invalid;
1039
1040     /*
1041      * If n is infinity, the result is infinity
1042      */
1043     if (tn & VFP_INFINITY)
1044         goto infinity;
1045
1046     /*
1047      * If m is zero, raise div0 exceptions
1048      */
1049     if (tm & VFP_ZERO)
1050         goto divzero;
1051
1052     /*
1053      * If m is infinity, or n is zero, the result is zero
1054      */
1055     if (tm & VFP_INFINITY || tn & VFP_ZERO)
1056         goto zero;
1057
1058     if (tn & VFP_DENORMAL)
1059         vfp_double_normalise_denormal(&vdn);
1060     if (tm & VFP_DENORMAL)
1061         vfp_double_normalise_denormal(&vdm);
1062
1063     /*
1064      * Ok, we have two numbers, we can perform division.
1065      */
1066     vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
1067     vdm.significand <<= 1;
1068     if (vdm.significand <= (2 * vdn.significand)) {
1069         vdn.significand >>= 1;
1070         vdd.exponent++;
1071     }
1072     vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
1073     if ((vdd.significand & 0x1ff) <= 2) {
1074         u64 termh, terml, remh, reml;
1075         mul64to128(&termh, &terml, vdm.significand, vdd.significand);
1076         sub128(&remh, &reml, vdn.significand, 0, termh, terml);
1077         while ((s64)remh < 0) {
1078             vdd.significand -= 1;
1079             add128(&remh, &reml, remh, reml, 0, vdm.significand);
1080         }
1081         vdd.significand |= (reml != 0);
1082     }
1083     return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv");
1084
1085  vdn_nan:
1086     exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
1087  pack:
1088     vfp_put_double(vfp_double_pack(&vdd), dd);
1089     return exceptions;
1090
1091  vdm_nan:
1092     exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
1093     goto pack;
1094
1095  zero:
1096     vdd.exponent = 0;
1097     vdd.significand = 0;
1098     goto pack;
1099
1100  divzero:
1101     exceptions = FPSCR_DZC;
1102  infinity:
1103     vdd.exponent = 2047;
1104     vdd.significand = 0;
1105     goto pack;
1106
1107  invalid:
1108     vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd);
1109     return FPSCR_IOC;
1110 }
1111
1112 static struct op fops[16] = {
1113     [FOP_TO_IDX(FOP_FMAC)]  = { vfp_double_fmac,  0 },
1114     [FOP_TO_IDX(FOP_FNMAC)] = { vfp_double_fnmac, 0 },
1115     [FOP_TO_IDX(FOP_FMSC)]  = { vfp_double_fmsc,  0 },
1116     [FOP_TO_IDX(FOP_FNMSC)] = { vfp_double_fnmsc, 0 },
1117     [FOP_TO_IDX(FOP_FMUL)]  = { vfp_double_fmul,  0 },
1118     [FOP_TO_IDX(FOP_FNMUL)] = { vfp_double_fnmul, 0 },
1119     [FOP_TO_IDX(FOP_FADD)]  = { vfp_double_fadd,  0 },
1120     [FOP_TO_IDX(FOP_FSUB)]  = { vfp_double_fsub,  0 },
1121     [FOP_TO_IDX(FOP_FDIV)]  = { vfp_double_fdiv,  0 },
1122 };
1123
1124 #define FREG_BANK(x)    ((x) & 0x0c)
1125 #define FREG_IDX(x) ((x) & 3)
1126
1127 u32 vfp_double_cpdo(u32 inst, u32 fpscr)
1128 {
1129     u32 op = inst & FOP_MASK;
1130     u32 exceptions = 0;
1131     unsigned int dest;
1132     unsigned int dn = vfp_get_dn(inst);
1133     unsigned int dm;
1134     unsigned int vecitr, veclen, vecstride;
1135     struct op *fop;
1136
1137     vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
1138
1139     fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1140
1141     /*
1142      * fcvtds takes an sN register number as destination, not dN.
1143      * It also always operates on scalars.
1144      */
1145     if (fop->flags & OP_SD)
1146         dest = vfp_get_sd(inst);
1147     else
1148         dest = vfp_get_dd(inst);
1149
1150     /*
1151      * f[us]ito takes a sN operand, not a dN operand.
1152      */
1153     if (fop->flags & OP_SM)
1154         dm = vfp_get_sm(inst);
1155     else
1156         dm = vfp_get_dm(inst);
1157
1158     /*
1159      * If destination bank is zero, vector length is always '1'.
1160      * ARM DDI0100F C5.1.3, C5.3.2.
1161      */
1162     if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
1163         veclen = 0;
1164     else
1165         veclen = fpscr & FPSCR_LENGTH_MASK;
1166
1167     pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1168          (veclen >> FPSCR_LENGTH_BIT) + 1);
1169
1170     if (!fop->fn)
1171         goto invalid;
1172
1173     for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1174         u32 except;
1175         char type;
1176
1177         type = fop->flags & OP_SD ? 's' : 'd';
1178         if (op == FOP_EXT)
1179             pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
1180                  vecitr >> FPSCR_LENGTH_BIT,
1181                  type, dest, dn, dm);
1182         else
1183             pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
1184                  vecitr >> FPSCR_LENGTH_BIT,
1185                  type, dest, dn, FOP_TO_IDX(op), dm);
1186
1187         except = fop->fn(dest, dn, dm, fpscr);
1188         pr_debug("VFP: itr%d: exceptions=%08x\n",
1189              vecitr >> FPSCR_LENGTH_BIT, except);
1190
1191         exceptions |= except;
1192
1193         /*
1194          * CHECK: It appears to be undefined whether we stop when
1195          * we encounter an exception.  We continue.
1196          */
1197         dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
1198         dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
1199         if (FREG_BANK(dm) != 0)
1200             dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
1201     }
1202     return exceptions;
1203
1204  invalid:
1205     return ~0;
1206 }