arm/vfp/vfpsingle.c

0001 /*
0002  *  linux/arch/arm/vfp/vfpsingle.c
0003  *
0004  * This code is derived in part from John R. Housers softfloat library, which
0005  * carries the following notice:
0006  *
0007  * ===========================================================================
0008  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
0009  * Arithmetic Package, Release 2.
0010  *
0011  * Written by John R. Hauser.  This work was made possible in part by the
0012  * International Computer Science Institute, located at Suite 600, 1947 Center
0013  * Street, Berkeley, California 94704.  Funding was partially provided by the
0014  * National Science Foundation under grant MIP-9311980.  The original version
0015  * of this code was written as part of a project to build a fixed-point vector
0016  * processor in collaboration with the University of California at Berkeley,
0017  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
0018  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
0019  * arithmetic/softfloat.html'.
0020  *
0021  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
0022  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
0023  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
0024  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
0025  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
0026  *
0027  * Derivative works are acceptable, even for commercial purposes, so long as
0028  * (1) they include prominent notice that the work is derivative, and (2) they
0029  * include prominent notice akin to these three paragraphs for those parts of
0030  * this code that are retained.
0031  * ===========================================================================
0032  */
0033 #include <linux/kernel.h>
0034 #include <linux/bitops.h>
0035
0036 #include <asm/div64.h>
0037 #include <asm/vfp.h>
0038
0039 #include "vfpinstr.h"
0040 #include "vfp.h"
0041
0042 static struct vfp_single vfp_single_default_qnan = {
0043     .exponent   = 255,
0044     .sign       = 0,
0045     .significand    = VFP_SINGLE_SIGNIFICAND_QNAN,
0046 };
0047
0048 static void vfp_single_dump(const char *str, struct vfp_single *s)
0049 {
0050     pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
0051          str, s->sign != 0, s->exponent, s->significand);
0052 }
0053
0054 static void vfp_single_normalise_denormal(struct vfp_single *vs)
0055 {
0056     int bits = 31 - fls(vs->significand);
0057
0058     vfp_single_dump("normalise_denormal: in", vs);
0059
0060     if (bits) {
0061         vs->exponent -= bits - 1;
0062         vs->significand <<= bits;
0063     }
0064
0065     vfp_single_dump("normalise_denormal: out", vs);
0066 }
0067
0068 #ifndef DEBUG
0069 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
0070 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
0071 #else
0072 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
0073 #endif
0074 {
0075     u32 significand, incr, rmode;
0076     int exponent, shift, underflow;
0077
0078     vfp_single_dump("pack: in", vs);
0079
0080     /*
0081      * Infinities and NaNs are a special case.
0082      */
0083     if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
0084         goto pack;
0085
0086     /*
0087      * Special-case zero.
0088      */
0089     if (vs->significand == 0) {
0090         vs->exponent = 0;
0091         goto pack;
0092     }
0093
0094     exponent = vs->exponent;
0095     significand = vs->significand;
0096
0097     /*
0098      * Normalise first.  Note that we shift the significand up to
0099      * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
0100      * significant bit.
0101      */
0102     shift = 32 - fls(significand);
0103     if (shift < 32 && shift) {
0104         exponent -= shift;
0105         significand <<= shift;
0106     }
0107
0108 #ifdef DEBUG
0109     vs->exponent = exponent;
0110     vs->significand = significand;
0111     vfp_single_dump("pack: normalised", vs);
0112 #endif
0113
0114     /*
0115      * Tiny number?
0116      */
0117     underflow = exponent < 0;
0118     if (underflow) {
0119         significand = vfp_shiftright32jamming(significand, -exponent);
0120         exponent = 0;
0121 #ifdef DEBUG
0122         vs->exponent = exponent;
0123         vs->significand = significand;
0124         vfp_single_dump("pack: tiny number", vs);
0125 #endif
0126         if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
0127             underflow = 0;
0128     }
0129
0130     /*
0131      * Select rounding increment.
0132      */
0133     incr = 0;
0134     rmode = fpscr & FPSCR_RMODE_MASK;
0135
0136     if (rmode == FPSCR_ROUND_NEAREST) {
0137         incr = 1 << VFP_SINGLE_LOW_BITS;
0138         if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
0139             incr -= 1;
0140     } else if (rmode == FPSCR_ROUND_TOZERO) {
0141         incr = 0;
0142     } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
0143         incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
0144
0145     pr_debug("VFP: rounding increment = 0x%08x\n", incr);
0146
0147     /*
0148      * Is our rounding going to overflow?
0149      */
0150     if ((significand + incr) < significand) {
0151         exponent += 1;
0152         significand = (significand >> 1) | (significand & 1);
0153         incr >>= 1;
0154 #ifdef DEBUG
0155         vs->exponent = exponent;
0156         vs->significand = significand;
0157         vfp_single_dump("pack: overflow", vs);
0158 #endif
0159     }
0160
0161     /*
0162      * If any of the low bits (which will be shifted out of the
0163      * number) are non-zero, the result is inexact.
0164      */
0165     if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
0166         exceptions |= FPSCR_IXC;
0167
0168     /*
0169      * Do our rounding.
0170      */
0171     significand += incr;
0172
0173     /*
0174      * Infinity?
0175      */
0176     if (exponent >= 254) {
0177         exceptions |= FPSCR_OFC | FPSCR_IXC;
0178         if (incr == 0) {
0179             vs->exponent = 253;
0180             vs->significand = 0x7fffffff;
0181         } else {
0182             vs->exponent = 255;     /* infinity */
0183             vs->significand = 0;
0184         }
0185     } else {
0186         if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
0187             exponent = 0;
0188         if (exponent || significand > 0x80000000)
0189             underflow = 0;
0190         if (underflow)
0191             exceptions |= FPSCR_UFC;
0192         vs->exponent = exponent;
0193         vs->significand = significand >> 1;
0194     }
0195
0196  pack:
0197     vfp_single_dump("pack: final", vs);
0198     {
0199         s32 d = vfp_single_pack(vs);
0200 #ifdef DEBUG
0201         pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
0202              sd, d, exceptions);
0203 #endif
0204         vfp_put_float(d, sd);
0205     }
0206
0207     return exceptions;
0208 }
0209
0210 /*
0211  * Propagate the NaN, setting exceptions if it is signalling.
0212  * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
0213  */
0214 static u32
0215 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
0216           struct vfp_single *vsm, u32 fpscr)
0217 {
0218     struct vfp_single *nan;
0219     int tn, tm = 0;
0220
0221     tn = vfp_single_type(vsn);
0222
0223     if (vsm)
0224         tm = vfp_single_type(vsm);
0225
0226     if (fpscr & FPSCR_DEFAULT_NAN)
0227         /*
0228          * Default NaN mode - always returns a quiet NaN
0229          */
0230         nan = &vfp_single_default_qnan;
0231     else {
0232         /*
0233          * Contemporary mode - select the first signalling
0234          * NAN, or if neither are signalling, the first
0235          * quiet NAN.
0236          */
0237         if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
0238             nan = vsn;
0239         else
0240             nan = vsm;
0241         /*
0242          * Make the NaN quiet.
0243          */
0244         nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
0245     }
0246
0247     *vsd = *nan;
0248
0249     /*
0250      * If one was a signalling NAN, raise invalid operation.
0251      */
0252     return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
0253 }
0254
0255
0256 /*
0257  * Extended operations
0258  */
0259 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
0260 {
0261     vfp_put_float(vfp_single_packed_abs(m), sd);
0262     return 0;
0263 }
0264
0265 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
0266 {
0267     vfp_put_float(m, sd);
0268     return 0;
0269 }
0270
0271 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
0272 {
0273     vfp_put_float(vfp_single_packed_negate(m), sd);
0274     return 0;
0275 }
0276
0277 static const u16 sqrt_oddadjust[] = {
0278     0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
0279     0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
0280 };
0281
0282 static const u16 sqrt_evenadjust[] = {
0283     0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
0284     0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
0285 };
0286
0287 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
0288 {
0289     int index;
0290     u32 z, a;
0291
0292     if ((significand & 0xc0000000) != 0x40000000) {
0293         pr_warn("VFP: estimate_sqrt: invalid significand\n");
0294     }
0295
0296     a = significand << 1;
0297     index = (a >> 27) & 15;
0298     if (exponent & 1) {
0299         z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
0300         z = ((a / z) << 14) + (z << 15);
0301         a >>= 1;
0302     } else {
0303         z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
0304         z = a / z + z;
0305         z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
0306         if (z <= a)
0307             return (s32)a >> 1;
0308     }
0309     {
0310         u64 v = (u64)a << 31;
0311         do_div(v, z);
0312         return v + (z >> 1);
0313     }
0314 }
0315
0316 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
0317 {
0318     struct vfp_single vsm, vsd;
0319     int ret, tm;
0320
0321     vfp_single_unpack(&vsm, m);
0322     tm = vfp_single_type(&vsm);
0323     if (tm & (VFP_NAN|VFP_INFINITY)) {
0324         struct vfp_single *vsp = &vsd;
0325
0326         if (tm & VFP_NAN)
0327             ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
0328         else if (vsm.sign == 0) {
0329  sqrt_copy:
0330             vsp = &vsm;
0331             ret = 0;
0332         } else {
0333  sqrt_invalid:
0334             vsp = &vfp_single_default_qnan;
0335             ret = FPSCR_IOC;
0336         }
0337         vfp_put_float(vfp_single_pack(vsp), sd);
0338         return ret;
0339     }
0340
0341     /*
0342      * sqrt(+/- 0) == +/- 0
0343      */
0344     if (tm & VFP_ZERO)
0345         goto sqrt_copy;
0346
0347     /*
0348      * Normalise a denormalised number
0349      */
0350     if (tm & VFP_DENORMAL)
0351         vfp_single_normalise_denormal(&vsm);
0352
0353     /*
0354      * sqrt(<0) = invalid
0355      */
0356     if (vsm.sign)
0357         goto sqrt_invalid;
0358
0359     vfp_single_dump("sqrt", &vsm);
0360
0361     /*
0362      * Estimate the square root.
0363      */
0364     vsd.sign = 0;
0365     vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
0366     vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
0367
0368     vfp_single_dump("sqrt estimate", &vsd);
0369
0370     /*
0371      * And now adjust.
0372      */
0373     if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
0374         if (vsd.significand < 2) {
0375             vsd.significand = 0xffffffff;
0376         } else {
0377             u64 term;
0378             s64 rem;
0379             vsm.significand <<= !(vsm.exponent & 1);
0380             term = (u64)vsd.significand * vsd.significand;
0381             rem = ((u64)vsm.significand << 32) - term;
0382
0383             pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
0384
0385             while (rem < 0) {
0386                 vsd.significand -= 1;
0387                 rem += ((u64)vsd.significand << 1) | 1;
0388             }
0389             vsd.significand |= rem != 0;
0390         }
0391     }
0392     vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
0393
0394     return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
0395 }
0396
0397 /*
0398  * Equal    := ZC
0399  * Less than    := N
0400  * Greater than := C
0401  * Unordered    := CV
0402  */
0403 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
0404 {
0405     s32 d;
0406     u32 ret = 0;
0407
0408     d = vfp_get_float(sd);
0409     if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
0410         ret |= FPSCR_C | FPSCR_V;
0411         if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
0412             /*
0413              * Signalling NaN, or signalling on quiet NaN
0414              */
0415             ret |= FPSCR_IOC;
0416     }
0417
0418     if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
0419         ret |= FPSCR_C | FPSCR_V;
0420         if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
0421             /*
0422              * Signalling NaN, or signalling on quiet NaN
0423              */
0424             ret |= FPSCR_IOC;
0425     }
0426
0427     if (ret == 0) {
0428         if (d == m || vfp_single_packed_abs(d | m) == 0) {
0429             /*
0430              * equal
0431              */
0432             ret |= FPSCR_Z | FPSCR_C;
0433         } else if (vfp_single_packed_sign(d ^ m)) {
0434             /*
0435              * different signs
0436              */
0437             if (vfp_single_packed_sign(d))
0438                 /*
0439                  * d is negative, so d < m
0440                  */
0441                 ret |= FPSCR_N;
0442             else
0443                 /*
0444                  * d is positive, so d > m
0445                  */
0446                 ret |= FPSCR_C;
0447         } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
0448             /*
0449              * d < m
0450              */
0451             ret |= FPSCR_N;
0452         } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
0453             /*
0454              * d > m
0455              */
0456             ret |= FPSCR_C;
0457         }
0458     }
0459     return ret;
0460 }
0461
0462 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
0463 {
0464     return vfp_compare(sd, 0, m, fpscr);
0465 }
0466
0467 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
0468 {
0469     return vfp_compare(sd, 1, m, fpscr);
0470 }
0471
0472 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
0473 {
0474     return vfp_compare(sd, 0, 0, fpscr);
0475 }
0476
0477 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
0478 {
0479     return vfp_compare(sd, 1, 0, fpscr);
0480 }
0481
0482 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
0483 {
0484     struct vfp_single vsm;
0485     struct vfp_double vdd;
0486     int tm;
0487     u32 exceptions = 0;
0488
0489     vfp_single_unpack(&vsm, m);
0490
0491     tm = vfp_single_type(&vsm);
0492
0493     /*
0494      * If we have a signalling NaN, signal invalid operation.
0495      */
0496     if (tm == VFP_SNAN)
0497         exceptions = FPSCR_IOC;
0498
0499     if (tm & VFP_DENORMAL)
0500         vfp_single_normalise_denormal(&vsm);
0501
0502     vdd.sign = vsm.sign;
0503     vdd.significand = (u64)vsm.significand << 32;
0504
0505     /*
0506      * If we have an infinity or NaN, the exponent must be 2047.
0507      */
0508     if (tm & (VFP_INFINITY|VFP_NAN)) {
0509         vdd.exponent = 2047;
0510         if (tm == VFP_QNAN)
0511             vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
0512         goto pack_nan;
0513     } else if (tm & VFP_ZERO)
0514         vdd.exponent = 0;
0515     else
0516         vdd.exponent = vsm.exponent + (1023 - 127);
0517
0518     return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
0519
0520  pack_nan:
0521     vfp_put_double(vfp_double_pack(&vdd), dd);
0522     return exceptions;
0523 }
0524
0525 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
0526 {
0527     struct vfp_single vs;
0528
0529     vs.sign = 0;
0530     vs.exponent = 127 + 31 - 1;
0531     vs.significand = (u32)m;
0532
0533     return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
0534 }
0535
0536 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
0537 {
0538     struct vfp_single vs;
0539
0540     vs.sign = (m & 0x80000000) >> 16;
0541     vs.exponent = 127 + 31 - 1;
0542     vs.significand = vs.sign ? -m : m;
0543
0544     return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
0545 }
0546
0547 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
0548 {
0549     struct vfp_single vsm;
0550     u32 d, exceptions = 0;
0551     int rmode = fpscr & FPSCR_RMODE_MASK;
0552     int tm;
0553
0554     vfp_single_unpack(&vsm, m);
0555     vfp_single_dump("VSM", &vsm);
0556
0557     /*
0558      * Do we have a denormalised number?
0559      */
0560     tm = vfp_single_type(&vsm);
0561     if (tm & VFP_DENORMAL)
0562         exceptions |= FPSCR_IDC;
0563
0564     if (tm & VFP_NAN)
0565         vsm.sign = 0;
0566
0567     if (vsm.exponent >= 127 + 32) {
0568         d = vsm.sign ? 0 : 0xffffffff;
0569         exceptions = FPSCR_IOC;
0570     } else if (vsm.exponent >= 127 - 1) {
0571         int shift = 127 + 31 - vsm.exponent;
0572         u32 rem, incr = 0;
0573
0574         /*
0575          * 2^0 <= m < 2^32-2^8
0576          */
0577         d = (vsm.significand << 1) >> shift;
0578         rem = vsm.significand << (33 - shift);
0579
0580         if (rmode == FPSCR_ROUND_NEAREST) {
0581             incr = 0x80000000;
0582             if ((d & 1) == 0)
0583                 incr -= 1;
0584         } else if (rmode == FPSCR_ROUND_TOZERO) {
0585             incr = 0;
0586         } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
0587             incr = ~0;
0588         }
0589
0590         if ((rem + incr) < rem) {
0591             if (d < 0xffffffff)
0592                 d += 1;
0593             else
0594                 exceptions |= FPSCR_IOC;
0595         }
0596
0597         if (d && vsm.sign) {
0598             d = 0;
0599             exceptions |= FPSCR_IOC;
0600         } else if (rem)
0601             exceptions |= FPSCR_IXC;
0602     } else {
0603         d = 0;
0604         if (vsm.exponent | vsm.significand) {
0605             exceptions |= FPSCR_IXC;
0606             if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
0607                 d = 1;
0608             else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
0609                 d = 0;
0610                 exceptions |= FPSCR_IOC;
0611             }
0612         }
0613     }
0614
0615     pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
0616
0617     vfp_put_float(d, sd);
0618
0619     return exceptions;
0620 }
0621
0622 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
0623 {
0624     return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
0625 }
0626
0627 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
0628 {
0629     struct vfp_single vsm;
0630     u32 d, exceptions = 0;
0631     int rmode = fpscr & FPSCR_RMODE_MASK;
0632     int tm;
0633
0634     vfp_single_unpack(&vsm, m);
0635     vfp_single_dump("VSM", &vsm);
0636
0637     /*
0638      * Do we have a denormalised number?
0639      */
0640     tm = vfp_single_type(&vsm);
0641     if (vfp_single_type(&vsm) & VFP_DENORMAL)
0642         exceptions |= FPSCR_IDC;
0643
0644     if (tm & VFP_NAN) {
0645         d = 0;
0646         exceptions |= FPSCR_IOC;
0647     } else if (vsm.exponent >= 127 + 32) {
0648         /*
0649          * m >= 2^31-2^7: invalid
0650          */
0651         d = 0x7fffffff;
0652         if (vsm.sign)
0653             d = ~d;
0654         exceptions |= FPSCR_IOC;
0655     } else if (vsm.exponent >= 127 - 1) {
0656         int shift = 127 + 31 - vsm.exponent;
0657         u32 rem, incr = 0;
0658
0659         /* 2^0 <= m <= 2^31-2^7 */
0660         d = (vsm.significand << 1) >> shift;
0661         rem = vsm.significand << (33 - shift);
0662
0663         if (rmode == FPSCR_ROUND_NEAREST) {
0664             incr = 0x80000000;
0665             if ((d & 1) == 0)
0666                 incr -= 1;
0667         } else if (rmode == FPSCR_ROUND_TOZERO) {
0668             incr = 0;
0669         } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
0670             incr = ~0;
0671         }
0672
0673         if ((rem + incr) < rem && d < 0xffffffff)
0674             d += 1;
0675         if (d > 0x7fffffff + (vsm.sign != 0)) {
0676             d = 0x7fffffff + (vsm.sign != 0);
0677             exceptions |= FPSCR_IOC;
0678         } else if (rem)
0679             exceptions |= FPSCR_IXC;
0680
0681         if (vsm.sign)
0682             d = -d;
0683     } else {
0684         d = 0;
0685         if (vsm.exponent | vsm.significand) {
0686             exceptions |= FPSCR_IXC;
0687             if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
0688                 d = 1;
0689             else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
0690                 d = -1;
0691         }
0692     }
0693
0694     pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
0695
0696     vfp_put_float((s32)d, sd);
0697
0698     return exceptions;
0699 }
0700
0701 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
0702 {
0703     return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
0704 }
0705
0706 static struct op fops_ext[32] = {
0707     [FEXT_TO_IDX(FEXT_FCPY)]    = { vfp_single_fcpy,   0 },
0708     [FEXT_TO_IDX(FEXT_FABS)]    = { vfp_single_fabs,   0 },
0709     [FEXT_TO_IDX(FEXT_FNEG)]    = { vfp_single_fneg,   0 },
0710     [FEXT_TO_IDX(FEXT_FSQRT)]   = { vfp_single_fsqrt,  0 },
0711     [FEXT_TO_IDX(FEXT_FCMP)]    = { vfp_single_fcmp,   OP_SCALAR },
0712     [FEXT_TO_IDX(FEXT_FCMPE)]   = { vfp_single_fcmpe,  OP_SCALAR },
0713     [FEXT_TO_IDX(FEXT_FCMPZ)]   = { vfp_single_fcmpz,  OP_SCALAR },
0714     [FEXT_TO_IDX(FEXT_FCMPEZ)]  = { vfp_single_fcmpez, OP_SCALAR },
0715     [FEXT_TO_IDX(FEXT_FCVT)]    = { vfp_single_fcvtd,  OP_SCALAR|OP_DD },
0716     [FEXT_TO_IDX(FEXT_FUITO)]   = { vfp_single_fuito,  OP_SCALAR },
0717     [FEXT_TO_IDX(FEXT_FSITO)]   = { vfp_single_fsito,  OP_SCALAR },
0718     [FEXT_TO_IDX(FEXT_FTOUI)]   = { vfp_single_ftoui,  OP_SCALAR },
0719     [FEXT_TO_IDX(FEXT_FTOUIZ)]  = { vfp_single_ftouiz, OP_SCALAR },
0720     [FEXT_TO_IDX(FEXT_FTOSI)]   = { vfp_single_ftosi,  OP_SCALAR },
0721     [FEXT_TO_IDX(FEXT_FTOSIZ)]  = { vfp_single_ftosiz, OP_SCALAR },
0722 };
0723
0724
0725
0726
0727
0728 static u32
0729 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
0730               struct vfp_single *vsm, u32 fpscr)
0731 {
0732     struct vfp_single *vsp;
0733     u32 exceptions = 0;
0734     int tn, tm;
0735
0736     tn = vfp_single_type(vsn);
0737     tm = vfp_single_type(vsm);
0738
0739     if (tn & tm & VFP_INFINITY) {
0740         /*
0741          * Two infinities.  Are they different signs?
0742          */
0743         if (vsn->sign ^ vsm->sign) {
0744             /*
0745              * different signs -> invalid
0746              */
0747             exceptions = FPSCR_IOC;
0748             vsp = &vfp_single_default_qnan;
0749         } else {
0750             /*
0751              * same signs -> valid
0752              */
0753             vsp = vsn;
0754         }
0755     } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
0756         /*
0757          * One infinity and one number -> infinity
0758          */
0759         vsp = vsn;
0760     } else {
0761         /*
0762          * 'n' is a NaN of some type
0763          */
0764         return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
0765     }
0766     *vsd = *vsp;
0767     return exceptions;
0768 }
0769
0770 static u32
0771 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
0772            struct vfp_single *vsm, u32 fpscr)
0773 {
0774     u32 exp_diff, m_sig;
0775
0776     if (vsn->significand & 0x80000000 ||
0777         vsm->significand & 0x80000000) {
0778         pr_info("VFP: bad FP values in %s\n", __func__);
0779         vfp_single_dump("VSN", vsn);
0780         vfp_single_dump("VSM", vsm);
0781     }
0782
0783     /*
0784      * Ensure that 'n' is the largest magnitude number.  Note that
0785      * if 'n' and 'm' have equal exponents, we do not swap them.
0786      * This ensures that NaN propagation works correctly.
0787      */
0788     if (vsn->exponent < vsm->exponent) {
0789         struct vfp_single *t = vsn;
0790         vsn = vsm;
0791         vsm = t;
0792     }
0793
0794     /*
0795      * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
0796      * infinity or a NaN here.
0797      */
0798     if (vsn->exponent == 255)
0799         return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
0800
0801     /*
0802      * We have two proper numbers, where 'vsn' is the larger magnitude.
0803      *
0804      * Copy 'n' to 'd' before doing the arithmetic.
0805      */
0806     *vsd = *vsn;
0807
0808     /*
0809      * Align both numbers.
0810      */
0811     exp_diff = vsn->exponent - vsm->exponent;
0812     m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
0813
0814     /*
0815      * If the signs are different, we are really subtracting.
0816      */
0817     if (vsn->sign ^ vsm->sign) {
0818         m_sig = vsn->significand - m_sig;
0819         if ((s32)m_sig < 0) {
0820             vsd->sign = vfp_sign_negate(vsd->sign);
0821             m_sig = -m_sig;
0822         } else if (m_sig == 0) {
0823             vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
0824                       FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
0825         }
0826     } else {
0827         m_sig = vsn->significand + m_sig;
0828     }
0829     vsd->significand = m_sig;
0830
0831     return 0;
0832 }
0833
0834 static u32
0835 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
0836 {
0837     vfp_single_dump("VSN", vsn);
0838     vfp_single_dump("VSM", vsm);
0839
0840     /*
0841      * Ensure that 'n' is the largest magnitude number.  Note that
0842      * if 'n' and 'm' have equal exponents, we do not swap them.
0843      * This ensures that NaN propagation works correctly.
0844      */
0845     if (vsn->exponent < vsm->exponent) {
0846         struct vfp_single *t = vsn;
0847         vsn = vsm;
0848         vsm = t;
0849         pr_debug("VFP: swapping M <-> N\n");
0850     }
0851
0852     vsd->sign = vsn->sign ^ vsm->sign;
0853
0854     /*
0855      * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
0856      */
0857     if (vsn->exponent == 255) {
0858         if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
0859             return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
0860         if ((vsm->exponent | vsm->significand) == 0) {
0861             *vsd = vfp_single_default_qnan;
0862             return FPSCR_IOC;
0863         }
0864         vsd->exponent = vsn->exponent;
0865         vsd->significand = 0;
0866         return 0;
0867     }
0868
0869     /*
0870      * If 'm' is zero, the result is always zero.  In this case,
0871      * 'n' may be zero or a number, but it doesn't matter which.
0872      */
0873     if ((vsm->exponent | vsm->significand) == 0) {
0874         vsd->exponent = 0;
0875         vsd->significand = 0;
0876         return 0;
0877     }
0878
0879     /*
0880      * We add 2 to the destination exponent for the same reason as
0881      * the addition case - though this time we have +1 from each
0882      * input operand.
0883      */
0884     vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
0885     vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
0886
0887     vfp_single_dump("VSD", vsd);
0888     return 0;
0889 }
0890
0891 #define NEG_MULTIPLY    (1 << 0)
0892 #define NEG_SUBTRACT    (1 << 1)
0893
0894 static u32
0895 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
0896 {
0897     struct vfp_single vsd, vsp, vsn, vsm;
0898     u32 exceptions;
0899     s32 v;
0900
0901     v = vfp_get_float(sn);
0902     pr_debug("VFP: s%u = %08x\n", sn, v);
0903     vfp_single_unpack(&vsn, v);
0904     if (vsn.exponent == 0 && vsn.significand)
0905         vfp_single_normalise_denormal(&vsn);
0906
0907     vfp_single_unpack(&vsm, m);
0908     if (vsm.exponent == 0 && vsm.significand)
0909         vfp_single_normalise_denormal(&vsm);
0910
0911     exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
0912     if (negate & NEG_MULTIPLY)
0913         vsp.sign = vfp_sign_negate(vsp.sign);
0914
0915     v = vfp_get_float(sd);
0916     pr_debug("VFP: s%u = %08x\n", sd, v);
0917     vfp_single_unpack(&vsn, v);
0918     if (vsn.exponent == 0 && vsn.significand)
0919         vfp_single_normalise_denormal(&vsn);
0920     if (negate & NEG_SUBTRACT)
0921         vsn.sign = vfp_sign_negate(vsn.sign);
0922
0923     exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
0924
0925     return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
0926 }
0927
0928 /*
0929  * Standard operations
0930  */
0931
0932 /*
0933  * sd = sd + (sn * sm)
0934  */
0935 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
0936 {
0937     return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
0938 }
0939
0940 /*
0941  * sd = sd - (sn * sm)
0942  */
0943 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
0944 {
0945     return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
0946 }
0947
0948 /*
0949  * sd = -sd + (sn * sm)
0950  */
0951 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
0952 {
0953     return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
0954 }
0955
0956 /*
0957  * sd = -sd - (sn * sm)
0958  */
0959 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
0960 {
0961     return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
0962 }
0963
0964 /*
0965  * sd = sn * sm
0966  */
0967 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
0968 {
0969     struct vfp_single vsd, vsn, vsm;
0970     u32 exceptions;
0971     s32 n = vfp_get_float(sn);
0972
0973     pr_debug("VFP: s%u = %08x\n", sn, n);
0974
0975     vfp_single_unpack(&vsn, n);
0976     if (vsn.exponent == 0 && vsn.significand)
0977         vfp_single_normalise_denormal(&vsn);
0978
0979     vfp_single_unpack(&vsm, m);
0980     if (vsm.exponent == 0 && vsm.significand)
0981         vfp_single_normalise_denormal(&vsm);
0982
0983     exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
0984     return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
0985 }
0986
0987 /*
0988  * sd = -(sn * sm)
0989  */
0990 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
0991 {
0992     struct vfp_single vsd, vsn, vsm;
0993     u32 exceptions;
0994     s32 n = vfp_get_float(sn);
0995
0996     pr_debug("VFP: s%u = %08x\n", sn, n);
0997
0998     vfp_single_unpack(&vsn, n);
0999     if (vsn.exponent == 0 && vsn.significand)
1000         vfp_single_normalise_denormal(&vsn);
1001
1002     vfp_single_unpack(&vsm, m);
1003     if (vsm.exponent == 0 && vsm.significand)
1004         vfp_single_normalise_denormal(&vsm);
1005
1006     exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1007     vsd.sign = vfp_sign_negate(vsd.sign);
1008     return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1009 }
1010
1011 /*
1012  * sd = sn + sm
1013  */
1014 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1015 {
1016     struct vfp_single vsd, vsn, vsm;
1017     u32 exceptions;
1018     s32 n = vfp_get_float(sn);
1019
1020     pr_debug("VFP: s%u = %08x\n", sn, n);
1021
1022     /*
1023      * Unpack and normalise denormals.
1024      */
1025     vfp_single_unpack(&vsn, n);
1026     if (vsn.exponent == 0 && vsn.significand)
1027         vfp_single_normalise_denormal(&vsn);
1028
1029     vfp_single_unpack(&vsm, m);
1030     if (vsm.exponent == 0 && vsm.significand)
1031         vfp_single_normalise_denormal(&vsm);
1032
1033     exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1034
1035     return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1036 }
1037
1038 /*
1039  * sd = sn - sm
1040  */
1041 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1042 {
1043     /*
1044      * Subtraction is addition with one sign inverted.
1045      */
1046     return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1047 }
1048
1049 /*
1050  * sd = sn / sm
1051  */
1052 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1053 {
1054     struct vfp_single vsd, vsn, vsm;
1055     u32 exceptions = 0;
1056     s32 n = vfp_get_float(sn);
1057     int tm, tn;
1058
1059     pr_debug("VFP: s%u = %08x\n", sn, n);
1060
1061     vfp_single_unpack(&vsn, n);
1062     vfp_single_unpack(&vsm, m);
1063
1064     vsd.sign = vsn.sign ^ vsm.sign;
1065
1066     tn = vfp_single_type(&vsn);
1067     tm = vfp_single_type(&vsm);
1068
1069     /*
1070      * Is n a NAN?
1071      */
1072     if (tn & VFP_NAN)
1073         goto vsn_nan;
1074
1075     /*
1076      * Is m a NAN?
1077      */
1078     if (tm & VFP_NAN)
1079         goto vsm_nan;
1080
1081     /*
1082      * If n and m are infinity, the result is invalid
1083      * If n and m are zero, the result is invalid
1084      */
1085     if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1086         goto invalid;
1087
1088     /*
1089      * If n is infinity, the result is infinity
1090      */
1091     if (tn & VFP_INFINITY)
1092         goto infinity;
1093
1094     /*
1095      * If m is zero, raise div0 exception
1096      */
1097     if (tm & VFP_ZERO)
1098         goto divzero;
1099
1100     /*
1101      * If m is infinity, or n is zero, the result is zero
1102      */
1103     if (tm & VFP_INFINITY || tn & VFP_ZERO)
1104         goto zero;
1105
1106     if (tn & VFP_DENORMAL)
1107         vfp_single_normalise_denormal(&vsn);
1108     if (tm & VFP_DENORMAL)
1109         vfp_single_normalise_denormal(&vsm);
1110
1111     /*
1112      * Ok, we have two numbers, we can perform division.
1113      */
1114     vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1115     vsm.significand <<= 1;
1116     if (vsm.significand <= (2 * vsn.significand)) {
1117         vsn.significand >>= 1;
1118         vsd.exponent++;
1119     }
1120     {
1121         u64 significand = (u64)vsn.significand << 32;
1122         do_div(significand, vsm.significand);
1123         vsd.significand = significand;
1124     }
1125     if ((vsd.significand & 0x3f) == 0)
1126         vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1127
1128     return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1129
1130  vsn_nan:
1131     exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1132  pack:
1133     vfp_put_float(vfp_single_pack(&vsd), sd);
1134     return exceptions;
1135
1136  vsm_nan:
1137     exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1138     goto pack;
1139
1140  zero:
1141     vsd.exponent = 0;
1142     vsd.significand = 0;
1143     goto pack;
1144
1145  divzero:
1146     exceptions = FPSCR_DZC;
1147  infinity:
1148     vsd.exponent = 255;
1149     vsd.significand = 0;
1150     goto pack;
1151
1152  invalid:
1153     vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd);
1154     return FPSCR_IOC;
1155 }
1156
1157 static struct op fops[16] = {
1158     [FOP_TO_IDX(FOP_FMAC)]  = { vfp_single_fmac,  0 },
1159     [FOP_TO_IDX(FOP_FNMAC)] = { vfp_single_fnmac, 0 },
1160     [FOP_TO_IDX(FOP_FMSC)]  = { vfp_single_fmsc,  0 },
1161     [FOP_TO_IDX(FOP_FNMSC)] = { vfp_single_fnmsc, 0 },
1162     [FOP_TO_IDX(FOP_FMUL)]  = { vfp_single_fmul,  0 },
1163     [FOP_TO_IDX(FOP_FNMUL)] = { vfp_single_fnmul, 0 },
1164     [FOP_TO_IDX(FOP_FADD)]  = { vfp_single_fadd,  0 },
1165     [FOP_TO_IDX(FOP_FSUB)]  = { vfp_single_fsub,  0 },
1166     [FOP_TO_IDX(FOP_FDIV)]  = { vfp_single_fdiv,  0 },
1167 };
1168
1169 #define FREG_BANK(x)    ((x) & 0x18)
1170 #define FREG_IDX(x) ((x) & 7)
1171
1172 u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1173 {
1174     u32 op = inst & FOP_MASK;
1175     u32 exceptions = 0;
1176     unsigned int dest;
1177     unsigned int sn = vfp_get_sn(inst);
1178     unsigned int sm = vfp_get_sm(inst);
1179     unsigned int vecitr, veclen, vecstride;
1180     struct op *fop;
1181
1182     vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1183
1184     fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1185
1186     /*
1187      * fcvtsd takes a dN register number as destination, not sN.
1188      * Technically, if bit 0 of dd is set, this is an invalid
1189      * instruction.  However, we ignore this for efficiency.
1190      * It also only operates on scalars.
1191      */
1192     if (fop->flags & OP_DD)
1193         dest = vfp_get_dd(inst);
1194     else
1195         dest = vfp_get_sd(inst);
1196
1197     /*
1198      * If destination bank is zero, vector length is always '1'.
1199      * ARM DDI0100F C5.1.3, C5.3.2.
1200      */
1201     if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
1202         veclen = 0;
1203     else
1204         veclen = fpscr & FPSCR_LENGTH_MASK;
1205
1206     pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1207          (veclen >> FPSCR_LENGTH_BIT) + 1);
1208
1209     if (!fop->fn)
1210         goto invalid;
1211
1212     for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1213         s32 m = vfp_get_float(sm);
1214         u32 except;
1215         char type;
1216
1217         type = fop->flags & OP_DD ? 'd' : 's';
1218         if (op == FOP_EXT)
1219             pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
1220                  vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1221                  sm, m);
1222         else
1223             pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
1224                  vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1225                  FOP_TO_IDX(op), sm, m);
1226
1227         except = fop->fn(dest, sn, m, fpscr);
1228         pr_debug("VFP: itr%d: exceptions=%08x\n",
1229              vecitr >> FPSCR_LENGTH_BIT, except);
1230
1231         exceptions |= except;
1232
1233         /*
1234          * CHECK: It appears to be undefined whether we stop when
1235          * we encounter an exception.  We continue.
1236          */
1237         dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
1238         sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1239         if (FREG_BANK(sm) != 0)
1240             sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1241     }
1242     return exceptions;
1243
1244  invalid:
1245     return (u32)-1;
1246 }