0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <linux/kernel.h>
0034 #include <linux/bitops.h>
0035
0036 #include <asm/div64.h>
0037 #include <asm/vfp.h>
0038
0039 #include "vfpinstr.h"
0040 #include "vfp.h"
0041
0042 static struct vfp_double vfp_double_default_qnan = {
0043 .exponent = 2047,
0044 .sign = 0,
0045 .significand = VFP_DOUBLE_SIGNIFICAND_QNAN,
0046 };
0047
0048 static void vfp_double_dump(const char *str, struct vfp_double *d)
0049 {
0050 pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
0051 str, d->sign != 0, d->exponent, d->significand);
0052 }
0053
0054 static void vfp_double_normalise_denormal(struct vfp_double *vd)
0055 {
0056 int bits = 31 - fls(vd->significand >> 32);
0057 if (bits == 31)
0058 bits = 63 - fls(vd->significand);
0059
0060 vfp_double_dump("normalise_denormal: in", vd);
0061
0062 if (bits) {
0063 vd->exponent -= bits - 1;
0064 vd->significand <<= bits;
0065 }
0066
0067 vfp_double_dump("normalise_denormal: out", vd);
0068 }
0069
0070 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
0071 {
0072 u64 significand, incr;
0073 int exponent, shift, underflow;
0074 u32 rmode;
0075
0076 vfp_double_dump("pack: in", vd);
0077
0078
0079
0080
0081 if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
0082 goto pack;
0083
0084
0085
0086
0087 if (vd->significand == 0) {
0088 vd->exponent = 0;
0089 goto pack;
0090 }
0091
0092 exponent = vd->exponent;
0093 significand = vd->significand;
0094
0095 shift = 32 - fls(significand >> 32);
0096 if (shift == 32)
0097 shift = 64 - fls(significand);
0098 if (shift) {
0099 exponent -= shift;
0100 significand <<= shift;
0101 }
0102
0103 #ifdef DEBUG
0104 vd->exponent = exponent;
0105 vd->significand = significand;
0106 vfp_double_dump("pack: normalised", vd);
0107 #endif
0108
0109
0110
0111
0112 underflow = exponent < 0;
0113 if (underflow) {
0114 significand = vfp_shiftright64jamming(significand, -exponent);
0115 exponent = 0;
0116 #ifdef DEBUG
0117 vd->exponent = exponent;
0118 vd->significand = significand;
0119 vfp_double_dump("pack: tiny number", vd);
0120 #endif
0121 if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
0122 underflow = 0;
0123 }
0124
0125
0126
0127
0128 incr = 0;
0129 rmode = fpscr & FPSCR_RMODE_MASK;
0130
0131 if (rmode == FPSCR_ROUND_NEAREST) {
0132 incr = 1ULL << VFP_DOUBLE_LOW_BITS;
0133 if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
0134 incr -= 1;
0135 } else if (rmode == FPSCR_ROUND_TOZERO) {
0136 incr = 0;
0137 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
0138 incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
0139
0140 pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
0141
0142
0143
0144
0145 if ((significand + incr) < significand) {
0146 exponent += 1;
0147 significand = (significand >> 1) | (significand & 1);
0148 incr >>= 1;
0149 #ifdef DEBUG
0150 vd->exponent = exponent;
0151 vd->significand = significand;
0152 vfp_double_dump("pack: overflow", vd);
0153 #endif
0154 }
0155
0156
0157
0158
0159
0160 if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
0161 exceptions |= FPSCR_IXC;
0162
0163
0164
0165
0166 significand += incr;
0167
0168
0169
0170
0171 if (exponent >= 2046) {
0172 exceptions |= FPSCR_OFC | FPSCR_IXC;
0173 if (incr == 0) {
0174 vd->exponent = 2045;
0175 vd->significand = 0x7fffffffffffffffULL;
0176 } else {
0177 vd->exponent = 2047;
0178 vd->significand = 0;
0179 }
0180 } else {
0181 if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
0182 exponent = 0;
0183 if (exponent || significand > 0x8000000000000000ULL)
0184 underflow = 0;
0185 if (underflow)
0186 exceptions |= FPSCR_UFC;
0187 vd->exponent = exponent;
0188 vd->significand = significand >> 1;
0189 }
0190
0191 pack:
0192 vfp_double_dump("pack: final", vd);
0193 {
0194 s64 d = vfp_double_pack(vd);
0195 pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
0196 dd, d, exceptions);
0197 vfp_put_double(d, dd);
0198 }
0199 return exceptions;
0200 }
0201
0202
0203
0204
0205
0206 static u32
0207 vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
0208 struct vfp_double *vdm, u32 fpscr)
0209 {
0210 struct vfp_double *nan;
0211 int tn, tm = 0;
0212
0213 tn = vfp_double_type(vdn);
0214
0215 if (vdm)
0216 tm = vfp_double_type(vdm);
0217
0218 if (fpscr & FPSCR_DEFAULT_NAN)
0219
0220
0221
0222 nan = &vfp_double_default_qnan;
0223 else {
0224
0225
0226
0227
0228
0229 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
0230 nan = vdn;
0231 else
0232 nan = vdm;
0233
0234
0235
0236 nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
0237 }
0238
0239 *vdd = *nan;
0240
0241
0242
0243
0244 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
0245 }
0246
0247
0248
0249
0250 static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
0251 {
0252 vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd);
0253 return 0;
0254 }
0255
0256 static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
0257 {
0258 vfp_put_double(vfp_get_double(dm), dd);
0259 return 0;
0260 }
0261
0262 static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
0263 {
0264 vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd);
0265 return 0;
0266 }
0267
0268 static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
0269 {
0270 struct vfp_double vdm, vdd;
0271 int ret, tm;
0272
0273 vfp_double_unpack(&vdm, vfp_get_double(dm));
0274 tm = vfp_double_type(&vdm);
0275 if (tm & (VFP_NAN|VFP_INFINITY)) {
0276 struct vfp_double *vdp = &vdd;
0277
0278 if (tm & VFP_NAN)
0279 ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
0280 else if (vdm.sign == 0) {
0281 sqrt_copy:
0282 vdp = &vdm;
0283 ret = 0;
0284 } else {
0285 sqrt_invalid:
0286 vdp = &vfp_double_default_qnan;
0287 ret = FPSCR_IOC;
0288 }
0289 vfp_put_double(vfp_double_pack(vdp), dd);
0290 return ret;
0291 }
0292
0293
0294
0295
0296 if (tm & VFP_ZERO)
0297 goto sqrt_copy;
0298
0299
0300
0301
0302 if (tm & VFP_DENORMAL)
0303 vfp_double_normalise_denormal(&vdm);
0304
0305
0306
0307
0308 if (vdm.sign)
0309 goto sqrt_invalid;
0310
0311 vfp_double_dump("sqrt", &vdm);
0312
0313
0314
0315
0316 vdd.sign = 0;
0317 vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
0318 vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
0319
0320 vfp_double_dump("sqrt estimate1", &vdd);
0321
0322 vdm.significand >>= 1 + (vdm.exponent & 1);
0323 vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
0324
0325 vfp_double_dump("sqrt estimate2", &vdd);
0326
0327
0328
0329
0330 if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
0331 if (vdd.significand < 2) {
0332 vdd.significand = ~0ULL;
0333 } else {
0334 u64 termh, terml, remh, reml;
0335 vdm.significand <<= 2;
0336 mul64to128(&termh, &terml, vdd.significand, vdd.significand);
0337 sub128(&remh, &reml, vdm.significand, 0, termh, terml);
0338 while ((s64)remh < 0) {
0339 vdd.significand -= 1;
0340 shift64left(&termh, &terml, vdd.significand);
0341 terml |= 1;
0342 add128(&remh, &reml, remh, reml, termh, terml);
0343 }
0344 vdd.significand |= (remh | reml) != 0;
0345 }
0346 }
0347 vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
0348
0349 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt");
0350 }
0351
0352
0353
0354
0355
0356
0357
0358 static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
0359 {
0360 s64 d, m;
0361 u32 ret = 0;
0362
0363 m = vfp_get_double(dm);
0364 if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
0365 ret |= FPSCR_C | FPSCR_V;
0366 if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
0367
0368
0369
0370 ret |= FPSCR_IOC;
0371 }
0372
0373 d = vfp_get_double(dd);
0374 if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
0375 ret |= FPSCR_C | FPSCR_V;
0376 if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
0377
0378
0379
0380 ret |= FPSCR_IOC;
0381 }
0382
0383 if (ret == 0) {
0384 if (d == m || vfp_double_packed_abs(d | m) == 0) {
0385
0386
0387
0388 ret |= FPSCR_Z | FPSCR_C;
0389 } else if (vfp_double_packed_sign(d ^ m)) {
0390
0391
0392
0393 if (vfp_double_packed_sign(d))
0394
0395
0396
0397 ret |= FPSCR_N;
0398 else
0399
0400
0401
0402 ret |= FPSCR_C;
0403 } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
0404
0405
0406
0407 ret |= FPSCR_N;
0408 } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
0409
0410
0411
0412 ret |= FPSCR_C;
0413 }
0414 }
0415
0416 return ret;
0417 }
0418
0419 static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
0420 {
0421 return vfp_compare(dd, 0, dm, fpscr);
0422 }
0423
0424 static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
0425 {
0426 return vfp_compare(dd, 1, dm, fpscr);
0427 }
0428
0429 static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
0430 {
0431 return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr);
0432 }
0433
0434 static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
0435 {
0436 return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr);
0437 }
0438
0439 static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
0440 {
0441 struct vfp_double vdm;
0442 struct vfp_single vsd;
0443 int tm;
0444 u32 exceptions = 0;
0445
0446 vfp_double_unpack(&vdm, vfp_get_double(dm));
0447
0448 tm = vfp_double_type(&vdm);
0449
0450
0451
0452
0453 if (tm == VFP_SNAN)
0454 exceptions = FPSCR_IOC;
0455
0456 if (tm & VFP_DENORMAL)
0457 vfp_double_normalise_denormal(&vdm);
0458
0459 vsd.sign = vdm.sign;
0460 vsd.significand = vfp_hi64to32jamming(vdm.significand);
0461
0462
0463
0464
0465 if (tm & (VFP_INFINITY|VFP_NAN)) {
0466 vsd.exponent = 255;
0467 if (tm == VFP_QNAN)
0468 vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
0469 goto pack_nan;
0470 } else if (tm & VFP_ZERO)
0471 vsd.exponent = 0;
0472 else
0473 vsd.exponent = vdm.exponent - (1023 - 127);
0474
0475 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
0476
0477 pack_nan:
0478 vfp_put_float(vfp_single_pack(&vsd), sd);
0479 return exceptions;
0480 }
0481
0482 static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
0483 {
0484 struct vfp_double vdm;
0485 u32 m = vfp_get_float(dm);
0486
0487 vdm.sign = 0;
0488 vdm.exponent = 1023 + 63 - 1;
0489 vdm.significand = (u64)m;
0490
0491 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito");
0492 }
0493
0494 static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
0495 {
0496 struct vfp_double vdm;
0497 u32 m = vfp_get_float(dm);
0498
0499 vdm.sign = (m & 0x80000000) >> 16;
0500 vdm.exponent = 1023 + 63 - 1;
0501 vdm.significand = vdm.sign ? -m : m;
0502
0503 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito");
0504 }
0505
0506 static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
0507 {
0508 struct vfp_double vdm;
0509 u32 d, exceptions = 0;
0510 int rmode = fpscr & FPSCR_RMODE_MASK;
0511 int tm;
0512
0513 vfp_double_unpack(&vdm, vfp_get_double(dm));
0514
0515
0516
0517
0518 tm = vfp_double_type(&vdm);
0519 if (tm & VFP_DENORMAL)
0520 exceptions |= FPSCR_IDC;
0521
0522 if (tm & VFP_NAN)
0523 vdm.sign = 0;
0524
0525 if (vdm.exponent >= 1023 + 32) {
0526 d = vdm.sign ? 0 : 0xffffffff;
0527 exceptions = FPSCR_IOC;
0528 } else if (vdm.exponent >= 1023 - 1) {
0529 int shift = 1023 + 63 - vdm.exponent;
0530 u64 rem, incr = 0;
0531
0532
0533
0534
0535 d = (vdm.significand << 1) >> shift;
0536 rem = vdm.significand << (65 - shift);
0537
0538 if (rmode == FPSCR_ROUND_NEAREST) {
0539 incr = 0x8000000000000000ULL;
0540 if ((d & 1) == 0)
0541 incr -= 1;
0542 } else if (rmode == FPSCR_ROUND_TOZERO) {
0543 incr = 0;
0544 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
0545 incr = ~0ULL;
0546 }
0547
0548 if ((rem + incr) < rem) {
0549 if (d < 0xffffffff)
0550 d += 1;
0551 else
0552 exceptions |= FPSCR_IOC;
0553 }
0554
0555 if (d && vdm.sign) {
0556 d = 0;
0557 exceptions |= FPSCR_IOC;
0558 } else if (rem)
0559 exceptions |= FPSCR_IXC;
0560 } else {
0561 d = 0;
0562 if (vdm.exponent | vdm.significand) {
0563 exceptions |= FPSCR_IXC;
0564 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
0565 d = 1;
0566 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
0567 d = 0;
0568 exceptions |= FPSCR_IOC;
0569 }
0570 }
0571 }
0572
0573 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
0574
0575 vfp_put_float(d, sd);
0576
0577 return exceptions;
0578 }
0579
0580 static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
0581 {
0582 return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO);
0583 }
0584
0585 static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
0586 {
0587 struct vfp_double vdm;
0588 u32 d, exceptions = 0;
0589 int rmode = fpscr & FPSCR_RMODE_MASK;
0590 int tm;
0591
0592 vfp_double_unpack(&vdm, vfp_get_double(dm));
0593 vfp_double_dump("VDM", &vdm);
0594
0595
0596
0597
0598 tm = vfp_double_type(&vdm);
0599 if (tm & VFP_DENORMAL)
0600 exceptions |= FPSCR_IDC;
0601
0602 if (tm & VFP_NAN) {
0603 d = 0;
0604 exceptions |= FPSCR_IOC;
0605 } else if (vdm.exponent >= 1023 + 32) {
0606 d = 0x7fffffff;
0607 if (vdm.sign)
0608 d = ~d;
0609 exceptions |= FPSCR_IOC;
0610 } else if (vdm.exponent >= 1023 - 1) {
0611 int shift = 1023 + 63 - vdm.exponent;
0612 u64 rem, incr = 0;
0613
0614 d = (vdm.significand << 1) >> shift;
0615 rem = vdm.significand << (65 - shift);
0616
0617 if (rmode == FPSCR_ROUND_NEAREST) {
0618 incr = 0x8000000000000000ULL;
0619 if ((d & 1) == 0)
0620 incr -= 1;
0621 } else if (rmode == FPSCR_ROUND_TOZERO) {
0622 incr = 0;
0623 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
0624 incr = ~0ULL;
0625 }
0626
0627 if ((rem + incr) < rem && d < 0xffffffff)
0628 d += 1;
0629 if (d > 0x7fffffff + (vdm.sign != 0)) {
0630 d = 0x7fffffff + (vdm.sign != 0);
0631 exceptions |= FPSCR_IOC;
0632 } else if (rem)
0633 exceptions |= FPSCR_IXC;
0634
0635 if (vdm.sign)
0636 d = -d;
0637 } else {
0638 d = 0;
0639 if (vdm.exponent | vdm.significand) {
0640 exceptions |= FPSCR_IXC;
0641 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
0642 d = 1;
0643 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
0644 d = -1;
0645 }
0646 }
0647
0648 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
0649
0650 vfp_put_float((s32)d, sd);
0651
0652 return exceptions;
0653 }
0654
0655 static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
0656 {
0657 return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO);
0658 }
0659
0660
0661 static struct op fops_ext[32] = {
0662 [FEXT_TO_IDX(FEXT_FCPY)] = { vfp_double_fcpy, 0 },
0663 [FEXT_TO_IDX(FEXT_FABS)] = { vfp_double_fabs, 0 },
0664 [FEXT_TO_IDX(FEXT_FNEG)] = { vfp_double_fneg, 0 },
0665 [FEXT_TO_IDX(FEXT_FSQRT)] = { vfp_double_fsqrt, 0 },
0666 [FEXT_TO_IDX(FEXT_FCMP)] = { vfp_double_fcmp, OP_SCALAR },
0667 [FEXT_TO_IDX(FEXT_FCMPE)] = { vfp_double_fcmpe, OP_SCALAR },
0668 [FEXT_TO_IDX(FEXT_FCMPZ)] = { vfp_double_fcmpz, OP_SCALAR },
0669 [FEXT_TO_IDX(FEXT_FCMPEZ)] = { vfp_double_fcmpez, OP_SCALAR },
0670 [FEXT_TO_IDX(FEXT_FCVT)] = { vfp_double_fcvts, OP_SCALAR|OP_SD },
0671 [FEXT_TO_IDX(FEXT_FUITO)] = { vfp_double_fuito, OP_SCALAR|OP_SM },
0672 [FEXT_TO_IDX(FEXT_FSITO)] = { vfp_double_fsito, OP_SCALAR|OP_SM },
0673 [FEXT_TO_IDX(FEXT_FTOUI)] = { vfp_double_ftoui, OP_SCALAR|OP_SD },
0674 [FEXT_TO_IDX(FEXT_FTOUIZ)] = { vfp_double_ftouiz, OP_SCALAR|OP_SD },
0675 [FEXT_TO_IDX(FEXT_FTOSI)] = { vfp_double_ftosi, OP_SCALAR|OP_SD },
0676 [FEXT_TO_IDX(FEXT_FTOSIZ)] = { vfp_double_ftosiz, OP_SCALAR|OP_SD },
0677 };
0678
0679
0680
0681
0682 static u32
0683 vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
0684 struct vfp_double *vdm, u32 fpscr)
0685 {
0686 struct vfp_double *vdp;
0687 u32 exceptions = 0;
0688 int tn, tm;
0689
0690 tn = vfp_double_type(vdn);
0691 tm = vfp_double_type(vdm);
0692
0693 if (tn & tm & VFP_INFINITY) {
0694
0695
0696
0697 if (vdn->sign ^ vdm->sign) {
0698
0699
0700
0701 exceptions = FPSCR_IOC;
0702 vdp = &vfp_double_default_qnan;
0703 } else {
0704
0705
0706
0707 vdp = vdn;
0708 }
0709 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
0710
0711
0712
0713 vdp = vdn;
0714 } else {
0715
0716
0717
0718 return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
0719 }
0720 *vdd = *vdp;
0721 return exceptions;
0722 }
0723
0724 static u32
0725 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
0726 struct vfp_double *vdm, u32 fpscr)
0727 {
0728 u32 exp_diff;
0729 u64 m_sig;
0730
0731 if (vdn->significand & (1ULL << 63) ||
0732 vdm->significand & (1ULL << 63)) {
0733 pr_info("VFP: bad FP values in %s\n", __func__);
0734 vfp_double_dump("VDN", vdn);
0735 vfp_double_dump("VDM", vdm);
0736 }
0737
0738
0739
0740
0741
0742
0743 if (vdn->exponent < vdm->exponent) {
0744 struct vfp_double *t = vdn;
0745 vdn = vdm;
0746 vdm = t;
0747 }
0748
0749
0750
0751
0752
0753 if (vdn->exponent == 2047)
0754 return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
0755
0756
0757
0758
0759
0760
0761 *vdd = *vdn;
0762
0763
0764
0765
0766 exp_diff = vdn->exponent - vdm->exponent;
0767 m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
0768
0769
0770
0771
0772 if (vdn->sign ^ vdm->sign) {
0773 m_sig = vdn->significand - m_sig;
0774 if ((s64)m_sig < 0) {
0775 vdd->sign = vfp_sign_negate(vdd->sign);
0776 m_sig = -m_sig;
0777 } else if (m_sig == 0) {
0778 vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
0779 FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
0780 }
0781 } else {
0782 m_sig += vdn->significand;
0783 }
0784 vdd->significand = m_sig;
0785
0786 return 0;
0787 }
0788
0789 static u32
0790 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
0791 struct vfp_double *vdm, u32 fpscr)
0792 {
0793 vfp_double_dump("VDN", vdn);
0794 vfp_double_dump("VDM", vdm);
0795
0796
0797
0798
0799
0800
0801 if (vdn->exponent < vdm->exponent) {
0802 struct vfp_double *t = vdn;
0803 vdn = vdm;
0804 vdm = t;
0805 pr_debug("VFP: swapping M <-> N\n");
0806 }
0807
0808 vdd->sign = vdn->sign ^ vdm->sign;
0809
0810
0811
0812
0813 if (vdn->exponent == 2047) {
0814 if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
0815 return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
0816 if ((vdm->exponent | vdm->significand) == 0) {
0817 *vdd = vfp_double_default_qnan;
0818 return FPSCR_IOC;
0819 }
0820 vdd->exponent = vdn->exponent;
0821 vdd->significand = 0;
0822 return 0;
0823 }
0824
0825
0826
0827
0828
0829 if ((vdm->exponent | vdm->significand) == 0) {
0830 vdd->exponent = 0;
0831 vdd->significand = 0;
0832 return 0;
0833 }
0834
0835
0836
0837
0838
0839
0840 vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
0841 vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
0842
0843 vfp_double_dump("VDD", vdd);
0844 return 0;
0845 }
0846
0847 #define NEG_MULTIPLY (1 << 0)
0848 #define NEG_SUBTRACT (1 << 1)
0849
0850 static u32
0851 vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
0852 {
0853 struct vfp_double vdd, vdp, vdn, vdm;
0854 u32 exceptions;
0855
0856 vfp_double_unpack(&vdn, vfp_get_double(dn));
0857 if (vdn.exponent == 0 && vdn.significand)
0858 vfp_double_normalise_denormal(&vdn);
0859
0860 vfp_double_unpack(&vdm, vfp_get_double(dm));
0861 if (vdm.exponent == 0 && vdm.significand)
0862 vfp_double_normalise_denormal(&vdm);
0863
0864 exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
0865 if (negate & NEG_MULTIPLY)
0866 vdp.sign = vfp_sign_negate(vdp.sign);
0867
0868 vfp_double_unpack(&vdn, vfp_get_double(dd));
0869 if (vdn.exponent == 0 && vdn.significand)
0870 vfp_double_normalise_denormal(&vdn);
0871 if (negate & NEG_SUBTRACT)
0872 vdn.sign = vfp_sign_negate(vdn.sign);
0873
0874 exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
0875
0876 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func);
0877 }
0878
0879
0880
0881
0882
0883
0884
0885
0886 static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
0887 {
0888 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac");
0889 }
0890
0891
0892
0893
0894 static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
0895 {
0896 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
0897 }
0898
0899
0900
0901
0902 static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
0903 {
0904 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
0905 }
0906
0907
0908
0909
0910 static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
0911 {
0912 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
0913 }
0914
0915
0916
0917
0918 static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
0919 {
0920 struct vfp_double vdd, vdn, vdm;
0921 u32 exceptions;
0922
0923 vfp_double_unpack(&vdn, vfp_get_double(dn));
0924 if (vdn.exponent == 0 && vdn.significand)
0925 vfp_double_normalise_denormal(&vdn);
0926
0927 vfp_double_unpack(&vdm, vfp_get_double(dm));
0928 if (vdm.exponent == 0 && vdm.significand)
0929 vfp_double_normalise_denormal(&vdm);
0930
0931 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
0932 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul");
0933 }
0934
0935
0936
0937
0938 static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
0939 {
0940 struct vfp_double vdd, vdn, vdm;
0941 u32 exceptions;
0942
0943 vfp_double_unpack(&vdn, vfp_get_double(dn));
0944 if (vdn.exponent == 0 && vdn.significand)
0945 vfp_double_normalise_denormal(&vdn);
0946
0947 vfp_double_unpack(&vdm, vfp_get_double(dm));
0948 if (vdm.exponent == 0 && vdm.significand)
0949 vfp_double_normalise_denormal(&vdm);
0950
0951 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
0952 vdd.sign = vfp_sign_negate(vdd.sign);
0953
0954 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul");
0955 }
0956
0957
0958
0959
0960 static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
0961 {
0962 struct vfp_double vdd, vdn, vdm;
0963 u32 exceptions;
0964
0965 vfp_double_unpack(&vdn, vfp_get_double(dn));
0966 if (vdn.exponent == 0 && vdn.significand)
0967 vfp_double_normalise_denormal(&vdn);
0968
0969 vfp_double_unpack(&vdm, vfp_get_double(dm));
0970 if (vdm.exponent == 0 && vdm.significand)
0971 vfp_double_normalise_denormal(&vdm);
0972
0973 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
0974
0975 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd");
0976 }
0977
0978
0979
0980
0981 static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
0982 {
0983 struct vfp_double vdd, vdn, vdm;
0984 u32 exceptions;
0985
0986 vfp_double_unpack(&vdn, vfp_get_double(dn));
0987 if (vdn.exponent == 0 && vdn.significand)
0988 vfp_double_normalise_denormal(&vdn);
0989
0990 vfp_double_unpack(&vdm, vfp_get_double(dm));
0991 if (vdm.exponent == 0 && vdm.significand)
0992 vfp_double_normalise_denormal(&vdm);
0993
0994
0995
0996
0997 vdm.sign = vfp_sign_negate(vdm.sign);
0998
0999 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
1000
1001 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub");
1002 }
1003
1004
1005
1006
1007 static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
1008 {
1009 struct vfp_double vdd, vdn, vdm;
1010 u32 exceptions = 0;
1011 int tm, tn;
1012
1013 vfp_double_unpack(&vdn, vfp_get_double(dn));
1014 vfp_double_unpack(&vdm, vfp_get_double(dm));
1015
1016 vdd.sign = vdn.sign ^ vdm.sign;
1017
1018 tn = vfp_double_type(&vdn);
1019 tm = vfp_double_type(&vdm);
1020
1021
1022
1023
1024 if (tn & VFP_NAN)
1025 goto vdn_nan;
1026
1027
1028
1029
1030 if (tm & VFP_NAN)
1031 goto vdm_nan;
1032
1033
1034
1035
1036
1037 if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1038 goto invalid;
1039
1040
1041
1042
1043 if (tn & VFP_INFINITY)
1044 goto infinity;
1045
1046
1047
1048
1049 if (tm & VFP_ZERO)
1050 goto divzero;
1051
1052
1053
1054
1055 if (tm & VFP_INFINITY || tn & VFP_ZERO)
1056 goto zero;
1057
1058 if (tn & VFP_DENORMAL)
1059 vfp_double_normalise_denormal(&vdn);
1060 if (tm & VFP_DENORMAL)
1061 vfp_double_normalise_denormal(&vdm);
1062
1063
1064
1065
1066 vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
1067 vdm.significand <<= 1;
1068 if (vdm.significand <= (2 * vdn.significand)) {
1069 vdn.significand >>= 1;
1070 vdd.exponent++;
1071 }
1072 vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
1073 if ((vdd.significand & 0x1ff) <= 2) {
1074 u64 termh, terml, remh, reml;
1075 mul64to128(&termh, &terml, vdm.significand, vdd.significand);
1076 sub128(&remh, &reml, vdn.significand, 0, termh, terml);
1077 while ((s64)remh < 0) {
1078 vdd.significand -= 1;
1079 add128(&remh, &reml, remh, reml, 0, vdm.significand);
1080 }
1081 vdd.significand |= (reml != 0);
1082 }
1083 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv");
1084
1085 vdn_nan:
1086 exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
1087 pack:
1088 vfp_put_double(vfp_double_pack(&vdd), dd);
1089 return exceptions;
1090
1091 vdm_nan:
1092 exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
1093 goto pack;
1094
1095 zero:
1096 vdd.exponent = 0;
1097 vdd.significand = 0;
1098 goto pack;
1099
1100 divzero:
1101 exceptions = FPSCR_DZC;
1102 infinity:
1103 vdd.exponent = 2047;
1104 vdd.significand = 0;
1105 goto pack;
1106
1107 invalid:
1108 vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd);
1109 return FPSCR_IOC;
1110 }
1111
1112 static struct op fops[16] = {
1113 [FOP_TO_IDX(FOP_FMAC)] = { vfp_double_fmac, 0 },
1114 [FOP_TO_IDX(FOP_FNMAC)] = { vfp_double_fnmac, 0 },
1115 [FOP_TO_IDX(FOP_FMSC)] = { vfp_double_fmsc, 0 },
1116 [FOP_TO_IDX(FOP_FNMSC)] = { vfp_double_fnmsc, 0 },
1117 [FOP_TO_IDX(FOP_FMUL)] = { vfp_double_fmul, 0 },
1118 [FOP_TO_IDX(FOP_FNMUL)] = { vfp_double_fnmul, 0 },
1119 [FOP_TO_IDX(FOP_FADD)] = { vfp_double_fadd, 0 },
1120 [FOP_TO_IDX(FOP_FSUB)] = { vfp_double_fsub, 0 },
1121 [FOP_TO_IDX(FOP_FDIV)] = { vfp_double_fdiv, 0 },
1122 };
1123
1124 #define FREG_BANK(x) ((x) & 0x0c)
1125 #define FREG_IDX(x) ((x) & 3)
1126
1127 u32 vfp_double_cpdo(u32 inst, u32 fpscr)
1128 {
1129 u32 op = inst & FOP_MASK;
1130 u32 exceptions = 0;
1131 unsigned int dest;
1132 unsigned int dn = vfp_get_dn(inst);
1133 unsigned int dm;
1134 unsigned int vecitr, veclen, vecstride;
1135 struct op *fop;
1136
1137 vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
1138
1139 fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1140
1141
1142
1143
1144
1145 if (fop->flags & OP_SD)
1146 dest = vfp_get_sd(inst);
1147 else
1148 dest = vfp_get_dd(inst);
1149
1150
1151
1152
1153 if (fop->flags & OP_SM)
1154 dm = vfp_get_sm(inst);
1155 else
1156 dm = vfp_get_dm(inst);
1157
1158
1159
1160
1161
1162 if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
1163 veclen = 0;
1164 else
1165 veclen = fpscr & FPSCR_LENGTH_MASK;
1166
1167 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1168 (veclen >> FPSCR_LENGTH_BIT) + 1);
1169
1170 if (!fop->fn)
1171 goto invalid;
1172
1173 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1174 u32 except;
1175 char type;
1176
1177 type = fop->flags & OP_SD ? 's' : 'd';
1178 if (op == FOP_EXT)
1179 pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
1180 vecitr >> FPSCR_LENGTH_BIT,
1181 type, dest, dn, dm);
1182 else
1183 pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
1184 vecitr >> FPSCR_LENGTH_BIT,
1185 type, dest, dn, FOP_TO_IDX(op), dm);
1186
1187 except = fop->fn(dest, dn, dm, fpscr);
1188 pr_debug("VFP: itr%d: exceptions=%08x\n",
1189 vecitr >> FPSCR_LENGTH_BIT, except);
1190
1191 exceptions |= except;
1192
1193
1194
1195
1196
1197 dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
1198 dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
1199 if (FREG_BANK(dm) != 0)
1200 dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
1201 }
1202 return exceptions;
1203
1204 invalid:
1205 return ~0;
1206 }