0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include "ieee754dp.h"
0013
0014
0015
0016 static void srl128(u64 *hptr, u64 *lptr, int count)
0017 {
0018 u64 low;
0019
0020 if (count >= 128) {
0021 *lptr = *hptr != 0 || *lptr != 0;
0022 *hptr = 0;
0023 } else if (count >= 64) {
0024 if (count == 64) {
0025 *lptr = *hptr | (*lptr != 0);
0026 } else {
0027 low = *lptr;
0028 *lptr = *hptr >> (count - 64);
0029 *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
0030 }
0031 *hptr = 0;
0032 } else {
0033 low = *lptr;
0034 *lptr = low >> count | *hptr << (64 - count);
0035 *lptr |= (low << (64 - count)) != 0;
0036 *hptr = *hptr >> count;
0037 }
0038 }
0039
0040 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
0041 union ieee754dp y, enum maddf_flags flags)
0042 {
0043 int re;
0044 int rs;
0045 unsigned int lxm;
0046 unsigned int hxm;
0047 unsigned int lym;
0048 unsigned int hym;
0049 u64 lrm;
0050 u64 hrm;
0051 u64 lzm;
0052 u64 hzm;
0053 u64 t;
0054 u64 at;
0055 int s;
0056
0057 COMPXDP;
0058 COMPYDP;
0059 COMPZDP;
0060
0061 EXPLODEXDP;
0062 EXPLODEYDP;
0063 EXPLODEZDP;
0064
0065 FLUSHXDP;
0066 FLUSHYDP;
0067 FLUSHZDP;
0068
0069 ieee754_clearcx();
0070
0071 rs = xs ^ ys;
0072 if (flags & MADDF_NEGATE_PRODUCT)
0073 rs ^= 1;
0074 if (flags & MADDF_NEGATE_ADDITION)
0075 zs ^= 1;
0076
0077
0078
0079
0080
0081 if (zc == IEEE754_CLASS_SNAN)
0082 return ieee754dp_nanxcpt(z);
0083 if (xc == IEEE754_CLASS_SNAN)
0084 return ieee754dp_nanxcpt(x);
0085 if (yc == IEEE754_CLASS_SNAN)
0086 return ieee754dp_nanxcpt(y);
0087 if (zc == IEEE754_CLASS_QNAN)
0088 return z;
0089 if (xc == IEEE754_CLASS_QNAN)
0090 return x;
0091 if (yc == IEEE754_CLASS_QNAN)
0092 return y;
0093
0094 if (zc == IEEE754_CLASS_DNORM)
0095 DPDNORMZ;
0096
0097
0098 switch (CLPAIR(xc, yc)) {
0099
0100
0101
0102
0103 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
0104 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
0105 ieee754_setcx(IEEE754_INVALID_OPERATION);
0106 return ieee754dp_indef();
0107
0108 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
0109 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
0110 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
0111 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
0112 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
0113 if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
0114
0115
0116
0117
0118 ieee754_setcx(IEEE754_INVALID_OPERATION);
0119 return ieee754dp_indef();
0120 }
0121
0122
0123
0124
0125
0126 return ieee754dp_inf(rs);
0127
0128 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
0129 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
0130 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
0131 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
0132 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
0133 if (zc == IEEE754_CLASS_INF)
0134 return ieee754dp_inf(zs);
0135 if (zc == IEEE754_CLASS_ZERO) {
0136
0137 if (zs == rs)
0138
0139
0140
0141
0142
0143
0144 return z;
0145
0146 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
0147 }
0148
0149 return z;
0150
0151 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
0152 DPDNORMX;
0153 fallthrough;
0154 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
0155 if (zc == IEEE754_CLASS_INF)
0156 return ieee754dp_inf(zs);
0157 DPDNORMY;
0158 break;
0159
0160 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
0161 if (zc == IEEE754_CLASS_INF)
0162 return ieee754dp_inf(zs);
0163 DPDNORMX;
0164 break;
0165
0166 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
0167 if (zc == IEEE754_CLASS_INF)
0168 return ieee754dp_inf(zs);
0169
0170 }
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181 assert(xm & DP_HIDDEN_BIT);
0182 assert(ym & DP_HIDDEN_BIT);
0183
0184 re = xe + ye;
0185
0186
0187 xm <<= 64 - (DP_FBITS + 1);
0188 ym <<= 64 - (DP_FBITS + 1);
0189
0190
0191
0192
0193
0194 lxm = xm;
0195 hxm = xm >> 32;
0196 lym = ym;
0197 hym = ym >> 32;
0198
0199 lrm = DPXMULT(lxm, lym);
0200 hrm = DPXMULT(hxm, hym);
0201
0202 t = DPXMULT(lxm, hym);
0203
0204 at = lrm + (t << 32);
0205 hrm += at < lrm;
0206 lrm = at;
0207
0208 hrm = hrm + (t >> 32);
0209
0210 t = DPXMULT(hxm, lym);
0211
0212 at = lrm + (t << 32);
0213 hrm += at < lrm;
0214 lrm = at;
0215
0216 hrm = hrm + (t >> 32);
0217
0218
0219 if ((int64_t)hrm < 0) {
0220 lrm = (hrm << 63) | (lrm >> 1);
0221 hrm = hrm >> 1;
0222 re++;
0223 }
0224
0225 assert(hrm & (1 << 62));
0226
0227 if (zc == IEEE754_CLASS_ZERO) {
0228
0229
0230
0231
0232
0233 srl128(&hrm, &lrm, (126 - 55));
0234 return ieee754dp_format(rs, re, lrm);
0235 }
0236
0237
0238 lzm = 0;
0239 hzm = zm << 10;
0240 assert(hzm & (1 << 62));
0241
0242
0243 if (ze > re) {
0244
0245
0246
0247 s = ze - re;
0248 srl128(&hrm, &lrm, s);
0249 re += s;
0250 } else if (re > ze) {
0251
0252
0253
0254 s = re - ze;
0255 srl128(&hzm, &lzm, s);
0256 ze += s;
0257 }
0258 assert(ze == re);
0259 assert(ze <= DP_EMAX);
0260
0261
0262 if (zs == rs) {
0263
0264
0265
0266
0267 hzm = hzm + hrm + (lzm > (lzm + lrm));
0268 lzm = lzm + lrm;
0269 if ((int64_t)hzm < 0) {
0270 srl128(&hzm, &lzm, 1);
0271 ze++;
0272 }
0273 } else {
0274 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
0275 hzm = hzm - hrm - (lzm < lrm);
0276 lzm = lzm - lrm;
0277 } else {
0278 hzm = hrm - hzm - (lrm < lzm);
0279 lzm = lrm - lzm;
0280 zs = rs;
0281 }
0282 if (lzm == 0 && hzm == 0)
0283 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
0284
0285
0286
0287
0288 if (hzm == 0) {
0289
0290 if ((int64_t)lzm < 0) {
0291
0292 hzm = lzm >> 1;
0293 lzm = lzm << 63;
0294 ze -= 63;
0295 } else {
0296 hzm = lzm;
0297 lzm = 0;
0298 ze -= 64;
0299 }
0300 }
0301
0302 t = 0;
0303 while ((hzm >> (62 - t)) == 0)
0304 t++;
0305
0306 assert(t <= 62);
0307 if (t) {
0308 hzm = hzm << t | lzm >> (64 - t);
0309 lzm = lzm << t;
0310 ze -= t;
0311 }
0312 }
0313
0314
0315
0316
0317
0318
0319 srl128(&hzm, &lzm, (126 - 55));
0320
0321 return ieee754dp_format(zs, ze, lzm);
0322 }
0323
0324 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
0325 union ieee754dp y)
0326 {
0327 return _dp_maddf(z, x, y, 0);
0328 }
0329
0330 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
0331 union ieee754dp y)
0332 {
0333 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
0334 }
0335
0336 union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
0337 union ieee754dp y)
0338 {
0339 return _dp_maddf(z, x, y, 0);
0340 }
0341
0342 union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
0343 union ieee754dp y)
0344 {
0345 return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
0346 }
0347
0348 union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
0349 union ieee754dp y)
0350 {
0351 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
0352 }
0353
0354 union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
0355 union ieee754dp y)
0356 {
0357 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
0358 }