Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /* IEEE754 floating point arithmetic
0003  * double precision square root
0004  */
0005 /*
0006  * MIPS floating point support
0007  * Copyright (C) 1994-2000 Algorithmics Ltd.
0008  */
0009 
0010 #include "ieee754dp.h"
0011 
0012 static const unsigned int table[] = {
0013     0, 1204, 3062, 5746, 9193, 13348, 18162, 23592,
0014     29598, 36145, 43202, 50740, 58733, 67158, 75992,
0015     85215, 83599, 71378, 60428, 50647, 41945, 34246,
0016     27478, 21581, 16499, 12183, 8588, 5674, 3403,
0017     1742, 661, 130
0018 };
0019 
0020 union ieee754dp ieee754dp_sqrt(union ieee754dp x)
0021 {
0022     struct _ieee754_csr oldcsr;
0023     union ieee754dp y, z, t;
0024     unsigned int scalx, yh;
0025     COMPXDP;
0026 
0027     EXPLODEXDP;
0028     ieee754_clearcx();
0029     FLUSHXDP;
0030 
0031     /* x == INF or NAN? */
0032     switch (xc) {
0033     case IEEE754_CLASS_SNAN:
0034         return ieee754dp_nanxcpt(x);
0035 
0036     case IEEE754_CLASS_QNAN:
0037         /* sqrt(Nan) = Nan */
0038         return x;
0039 
0040     case IEEE754_CLASS_ZERO:
0041         /* sqrt(0) = 0 */
0042         return x;
0043 
0044     case IEEE754_CLASS_INF:
0045         if (xs) {
0046             /* sqrt(-Inf) = Nan */
0047             ieee754_setcx(IEEE754_INVALID_OPERATION);
0048             return ieee754dp_indef();
0049         }
0050         /* sqrt(+Inf) = Inf */
0051         return x;
0052 
0053     case IEEE754_CLASS_DNORM:
0054         DPDNORMX;
0055         fallthrough;
0056     case IEEE754_CLASS_NORM:
0057         if (xs) {
0058             /* sqrt(-x) = Nan */
0059             ieee754_setcx(IEEE754_INVALID_OPERATION);
0060             return ieee754dp_indef();
0061         }
0062         break;
0063     }
0064 
0065     /* save old csr; switch off INX enable & flag; set RN rounding */
0066     oldcsr = ieee754_csr;
0067     ieee754_csr.mx &= ~IEEE754_INEXACT;
0068     ieee754_csr.sx &= ~IEEE754_INEXACT;
0069     ieee754_csr.rm = FPU_CSR_RN;
0070 
0071     /* adjust exponent to prevent overflow */
0072     scalx = 0;
0073     if (xe > 512) {     /* x > 2**-512? */
0074         xe -= 512;  /* x = x / 2**512 */
0075         scalx += 256;
0076     } else if (xe < -512) { /* x < 2**-512? */
0077         xe += 512;  /* x = x * 2**512 */
0078         scalx -= 256;
0079     }
0080 
0081     x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
0082     y = x;
0083 
0084     /* magic initial approximation to almost 8 sig. bits */
0085     yh = y.bits >> 32;
0086     yh = (yh >> 1) + 0x1ff80000;
0087     yh = yh - table[(yh >> 15) & 31];
0088     y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff);
0089 
0090     /* Heron's rule once with correction to improve to ~18 sig. bits */
0091     /* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */
0092     t = ieee754dp_div(x, y);
0093     y = ieee754dp_add(y, t);
0094     y.bits -= 0x0010000600000000LL;
0095     y.bits &= 0xffffffff00000000LL;
0096 
0097     /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */
0098     /* t=y*y; z=t;  pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */
0099     t = ieee754dp_mul(y, y);
0100     z = t;
0101     t.bexp += 0x001;
0102     t = ieee754dp_add(t, z);
0103     z = ieee754dp_mul(ieee754dp_sub(x, z), y);
0104 
0105     /* t=z/(t+x) ;  pt[n0]+=0x00100000; y+=t; */
0106     t = ieee754dp_div(z, ieee754dp_add(t, x));
0107     t.bexp += 0x001;
0108     y = ieee754dp_add(y, t);
0109 
0110     /* twiddle last bit to force y correctly rounded */
0111 
0112     /* set RZ, clear INEX flag */
0113     ieee754_csr.rm = FPU_CSR_RZ;
0114     ieee754_csr.sx &= ~IEEE754_INEXACT;
0115 
0116     /* t=x/y; ...chopped quotient, possibly inexact */
0117     t = ieee754dp_div(x, y);
0118 
0119     if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) {
0120 
0121         if (!(ieee754_csr.sx & IEEE754_INEXACT))
0122             /* t = t-ulp */
0123             t.bits -= 1;
0124 
0125         /* add inexact to result status */
0126         oldcsr.cx |= IEEE754_INEXACT;
0127         oldcsr.sx |= IEEE754_INEXACT;
0128 
0129         switch (oldcsr.rm) {
0130         case FPU_CSR_RU:
0131             y.bits += 1;
0132             fallthrough;
0133         case FPU_CSR_RN:
0134             t.bits += 1;
0135             break;
0136         }
0137 
0138         /* y=y+t; ...chopped sum */
0139         y = ieee754dp_add(y, t);
0140 
0141         /* adjust scalx for correctly rounded sqrt(x) */
0142         scalx -= 1;
0143     }
0144 
0145     /* py[n0]=py[n0]+scalx; ...scale back y */
0146     y.bexp += scalx;
0147 
0148     /* restore rounding mode, possibly set inexact */
0149     ieee754_csr = oldcsr;
0150 
0151     return y;
0152 }