OSCL-LXR

0001
0002 /*
0003 ===============================================================================
0004
0005 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
0006 Arithmetic Package, Release 2.
0007
0008 Written by John R. Hauser.  This work was made possible in part by the
0009 International Computer Science Institute, located at Suite 600, 1947 Center
0010 Street, Berkeley, California 94704.  Funding was partially provided by the
0011 National Science Foundation under grant MIP-9311980.  The original version
0012 of this code was written as part of a project to build a fixed-point vector
0013 processor in collaboration with the University of California at Berkeley,
0014 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
0015 is available through the web page
0016 http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
0017
0018 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
0019 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
0020 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
0021 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
0022 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
0023
0024 Derivative works are acceptable, even for commercial purposes, so long as
0025 (1) they include prominent notice that the work is derivative, and (2) they
0026 include prominent notice akin to these three paragraphs for those parts of
0027 this code that are retained.
0028
0029 ===============================================================================
0030 */
0031
0032 /*
0033 -------------------------------------------------------------------------------
0034 Shifts `a' right by the number of bits given in `count'.  If any nonzero
0035 bits are shifted off, they are ``jammed'' into the least significant bit of
0036 the result by setting the least significant bit to 1.  The value of `count'
0037 can be arbitrarily large; in particular, if `count' is greater than 32, the
0038 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
0039 The result is stored in the location pointed to by `zPtr'.
0040 -------------------------------------------------------------------------------
0041 */
0042 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
0043 {
0044     bits32 z;
0045     if ( count == 0 ) {
0046         z = a;
0047     }
0048     else if ( count < 32 ) {
0049         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
0050     }
0051     else {
0052         z = ( a != 0 );
0053     }
0054     *zPtr = z;
0055 }
0056
0057 /*
0058 -------------------------------------------------------------------------------
0059 Shifts `a' right by the number of bits given in `count'.  If any nonzero
0060 bits are shifted off, they are ``jammed'' into the least significant bit of
0061 the result by setting the least significant bit to 1.  The value of `count'
0062 can be arbitrarily large; in particular, if `count' is greater than 64, the
0063 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
0064 The result is stored in the location pointed to by `zPtr'.
0065 -------------------------------------------------------------------------------
0066 */
0067 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
0068 {
0069     bits64 z;
0070
0071  __asm__("@shift64RightJamming -- start");
0072     if ( count == 0 ) {
0073         z = a;
0074     }
0075     else if ( count < 64 ) {
0076         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
0077     }
0078     else {
0079         z = ( a != 0 );
0080     }
0081  __asm__("@shift64RightJamming -- end");
0082     *zPtr = z;
0083 }
0084
0085 /*
0086 -------------------------------------------------------------------------------
0087 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
0088 _plus_ the number of bits given in `count'.  The shifted result is at most
0089 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
0090 bits shifted off form a second 64-bit result as follows:  The _last_ bit
0091 shifted off is the most-significant bit of the extra result, and the other
0092 63 bits of the extra result are all zero if and only if _all_but_the_last_
0093 bits shifted off were all zero.  This extra result is stored in the location
0094 pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
0095     (This routine makes more sense if `a0' and `a1' are considered to form a
0096 fixed-point value with binary point between `a0' and `a1'.  This fixed-point
0097 value is shifted right by the number of bits given in `count', and the
0098 integer part of the result is returned at the location pointed to by
0099 `z0Ptr'.  The fractional part of the result may be slightly corrupted as
0100 described above, and is returned at the location pointed to by `z1Ptr'.)
0101 -------------------------------------------------------------------------------
0102 */
0103 INLINE void
0104  shift64ExtraRightJamming(
0105      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
0106 {
0107     bits64 z0, z1;
0108     int8 negCount = ( - count ) & 63;
0109
0110     if ( count == 0 ) {
0111         z1 = a1;
0112         z0 = a0;
0113     }
0114     else if ( count < 64 ) {
0115         z1 = ( a0<<negCount ) | ( a1 != 0 );
0116         z0 = a0>>count;
0117     }
0118     else {
0119         if ( count == 64 ) {
0120             z1 = a0 | ( a1 != 0 );
0121         }
0122         else {
0123             z1 = ( ( a0 | a1 ) != 0 );
0124         }
0125         z0 = 0;
0126     }
0127     *z1Ptr = z1;
0128     *z0Ptr = z0;
0129
0130 }
0131
0132 /*
0133 -------------------------------------------------------------------------------
0134 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
0135 number of bits given in `count'.  Any bits shifted off are lost.  The value
0136 of `count' can be arbitrarily large; in particular, if `count' is greater
0137 than 128, the result will be 0.  The result is broken into two 64-bit pieces
0138 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
0139 -------------------------------------------------------------------------------
0140 */
0141 INLINE void
0142  shift128Right(
0143      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
0144 {
0145     bits64 z0, z1;
0146     int8 negCount = ( - count ) & 63;
0147
0148     if ( count == 0 ) {
0149         z1 = a1;
0150         z0 = a0;
0151     }
0152     else if ( count < 64 ) {
0153         z1 = ( a0<<negCount ) | ( a1>>count );
0154         z0 = a0>>count;
0155     }
0156     else {
0157         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
0158         z0 = 0;
0159     }
0160     *z1Ptr = z1;
0161     *z0Ptr = z0;
0162
0163 }
0164
0165 /*
0166 -------------------------------------------------------------------------------
0167 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
0168 number of bits given in `count'.  If any nonzero bits are shifted off, they
0169 are ``jammed'' into the least significant bit of the result by setting the
0170 least significant bit to 1.  The value of `count' can be arbitrarily large;
0171 in particular, if `count' is greater than 128, the result will be either 0
0172 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
0173 nonzero.  The result is broken into two 64-bit pieces which are stored at
0174 the locations pointed to by `z0Ptr' and `z1Ptr'.
0175 -------------------------------------------------------------------------------
0176 */
0177 INLINE void
0178  shift128RightJamming(
0179      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
0180 {
0181     bits64 z0, z1;
0182     int8 negCount = ( - count ) & 63;
0183
0184     if ( count == 0 ) {
0185         z1 = a1;
0186         z0 = a0;
0187     }
0188     else if ( count < 64 ) {
0189         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
0190         z0 = a0>>count;
0191     }
0192     else {
0193         if ( count == 64 ) {
0194             z1 = a0 | ( a1 != 0 );
0195         }
0196         else if ( count < 128 ) {
0197             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
0198         }
0199         else {
0200             z1 = ( ( a0 | a1 ) != 0 );
0201         }
0202         z0 = 0;
0203     }
0204     *z1Ptr = z1;
0205     *z0Ptr = z0;
0206
0207 }
0208
0209 /*
0210 -------------------------------------------------------------------------------
0211 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
0212 by 64 _plus_ the number of bits given in `count'.  The shifted result is
0213 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
0214 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
0215 off form a third 64-bit result as follows:  The _last_ bit shifted off is
0216 the most-significant bit of the extra result, and the other 63 bits of the
0217 extra result are all zero if and only if _all_but_the_last_ bits shifted off
0218 were all zero.  This extra result is stored in the location pointed to by
0219 `z2Ptr'.  The value of `count' can be arbitrarily large.
0220     (This routine makes more sense if `a0', `a1', and `a2' are considered
0221 to form a fixed-point value with binary point between `a1' and `a2'.  This
0222 fixed-point value is shifted right by the number of bits given in `count',
0223 and the integer part of the result is returned at the locations pointed to
0224 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
0225 corrupted as described above, and is returned at the location pointed to by
0226 `z2Ptr'.)
0227 -------------------------------------------------------------------------------
0228 */
0229 INLINE void
0230  shift128ExtraRightJamming(
0231      bits64 a0,
0232      bits64 a1,
0233      bits64 a2,
0234      int16 count,
0235      bits64 *z0Ptr,
0236      bits64 *z1Ptr,
0237      bits64 *z2Ptr
0238  )
0239 {
0240     bits64 z0, z1, z2;
0241     int8 negCount = ( - count ) & 63;
0242
0243     if ( count == 0 ) {
0244         z2 = a2;
0245         z1 = a1;
0246         z0 = a0;
0247     }
0248     else {
0249         if ( count < 64 ) {
0250             z2 = a1<<negCount;
0251             z1 = ( a0<<negCount ) | ( a1>>count );
0252             z0 = a0>>count;
0253         }
0254         else {
0255             if ( count == 64 ) {
0256                 z2 = a1;
0257                 z1 = a0;
0258             }
0259             else {
0260                 a2 |= a1;
0261                 if ( count < 128 ) {
0262                     z2 = a0<<negCount;
0263                     z1 = a0>>( count & 63 );
0264                 }
0265                 else {
0266                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
0267                     z1 = 0;
0268                 }
0269             }
0270             z0 = 0;
0271         }
0272         z2 |= ( a2 != 0 );
0273     }
0274     *z2Ptr = z2;
0275     *z1Ptr = z1;
0276     *z0Ptr = z0;
0277
0278 }
0279
0280 /*
0281 -------------------------------------------------------------------------------
0282 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
0283 number of bits given in `count'.  Any bits shifted off are lost.  The value
0284 of `count' must be less than 64.  The result is broken into two 64-bit
0285 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
0286 -------------------------------------------------------------------------------
0287 */
0288 INLINE void
0289  shortShift128Left(
0290      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
0291 {
0292
0293     *z1Ptr = a1<<count;
0294     *z0Ptr =
0295         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
0296
0297 }
0298
0299 /*
0300 -------------------------------------------------------------------------------
0301 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
0302 by the number of bits given in `count'.  Any bits shifted off are lost.
0303 The value of `count' must be less than 64.  The result is broken into three
0304 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
0305 `z1Ptr', and `z2Ptr'.
0306 -------------------------------------------------------------------------------
0307 */
0308 INLINE void
0309  shortShift192Left(
0310      bits64 a0,
0311      bits64 a1,
0312      bits64 a2,
0313      int16 count,
0314      bits64 *z0Ptr,
0315      bits64 *z1Ptr,
0316      bits64 *z2Ptr
0317  )
0318 {
0319     bits64 z0, z1, z2;
0320     int8 negCount;
0321
0322     z2 = a2<<count;
0323     z1 = a1<<count;
0324     z0 = a0<<count;
0325     if ( 0 < count ) {
0326         negCount = ( ( - count ) & 63 );
0327         z1 |= a2>>negCount;
0328         z0 |= a1>>negCount;
0329     }
0330     *z2Ptr = z2;
0331     *z1Ptr = z1;
0332     *z0Ptr = z0;
0333
0334 }
0335
0336 /*
0337 -------------------------------------------------------------------------------
0338 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
0339 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
0340 any carry out is lost.  The result is broken into two 64-bit pieces which
0341 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
0342 -------------------------------------------------------------------------------
0343 */
0344 INLINE void
0345  add128(
0346      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
0347 {
0348     bits64 z1;
0349
0350     z1 = a1 + b1;
0351     *z1Ptr = z1;
0352     *z0Ptr = a0 + b0 + ( z1 < a1 );
0353
0354 }
0355
0356 /*
0357 -------------------------------------------------------------------------------
0358 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
0359 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
0360 modulo 2^192, so any carry out is lost.  The result is broken into three
0361 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
0362 `z1Ptr', and `z2Ptr'.
0363 -------------------------------------------------------------------------------
0364 */
0365 INLINE void
0366  add192(
0367      bits64 a0,
0368      bits64 a1,
0369      bits64 a2,
0370      bits64 b0,
0371      bits64 b1,
0372      bits64 b2,
0373      bits64 *z0Ptr,
0374      bits64 *z1Ptr,
0375      bits64 *z2Ptr
0376  )
0377 {
0378     bits64 z0, z1, z2;
0379     int8 carry0, carry1;
0380
0381     z2 = a2 + b2;
0382     carry1 = ( z2 < a2 );
0383     z1 = a1 + b1;
0384     carry0 = ( z1 < a1 );
0385     z0 = a0 + b0;
0386     z1 += carry1;
0387     z0 += ( z1 < carry1 );
0388     z0 += carry0;
0389     *z2Ptr = z2;
0390     *z1Ptr = z1;
0391     *z0Ptr = z0;
0392
0393 }
0394
0395 /*
0396 -------------------------------------------------------------------------------
0397 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
0398 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
0399 2^128, so any borrow out (carry out) is lost.  The result is broken into two
0400 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
0401 `z1Ptr'.
0402 -------------------------------------------------------------------------------
0403 */
0404 INLINE void
0405  sub128(
0406      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
0407 {
0408
0409     *z1Ptr = a1 - b1;
0410     *z0Ptr = a0 - b0 - ( a1 < b1 );
0411
0412 }
0413
0414 /*
0415 -------------------------------------------------------------------------------
0416 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
0417 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
0418 Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
0419 result is broken into three 64-bit pieces which are stored at the locations
0420 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
0421 -------------------------------------------------------------------------------
0422 */
0423 INLINE void
0424  sub192(
0425      bits64 a0,
0426      bits64 a1,
0427      bits64 a2,
0428      bits64 b0,
0429      bits64 b1,
0430      bits64 b2,
0431      bits64 *z0Ptr,
0432      bits64 *z1Ptr,
0433      bits64 *z2Ptr
0434  )
0435 {
0436     bits64 z0, z1, z2;
0437     int8 borrow0, borrow1;
0438
0439     z2 = a2 - b2;
0440     borrow1 = ( a2 < b2 );
0441     z1 = a1 - b1;
0442     borrow0 = ( a1 < b1 );
0443     z0 = a0 - b0;
0444     z0 -= ( z1 < borrow1 );
0445     z1 -= borrow1;
0446     z0 -= borrow0;
0447     *z2Ptr = z2;
0448     *z1Ptr = z1;
0449     *z0Ptr = z0;
0450
0451 }
0452
0453 /*
0454 -------------------------------------------------------------------------------
0455 Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
0456 into two 64-bit pieces which are stored at the locations pointed to by
0457 `z0Ptr' and `z1Ptr'.
0458 -------------------------------------------------------------------------------
0459 */
0460 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
0461 {
0462     bits32 aHigh, aLow, bHigh, bLow;
0463     bits64 z0, zMiddleA, zMiddleB, z1;
0464
0465     aLow = a;
0466     aHigh = a>>32;
0467     bLow = b;
0468     bHigh = b>>32;
0469     z1 = ( (bits64) aLow ) * bLow;
0470     zMiddleA = ( (bits64) aLow ) * bHigh;
0471     zMiddleB = ( (bits64) aHigh ) * bLow;
0472     z0 = ( (bits64) aHigh ) * bHigh;
0473     zMiddleA += zMiddleB;
0474     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
0475     zMiddleA <<= 32;
0476     z1 += zMiddleA;
0477     z0 += ( z1 < zMiddleA );
0478     *z1Ptr = z1;
0479     *z0Ptr = z0;
0480
0481 }
0482
0483 /*
0484 -------------------------------------------------------------------------------
0485 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
0486 obtain a 192-bit product.  The product is broken into three 64-bit pieces
0487 which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
0488 `z2Ptr'.
0489 -------------------------------------------------------------------------------
0490 */
0491 INLINE void
0492  mul128By64To192(
0493      bits64 a0,
0494      bits64 a1,
0495      bits64 b,
0496      bits64 *z0Ptr,
0497      bits64 *z1Ptr,
0498      bits64 *z2Ptr
0499  )
0500 {
0501     bits64 z0, z1, z2, more1;
0502
0503     mul64To128( a1, b, &z1, &z2 );
0504     mul64To128( a0, b, &z0, &more1 );
0505     add128( z0, more1, 0, z1, &z0, &z1 );
0506     *z2Ptr = z2;
0507     *z1Ptr = z1;
0508     *z0Ptr = z0;
0509
0510 }
0511
0512 /*
0513 -------------------------------------------------------------------------------
0514 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
0515 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
0516 product.  The product is broken into four 64-bit pieces which are stored at
0517 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
0518 -------------------------------------------------------------------------------
0519 */
0520 INLINE void
0521  mul128To256(
0522      bits64 a0,
0523      bits64 a1,
0524      bits64 b0,
0525      bits64 b1,
0526      bits64 *z0Ptr,
0527      bits64 *z1Ptr,
0528      bits64 *z2Ptr,
0529      bits64 *z3Ptr
0530  )
0531 {
0532     bits64 z0, z1, z2, z3;
0533     bits64 more1, more2;
0534
0535     mul64To128( a1, b1, &z2, &z3 );
0536     mul64To128( a1, b0, &z1, &more2 );
0537     add128( z1, more2, 0, z2, &z1, &z2 );
0538     mul64To128( a0, b0, &z0, &more1 );
0539     add128( z0, more1, 0, z1, &z0, &z1 );
0540     mul64To128( a0, b1, &more1, &more2 );
0541     add128( more1, more2, 0, z2, &more1, &z2 );
0542     add128( z0, z1, 0, more1, &z0, &z1 );
0543     *z3Ptr = z3;
0544     *z2Ptr = z2;
0545     *z1Ptr = z1;
0546     *z0Ptr = z0;
0547
0548 }
0549
0550 /*
0551 -------------------------------------------------------------------------------
0552 Returns an approximation to the 64-bit integer quotient obtained by dividing
0553 `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
0554 divisor `b' must be at least 2^63.  If q is the exact quotient truncated
0555 toward zero, the approximation returned lies between q and q + 2 inclusive.
0556 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
0557 unsigned integer is returned.
0558 -------------------------------------------------------------------------------
0559 */
0560 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
0561 {
0562     bits64 b0, b1;
0563     bits64 rem0, rem1, term0, term1;
0564     bits64 z;
0565     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
0566     b0 = b>>32;  /* hence b0 is 32 bits wide now */
0567     if ( b0<<32 <= a0 ) {
0568         z = LIT64( 0xFFFFFFFF00000000 );
0569     }  else {
0570         z = a0;
0571         do_div( z, b0 );
0572         z <<= 32;
0573     }
0574     mul64To128( b, z, &term0, &term1 );
0575     sub128( a0, a1, term0, term1, &rem0, &rem1 );
0576     while ( ( (sbits64) rem0 ) < 0 ) {
0577         z -= LIT64( 0x100000000 );
0578         b1 = b<<32;
0579         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
0580     }
0581     rem0 = ( rem0<<32 ) | ( rem1>>32 );
0582     if ( b0<<32 <= rem0 ) {
0583         z |= 0xFFFFFFFF;
0584     } else {
0585         do_div( rem0, b0 );
0586         z |= rem0;
0587     }
0588     return z;
0589
0590 }
0591
0592 /*
0593 -------------------------------------------------------------------------------
0594 Returns an approximation to the square root of the 32-bit significand given
0595 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
0596 `aExp' (the least significant bit) is 1, the integer returned approximates
0597 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
0598 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
0599 case, the approximation returned lies strictly within +/-2 of the exact
0600 value.
0601 -------------------------------------------------------------------------------
0602 */
0603 static bits32 estimateSqrt32( int16 aExp, bits32 a )
0604 {
0605     static const bits16 sqrtOddAdjustments[] = {
0606         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
0607         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
0608     };
0609     static const bits16 sqrtEvenAdjustments[] = {
0610         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
0611         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
0612     };
0613     int8 index;
0614     bits32 z;
0615     bits64 A;
0616
0617     index = ( a>>27 ) & 15;
0618     if ( aExp & 1 ) {
0619         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
0620         z = ( ( a / z )<<14 ) + ( z<<15 );
0621         a >>= 1;
0622     }
0623     else {
0624         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
0625         z = a / z + z;
0626         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
0627         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
0628     }
0629     A = ( (bits64) a )<<31;
0630     do_div( A, z );
0631     return ( (bits32) A ) + ( z>>1 );
0632
0633 }
0634
0635 /*
0636 -------------------------------------------------------------------------------
0637 Returns the number of leading 0 bits before the most-significant 1 bit
0638 of `a'.  If `a' is zero, 32 is returned.
0639 -------------------------------------------------------------------------------
0640 */
0641 static int8 countLeadingZeros32( bits32 a )
0642 {
0643     static const int8 countLeadingZerosHigh[] = {
0644         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
0645         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0646         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0647         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0648         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0649         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0650         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0651         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0652         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0653         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0654         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0655         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0656         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0657         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0658         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0659         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
0660     };
0661     int8 shiftCount;
0662
0663     shiftCount = 0;
0664     if ( a < 0x10000 ) {
0665         shiftCount += 16;
0666         a <<= 16;
0667     }
0668     if ( a < 0x1000000 ) {
0669         shiftCount += 8;
0670         a <<= 8;
0671     }
0672     shiftCount += countLeadingZerosHigh[ a>>24 ];
0673     return shiftCount;
0674
0675 }
0676
0677 /*
0678 -------------------------------------------------------------------------------
0679 Returns the number of leading 0 bits before the most-significant 1 bit
0680 of `a'.  If `a' is zero, 64 is returned.
0681 -------------------------------------------------------------------------------
0682 */
0683 static int8 countLeadingZeros64( bits64 a )
0684 {
0685     int8 shiftCount;
0686
0687     shiftCount = 0;
0688     if ( a < ( (bits64) 1 )<<32 ) {
0689         shiftCount += 32;
0690     }
0691     else {
0692         a >>= 32;
0693     }
0694     shiftCount += countLeadingZeros32( a );
0695     return shiftCount;
0696
0697 }
0698
0699 /*
0700 -------------------------------------------------------------------------------
0701 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
0702 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
0703 Otherwise, returns 0.
0704 -------------------------------------------------------------------------------
0705 */
0706 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
0707 {
0708
0709     return ( a0 == b0 ) && ( a1 == b1 );
0710
0711 }
0712
0713 /*
0714 -------------------------------------------------------------------------------
0715 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
0716 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
0717 Otherwise, returns 0.
0718 -------------------------------------------------------------------------------
0719 */
0720 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
0721 {
0722
0723     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
0724
0725 }
0726
0727 /*
0728 -------------------------------------------------------------------------------
0729 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
0730 than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
0731 returns 0.
0732 -------------------------------------------------------------------------------
0733 */
0734 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
0735 {
0736
0737     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
0738
0739 }
0740
0741 /*
0742 -------------------------------------------------------------------------------
0743 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
0744 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
0745 Otherwise, returns 0.
0746 -------------------------------------------------------------------------------
0747 */
0748 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
0749 {
0750
0751     return ( a0 != b0 ) || ( a1 != b1 );
0752
0753 }
0754