0001
0002 /*
0003 ===============================================================================
0004
0005 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
0006 Arithmetic Package, Release 2.
0007
0008 Written by John R. Hauser. This work was made possible in part by the
0009 International Computer Science Institute, located at Suite 600, 1947 Center
0010 Street, Berkeley, California 94704. Funding was partially provided by the
0011 National Science Foundation under grant MIP-9311980. The original version
0012 of this code was written as part of a project to build a fixed-point vector
0013 processor in collaboration with the University of California at Berkeley,
0014 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
0015 is available through the web page
0016 http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
0017
0018 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
0019 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
0020 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
0021 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
0022 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
0023
0024 Derivative works are acceptable, even for commercial purposes, so long as
0025 (1) they include prominent notice that the work is derivative, and (2) they
0026 include prominent notice akin to these three paragraphs for those parts of
0027 this code that are retained.
0028
0029 ===============================================================================
0030 */
0031
0032 /*
0033 -------------------------------------------------------------------------------
0034 Shifts `a' right by the number of bits given in `count'. If any nonzero
0035 bits are shifted off, they are ``jammed'' into the least significant bit of
0036 the result by setting the least significant bit to 1. The value of `count'
0037 can be arbitrarily large; in particular, if `count' is greater than 32, the
0038 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
0039 The result is stored in the location pointed to by `zPtr'.
0040 -------------------------------------------------------------------------------
0041 */
0042 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
0043 {
0044 bits32 z;
0045 if ( count == 0 ) {
0046 z = a;
0047 }
0048 else if ( count < 32 ) {
0049 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
0050 }
0051 else {
0052 z = ( a != 0 );
0053 }
0054 *zPtr = z;
0055 }
0056
0057 /*
0058 -------------------------------------------------------------------------------
0059 Shifts `a' right by the number of bits given in `count'. If any nonzero
0060 bits are shifted off, they are ``jammed'' into the least significant bit of
0061 the result by setting the least significant bit to 1. The value of `count'
0062 can be arbitrarily large; in particular, if `count' is greater than 64, the
0063 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
0064 The result is stored in the location pointed to by `zPtr'.
0065 -------------------------------------------------------------------------------
0066 */
0067 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
0068 {
0069 bits64 z;
0070
0071 __asm__("@shift64RightJamming -- start");
0072 if ( count == 0 ) {
0073 z = a;
0074 }
0075 else if ( count < 64 ) {
0076 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
0077 }
0078 else {
0079 z = ( a != 0 );
0080 }
0081 __asm__("@shift64RightJamming -- end");
0082 *zPtr = z;
0083 }
0084
0085 /*
0086 -------------------------------------------------------------------------------
0087 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
0088 _plus_ the number of bits given in `count'. The shifted result is at most
0089 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
0090 bits shifted off form a second 64-bit result as follows: The _last_ bit
0091 shifted off is the most-significant bit of the extra result, and the other
0092 63 bits of the extra result are all zero if and only if _all_but_the_last_
0093 bits shifted off were all zero. This extra result is stored in the location
0094 pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
0095 (This routine makes more sense if `a0' and `a1' are considered to form a
0096 fixed-point value with binary point between `a0' and `a1'. This fixed-point
0097 value is shifted right by the number of bits given in `count', and the
0098 integer part of the result is returned at the location pointed to by
0099 `z0Ptr'. The fractional part of the result may be slightly corrupted as
0100 described above, and is returned at the location pointed to by `z1Ptr'.)
0101 -------------------------------------------------------------------------------
0102 */
0103 INLINE void
0104 shift64ExtraRightJamming(
0105 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
0106 {
0107 bits64 z0, z1;
0108 int8 negCount = ( - count ) & 63;
0109
0110 if ( count == 0 ) {
0111 z1 = a1;
0112 z0 = a0;
0113 }
0114 else if ( count < 64 ) {
0115 z1 = ( a0<<negCount ) | ( a1 != 0 );
0116 z0 = a0>>count;
0117 }
0118 else {
0119 if ( count == 64 ) {
0120 z1 = a0 | ( a1 != 0 );
0121 }
0122 else {
0123 z1 = ( ( a0 | a1 ) != 0 );
0124 }
0125 z0 = 0;
0126 }
0127 *z1Ptr = z1;
0128 *z0Ptr = z0;
0129
0130 }
0131
0132 /*
0133 -------------------------------------------------------------------------------
0134 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
0135 number of bits given in `count'. Any bits shifted off are lost. The value
0136 of `count' can be arbitrarily large; in particular, if `count' is greater
0137 than 128, the result will be 0. The result is broken into two 64-bit pieces
0138 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
0139 -------------------------------------------------------------------------------
0140 */
0141 INLINE void
0142 shift128Right(
0143 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
0144 {
0145 bits64 z0, z1;
0146 int8 negCount = ( - count ) & 63;
0147
0148 if ( count == 0 ) {
0149 z1 = a1;
0150 z0 = a0;
0151 }
0152 else if ( count < 64 ) {
0153 z1 = ( a0<<negCount ) | ( a1>>count );
0154 z0 = a0>>count;
0155 }
0156 else {
0157 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
0158 z0 = 0;
0159 }
0160 *z1Ptr = z1;
0161 *z0Ptr = z0;
0162
0163 }
0164
0165 /*
0166 -------------------------------------------------------------------------------
0167 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
0168 number of bits given in `count'. If any nonzero bits are shifted off, they
0169 are ``jammed'' into the least significant bit of the result by setting the
0170 least significant bit to 1. The value of `count' can be arbitrarily large;
0171 in particular, if `count' is greater than 128, the result will be either 0
0172 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
0173 nonzero. The result is broken into two 64-bit pieces which are stored at
0174 the locations pointed to by `z0Ptr' and `z1Ptr'.
0175 -------------------------------------------------------------------------------
0176 */
0177 INLINE void
0178 shift128RightJamming(
0179 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
0180 {
0181 bits64 z0, z1;
0182 int8 negCount = ( - count ) & 63;
0183
0184 if ( count == 0 ) {
0185 z1 = a1;
0186 z0 = a0;
0187 }
0188 else if ( count < 64 ) {
0189 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
0190 z0 = a0>>count;
0191 }
0192 else {
0193 if ( count == 64 ) {
0194 z1 = a0 | ( a1 != 0 );
0195 }
0196 else if ( count < 128 ) {
0197 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
0198 }
0199 else {
0200 z1 = ( ( a0 | a1 ) != 0 );
0201 }
0202 z0 = 0;
0203 }
0204 *z1Ptr = z1;
0205 *z0Ptr = z0;
0206
0207 }
0208
0209 /*
0210 -------------------------------------------------------------------------------
0211 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
0212 by 64 _plus_ the number of bits given in `count'. The shifted result is
0213 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
0214 stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
0215 off form a third 64-bit result as follows: The _last_ bit shifted off is
0216 the most-significant bit of the extra result, and the other 63 bits of the
0217 extra result are all zero if and only if _all_but_the_last_ bits shifted off
0218 were all zero. This extra result is stored in the location pointed to by
0219 `z2Ptr'. The value of `count' can be arbitrarily large.
0220 (This routine makes more sense if `a0', `a1', and `a2' are considered
0221 to form a fixed-point value with binary point between `a1' and `a2'. This
0222 fixed-point value is shifted right by the number of bits given in `count',
0223 and the integer part of the result is returned at the locations pointed to
0224 by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
0225 corrupted as described above, and is returned at the location pointed to by
0226 `z2Ptr'.)
0227 -------------------------------------------------------------------------------
0228 */
0229 INLINE void
0230 shift128ExtraRightJamming(
0231 bits64 a0,
0232 bits64 a1,
0233 bits64 a2,
0234 int16 count,
0235 bits64 *z0Ptr,
0236 bits64 *z1Ptr,
0237 bits64 *z2Ptr
0238 )
0239 {
0240 bits64 z0, z1, z2;
0241 int8 negCount = ( - count ) & 63;
0242
0243 if ( count == 0 ) {
0244 z2 = a2;
0245 z1 = a1;
0246 z0 = a0;
0247 }
0248 else {
0249 if ( count < 64 ) {
0250 z2 = a1<<negCount;
0251 z1 = ( a0<<negCount ) | ( a1>>count );
0252 z0 = a0>>count;
0253 }
0254 else {
0255 if ( count == 64 ) {
0256 z2 = a1;
0257 z1 = a0;
0258 }
0259 else {
0260 a2 |= a1;
0261 if ( count < 128 ) {
0262 z2 = a0<<negCount;
0263 z1 = a0>>( count & 63 );
0264 }
0265 else {
0266 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
0267 z1 = 0;
0268 }
0269 }
0270 z0 = 0;
0271 }
0272 z2 |= ( a2 != 0 );
0273 }
0274 *z2Ptr = z2;
0275 *z1Ptr = z1;
0276 *z0Ptr = z0;
0277
0278 }
0279
0280 /*
0281 -------------------------------------------------------------------------------
0282 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
0283 number of bits given in `count'. Any bits shifted off are lost. The value
0284 of `count' must be less than 64. The result is broken into two 64-bit
0285 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
0286 -------------------------------------------------------------------------------
0287 */
0288 INLINE void
0289 shortShift128Left(
0290 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
0291 {
0292
0293 *z1Ptr = a1<<count;
0294 *z0Ptr =
0295 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
0296
0297 }
0298
0299 /*
0300 -------------------------------------------------------------------------------
0301 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
0302 by the number of bits given in `count'. Any bits shifted off are lost.
0303 The value of `count' must be less than 64. The result is broken into three
0304 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
0305 `z1Ptr', and `z2Ptr'.
0306 -------------------------------------------------------------------------------
0307 */
0308 INLINE void
0309 shortShift192Left(
0310 bits64 a0,
0311 bits64 a1,
0312 bits64 a2,
0313 int16 count,
0314 bits64 *z0Ptr,
0315 bits64 *z1Ptr,
0316 bits64 *z2Ptr
0317 )
0318 {
0319 bits64 z0, z1, z2;
0320 int8 negCount;
0321
0322 z2 = a2<<count;
0323 z1 = a1<<count;
0324 z0 = a0<<count;
0325 if ( 0 < count ) {
0326 negCount = ( ( - count ) & 63 );
0327 z1 |= a2>>negCount;
0328 z0 |= a1>>negCount;
0329 }
0330 *z2Ptr = z2;
0331 *z1Ptr = z1;
0332 *z0Ptr = z0;
0333
0334 }
0335
0336 /*
0337 -------------------------------------------------------------------------------
0338 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
0339 value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
0340 any carry out is lost. The result is broken into two 64-bit pieces which
0341 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
0342 -------------------------------------------------------------------------------
0343 */
0344 INLINE void
0345 add128(
0346 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
0347 {
0348 bits64 z1;
0349
0350 z1 = a1 + b1;
0351 *z1Ptr = z1;
0352 *z0Ptr = a0 + b0 + ( z1 < a1 );
0353
0354 }
0355
0356 /*
0357 -------------------------------------------------------------------------------
0358 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
0359 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
0360 modulo 2^192, so any carry out is lost. The result is broken into three
0361 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
0362 `z1Ptr', and `z2Ptr'.
0363 -------------------------------------------------------------------------------
0364 */
0365 INLINE void
0366 add192(
0367 bits64 a0,
0368 bits64 a1,
0369 bits64 a2,
0370 bits64 b0,
0371 bits64 b1,
0372 bits64 b2,
0373 bits64 *z0Ptr,
0374 bits64 *z1Ptr,
0375 bits64 *z2Ptr
0376 )
0377 {
0378 bits64 z0, z1, z2;
0379 int8 carry0, carry1;
0380
0381 z2 = a2 + b2;
0382 carry1 = ( z2 < a2 );
0383 z1 = a1 + b1;
0384 carry0 = ( z1 < a1 );
0385 z0 = a0 + b0;
0386 z1 += carry1;
0387 z0 += ( z1 < carry1 );
0388 z0 += carry0;
0389 *z2Ptr = z2;
0390 *z1Ptr = z1;
0391 *z0Ptr = z0;
0392
0393 }
0394
0395 /*
0396 -------------------------------------------------------------------------------
0397 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
0398 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
0399 2^128, so any borrow out (carry out) is lost. The result is broken into two
0400 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
0401 `z1Ptr'.
0402 -------------------------------------------------------------------------------
0403 */
0404 INLINE void
0405 sub128(
0406 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
0407 {
0408
0409 *z1Ptr = a1 - b1;
0410 *z0Ptr = a0 - b0 - ( a1 < b1 );
0411
0412 }
0413
0414 /*
0415 -------------------------------------------------------------------------------
0416 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
0417 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
0418 Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
0419 result is broken into three 64-bit pieces which are stored at the locations
0420 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
0421 -------------------------------------------------------------------------------
0422 */
0423 INLINE void
0424 sub192(
0425 bits64 a0,
0426 bits64 a1,
0427 bits64 a2,
0428 bits64 b0,
0429 bits64 b1,
0430 bits64 b2,
0431 bits64 *z0Ptr,
0432 bits64 *z1Ptr,
0433 bits64 *z2Ptr
0434 )
0435 {
0436 bits64 z0, z1, z2;
0437 int8 borrow0, borrow1;
0438
0439 z2 = a2 - b2;
0440 borrow1 = ( a2 < b2 );
0441 z1 = a1 - b1;
0442 borrow0 = ( a1 < b1 );
0443 z0 = a0 - b0;
0444 z0 -= ( z1 < borrow1 );
0445 z1 -= borrow1;
0446 z0 -= borrow0;
0447 *z2Ptr = z2;
0448 *z1Ptr = z1;
0449 *z0Ptr = z0;
0450
0451 }
0452
0453 /*
0454 -------------------------------------------------------------------------------
0455 Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
0456 into two 64-bit pieces which are stored at the locations pointed to by
0457 `z0Ptr' and `z1Ptr'.
0458 -------------------------------------------------------------------------------
0459 */
0460 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
0461 {
0462 bits32 aHigh, aLow, bHigh, bLow;
0463 bits64 z0, zMiddleA, zMiddleB, z1;
0464
0465 aLow = a;
0466 aHigh = a>>32;
0467 bLow = b;
0468 bHigh = b>>32;
0469 z1 = ( (bits64) aLow ) * bLow;
0470 zMiddleA = ( (bits64) aLow ) * bHigh;
0471 zMiddleB = ( (bits64) aHigh ) * bLow;
0472 z0 = ( (bits64) aHigh ) * bHigh;
0473 zMiddleA += zMiddleB;
0474 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
0475 zMiddleA <<= 32;
0476 z1 += zMiddleA;
0477 z0 += ( z1 < zMiddleA );
0478 *z1Ptr = z1;
0479 *z0Ptr = z0;
0480
0481 }
0482
0483 /*
0484 -------------------------------------------------------------------------------
0485 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
0486 obtain a 192-bit product. The product is broken into three 64-bit pieces
0487 which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
0488 `z2Ptr'.
0489 -------------------------------------------------------------------------------
0490 */
0491 INLINE void
0492 mul128By64To192(
0493 bits64 a0,
0494 bits64 a1,
0495 bits64 b,
0496 bits64 *z0Ptr,
0497 bits64 *z1Ptr,
0498 bits64 *z2Ptr
0499 )
0500 {
0501 bits64 z0, z1, z2, more1;
0502
0503 mul64To128( a1, b, &z1, &z2 );
0504 mul64To128( a0, b, &z0, &more1 );
0505 add128( z0, more1, 0, z1, &z0, &z1 );
0506 *z2Ptr = z2;
0507 *z1Ptr = z1;
0508 *z0Ptr = z0;
0509
0510 }
0511
0512 /*
0513 -------------------------------------------------------------------------------
0514 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
0515 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
0516 product. The product is broken into four 64-bit pieces which are stored at
0517 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
0518 -------------------------------------------------------------------------------
0519 */
0520 INLINE void
0521 mul128To256(
0522 bits64 a0,
0523 bits64 a1,
0524 bits64 b0,
0525 bits64 b1,
0526 bits64 *z0Ptr,
0527 bits64 *z1Ptr,
0528 bits64 *z2Ptr,
0529 bits64 *z3Ptr
0530 )
0531 {
0532 bits64 z0, z1, z2, z3;
0533 bits64 more1, more2;
0534
0535 mul64To128( a1, b1, &z2, &z3 );
0536 mul64To128( a1, b0, &z1, &more2 );
0537 add128( z1, more2, 0, z2, &z1, &z2 );
0538 mul64To128( a0, b0, &z0, &more1 );
0539 add128( z0, more1, 0, z1, &z0, &z1 );
0540 mul64To128( a0, b1, &more1, &more2 );
0541 add128( more1, more2, 0, z2, &more1, &z2 );
0542 add128( z0, z1, 0, more1, &z0, &z1 );
0543 *z3Ptr = z3;
0544 *z2Ptr = z2;
0545 *z1Ptr = z1;
0546 *z0Ptr = z0;
0547
0548 }
0549
0550 /*
0551 -------------------------------------------------------------------------------
0552 Returns an approximation to the 64-bit integer quotient obtained by dividing
0553 `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
0554 divisor `b' must be at least 2^63. If q is the exact quotient truncated
0555 toward zero, the approximation returned lies between q and q + 2 inclusive.
0556 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
0557 unsigned integer is returned.
0558 -------------------------------------------------------------------------------
0559 */
0560 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
0561 {
0562 bits64 b0, b1;
0563 bits64 rem0, rem1, term0, term1;
0564 bits64 z;
0565 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
0566 b0 = b>>32; /* hence b0 is 32 bits wide now */
0567 if ( b0<<32 <= a0 ) {
0568 z = LIT64( 0xFFFFFFFF00000000 );
0569 } else {
0570 z = a0;
0571 do_div( z, b0 );
0572 z <<= 32;
0573 }
0574 mul64To128( b, z, &term0, &term1 );
0575 sub128( a0, a1, term0, term1, &rem0, &rem1 );
0576 while ( ( (sbits64) rem0 ) < 0 ) {
0577 z -= LIT64( 0x100000000 );
0578 b1 = b<<32;
0579 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
0580 }
0581 rem0 = ( rem0<<32 ) | ( rem1>>32 );
0582 if ( b0<<32 <= rem0 ) {
0583 z |= 0xFFFFFFFF;
0584 } else {
0585 do_div( rem0, b0 );
0586 z |= rem0;
0587 }
0588 return z;
0589
0590 }
0591
0592 /*
0593 -------------------------------------------------------------------------------
0594 Returns an approximation to the square root of the 32-bit significand given
0595 by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
0596 `aExp' (the least significant bit) is 1, the integer returned approximates
0597 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
0598 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
0599 case, the approximation returned lies strictly within +/-2 of the exact
0600 value.
0601 -------------------------------------------------------------------------------
0602 */
0603 static bits32 estimateSqrt32( int16 aExp, bits32 a )
0604 {
0605 static const bits16 sqrtOddAdjustments[] = {
0606 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
0607 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
0608 };
0609 static const bits16 sqrtEvenAdjustments[] = {
0610 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
0611 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
0612 };
0613 int8 index;
0614 bits32 z;
0615 bits64 A;
0616
0617 index = ( a>>27 ) & 15;
0618 if ( aExp & 1 ) {
0619 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
0620 z = ( ( a / z )<<14 ) + ( z<<15 );
0621 a >>= 1;
0622 }
0623 else {
0624 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
0625 z = a / z + z;
0626 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
0627 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
0628 }
0629 A = ( (bits64) a )<<31;
0630 do_div( A, z );
0631 return ( (bits32) A ) + ( z>>1 );
0632
0633 }
0634
0635 /*
0636 -------------------------------------------------------------------------------
0637 Returns the number of leading 0 bits before the most-significant 1 bit
0638 of `a'. If `a' is zero, 32 is returned.
0639 -------------------------------------------------------------------------------
0640 */
0641 static int8 countLeadingZeros32( bits32 a )
0642 {
0643 static const int8 countLeadingZerosHigh[] = {
0644 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
0645 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0646 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0647 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
0648 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0649 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0650 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0651 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0652 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0653 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0654 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0655 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0656 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0657 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0658 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
0660 };
0661 int8 shiftCount;
0662
0663 shiftCount = 0;
0664 if ( a < 0x10000 ) {
0665 shiftCount += 16;
0666 a <<= 16;
0667 }
0668 if ( a < 0x1000000 ) {
0669 shiftCount += 8;
0670 a <<= 8;
0671 }
0672 shiftCount += countLeadingZerosHigh[ a>>24 ];
0673 return shiftCount;
0674
0675 }
0676
0677 /*
0678 -------------------------------------------------------------------------------
0679 Returns the number of leading 0 bits before the most-significant 1 bit
0680 of `a'. If `a' is zero, 64 is returned.
0681 -------------------------------------------------------------------------------
0682 */
0683 static int8 countLeadingZeros64( bits64 a )
0684 {
0685 int8 shiftCount;
0686
0687 shiftCount = 0;
0688 if ( a < ( (bits64) 1 )<<32 ) {
0689 shiftCount += 32;
0690 }
0691 else {
0692 a >>= 32;
0693 }
0694 shiftCount += countLeadingZeros32( a );
0695 return shiftCount;
0696
0697 }
0698
0699 /*
0700 -------------------------------------------------------------------------------
0701 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
0702 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
0703 Otherwise, returns 0.
0704 -------------------------------------------------------------------------------
0705 */
0706 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
0707 {
0708
0709 return ( a0 == b0 ) && ( a1 == b1 );
0710
0711 }
0712
0713 /*
0714 -------------------------------------------------------------------------------
0715 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
0716 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
0717 Otherwise, returns 0.
0718 -------------------------------------------------------------------------------
0719 */
0720 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
0721 {
0722
0723 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
0724
0725 }
0726
0727 /*
0728 -------------------------------------------------------------------------------
0729 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
0730 than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
0731 returns 0.
0732 -------------------------------------------------------------------------------
0733 */
0734 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
0735 {
0736
0737 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
0738
0739 }
0740
0741 /*
0742 -------------------------------------------------------------------------------
0743 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
0744 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
0745 Otherwise, returns 0.
0746 -------------------------------------------------------------------------------
0747 */
0748 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
0749 {
0750
0751 return ( a0 != b0 ) || ( a1 != b1 );
0752
0753 }
0754