0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031 #include <asm/div64.h>
0032
0033 #include "fpa11.h"
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044 #include "softfloat-macros"
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056 #include "softfloat-specialize"
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070 static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
0071 {
0072 int8 roundingMode;
0073 flag roundNearestEven;
0074 int8 roundIncrement, roundBits;
0075 int32 z;
0076
0077 roundingMode = roundData->mode;
0078 roundNearestEven = ( roundingMode == float_round_nearest_even );
0079 roundIncrement = 0x40;
0080 if ( ! roundNearestEven ) {
0081 if ( roundingMode == float_round_to_zero ) {
0082 roundIncrement = 0;
0083 }
0084 else {
0085 roundIncrement = 0x7F;
0086 if ( zSign ) {
0087 if ( roundingMode == float_round_up ) roundIncrement = 0;
0088 }
0089 else {
0090 if ( roundingMode == float_round_down ) roundIncrement = 0;
0091 }
0092 }
0093 }
0094 roundBits = absZ & 0x7F;
0095 absZ = ( absZ + roundIncrement )>>7;
0096 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
0097 z = absZ;
0098 if ( zSign ) z = - z;
0099 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
0100 roundData->exception |= float_flag_invalid;
0101 return zSign ? 0x80000000 : 0x7FFFFFFF;
0102 }
0103 if ( roundBits ) roundData->exception |= float_flag_inexact;
0104 return z;
0105
0106 }
0107
0108
0109
0110
0111
0112
0113 INLINE bits32 extractFloat32Frac( float32 a )
0114 {
0115
0116 return a & 0x007FFFFF;
0117
0118 }
0119
0120
0121
0122
0123
0124
0125 INLINE int16 extractFloat32Exp( float32 a )
0126 {
0127
0128 return ( a>>23 ) & 0xFF;
0129
0130 }
0131
0132
0133
0134
0135
0136
0137 #if 0
0138 INLINE flag extractFloat32Sign( float32 a )
0139 {
0140
0141 return a>>31;
0142
0143 }
0144 #endif
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154 static void
0155 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
0156 {
0157 int8 shiftCount;
0158
0159 shiftCount = countLeadingZeros32( aSig ) - 8;
0160 *zSigPtr = aSig<<shiftCount;
0161 *zExpPtr = 1 - shiftCount;
0162
0163 }
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
0178 {
0179 #if 0
0180 float32 f;
0181 __asm__("@ packFloat32 \n\
0182 mov %0, %1, asl #31 \n\
0183 orr %0, %2, asl #23 \n\
0184 orr %0, %3"
0185 :
0186 : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
0187 : "cc");
0188 return f;
0189 #else
0190 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
0191 #endif
0192 }
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217 static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
0218 {
0219 int8 roundingMode;
0220 flag roundNearestEven;
0221 int8 roundIncrement, roundBits;
0222 flag isTiny;
0223
0224 roundingMode = roundData->mode;
0225 roundNearestEven = ( roundingMode == float_round_nearest_even );
0226 roundIncrement = 0x40;
0227 if ( ! roundNearestEven ) {
0228 if ( roundingMode == float_round_to_zero ) {
0229 roundIncrement = 0;
0230 }
0231 else {
0232 roundIncrement = 0x7F;
0233 if ( zSign ) {
0234 if ( roundingMode == float_round_up ) roundIncrement = 0;
0235 }
0236 else {
0237 if ( roundingMode == float_round_down ) roundIncrement = 0;
0238 }
0239 }
0240 }
0241 roundBits = zSig & 0x7F;
0242 if ( 0xFD <= (bits16) zExp ) {
0243 if ( ( 0xFD < zExp )
0244 || ( ( zExp == 0xFD )
0245 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
0246 ) {
0247 roundData->exception |= float_flag_overflow | float_flag_inexact;
0248 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
0249 }
0250 if ( zExp < 0 ) {
0251 isTiny =
0252 ( float_detect_tininess == float_tininess_before_rounding )
0253 || ( zExp < -1 )
0254 || ( zSig + roundIncrement < 0x80000000 );
0255 shift32RightJamming( zSig, - zExp, &zSig );
0256 zExp = 0;
0257 roundBits = zSig & 0x7F;
0258 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
0259 }
0260 }
0261 if ( roundBits ) roundData->exception |= float_flag_inexact;
0262 zSig = ( zSig + roundIncrement )>>7;
0263 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
0264 if ( zSig == 0 ) zExp = 0;
0265 return packFloat32( zSign, zExp, zSig );
0266
0267 }
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279 static float32
0280 normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
0281 {
0282 int8 shiftCount;
0283
0284 shiftCount = countLeadingZeros32( zSig ) - 1;
0285 return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
0286
0287 }
0288
0289
0290
0291
0292
0293
0294 INLINE bits64 extractFloat64Frac( float64 a )
0295 {
0296
0297 return a & LIT64( 0x000FFFFFFFFFFFFF );
0298
0299 }
0300
0301
0302
0303
0304
0305
0306 INLINE int16 extractFloat64Exp( float64 a )
0307 {
0308
0309 return ( a>>52 ) & 0x7FF;
0310
0311 }
0312
0313
0314
0315
0316
0317
0318 #if 0
0319 INLINE flag extractFloat64Sign( float64 a )
0320 {
0321
0322 return a>>63;
0323
0324 }
0325 #endif
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335 static void
0336 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
0337 {
0338 int8 shiftCount;
0339
0340 shiftCount = countLeadingZeros64( aSig ) - 11;
0341 *zSigPtr = aSig<<shiftCount;
0342 *zExpPtr = 1 - shiftCount;
0343
0344 }
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358 INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
0359 {
0360
0361 return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
0362
0363 }
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388 static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
0389 {
0390 int8 roundingMode;
0391 flag roundNearestEven;
0392 int16 roundIncrement, roundBits;
0393 flag isTiny;
0394
0395 roundingMode = roundData->mode;
0396 roundNearestEven = ( roundingMode == float_round_nearest_even );
0397 roundIncrement = 0x200;
0398 if ( ! roundNearestEven ) {
0399 if ( roundingMode == float_round_to_zero ) {
0400 roundIncrement = 0;
0401 }
0402 else {
0403 roundIncrement = 0x3FF;
0404 if ( zSign ) {
0405 if ( roundingMode == float_round_up ) roundIncrement = 0;
0406 }
0407 else {
0408 if ( roundingMode == float_round_down ) roundIncrement = 0;
0409 }
0410 }
0411 }
0412 roundBits = zSig & 0x3FF;
0413 if ( 0x7FD <= (bits16) zExp ) {
0414 if ( ( 0x7FD < zExp )
0415 || ( ( zExp == 0x7FD )
0416 && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
0417 ) {
0418
0419
0420 roundData->exception |= float_flag_overflow | float_flag_inexact;
0421 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
0422 }
0423 if ( zExp < 0 ) {
0424 isTiny =
0425 ( float_detect_tininess == float_tininess_before_rounding )
0426 || ( zExp < -1 )
0427 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
0428 shift64RightJamming( zSig, - zExp, &zSig );
0429 zExp = 0;
0430 roundBits = zSig & 0x3FF;
0431 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
0432 }
0433 }
0434 if ( roundBits ) roundData->exception |= float_flag_inexact;
0435 zSig = ( zSig + roundIncrement )>>10;
0436 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
0437 if ( zSig == 0 ) zExp = 0;
0438 return packFloat64( zSign, zExp, zSig );
0439
0440 }
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452 static float64
0453 normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
0454 {
0455 int8 shiftCount;
0456
0457 shiftCount = countLeadingZeros64( zSig ) - 1;
0458 return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
0459
0460 }
0461
0462 #ifdef FLOATX80
0463
0464
0465
0466
0467
0468
0469
0470 INLINE bits64 extractFloatx80Frac( floatx80 a )
0471 {
0472
0473 return a.low;
0474
0475 }
0476
0477
0478
0479
0480
0481
0482
0483 INLINE int32 extractFloatx80Exp( floatx80 a )
0484 {
0485
0486 return a.high & 0x7FFF;
0487
0488 }
0489
0490
0491
0492
0493
0494
0495
0496 INLINE flag extractFloatx80Sign( floatx80 a )
0497 {
0498
0499 return a.high>>15;
0500
0501 }
0502
0503
0504
0505
0506
0507
0508
0509
0510
0511 static void
0512 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
0513 {
0514 int8 shiftCount;
0515
0516 shiftCount = countLeadingZeros64( aSig );
0517 *zSigPtr = aSig<<shiftCount;
0518 *zExpPtr = 1 - shiftCount;
0519
0520 }
0521
0522
0523
0524
0525
0526
0527
0528 INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
0529 {
0530 floatx80 z;
0531
0532 z.low = zSig;
0533 z.high = ( ( (bits16) zSign )<<15 ) + zExp;
0534 z.__padding = 0;
0535 return z;
0536
0537 }
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564 static floatx80
0565 roundAndPackFloatx80(
0566 struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
0567 )
0568 {
0569 int8 roundingMode, roundingPrecision;
0570 flag roundNearestEven, increment, isTiny;
0571 int64 roundIncrement, roundMask, roundBits;
0572
0573 roundingMode = roundData->mode;
0574 roundingPrecision = roundData->precision;
0575 roundNearestEven = ( roundingMode == float_round_nearest_even );
0576 if ( roundingPrecision == 80 ) goto precision80;
0577 if ( roundingPrecision == 64 ) {
0578 roundIncrement = LIT64( 0x0000000000000400 );
0579 roundMask = LIT64( 0x00000000000007FF );
0580 }
0581 else if ( roundingPrecision == 32 ) {
0582 roundIncrement = LIT64( 0x0000008000000000 );
0583 roundMask = LIT64( 0x000000FFFFFFFFFF );
0584 }
0585 else {
0586 goto precision80;
0587 }
0588 zSig0 |= ( zSig1 != 0 );
0589 if ( ! roundNearestEven ) {
0590 if ( roundingMode == float_round_to_zero ) {
0591 roundIncrement = 0;
0592 }
0593 else {
0594 roundIncrement = roundMask;
0595 if ( zSign ) {
0596 if ( roundingMode == float_round_up ) roundIncrement = 0;
0597 }
0598 else {
0599 if ( roundingMode == float_round_down ) roundIncrement = 0;
0600 }
0601 }
0602 }
0603 roundBits = zSig0 & roundMask;
0604 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
0605 if ( ( 0x7FFE < zExp )
0606 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
0607 ) {
0608 goto overflow;
0609 }
0610 if ( zExp <= 0 ) {
0611 isTiny =
0612 ( float_detect_tininess == float_tininess_before_rounding )
0613 || ( zExp < 0 )
0614 || ( zSig0 <= zSig0 + roundIncrement );
0615 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
0616 zExp = 0;
0617 roundBits = zSig0 & roundMask;
0618 if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
0619 if ( roundBits ) roundData->exception |= float_flag_inexact;
0620 zSig0 += roundIncrement;
0621 if ( (sbits64) zSig0 < 0 ) zExp = 1;
0622 roundIncrement = roundMask + 1;
0623 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
0624 roundMask |= roundIncrement;
0625 }
0626 zSig0 &= ~ roundMask;
0627 return packFloatx80( zSign, zExp, zSig0 );
0628 }
0629 }
0630 if ( roundBits ) roundData->exception |= float_flag_inexact;
0631 zSig0 += roundIncrement;
0632 if ( zSig0 < roundIncrement ) {
0633 ++zExp;
0634 zSig0 = LIT64( 0x8000000000000000 );
0635 }
0636 roundIncrement = roundMask + 1;
0637 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
0638 roundMask |= roundIncrement;
0639 }
0640 zSig0 &= ~ roundMask;
0641 if ( zSig0 == 0 ) zExp = 0;
0642 return packFloatx80( zSign, zExp, zSig0 );
0643 precision80:
0644 increment = ( (sbits64) zSig1 < 0 );
0645 if ( ! roundNearestEven ) {
0646 if ( roundingMode == float_round_to_zero ) {
0647 increment = 0;
0648 }
0649 else {
0650 if ( zSign ) {
0651 increment = ( roundingMode == float_round_down ) && zSig1;
0652 }
0653 else {
0654 increment = ( roundingMode == float_round_up ) && zSig1;
0655 }
0656 }
0657 }
0658 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
0659 if ( ( 0x7FFE < zExp )
0660 || ( ( zExp == 0x7FFE )
0661 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
0662 && increment
0663 )
0664 ) {
0665 roundMask = 0;
0666 overflow:
0667 roundData->exception |= float_flag_overflow | float_flag_inexact;
0668 if ( ( roundingMode == float_round_to_zero )
0669 || ( zSign && ( roundingMode == float_round_up ) )
0670 || ( ! zSign && ( roundingMode == float_round_down ) )
0671 ) {
0672 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
0673 }
0674 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
0675 }
0676 if ( zExp <= 0 ) {
0677 isTiny =
0678 ( float_detect_tininess == float_tininess_before_rounding )
0679 || ( zExp < 0 )
0680 || ! increment
0681 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
0682 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
0683 zExp = 0;
0684 if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
0685 if ( zSig1 ) roundData->exception |= float_flag_inexact;
0686 if ( roundNearestEven ) {
0687 increment = ( (sbits64) zSig1 < 0 );
0688 }
0689 else {
0690 if ( zSign ) {
0691 increment = ( roundingMode == float_round_down ) && zSig1;
0692 }
0693 else {
0694 increment = ( roundingMode == float_round_up ) && zSig1;
0695 }
0696 }
0697 if ( increment ) {
0698 ++zSig0;
0699 zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
0700 if ( (sbits64) zSig0 < 0 ) zExp = 1;
0701 }
0702 return packFloatx80( zSign, zExp, zSig0 );
0703 }
0704 }
0705 if ( zSig1 ) roundData->exception |= float_flag_inexact;
0706 if ( increment ) {
0707 ++zSig0;
0708 if ( zSig0 == 0 ) {
0709 ++zExp;
0710 zSig0 = LIT64( 0x8000000000000000 );
0711 }
0712 else {
0713 zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
0714 }
0715 }
0716 else {
0717 if ( zSig0 == 0 ) zExp = 0;
0718 }
0719
0720 return packFloatx80( zSign, zExp, zSig0 );
0721 }
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733 static floatx80
0734 normalizeRoundAndPackFloatx80(
0735 struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
0736 )
0737 {
0738 int8 shiftCount;
0739
0740 if ( zSig0 == 0 ) {
0741 zSig0 = zSig1;
0742 zSig1 = 0;
0743 zExp -= 64;
0744 }
0745 shiftCount = countLeadingZeros64( zSig0 );
0746 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
0747 zExp -= shiftCount;
0748 return
0749 roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
0750
0751 }
0752
0753 #endif
0754
0755
0756
0757
0758
0759
0760
0761
0762 float32 int32_to_float32(struct roundingData *roundData, int32 a)
0763 {
0764 flag zSign;
0765
0766 if ( a == 0 ) return 0;
0767 if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
0768 zSign = ( a < 0 );
0769 return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
0770
0771 }
0772
0773
0774
0775
0776
0777
0778
0779
0780 float64 int32_to_float64( int32 a )
0781 {
0782 flag aSign;
0783 uint32 absA;
0784 int8 shiftCount;
0785 bits64 zSig;
0786
0787 if ( a == 0 ) return 0;
0788 aSign = ( a < 0 );
0789 absA = aSign ? - a : a;
0790 shiftCount = countLeadingZeros32( absA ) + 21;
0791 zSig = absA;
0792 return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
0793
0794 }
0795
0796 #ifdef FLOATX80
0797
0798
0799
0800
0801
0802
0803
0804
0805
0806 floatx80 int32_to_floatx80( int32 a )
0807 {
0808 flag zSign;
0809 uint32 absA;
0810 int8 shiftCount;
0811 bits64 zSig;
0812
0813 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
0814 zSign = ( a < 0 );
0815 absA = zSign ? - a : a;
0816 shiftCount = countLeadingZeros32( absA ) + 32;
0817 zSig = absA;
0818 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
0819
0820 }
0821
0822 #endif
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834
0835 int32 float32_to_int32( struct roundingData *roundData, float32 a )
0836 {
0837 flag aSign;
0838 int16 aExp, shiftCount;
0839 bits32 aSig;
0840 bits64 zSig;
0841
0842 aSig = extractFloat32Frac( a );
0843 aExp = extractFloat32Exp( a );
0844 aSign = extractFloat32Sign( a );
0845 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
0846 if ( aExp ) aSig |= 0x00800000;
0847 shiftCount = 0xAF - aExp;
0848 zSig = aSig;
0849 zSig <<= 32;
0850 if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
0851 return roundAndPackInt32( roundData, aSign, zSig );
0852
0853 }
0854
0855
0856
0857
0858
0859
0860
0861
0862
0863
0864
0865
0866 int32 float32_to_int32_round_to_zero( float32 a )
0867 {
0868 flag aSign;
0869 int16 aExp, shiftCount;
0870 bits32 aSig;
0871 int32 z;
0872
0873 aSig = extractFloat32Frac( a );
0874 aExp = extractFloat32Exp( a );
0875 aSign = extractFloat32Sign( a );
0876 shiftCount = aExp - 0x9E;
0877 if ( 0 <= shiftCount ) {
0878 if ( a == 0xCF000000 ) return 0x80000000;
0879 float_raise( float_flag_invalid );
0880 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
0881 return 0x80000000;
0882 }
0883 else if ( aExp <= 0x7E ) {
0884 if ( aExp | aSig ) float_raise( float_flag_inexact );
0885 return 0;
0886 }
0887 aSig = ( aSig | 0x00800000 )<<8;
0888 z = aSig>>( - shiftCount );
0889 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
0890 float_raise( float_flag_inexact );
0891 }
0892 return aSign ? - z : z;
0893
0894 }
0895
0896
0897
0898
0899
0900
0901
0902
0903
0904 float64 float32_to_float64( float32 a )
0905 {
0906 flag aSign;
0907 int16 aExp;
0908 bits32 aSig;
0909
0910 aSig = extractFloat32Frac( a );
0911 aExp = extractFloat32Exp( a );
0912 aSign = extractFloat32Sign( a );
0913 if ( aExp == 0xFF ) {
0914 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
0915 return packFloat64( aSign, 0x7FF, 0 );
0916 }
0917 if ( aExp == 0 ) {
0918 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
0919 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
0920 --aExp;
0921 }
0922 return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
0923
0924 }
0925
0926 #ifdef FLOATX80
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936 floatx80 float32_to_floatx80( float32 a )
0937 {
0938 flag aSign;
0939 int16 aExp;
0940 bits32 aSig;
0941
0942 aSig = extractFloat32Frac( a );
0943 aExp = extractFloat32Exp( a );
0944 aSign = extractFloat32Sign( a );
0945 if ( aExp == 0xFF ) {
0946 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
0947 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
0948 }
0949 if ( aExp == 0 ) {
0950 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
0951 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
0952 }
0953 aSig |= 0x00800000;
0954 return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
0955
0956 }
0957
0958 #endif
0959
0960
0961
0962
0963
0964
0965
0966
0967
0968 float32 float32_round_to_int( struct roundingData *roundData, float32 a )
0969 {
0970 flag aSign;
0971 int16 aExp;
0972 bits32 lastBitMask, roundBitsMask;
0973 int8 roundingMode;
0974 float32 z;
0975
0976 aExp = extractFloat32Exp( a );
0977 if ( 0x96 <= aExp ) {
0978 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
0979 return propagateFloat32NaN( a, a );
0980 }
0981 return a;
0982 }
0983 roundingMode = roundData->mode;
0984 if ( aExp <= 0x7E ) {
0985 if ( (bits32) ( a<<1 ) == 0 ) return a;
0986 roundData->exception |= float_flag_inexact;
0987 aSign = extractFloat32Sign( a );
0988 switch ( roundingMode ) {
0989 case float_round_nearest_even:
0990 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
0991 return packFloat32( aSign, 0x7F, 0 );
0992 }
0993 break;
0994 case float_round_down:
0995 return aSign ? 0xBF800000 : 0;
0996 case float_round_up:
0997 return aSign ? 0x80000000 : 0x3F800000;
0998 }
0999 return packFloat32( aSign, 0, 0 );
1000 }
1001 lastBitMask = 1;
1002 lastBitMask <<= 0x96 - aExp;
1003 roundBitsMask = lastBitMask - 1;
1004 z = a;
1005 if ( roundingMode == float_round_nearest_even ) {
1006 z += lastBitMask>>1;
1007 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1008 }
1009 else if ( roundingMode != float_round_to_zero ) {
1010 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1011 z += roundBitsMask;
1012 }
1013 }
1014 z &= ~ roundBitsMask;
1015 if ( z != a ) roundData->exception |= float_flag_inexact;
1016 return z;
1017
1018 }
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029 static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
1030 {
1031 int16 aExp, bExp, zExp;
1032 bits32 aSig, bSig, zSig;
1033 int16 expDiff;
1034
1035 aSig = extractFloat32Frac( a );
1036 aExp = extractFloat32Exp( a );
1037 bSig = extractFloat32Frac( b );
1038 bExp = extractFloat32Exp( b );
1039 expDiff = aExp - bExp;
1040 aSig <<= 6;
1041 bSig <<= 6;
1042 if ( 0 < expDiff ) {
1043 if ( aExp == 0xFF ) {
1044 if ( aSig ) return propagateFloat32NaN( a, b );
1045 return a;
1046 }
1047 if ( bExp == 0 ) {
1048 --expDiff;
1049 }
1050 else {
1051 bSig |= 0x20000000;
1052 }
1053 shift32RightJamming( bSig, expDiff, &bSig );
1054 zExp = aExp;
1055 }
1056 else if ( expDiff < 0 ) {
1057 if ( bExp == 0xFF ) {
1058 if ( bSig ) return propagateFloat32NaN( a, b );
1059 return packFloat32( zSign, 0xFF, 0 );
1060 }
1061 if ( aExp == 0 ) {
1062 ++expDiff;
1063 }
1064 else {
1065 aSig |= 0x20000000;
1066 }
1067 shift32RightJamming( aSig, - expDiff, &aSig );
1068 zExp = bExp;
1069 }
1070 else {
1071 if ( aExp == 0xFF ) {
1072 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1073 return a;
1074 }
1075 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1076 zSig = 0x40000000 + aSig + bSig;
1077 zExp = aExp;
1078 goto roundAndPack;
1079 }
1080 aSig |= 0x20000000;
1081 zSig = ( aSig + bSig )<<1;
1082 --zExp;
1083 if ( (sbits32) zSig < 0 ) {
1084 zSig = aSig + bSig;
1085 ++zExp;
1086 }
1087 roundAndPack:
1088 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1089
1090 }
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101 static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
1102 {
1103 int16 aExp, bExp, zExp;
1104 bits32 aSig, bSig, zSig;
1105 int16 expDiff;
1106
1107 aSig = extractFloat32Frac( a );
1108 aExp = extractFloat32Exp( a );
1109 bSig = extractFloat32Frac( b );
1110 bExp = extractFloat32Exp( b );
1111 expDiff = aExp - bExp;
1112 aSig <<= 7;
1113 bSig <<= 7;
1114 if ( 0 < expDiff ) goto aExpBigger;
1115 if ( expDiff < 0 ) goto bExpBigger;
1116 if ( aExp == 0xFF ) {
1117 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1118 roundData->exception |= float_flag_invalid;
1119 return float32_default_nan;
1120 }
1121 if ( aExp == 0 ) {
1122 aExp = 1;
1123 bExp = 1;
1124 }
1125 if ( bSig < aSig ) goto aBigger;
1126 if ( aSig < bSig ) goto bBigger;
1127 return packFloat32( roundData->mode == float_round_down, 0, 0 );
1128 bExpBigger:
1129 if ( bExp == 0xFF ) {
1130 if ( bSig ) return propagateFloat32NaN( a, b );
1131 return packFloat32( zSign ^ 1, 0xFF, 0 );
1132 }
1133 if ( aExp == 0 ) {
1134 ++expDiff;
1135 }
1136 else {
1137 aSig |= 0x40000000;
1138 }
1139 shift32RightJamming( aSig, - expDiff, &aSig );
1140 bSig |= 0x40000000;
1141 bBigger:
1142 zSig = bSig - aSig;
1143 zExp = bExp;
1144 zSign ^= 1;
1145 goto normalizeRoundAndPack;
1146 aExpBigger:
1147 if ( aExp == 0xFF ) {
1148 if ( aSig ) return propagateFloat32NaN( a, b );
1149 return a;
1150 }
1151 if ( bExp == 0 ) {
1152 --expDiff;
1153 }
1154 else {
1155 bSig |= 0x40000000;
1156 }
1157 shift32RightJamming( bSig, expDiff, &bSig );
1158 aSig |= 0x40000000;
1159 aBigger:
1160 zSig = aSig - bSig;
1161 zExp = aExp;
1162 normalizeRoundAndPack:
1163 --zExp;
1164 return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
1165
1166 }
1167
1168
1169
1170
1171
1172
1173
1174
1175 float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
1176 {
1177 flag aSign, bSign;
1178
1179 aSign = extractFloat32Sign( a );
1180 bSign = extractFloat32Sign( b );
1181 if ( aSign == bSign ) {
1182 return addFloat32Sigs( roundData, a, b, aSign );
1183 }
1184 else {
1185 return subFloat32Sigs( roundData, a, b, aSign );
1186 }
1187
1188 }
1189
1190
1191
1192
1193
1194
1195
1196
1197 float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
1198 {
1199 flag aSign, bSign;
1200
1201 aSign = extractFloat32Sign( a );
1202 bSign = extractFloat32Sign( b );
1203 if ( aSign == bSign ) {
1204 return subFloat32Sigs( roundData, a, b, aSign );
1205 }
1206 else {
1207 return addFloat32Sigs( roundData, a, b, aSign );
1208 }
1209
1210 }
1211
1212
1213
1214
1215
1216
1217
1218
1219 float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
1220 {
1221 flag aSign, bSign, zSign;
1222 int16 aExp, bExp, zExp;
1223 bits32 aSig, bSig;
1224 bits64 zSig64;
1225 bits32 zSig;
1226
1227 aSig = extractFloat32Frac( a );
1228 aExp = extractFloat32Exp( a );
1229 aSign = extractFloat32Sign( a );
1230 bSig = extractFloat32Frac( b );
1231 bExp = extractFloat32Exp( b );
1232 bSign = extractFloat32Sign( b );
1233 zSign = aSign ^ bSign;
1234 if ( aExp == 0xFF ) {
1235 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1236 return propagateFloat32NaN( a, b );
1237 }
1238 if ( ( bExp | bSig ) == 0 ) {
1239 roundData->exception |= float_flag_invalid;
1240 return float32_default_nan;
1241 }
1242 return packFloat32( zSign, 0xFF, 0 );
1243 }
1244 if ( bExp == 0xFF ) {
1245 if ( bSig ) return propagateFloat32NaN( a, b );
1246 if ( ( aExp | aSig ) == 0 ) {
1247 roundData->exception |= float_flag_invalid;
1248 return float32_default_nan;
1249 }
1250 return packFloat32( zSign, 0xFF, 0 );
1251 }
1252 if ( aExp == 0 ) {
1253 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1254 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1255 }
1256 if ( bExp == 0 ) {
1257 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1258 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1259 }
1260 zExp = aExp + bExp - 0x7F;
1261 aSig = ( aSig | 0x00800000 )<<7;
1262 bSig = ( bSig | 0x00800000 )<<8;
1263 shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1264 zSig = zSig64;
1265 if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1266 zSig <<= 1;
1267 --zExp;
1268 }
1269 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1270
1271 }
1272
1273
1274
1275
1276
1277
1278
1279
1280 float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
1281 {
1282 flag aSign, bSign, zSign;
1283 int16 aExp, bExp, zExp;
1284 bits32 aSig, bSig, zSig;
1285
1286 aSig = extractFloat32Frac( a );
1287 aExp = extractFloat32Exp( a );
1288 aSign = extractFloat32Sign( a );
1289 bSig = extractFloat32Frac( b );
1290 bExp = extractFloat32Exp( b );
1291 bSign = extractFloat32Sign( b );
1292 zSign = aSign ^ bSign;
1293 if ( aExp == 0xFF ) {
1294 if ( aSig ) return propagateFloat32NaN( a, b );
1295 if ( bExp == 0xFF ) {
1296 if ( bSig ) return propagateFloat32NaN( a, b );
1297 roundData->exception |= float_flag_invalid;
1298 return float32_default_nan;
1299 }
1300 return packFloat32( zSign, 0xFF, 0 );
1301 }
1302 if ( bExp == 0xFF ) {
1303 if ( bSig ) return propagateFloat32NaN( a, b );
1304 return packFloat32( zSign, 0, 0 );
1305 }
1306 if ( bExp == 0 ) {
1307 if ( bSig == 0 ) {
1308 if ( ( aExp | aSig ) == 0 ) {
1309 roundData->exception |= float_flag_invalid;
1310 return float32_default_nan;
1311 }
1312 roundData->exception |= float_flag_divbyzero;
1313 return packFloat32( zSign, 0xFF, 0 );
1314 }
1315 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1316 }
1317 if ( aExp == 0 ) {
1318 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1319 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1320 }
1321 zExp = aExp - bExp + 0x7D;
1322 aSig = ( aSig | 0x00800000 )<<7;
1323 bSig = ( bSig | 0x00800000 )<<8;
1324 if ( bSig <= ( aSig + aSig ) ) {
1325 aSig >>= 1;
1326 ++zExp;
1327 }
1328 {
1329 bits64 tmp = ( (bits64) aSig )<<32;
1330 do_div( tmp, bSig );
1331 zSig = tmp;
1332 }
1333 if ( ( zSig & 0x3F ) == 0 ) {
1334 zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
1335 }
1336 return roundAndPackFloat32( roundData, zSign, zExp, zSig );
1337
1338 }
1339
1340
1341
1342
1343
1344
1345
1346
1347 float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
1348 {
1349 flag aSign, bSign, zSign;
1350 int16 aExp, bExp, expDiff;
1351 bits32 aSig, bSig;
1352 bits32 q;
1353 bits64 aSig64, bSig64, q64;
1354 bits32 alternateASig;
1355 sbits32 sigMean;
1356
1357 aSig = extractFloat32Frac( a );
1358 aExp = extractFloat32Exp( a );
1359 aSign = extractFloat32Sign( a );
1360 bSig = extractFloat32Frac( b );
1361 bExp = extractFloat32Exp( b );
1362 bSign = extractFloat32Sign( b );
1363 if ( aExp == 0xFF ) {
1364 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1365 return propagateFloat32NaN( a, b );
1366 }
1367 roundData->exception |= float_flag_invalid;
1368 return float32_default_nan;
1369 }
1370 if ( bExp == 0xFF ) {
1371 if ( bSig ) return propagateFloat32NaN( a, b );
1372 return a;
1373 }
1374 if ( bExp == 0 ) {
1375 if ( bSig == 0 ) {
1376 roundData->exception |= float_flag_invalid;
1377 return float32_default_nan;
1378 }
1379 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1380 }
1381 if ( aExp == 0 ) {
1382 if ( aSig == 0 ) return a;
1383 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1384 }
1385 expDiff = aExp - bExp;
1386 aSig |= 0x00800000;
1387 bSig |= 0x00800000;
1388 if ( expDiff < 32 ) {
1389 aSig <<= 8;
1390 bSig <<= 8;
1391 if ( expDiff < 0 ) {
1392 if ( expDiff < -1 ) return a;
1393 aSig >>= 1;
1394 }
1395 q = ( bSig <= aSig );
1396 if ( q ) aSig -= bSig;
1397 if ( 0 < expDiff ) {
1398 bits64 tmp = ( (bits64) aSig )<<32;
1399 do_div( tmp, bSig );
1400 q = tmp;
1401 q >>= 32 - expDiff;
1402 bSig >>= 2;
1403 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1404 }
1405 else {
1406 aSig >>= 2;
1407 bSig >>= 2;
1408 }
1409 }
1410 else {
1411 if ( bSig <= aSig ) aSig -= bSig;
1412 aSig64 = ( (bits64) aSig )<<40;
1413 bSig64 = ( (bits64) bSig )<<40;
1414 expDiff -= 64;
1415 while ( 0 < expDiff ) {
1416 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1417 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1418 aSig64 = - ( ( bSig * q64 )<<38 );
1419 expDiff -= 62;
1420 }
1421 expDiff += 64;
1422 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1423 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1424 q = q64>>( 64 - expDiff );
1425 bSig <<= 6;
1426 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1427 }
1428 do {
1429 alternateASig = aSig;
1430 ++q;
1431 aSig -= bSig;
1432 } while ( 0 <= (sbits32) aSig );
1433 sigMean = aSig + alternateASig;
1434 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1435 aSig = alternateASig;
1436 }
1437 zSign = ( (sbits32) aSig < 0 );
1438 if ( zSign ) aSig = - aSig;
1439 return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
1440
1441 }
1442
1443
1444
1445
1446
1447
1448
1449
1450 float32 float32_sqrt( struct roundingData *roundData, float32 a )
1451 {
1452 flag aSign;
1453 int16 aExp, zExp;
1454 bits32 aSig, zSig;
1455 bits64 rem, term;
1456
1457 aSig = extractFloat32Frac( a );
1458 aExp = extractFloat32Exp( a );
1459 aSign = extractFloat32Sign( a );
1460 if ( aExp == 0xFF ) {
1461 if ( aSig ) return propagateFloat32NaN( a, 0 );
1462 if ( ! aSign ) return a;
1463 roundData->exception |= float_flag_invalid;
1464 return float32_default_nan;
1465 }
1466 if ( aSign ) {
1467 if ( ( aExp | aSig ) == 0 ) return a;
1468 roundData->exception |= float_flag_invalid;
1469 return float32_default_nan;
1470 }
1471 if ( aExp == 0 ) {
1472 if ( aSig == 0 ) return 0;
1473 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1474 }
1475 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1476 aSig = ( aSig | 0x00800000 )<<8;
1477 zSig = estimateSqrt32( aExp, aSig ) + 2;
1478 if ( ( zSig & 0x7F ) <= 5 ) {
1479 if ( zSig < 2 ) {
1480 zSig = 0xFFFFFFFF;
1481 }
1482 else {
1483 aSig >>= aExp & 1;
1484 term = ( (bits64) zSig ) * zSig;
1485 rem = ( ( (bits64) aSig )<<32 ) - term;
1486 while ( (sbits64) rem < 0 ) {
1487 --zSig;
1488 rem += ( ( (bits64) zSig )<<1 ) | 1;
1489 }
1490 zSig |= ( rem != 0 );
1491 }
1492 }
1493 shift32RightJamming( zSig, 1, &zSig );
1494 return roundAndPackFloat32( roundData, 0, zExp, zSig );
1495
1496 }
1497
1498
1499
1500
1501
1502
1503
1504
1505 flag float32_eq( float32 a, float32 b )
1506 {
1507
1508 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1509 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1510 ) {
1511 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1512 float_raise( float_flag_invalid );
1513 }
1514 return 0;
1515 }
1516 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1517
1518 }
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528 flag float32_le( float32 a, float32 b )
1529 {
1530 flag aSign, bSign;
1531
1532 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1533 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1534 ) {
1535 float_raise( float_flag_invalid );
1536 return 0;
1537 }
1538 aSign = extractFloat32Sign( a );
1539 bSign = extractFloat32Sign( b );
1540 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1541 return ( a == b ) || ( aSign ^ ( a < b ) );
1542
1543 }
1544
1545
1546
1547
1548
1549
1550
1551
1552 flag float32_lt( float32 a, float32 b )
1553 {
1554 flag aSign, bSign;
1555
1556 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1557 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1558 ) {
1559 float_raise( float_flag_invalid );
1560 return 0;
1561 }
1562 aSign = extractFloat32Sign( a );
1563 bSign = extractFloat32Sign( b );
1564 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1565 return ( a != b ) && ( aSign ^ ( a < b ) );
1566
1567 }
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577 flag float32_eq_signaling( float32 a, float32 b )
1578 {
1579
1580 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1581 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1582 ) {
1583 float_raise( float_flag_invalid );
1584 return 0;
1585 }
1586 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1587
1588 }
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598 flag float32_le_quiet( float32 a, float32 b )
1599 {
1600 flag aSign, bSign;
1601
1602
1603 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1604 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1605 ) {
1606
1607 return 0;
1608 }
1609 aSign = extractFloat32Sign( a );
1610 bSign = extractFloat32Sign( b );
1611 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1612 return ( a == b ) || ( aSign ^ ( a < b ) );
1613
1614 }
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624 flag float32_lt_quiet( float32 a, float32 b )
1625 {
1626 flag aSign, bSign;
1627
1628 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1629 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1630 ) {
1631
1632 return 0;
1633 }
1634 aSign = extractFloat32Sign( a );
1635 bSign = extractFloat32Sign( b );
1636 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1637 return ( a != b ) && ( aSign ^ ( a < b ) );
1638
1639 }
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652 int32 float64_to_int32( struct roundingData *roundData, float64 a )
1653 {
1654 flag aSign;
1655 int16 aExp, shiftCount;
1656 bits64 aSig;
1657
1658 aSig = extractFloat64Frac( a );
1659 aExp = extractFloat64Exp( a );
1660 aSign = extractFloat64Sign( a );
1661 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1662 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1663 shiftCount = 0x42C - aExp;
1664 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
1665 return roundAndPackInt32( roundData, aSign, aSig );
1666
1667 }
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680 int32 float64_to_int32_round_to_zero( float64 a )
1681 {
1682 flag aSign;
1683 int16 aExp, shiftCount;
1684 bits64 aSig, savedASig;
1685 int32 z;
1686
1687 aSig = extractFloat64Frac( a );
1688 aExp = extractFloat64Exp( a );
1689 aSign = extractFloat64Sign( a );
1690 shiftCount = 0x433 - aExp;
1691 if ( shiftCount < 21 ) {
1692 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1693 goto invalid;
1694 }
1695 else if ( 52 < shiftCount ) {
1696 if ( aExp || aSig ) float_raise( float_flag_inexact );
1697 return 0;
1698 }
1699 aSig |= LIT64( 0x0010000000000000 );
1700 savedASig = aSig;
1701 aSig >>= shiftCount;
1702 z = aSig;
1703 if ( aSign ) z = - z;
1704 if ( ( z < 0 ) ^ aSign ) {
1705 invalid:
1706 float_raise( float_flag_invalid );
1707 return aSign ? 0x80000000 : 0x7FFFFFFF;
1708 }
1709 if ( ( aSig<<shiftCount ) != savedASig ) {
1710 float_raise( float_flag_inexact );
1711 }
1712 return z;
1713
1714 }
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727 int32 float64_to_uint32( struct roundingData *roundData, float64 a )
1728 {
1729 flag aSign;
1730 int16 aExp, shiftCount;
1731 bits64 aSig;
1732
1733 aSig = extractFloat64Frac( a );
1734 aExp = extractFloat64Exp( a );
1735 aSign = 0;
1736
1737 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
1738 shiftCount = 0x42C - aExp;
1739 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
1740 return roundAndPackInt32( roundData, aSign, aSig );
1741 }
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753 int32 float64_to_uint32_round_to_zero( float64 a )
1754 {
1755 flag aSign;
1756 int16 aExp, shiftCount;
1757 bits64 aSig, savedASig;
1758 int32 z;
1759
1760 aSig = extractFloat64Frac( a );
1761 aExp = extractFloat64Exp( a );
1762 aSign = extractFloat64Sign( a );
1763 shiftCount = 0x433 - aExp;
1764 if ( shiftCount < 21 ) {
1765 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
1766 goto invalid;
1767 }
1768 else if ( 52 < shiftCount ) {
1769 if ( aExp || aSig ) float_raise( float_flag_inexact );
1770 return 0;
1771 }
1772 aSig |= LIT64( 0x0010000000000000 );
1773 savedASig = aSig;
1774 aSig >>= shiftCount;
1775 z = aSig;
1776 if ( aSign ) z = - z;
1777 if ( ( z < 0 ) ^ aSign ) {
1778 invalid:
1779 float_raise( float_flag_invalid );
1780 return aSign ? 0x80000000 : 0x7FFFFFFF;
1781 }
1782 if ( ( aSig<<shiftCount ) != savedASig ) {
1783 float_raise( float_flag_inexact );
1784 }
1785 return z;
1786 }
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796 float32 float64_to_float32( struct roundingData *roundData, float64 a )
1797 {
1798 flag aSign;
1799 int16 aExp;
1800 bits64 aSig;
1801 bits32 zSig;
1802
1803 aSig = extractFloat64Frac( a );
1804 aExp = extractFloat64Exp( a );
1805 aSign = extractFloat64Sign( a );
1806 if ( aExp == 0x7FF ) {
1807 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
1808 return packFloat32( aSign, 0xFF, 0 );
1809 }
1810 shift64RightJamming( aSig, 22, &aSig );
1811 zSig = aSig;
1812 if ( aExp || zSig ) {
1813 zSig |= 0x40000000;
1814 aExp -= 0x381;
1815 }
1816 return roundAndPackFloat32( roundData, aSign, aExp, zSig );
1817
1818 }
1819
1820 #ifdef FLOATX80
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830 floatx80 float64_to_floatx80( float64 a )
1831 {
1832 flag aSign;
1833 int16 aExp;
1834 bits64 aSig;
1835
1836 aSig = extractFloat64Frac( a );
1837 aExp = extractFloat64Exp( a );
1838 aSign = extractFloat64Sign( a );
1839 if ( aExp == 0x7FF ) {
1840 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
1841 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1842 }
1843 if ( aExp == 0 ) {
1844 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1845 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1846 }
1847 return
1848 packFloatx80(
1849 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1850
1851 }
1852
1853 #endif
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863 float64 float64_round_to_int( struct roundingData *roundData, float64 a )
1864 {
1865 flag aSign;
1866 int16 aExp;
1867 bits64 lastBitMask, roundBitsMask;
1868 int8 roundingMode;
1869 float64 z;
1870
1871 aExp = extractFloat64Exp( a );
1872 if ( 0x433 <= aExp ) {
1873 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
1874 return propagateFloat64NaN( a, a );
1875 }
1876 return a;
1877 }
1878 if ( aExp <= 0x3FE ) {
1879 if ( (bits64) ( a<<1 ) == 0 ) return a;
1880 roundData->exception |= float_flag_inexact;
1881 aSign = extractFloat64Sign( a );
1882 switch ( roundData->mode ) {
1883 case float_round_nearest_even:
1884 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
1885 return packFloat64( aSign, 0x3FF, 0 );
1886 }
1887 break;
1888 case float_round_down:
1889 return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
1890 case float_round_up:
1891 return
1892 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
1893 }
1894 return packFloat64( aSign, 0, 0 );
1895 }
1896 lastBitMask = 1;
1897 lastBitMask <<= 0x433 - aExp;
1898 roundBitsMask = lastBitMask - 1;
1899 z = a;
1900 roundingMode = roundData->mode;
1901 if ( roundingMode == float_round_nearest_even ) {
1902 z += lastBitMask>>1;
1903 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1904 }
1905 else if ( roundingMode != float_round_to_zero ) {
1906 if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1907 z += roundBitsMask;
1908 }
1909 }
1910 z &= ~ roundBitsMask;
1911 if ( z != a ) roundData->exception |= float_flag_inexact;
1912 return z;
1913
1914 }
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925 static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
1926 {
1927 int16 aExp, bExp, zExp;
1928 bits64 aSig, bSig, zSig;
1929 int16 expDiff;
1930
1931 aSig = extractFloat64Frac( a );
1932 aExp = extractFloat64Exp( a );
1933 bSig = extractFloat64Frac( b );
1934 bExp = extractFloat64Exp( b );
1935 expDiff = aExp - bExp;
1936 aSig <<= 9;
1937 bSig <<= 9;
1938 if ( 0 < expDiff ) {
1939 if ( aExp == 0x7FF ) {
1940 if ( aSig ) return propagateFloat64NaN( a, b );
1941 return a;
1942 }
1943 if ( bExp == 0 ) {
1944 --expDiff;
1945 }
1946 else {
1947 bSig |= LIT64( 0x2000000000000000 );
1948 }
1949 shift64RightJamming( bSig, expDiff, &bSig );
1950 zExp = aExp;
1951 }
1952 else if ( expDiff < 0 ) {
1953 if ( bExp == 0x7FF ) {
1954 if ( bSig ) return propagateFloat64NaN( a, b );
1955 return packFloat64( zSign, 0x7FF, 0 );
1956 }
1957 if ( aExp == 0 ) {
1958 ++expDiff;
1959 }
1960 else {
1961 aSig |= LIT64( 0x2000000000000000 );
1962 }
1963 shift64RightJamming( aSig, - expDiff, &aSig );
1964 zExp = bExp;
1965 }
1966 else {
1967 if ( aExp == 0x7FF ) {
1968 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
1969 return a;
1970 }
1971 if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
1972 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
1973 zExp = aExp;
1974 goto roundAndPack;
1975 }
1976 aSig |= LIT64( 0x2000000000000000 );
1977 zSig = ( aSig + bSig )<<1;
1978 --zExp;
1979 if ( (sbits64) zSig < 0 ) {
1980 zSig = aSig + bSig;
1981 ++zExp;
1982 }
1983 roundAndPack:
1984 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
1985
1986 }
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997 static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
1998 {
1999 int16 aExp, bExp, zExp;
2000 bits64 aSig, bSig, zSig;
2001 int16 expDiff;
2002
2003 aSig = extractFloat64Frac( a );
2004 aExp = extractFloat64Exp( a );
2005 bSig = extractFloat64Frac( b );
2006 bExp = extractFloat64Exp( b );
2007 expDiff = aExp - bExp;
2008 aSig <<= 10;
2009 bSig <<= 10;
2010 if ( 0 < expDiff ) goto aExpBigger;
2011 if ( expDiff < 0 ) goto bExpBigger;
2012 if ( aExp == 0x7FF ) {
2013 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2014 roundData->exception |= float_flag_invalid;
2015 return float64_default_nan;
2016 }
2017 if ( aExp == 0 ) {
2018 aExp = 1;
2019 bExp = 1;
2020 }
2021 if ( bSig < aSig ) goto aBigger;
2022 if ( aSig < bSig ) goto bBigger;
2023 return packFloat64( roundData->mode == float_round_down, 0, 0 );
2024 bExpBigger:
2025 if ( bExp == 0x7FF ) {
2026 if ( bSig ) return propagateFloat64NaN( a, b );
2027 return packFloat64( zSign ^ 1, 0x7FF, 0 );
2028 }
2029 if ( aExp == 0 ) {
2030 ++expDiff;
2031 }
2032 else {
2033 aSig |= LIT64( 0x4000000000000000 );
2034 }
2035 shift64RightJamming( aSig, - expDiff, &aSig );
2036 bSig |= LIT64( 0x4000000000000000 );
2037 bBigger:
2038 zSig = bSig - aSig;
2039 zExp = bExp;
2040 zSign ^= 1;
2041 goto normalizeRoundAndPack;
2042 aExpBigger:
2043 if ( aExp == 0x7FF ) {
2044 if ( aSig ) return propagateFloat64NaN( a, b );
2045 return a;
2046 }
2047 if ( bExp == 0 ) {
2048 --expDiff;
2049 }
2050 else {
2051 bSig |= LIT64( 0x4000000000000000 );
2052 }
2053 shift64RightJamming( bSig, expDiff, &bSig );
2054 aSig |= LIT64( 0x4000000000000000 );
2055 aBigger:
2056 zSig = aSig - bSig;
2057 zExp = aExp;
2058 normalizeRoundAndPack:
2059 --zExp;
2060 return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
2061
2062 }
2063
2064
2065
2066
2067
2068
2069
2070
2071 float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
2072 {
2073 flag aSign, bSign;
2074
2075 aSign = extractFloat64Sign( a );
2076 bSign = extractFloat64Sign( b );
2077 if ( aSign == bSign ) {
2078 return addFloat64Sigs( roundData, a, b, aSign );
2079 }
2080 else {
2081 return subFloat64Sigs( roundData, a, b, aSign );
2082 }
2083
2084 }
2085
2086
2087
2088
2089
2090
2091
2092
2093 float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
2094 {
2095 flag aSign, bSign;
2096
2097 aSign = extractFloat64Sign( a );
2098 bSign = extractFloat64Sign( b );
2099 if ( aSign == bSign ) {
2100 return subFloat64Sigs( roundData, a, b, aSign );
2101 }
2102 else {
2103 return addFloat64Sigs( roundData, a, b, aSign );
2104 }
2105
2106 }
2107
2108
2109
2110
2111
2112
2113
2114
2115 float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
2116 {
2117 flag aSign, bSign, zSign;
2118 int16 aExp, bExp, zExp;
2119 bits64 aSig, bSig, zSig0, zSig1;
2120
2121 aSig = extractFloat64Frac( a );
2122 aExp = extractFloat64Exp( a );
2123 aSign = extractFloat64Sign( a );
2124 bSig = extractFloat64Frac( b );
2125 bExp = extractFloat64Exp( b );
2126 bSign = extractFloat64Sign( b );
2127 zSign = aSign ^ bSign;
2128 if ( aExp == 0x7FF ) {
2129 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2130 return propagateFloat64NaN( a, b );
2131 }
2132 if ( ( bExp | bSig ) == 0 ) {
2133 roundData->exception |= float_flag_invalid;
2134 return float64_default_nan;
2135 }
2136 return packFloat64( zSign, 0x7FF, 0 );
2137 }
2138 if ( bExp == 0x7FF ) {
2139 if ( bSig ) return propagateFloat64NaN( a, b );
2140 if ( ( aExp | aSig ) == 0 ) {
2141 roundData->exception |= float_flag_invalid;
2142 return float64_default_nan;
2143 }
2144 return packFloat64( zSign, 0x7FF, 0 );
2145 }
2146 if ( aExp == 0 ) {
2147 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2148 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2149 }
2150 if ( bExp == 0 ) {
2151 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2152 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2153 }
2154 zExp = aExp + bExp - 0x3FF;
2155 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2156 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2157 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2158 zSig0 |= ( zSig1 != 0 );
2159 if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2160 zSig0 <<= 1;
2161 --zExp;
2162 }
2163 return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
2164
2165 }
2166
2167
2168
2169
2170
2171
2172
2173
2174 float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
2175 {
2176 flag aSign, bSign, zSign;
2177 int16 aExp, bExp, zExp;
2178 bits64 aSig, bSig, zSig;
2179 bits64 rem0, rem1;
2180 bits64 term0, term1;
2181
2182 aSig = extractFloat64Frac( a );
2183 aExp = extractFloat64Exp( a );
2184 aSign = extractFloat64Sign( a );
2185 bSig = extractFloat64Frac( b );
2186 bExp = extractFloat64Exp( b );
2187 bSign = extractFloat64Sign( b );
2188 zSign = aSign ^ bSign;
2189 if ( aExp == 0x7FF ) {
2190 if ( aSig ) return propagateFloat64NaN( a, b );
2191 if ( bExp == 0x7FF ) {
2192 if ( bSig ) return propagateFloat64NaN( a, b );
2193 roundData->exception |= float_flag_invalid;
2194 return float64_default_nan;
2195 }
2196 return packFloat64( zSign, 0x7FF, 0 );
2197 }
2198 if ( bExp == 0x7FF ) {
2199 if ( bSig ) return propagateFloat64NaN( a, b );
2200 return packFloat64( zSign, 0, 0 );
2201 }
2202 if ( bExp == 0 ) {
2203 if ( bSig == 0 ) {
2204 if ( ( aExp | aSig ) == 0 ) {
2205 roundData->exception |= float_flag_invalid;
2206 return float64_default_nan;
2207 }
2208 roundData->exception |= float_flag_divbyzero;
2209 return packFloat64( zSign, 0x7FF, 0 );
2210 }
2211 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2212 }
2213 if ( aExp == 0 ) {
2214 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2215 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2216 }
2217 zExp = aExp - bExp + 0x3FD;
2218 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2219 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2220 if ( bSig <= ( aSig + aSig ) ) {
2221 aSig >>= 1;
2222 ++zExp;
2223 }
2224 zSig = estimateDiv128To64( aSig, 0, bSig );
2225 if ( ( zSig & 0x1FF ) <= 2 ) {
2226 mul64To128( bSig, zSig, &term0, &term1 );
2227 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2228 while ( (sbits64) rem0 < 0 ) {
2229 --zSig;
2230 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2231 }
2232 zSig |= ( rem1 != 0 );
2233 }
2234 return roundAndPackFloat64( roundData, zSign, zExp, zSig );
2235
2236 }
2237
2238
2239
2240
2241
2242
2243
2244
2245 float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
2246 {
2247 flag aSign, bSign, zSign;
2248 int16 aExp, bExp, expDiff;
2249 bits64 aSig, bSig;
2250 bits64 q, alternateASig;
2251 sbits64 sigMean;
2252
2253 aSig = extractFloat64Frac( a );
2254 aExp = extractFloat64Exp( a );
2255 aSign = extractFloat64Sign( a );
2256 bSig = extractFloat64Frac( b );
2257 bExp = extractFloat64Exp( b );
2258 bSign = extractFloat64Sign( b );
2259 if ( aExp == 0x7FF ) {
2260 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2261 return propagateFloat64NaN( a, b );
2262 }
2263 roundData->exception |= float_flag_invalid;
2264 return float64_default_nan;
2265 }
2266 if ( bExp == 0x7FF ) {
2267 if ( bSig ) return propagateFloat64NaN( a, b );
2268 return a;
2269 }
2270 if ( bExp == 0 ) {
2271 if ( bSig == 0 ) {
2272 roundData->exception |= float_flag_invalid;
2273 return float64_default_nan;
2274 }
2275 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2276 }
2277 if ( aExp == 0 ) {
2278 if ( aSig == 0 ) return a;
2279 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2280 }
2281 expDiff = aExp - bExp;
2282 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2283 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2284 if ( expDiff < 0 ) {
2285 if ( expDiff < -1 ) return a;
2286 aSig >>= 1;
2287 }
2288 q = ( bSig <= aSig );
2289 if ( q ) aSig -= bSig;
2290 expDiff -= 64;
2291 while ( 0 < expDiff ) {
2292 q = estimateDiv128To64( aSig, 0, bSig );
2293 q = ( 2 < q ) ? q - 2 : 0;
2294 aSig = - ( ( bSig>>2 ) * q );
2295 expDiff -= 62;
2296 }
2297 expDiff += 64;
2298 if ( 0 < expDiff ) {
2299 q = estimateDiv128To64( aSig, 0, bSig );
2300 q = ( 2 < q ) ? q - 2 : 0;
2301 q >>= 64 - expDiff;
2302 bSig >>= 2;
2303 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2304 }
2305 else {
2306 aSig >>= 2;
2307 bSig >>= 2;
2308 }
2309 do {
2310 alternateASig = aSig;
2311 ++q;
2312 aSig -= bSig;
2313 } while ( 0 <= (sbits64) aSig );
2314 sigMean = aSig + alternateASig;
2315 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2316 aSig = alternateASig;
2317 }
2318 zSign = ( (sbits64) aSig < 0 );
2319 if ( zSign ) aSig = - aSig;
2320 return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
2321
2322 }
2323
2324
2325
2326
2327
2328
2329
2330
2331 float64 float64_sqrt( struct roundingData *roundData, float64 a )
2332 {
2333 flag aSign;
2334 int16 aExp, zExp;
2335 bits64 aSig, zSig;
2336 bits64 rem0, rem1, term0, term1;
2337
2338
2339 aSig = extractFloat64Frac( a );
2340 aExp = extractFloat64Exp( a );
2341 aSign = extractFloat64Sign( a );
2342 if ( aExp == 0x7FF ) {
2343 if ( aSig ) return propagateFloat64NaN( a, a );
2344 if ( ! aSign ) return a;
2345 roundData->exception |= float_flag_invalid;
2346 return float64_default_nan;
2347 }
2348 if ( aSign ) {
2349 if ( ( aExp | aSig ) == 0 ) return a;
2350 roundData->exception |= float_flag_invalid;
2351 return float64_default_nan;
2352 }
2353 if ( aExp == 0 ) {
2354 if ( aSig == 0 ) return 0;
2355 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2356 }
2357 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2358 aSig |= LIT64( 0x0010000000000000 );
2359 zSig = estimateSqrt32( aExp, aSig>>21 );
2360 zSig <<= 31;
2361 aSig <<= 9 - ( aExp & 1 );
2362 zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
2363 if ( ( zSig & 0x3FF ) <= 5 ) {
2364 if ( zSig < 2 ) {
2365 zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
2366 }
2367 else {
2368 aSig <<= 2;
2369 mul64To128( zSig, zSig, &term0, &term1 );
2370 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2371 while ( (sbits64) rem0 < 0 ) {
2372 --zSig;
2373 shortShift128Left( 0, zSig, 1, &term0, &term1 );
2374 term1 |= 1;
2375 add128( rem0, rem1, term0, term1, &rem0, &rem1 );
2376 }
2377 zSig |= ( ( rem0 | rem1 ) != 0 );
2378 }
2379 }
2380 shift64RightJamming( zSig, 1, &zSig );
2381 return roundAndPackFloat64( roundData, 0, zExp, zSig );
2382
2383 }
2384
2385
2386
2387
2388
2389
2390
2391
2392 flag float64_eq( float64 a, float64 b )
2393 {
2394
2395 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2396 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2397 ) {
2398 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2399 float_raise( float_flag_invalid );
2400 }
2401 return 0;
2402 }
2403 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2404
2405 }
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415 flag float64_le( float64 a, float64 b )
2416 {
2417 flag aSign, bSign;
2418
2419 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2420 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2421 ) {
2422 float_raise( float_flag_invalid );
2423 return 0;
2424 }
2425 aSign = extractFloat64Sign( a );
2426 bSign = extractFloat64Sign( b );
2427 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2428 return ( a == b ) || ( aSign ^ ( a < b ) );
2429
2430 }
2431
2432
2433
2434
2435
2436
2437
2438
2439 flag float64_lt( float64 a, float64 b )
2440 {
2441 flag aSign, bSign;
2442
2443 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2444 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2445 ) {
2446 float_raise( float_flag_invalid );
2447 return 0;
2448 }
2449 aSign = extractFloat64Sign( a );
2450 bSign = extractFloat64Sign( b );
2451 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2452 return ( a != b ) && ( aSign ^ ( a < b ) );
2453
2454 }
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464 flag float64_eq_signaling( float64 a, float64 b )
2465 {
2466
2467 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2468 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2469 ) {
2470 float_raise( float_flag_invalid );
2471 return 0;
2472 }
2473 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2474
2475 }
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485 flag float64_le_quiet( float64 a, float64 b )
2486 {
2487 flag aSign, bSign;
2488
2489
2490 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2491 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2492 ) {
2493
2494 return 0;
2495 }
2496 aSign = extractFloat64Sign( a );
2497 bSign = extractFloat64Sign( b );
2498 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2499 return ( a == b ) || ( aSign ^ ( a < b ) );
2500
2501 }
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511 flag float64_lt_quiet( float64 a, float64 b )
2512 {
2513 flag aSign, bSign;
2514
2515 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
2516 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
2517 ) {
2518
2519 return 0;
2520 }
2521 aSign = extractFloat64Sign( a );
2522 bSign = extractFloat64Sign( b );
2523 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2524 return ( a != b ) && ( aSign ^ ( a < b ) );
2525
2526 }
2527
2528 #ifdef FLOATX80
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541 int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
2542 {
2543 flag aSign;
2544 int32 aExp, shiftCount;
2545 bits64 aSig;
2546
2547 aSig = extractFloatx80Frac( a );
2548 aExp = extractFloatx80Exp( a );
2549 aSign = extractFloatx80Sign( a );
2550 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2551 shiftCount = 0x4037 - aExp;
2552 if ( shiftCount <= 0 ) shiftCount = 1;
2553 shift64RightJamming( aSig, shiftCount, &aSig );
2554 return roundAndPackInt32( roundData, aSign, aSig );
2555
2556 }
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569 int32 floatx80_to_int32_round_to_zero( floatx80 a )
2570 {
2571 flag aSign;
2572 int32 aExp, shiftCount;
2573 bits64 aSig, savedASig;
2574 int32 z;
2575
2576 aSig = extractFloatx80Frac( a );
2577 aExp = extractFloatx80Exp( a );
2578 aSign = extractFloatx80Sign( a );
2579 shiftCount = 0x403E - aExp;
2580 if ( shiftCount < 32 ) {
2581 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
2582 goto invalid;
2583 }
2584 else if ( 63 < shiftCount ) {
2585 if ( aExp || aSig ) float_raise( float_flag_inexact );
2586 return 0;
2587 }
2588 savedASig = aSig;
2589 aSig >>= shiftCount;
2590 z = aSig;
2591 if ( aSign ) z = - z;
2592 if ( ( z < 0 ) ^ aSign ) {
2593 invalid:
2594 float_raise( float_flag_invalid );
2595 return aSign ? 0x80000000 : 0x7FFFFFFF;
2596 }
2597 if ( ( aSig<<shiftCount ) != savedASig ) {
2598 float_raise( float_flag_inexact );
2599 }
2600 return z;
2601
2602 }
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612 float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
2613 {
2614 flag aSign;
2615 int32 aExp;
2616 bits64 aSig;
2617
2618 aSig = extractFloatx80Frac( a );
2619 aExp = extractFloatx80Exp( a );
2620 aSign = extractFloatx80Sign( a );
2621 if ( aExp == 0x7FFF ) {
2622 if ( (bits64) ( aSig<<1 ) ) {
2623 return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
2624 }
2625 return packFloat32( aSign, 0xFF, 0 );
2626 }
2627 shift64RightJamming( aSig, 33, &aSig );
2628 if ( aExp || aSig ) aExp -= 0x3F81;
2629 return roundAndPackFloat32( roundData, aSign, aExp, aSig );
2630
2631 }
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641 float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
2642 {
2643 flag aSign;
2644 int32 aExp;
2645 bits64 aSig, zSig;
2646
2647 aSig = extractFloatx80Frac( a );
2648 aExp = extractFloatx80Exp( a );
2649 aSign = extractFloatx80Sign( a );
2650 if ( aExp == 0x7FFF ) {
2651 if ( (bits64) ( aSig<<1 ) ) {
2652 return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
2653 }
2654 return packFloat64( aSign, 0x7FF, 0 );
2655 }
2656 shift64RightJamming( aSig, 1, &zSig );
2657 if ( aExp || aSig ) aExp -= 0x3C01;
2658 return roundAndPackFloat64( roundData, aSign, aExp, zSig );
2659
2660 }
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670 floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
2671 {
2672 flag aSign;
2673 int32 aExp;
2674 bits64 lastBitMask, roundBitsMask;
2675 int8 roundingMode;
2676 floatx80 z;
2677
2678 aExp = extractFloatx80Exp( a );
2679 if ( 0x403E <= aExp ) {
2680 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
2681 return propagateFloatx80NaN( a, a );
2682 }
2683 return a;
2684 }
2685 if ( aExp <= 0x3FFE ) {
2686 if ( ( aExp == 0 )
2687 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2688 return a;
2689 }
2690 roundData->exception |= float_flag_inexact;
2691 aSign = extractFloatx80Sign( a );
2692 switch ( roundData->mode ) {
2693 case float_round_nearest_even:
2694 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
2695 ) {
2696 return
2697 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
2698 }
2699 break;
2700 case float_round_down:
2701 return
2702 aSign ?
2703 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
2704 : packFloatx80( 0, 0, 0 );
2705 case float_round_up:
2706 return
2707 aSign ? packFloatx80( 1, 0, 0 )
2708 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
2709 }
2710 return packFloatx80( aSign, 0, 0 );
2711 }
2712 lastBitMask = 1;
2713 lastBitMask <<= 0x403E - aExp;
2714 roundBitsMask = lastBitMask - 1;
2715 z = a;
2716 roundingMode = roundData->mode;
2717 if ( roundingMode == float_round_nearest_even ) {
2718 z.low += lastBitMask>>1;
2719 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
2720 }
2721 else if ( roundingMode != float_round_to_zero ) {
2722 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2723 z.low += roundBitsMask;
2724 }
2725 }
2726 z.low &= ~ roundBitsMask;
2727 if ( z.low == 0 ) {
2728 ++z.high;
2729 z.low = LIT64( 0x8000000000000000 );
2730 }
2731 if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
2732 return z;
2733
2734 }
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745 static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
2746 {
2747 int32 aExp, bExp, zExp;
2748 bits64 aSig, bSig, zSig0, zSig1;
2749 int32 expDiff;
2750
2751 aSig = extractFloatx80Frac( a );
2752 aExp = extractFloatx80Exp( a );
2753 bSig = extractFloatx80Frac( b );
2754 bExp = extractFloatx80Exp( b );
2755 expDiff = aExp - bExp;
2756 if ( 0 < expDiff ) {
2757 if ( aExp == 0x7FFF ) {
2758 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2759 return a;
2760 }
2761 if ( bExp == 0 ) --expDiff;
2762 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2763 zExp = aExp;
2764 }
2765 else if ( expDiff < 0 ) {
2766 if ( bExp == 0x7FFF ) {
2767 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2768 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2769 }
2770 if ( aExp == 0 ) ++expDiff;
2771 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2772 zExp = bExp;
2773 }
2774 else {
2775 if ( aExp == 0x7FFF ) {
2776 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2777 return propagateFloatx80NaN( a, b );
2778 }
2779 return a;
2780 }
2781 zSig1 = 0;
2782 zSig0 = aSig + bSig;
2783 if ( aExp == 0 ) {
2784 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2785 goto roundAndPack;
2786 }
2787 zExp = aExp;
2788 goto shiftRight1;
2789 }
2790
2791 zSig0 = aSig + bSig;
2792
2793 if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
2794 shiftRight1:
2795 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2796 zSig0 |= LIT64( 0x8000000000000000 );
2797 ++zExp;
2798 roundAndPack:
2799 return
2800 roundAndPackFloatx80(
2801 roundData, zSign, zExp, zSig0, zSig1 );
2802
2803 }
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814 static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
2815 {
2816 int32 aExp, bExp, zExp;
2817 bits64 aSig, bSig, zSig0, zSig1;
2818 int32 expDiff;
2819 floatx80 z;
2820
2821 aSig = extractFloatx80Frac( a );
2822 aExp = extractFloatx80Exp( a );
2823 bSig = extractFloatx80Frac( b );
2824 bExp = extractFloatx80Exp( b );
2825 expDiff = aExp - bExp;
2826 if ( 0 < expDiff ) goto aExpBigger;
2827 if ( expDiff < 0 ) goto bExpBigger;
2828 if ( aExp == 0x7FFF ) {
2829 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
2830 return propagateFloatx80NaN( a, b );
2831 }
2832 roundData->exception |= float_flag_invalid;
2833 z.low = floatx80_default_nan_low;
2834 z.high = floatx80_default_nan_high;
2835 z.__padding = 0;
2836 return z;
2837 }
2838 if ( aExp == 0 ) {
2839 aExp = 1;
2840 bExp = 1;
2841 }
2842 zSig1 = 0;
2843 if ( bSig < aSig ) goto aBigger;
2844 if ( aSig < bSig ) goto bBigger;
2845 return packFloatx80( roundData->mode == float_round_down, 0, 0 );
2846 bExpBigger:
2847 if ( bExp == 0x7FFF ) {
2848 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2849 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
2850 }
2851 if ( aExp == 0 ) ++expDiff;
2852 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2853 bBigger:
2854 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2855 zExp = bExp;
2856 zSign ^= 1;
2857 goto normalizeRoundAndPack;
2858 aExpBigger:
2859 if ( aExp == 0x7FFF ) {
2860 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2861 return a;
2862 }
2863 if ( bExp == 0 ) --expDiff;
2864 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2865 aBigger:
2866 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2867 zExp = aExp;
2868 normalizeRoundAndPack:
2869 return
2870 normalizeRoundAndPackFloatx80(
2871 roundData, zSign, zExp, zSig0, zSig1 );
2872
2873 }
2874
2875
2876
2877
2878
2879
2880
2881
2882 floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
2883 {
2884 flag aSign, bSign;
2885
2886 aSign = extractFloatx80Sign( a );
2887 bSign = extractFloatx80Sign( b );
2888 if ( aSign == bSign ) {
2889 return addFloatx80Sigs( roundData, a, b, aSign );
2890 }
2891 else {
2892 return subFloatx80Sigs( roundData, a, b, aSign );
2893 }
2894
2895 }
2896
2897
2898
2899
2900
2901
2902
2903
2904 floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
2905 {
2906 flag aSign, bSign;
2907
2908 aSign = extractFloatx80Sign( a );
2909 bSign = extractFloatx80Sign( b );
2910 if ( aSign == bSign ) {
2911 return subFloatx80Sigs( roundData, a, b, aSign );
2912 }
2913 else {
2914 return addFloatx80Sigs( roundData, a, b, aSign );
2915 }
2916
2917 }
2918
2919
2920
2921
2922
2923
2924
2925
2926 floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
2927 {
2928 flag aSign, bSign, zSign;
2929 int32 aExp, bExp, zExp;
2930 bits64 aSig, bSig, zSig0, zSig1;
2931 floatx80 z;
2932
2933 aSig = extractFloatx80Frac( a );
2934 aExp = extractFloatx80Exp( a );
2935 aSign = extractFloatx80Sign( a );
2936 bSig = extractFloatx80Frac( b );
2937 bExp = extractFloatx80Exp( b );
2938 bSign = extractFloatx80Sign( b );
2939 zSign = aSign ^ bSign;
2940 if ( aExp == 0x7FFF ) {
2941 if ( (bits64) ( aSig<<1 )
2942 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
2943 return propagateFloatx80NaN( a, b );
2944 }
2945 if ( ( bExp | bSig ) == 0 ) goto invalid;
2946 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2947 }
2948 if ( bExp == 0x7FFF ) {
2949 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2950 if ( ( aExp | aSig ) == 0 ) {
2951 invalid:
2952 roundData->exception |= float_flag_invalid;
2953 z.low = floatx80_default_nan_low;
2954 z.high = floatx80_default_nan_high;
2955 z.__padding = 0;
2956 return z;
2957 }
2958 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2959 }
2960 if ( aExp == 0 ) {
2961 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2962 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2963 }
2964 if ( bExp == 0 ) {
2965 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2966 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2967 }
2968 zExp = aExp + bExp - 0x3FFE;
2969 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2970 if ( 0 < (sbits64) zSig0 ) {
2971 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2972 --zExp;
2973 }
2974 return
2975 roundAndPackFloatx80(
2976 roundData, zSign, zExp, zSig0, zSig1 );
2977
2978 }
2979
2980
2981
2982
2983
2984
2985
2986
2987 floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
2988 {
2989 flag aSign, bSign, zSign;
2990 int32 aExp, bExp, zExp;
2991 bits64 aSig, bSig, zSig0, zSig1;
2992 bits64 rem0, rem1, rem2, term0, term1, term2;
2993 floatx80 z;
2994
2995 aSig = extractFloatx80Frac( a );
2996 aExp = extractFloatx80Exp( a );
2997 aSign = extractFloatx80Sign( a );
2998 bSig = extractFloatx80Frac( b );
2999 bExp = extractFloatx80Exp( b );
3000 bSign = extractFloatx80Sign( b );
3001 zSign = aSign ^ bSign;
3002 if ( aExp == 0x7FFF ) {
3003 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3004 if ( bExp == 0x7FFF ) {
3005 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3006 goto invalid;
3007 }
3008 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3009 }
3010 if ( bExp == 0x7FFF ) {
3011 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3012 return packFloatx80( zSign, 0, 0 );
3013 }
3014 if ( bExp == 0 ) {
3015 if ( bSig == 0 ) {
3016 if ( ( aExp | aSig ) == 0 ) {
3017 invalid:
3018 roundData->exception |= float_flag_invalid;
3019 z.low = floatx80_default_nan_low;
3020 z.high = floatx80_default_nan_high;
3021 z.__padding = 0;
3022 return z;
3023 }
3024 roundData->exception |= float_flag_divbyzero;
3025 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3026 }
3027 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3028 }
3029 if ( aExp == 0 ) {
3030 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3031 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3032 }
3033 zExp = aExp - bExp + 0x3FFE;
3034 rem1 = 0;
3035 if ( bSig <= aSig ) {
3036 shift128Right( aSig, 0, 1, &aSig, &rem1 );
3037 ++zExp;
3038 }
3039 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3040 mul64To128( bSig, zSig0, &term0, &term1 );
3041 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3042 while ( (sbits64) rem0 < 0 ) {
3043 --zSig0;
3044 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3045 }
3046 zSig1 = estimateDiv128To64( rem1, 0, bSig );
3047 if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3048 mul64To128( bSig, zSig1, &term1, &term2 );
3049 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3050 while ( (sbits64) rem1 < 0 ) {
3051 --zSig1;
3052 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3053 }
3054 zSig1 |= ( ( rem1 | rem2 ) != 0 );
3055 }
3056 return
3057 roundAndPackFloatx80(
3058 roundData, zSign, zExp, zSig0, zSig1 );
3059
3060 }
3061
3062
3063
3064
3065
3066
3067
3068
3069 floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
3070 {
3071 flag aSign, bSign, zSign;
3072 int32 aExp, bExp, expDiff;
3073 bits64 aSig0, aSig1, bSig;
3074 bits64 q, term0, term1, alternateASig0, alternateASig1;
3075 floatx80 z;
3076
3077 aSig0 = extractFloatx80Frac( a );
3078 aExp = extractFloatx80Exp( a );
3079 aSign = extractFloatx80Sign( a );
3080 bSig = extractFloatx80Frac( b );
3081 bExp = extractFloatx80Exp( b );
3082 bSign = extractFloatx80Sign( b );
3083 if ( aExp == 0x7FFF ) {
3084 if ( (bits64) ( aSig0<<1 )
3085 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3086 return propagateFloatx80NaN( a, b );
3087 }
3088 goto invalid;
3089 }
3090 if ( bExp == 0x7FFF ) {
3091 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3092 return a;
3093 }
3094 if ( bExp == 0 ) {
3095 if ( bSig == 0 ) {
3096 invalid:
3097 roundData->exception |= float_flag_invalid;
3098 z.low = floatx80_default_nan_low;
3099 z.high = floatx80_default_nan_high;
3100 z.__padding = 0;
3101 return z;
3102 }
3103 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3104 }
3105 if ( aExp == 0 ) {
3106 if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3107 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3108 }
3109 bSig |= LIT64( 0x8000000000000000 );
3110 zSign = aSign;
3111 expDiff = aExp - bExp;
3112 aSig1 = 0;
3113 if ( expDiff < 0 ) {
3114 if ( expDiff < -1 ) return a;
3115 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3116 expDiff = 0;
3117 }
3118 q = ( bSig <= aSig0 );
3119 if ( q ) aSig0 -= bSig;
3120 expDiff -= 64;
3121 while ( 0 < expDiff ) {
3122 q = estimateDiv128To64( aSig0, aSig1, bSig );
3123 q = ( 2 < q ) ? q - 2 : 0;
3124 mul64To128( bSig, q, &term0, &term1 );
3125 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3126 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3127 expDiff -= 62;
3128 }
3129 expDiff += 64;
3130 if ( 0 < expDiff ) {
3131 q = estimateDiv128To64( aSig0, aSig1, bSig );
3132 q = ( 2 < q ) ? q - 2 : 0;
3133 q >>= 64 - expDiff;
3134 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3135 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3136 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3137 while ( le128( term0, term1, aSig0, aSig1 ) ) {
3138 ++q;
3139 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3140 }
3141 }
3142 else {
3143 term1 = 0;
3144 term0 = bSig;
3145 }
3146 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3147 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3148 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3149 && ( q & 1 ) )
3150 ) {
3151 aSig0 = alternateASig0;
3152 aSig1 = alternateASig1;
3153 zSign = ! zSign;
3154 }
3155
3156 return
3157 normalizeRoundAndPackFloatx80(
3158 roundData, zSign, bExp + expDiff, aSig0, aSig1 );
3159
3160 }
3161
3162
3163
3164
3165
3166
3167
3168
3169 floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
3170 {
3171 flag aSign;
3172 int32 aExp, zExp;
3173 bits64 aSig0, aSig1, zSig0, zSig1;
3174 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3175 bits64 shiftedRem0, shiftedRem1;
3176 floatx80 z;
3177
3178 aSig0 = extractFloatx80Frac( a );
3179 aExp = extractFloatx80Exp( a );
3180 aSign = extractFloatx80Sign( a );
3181 if ( aExp == 0x7FFF ) {
3182 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
3183 if ( ! aSign ) return a;
3184 goto invalid;
3185 }
3186 if ( aSign ) {
3187 if ( ( aExp | aSig0 ) == 0 ) return a;
3188 invalid:
3189 roundData->exception |= float_flag_invalid;
3190 z.low = floatx80_default_nan_low;
3191 z.high = floatx80_default_nan_high;
3192 z.__padding = 0;
3193 return z;
3194 }
3195 if ( aExp == 0 ) {
3196 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3197 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3198 }
3199 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3200 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3201 zSig0 <<= 31;
3202 aSig1 = 0;
3203 shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
3204 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
3205 if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
3206 shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
3207 mul64To128( zSig0, zSig0, &term0, &term1 );
3208 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3209 while ( (sbits64) rem0 < 0 ) {
3210 --zSig0;
3211 shortShift128Left( 0, zSig0, 1, &term0, &term1 );
3212 term1 |= 1;
3213 add128( rem0, rem1, term0, term1, &rem0, &rem1 );
3214 }
3215 shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
3216 zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
3217 if ( (bits64) ( zSig1<<1 ) <= 10 ) {
3218 if ( zSig1 == 0 ) zSig1 = 1;
3219 mul64To128( zSig0, zSig1, &term1, &term2 );
3220 shortShift128Left( term1, term2, 1, &term1, &term2 );
3221 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3222 mul64To128( zSig1, zSig1, &term2, &term3 );
3223 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3224 while ( (sbits64) rem1 < 0 ) {
3225 --zSig1;
3226 shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
3227 term3 |= 1;
3228 add192(
3229 rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
3230 }
3231 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3232 }
3233 return
3234 roundAndPackFloatx80(
3235 roundData, 0, zExp, zSig0, zSig1 );
3236
3237 }
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247 flag floatx80_eq( floatx80 a, floatx80 b )
3248 {
3249
3250 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3251 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3252 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3253 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3254 ) {
3255 if ( floatx80_is_signaling_nan( a )
3256 || floatx80_is_signaling_nan( b ) ) {
3257 float_raise( float_flag_invalid );
3258 }
3259 return 0;
3260 }
3261 return
3262 ( a.low == b.low )
3263 && ( ( a.high == b.high )
3264 || ( ( a.low == 0 )
3265 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3266 );
3267
3268 }
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278 flag floatx80_le( floatx80 a, floatx80 b )
3279 {
3280 flag aSign, bSign;
3281
3282 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3283 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3284 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3285 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3286 ) {
3287 float_raise( float_flag_invalid );
3288 return 0;
3289 }
3290 aSign = extractFloatx80Sign( a );
3291 bSign = extractFloatx80Sign( b );
3292 if ( aSign != bSign ) {
3293 return
3294 aSign
3295 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3296 == 0 );
3297 }
3298 return
3299 aSign ? le128( b.high, b.low, a.high, a.low )
3300 : le128( a.high, a.low, b.high, b.low );
3301
3302 }
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312 flag floatx80_lt( floatx80 a, floatx80 b )
3313 {
3314 flag aSign, bSign;
3315
3316 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3317 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3318 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3319 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3320 ) {
3321 float_raise( float_flag_invalid );
3322 return 0;
3323 }
3324 aSign = extractFloatx80Sign( a );
3325 bSign = extractFloatx80Sign( b );
3326 if ( aSign != bSign ) {
3327 return
3328 aSign
3329 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3330 != 0 );
3331 }
3332 return
3333 aSign ? lt128( b.high, b.low, a.high, a.low )
3334 : lt128( a.high, a.low, b.high, b.low );
3335
3336 }
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346 flag floatx80_eq_signaling( floatx80 a, floatx80 b )
3347 {
3348
3349 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3350 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3351 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3352 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3353 ) {
3354 float_raise( float_flag_invalid );
3355 return 0;
3356 }
3357 return
3358 ( a.low == b.low )
3359 && ( ( a.high == b.high )
3360 || ( ( a.low == 0 )
3361 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
3362 );
3363
3364 }
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374 flag floatx80_le_quiet( floatx80 a, floatx80 b )
3375 {
3376 flag aSign, bSign;
3377
3378 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3379 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3380 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3381 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3382 ) {
3383
3384 return 0;
3385 }
3386 aSign = extractFloatx80Sign( a );
3387 bSign = extractFloatx80Sign( b );
3388 if ( aSign != bSign ) {
3389 return
3390 aSign
3391 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3392 == 0 );
3393 }
3394 return
3395 aSign ? le128( b.high, b.low, a.high, a.low )
3396 : le128( a.high, a.low, b.high, b.low );
3397
3398 }
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408 flag floatx80_lt_quiet( floatx80 a, floatx80 b )
3409 {
3410 flag aSign, bSign;
3411
3412 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3413 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
3414 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3415 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
3416 ) {
3417
3418 return 0;
3419 }
3420 aSign = extractFloatx80Sign( a );
3421 bSign = extractFloatx80Sign( b );
3422 if ( aSign != bSign ) {
3423 return
3424 aSign
3425 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3426 != 0 );
3427 }
3428 return
3429 aSign ? lt128( b.high, b.low, a.high, a.low )
3430 : lt128( a.high, a.low, b.high, b.low );
3431
3432 }
3433
3434 #endif
3435