x86/math-emu/wm_sqrt.S

0001 /* SPDX-License-Identifier: GPL-2.0 */
0002     .file   "wm_sqrt.S"
0003 /*---------------------------------------------------------------------------+
0004  |  wm_sqrt.S                                                                |
0005  |                                                                           |
0006  | Fixed point arithmetic square root evaluation.                            |
0007  |                                                                           |
0008  | Copyright (C) 1992,1993,1995,1997                                         |
0009  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
0010  |                       Australia.  E-mail billm@suburbia.net               |
0011  |                                                                           |
0012  | Call from C as:                                                           |
0013  |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
0014  |                                                                           |
0015  +---------------------------------------------------------------------------*/
0016
0017 /*---------------------------------------------------------------------------+
0018  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
0019  |    returns the square root of n in n.                                     |
0020  |                                                                           |
0021  |  Use Newton's method to compute the square root of a number, which must   |
0022  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
0023  |  Does not check the sign or tag of the argument.                          |
0024  |  Sets the exponent, but not the sign or tag of the result.                |
0025  |                                                                           |
0026  |  The guess is kept in %esi:%edi                                           |
0027  +---------------------------------------------------------------------------*/
0028
0029 #include "exception.h"
0030 #include "fpu_emu.h"
0031
0032
0033 #ifndef NON_REENTRANT_FPU
0034 /*  Local storage on the stack: */
0035 #define FPU_accum_3 -4(%ebp)    /* ms word */
0036 #define FPU_accum_2 -8(%ebp)
0037 #define FPU_accum_1 -12(%ebp)
0038 #define FPU_accum_0 -16(%ebp)
0039
0040 /*
0041  * The de-normalised argument:
0042  *                  sq_2                  sq_1              sq_0
0043  *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
0044  *           ^ binary point here
0045  */
0046 #define FPU_fsqrt_arg_2 -20(%ebp)   /* ms word */
0047 #define FPU_fsqrt_arg_1 -24(%ebp)
0048 #define FPU_fsqrt_arg_0 -28(%ebp)   /* ls word, at most the ms bit is set */
0049
0050 #else
0051 /*  Local storage in a static area: */
0052 .data
0053     .align 4,0
0054 FPU_accum_3:
0055     .long   0       /* ms word */
0056 FPU_accum_2:
0057     .long   0
0058 FPU_accum_1:
0059     .long   0
0060 FPU_accum_0:
0061     .long   0
0062
0063 /* The de-normalised argument:
0064                     sq_2                  sq_1              sq_0
0065           b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
0066              ^ binary point here
0067  */
0068 FPU_fsqrt_arg_2:
0069     .long   0       /* ms word */
0070 FPU_fsqrt_arg_1:
0071     .long   0
0072 FPU_fsqrt_arg_0:
0073     .long   0       /* ls word, at most the ms bit is set */
0074 #endif /* NON_REENTRANT_FPU */
0075
0076
0077 .text
0078 SYM_FUNC_START(wm_sqrt)
0079     pushl   %ebp
0080     movl    %esp,%ebp
0081 #ifndef NON_REENTRANT_FPU
0082     subl    $28,%esp
0083 #endif /* NON_REENTRANT_FPU */
0084     pushl   %esi
0085     pushl   %edi
0086     pushl   %ebx
0087
0088     movl    PARAM1,%esi
0089
0090     movl    SIGH(%esi),%eax
0091     movl    SIGL(%esi),%ecx
0092     xorl    %edx,%edx
0093
0094 /* We use a rough linear estimate for the first guess.. */
0095
0096     cmpw    EXP_BIAS,EXP(%esi)
0097     jnz sqrt_arg_ge_2
0098
0099     shrl    $1,%eax         /* arg is in the range  [1.0 .. 2.0) */
0100     rcrl    $1,%ecx
0101     rcrl    $1,%edx
0102
0103 sqrt_arg_ge_2:
0104 /* From here on, n is never accessed directly again until it is
0105    replaced by the answer. */
0106
0107     movl    %eax,FPU_fsqrt_arg_2        /* ms word of n */
0108     movl    %ecx,FPU_fsqrt_arg_1
0109     movl    %edx,FPU_fsqrt_arg_0
0110
0111 /* Make a linear first estimate */
0112     shrl    $1,%eax
0113     addl    $0x40000000,%eax
0114     movl    $0xaaaaaaaa,%ecx
0115     mull    %ecx
0116     shll    %edx            /* max result was 7fff... */
0117     testl   $0x80000000,%edx    /* but min was 3fff... */
0118     jnz sqrt_prelim_no_adjust
0119
0120     movl    $0x80000000,%edx    /* round up */
0121
0122 sqrt_prelim_no_adjust:
0123     movl    %edx,%esi   /* Our first guess */
0124
0125 /* We have now computed (approx)   (2 + x) / 3, which forms the basis
0126    for a few iterations of Newton's method */
0127
0128     movl    FPU_fsqrt_arg_2,%ecx    /* ms word */
0129
0130 /*
0131  * From our initial estimate, three iterations are enough to get us
0132  * to 30 bits or so. This will then allow two iterations at better
0133  * precision to complete the process.
0134  */
0135
0136 /* Compute  (g + n/g)/2  at each iteration (g is the guess). */
0137     shrl    %ecx        /* Doing this first will prevent a divide */
0138                 /* overflow later. */
0139
0140     movl    %ecx,%edx   /* msw of the arg / 2 */
0141     divl    %esi        /* current estimate */
0142     shrl    %esi        /* divide by 2 */
0143     addl    %eax,%esi   /* the new estimate */
0144
0145     movl    %ecx,%edx
0146     divl    %esi
0147     shrl    %esi
0148     addl    %eax,%esi
0149
0150     movl    %ecx,%edx
0151     divl    %esi
0152     shrl    %esi
0153     addl    %eax,%esi
0154
0155 /*
0156  * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
0157  * we improve it to 60 bits or so.
0158  *
0159  * The strategy from now on is to compute new estimates from
0160  *      guess := guess + (n - guess^2) / (2 * guess)
0161  */
0162
0163 /* First, find the square of the guess */
0164     movl    %esi,%eax
0165     mull    %esi
0166 /* guess^2 now in %edx:%eax */
0167
0168     movl    FPU_fsqrt_arg_1,%ecx
0169     subl    %ecx,%eax
0170     movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
0171     sbbl    %ecx,%edx
0172     jnc sqrt_stage_2_positive
0173
0174 /* Subtraction gives a negative result,
0175    negate the result before division. */
0176     notl    %edx
0177     notl    %eax
0178     addl    $1,%eax
0179     adcl    $0,%edx
0180
0181     divl    %esi
0182     movl    %eax,%ecx
0183
0184     movl    %edx,%eax
0185     divl    %esi
0186     jmp sqrt_stage_2_finish
0187
0188 sqrt_stage_2_positive:
0189     divl    %esi
0190     movl    %eax,%ecx
0191
0192     movl    %edx,%eax
0193     divl    %esi
0194
0195     notl    %ecx
0196     notl    %eax
0197     addl    $1,%eax
0198     adcl    $0,%ecx
0199
0200 sqrt_stage_2_finish:
0201     sarl    $1,%ecx     /* divide by 2 */
0202     rcrl    $1,%eax
0203
0204     /* Form the new estimate in %esi:%edi */
0205     movl    %eax,%edi
0206     addl    %ecx,%esi
0207
0208     jnz sqrt_stage_2_done   /* result should be [1..2) */
0209
0210 #ifdef PARANOID
0211 /* It should be possible to get here only if the arg is ffff....ffff */
0212     cmpl    $0xffffffff,FPU_fsqrt_arg_1
0213     jnz sqrt_stage_2_error
0214 #endif /* PARANOID */
0215
0216 /* The best rounded result. */
0217     xorl    %eax,%eax
0218     decl    %eax
0219     movl    %eax,%edi
0220     movl    %eax,%esi
0221     movl    $0x7fffffff,%eax
0222     jmp sqrt_round_result
0223
0224 #ifdef PARANOID
0225 sqrt_stage_2_error:
0226     pushl   EX_INTERNAL|0x213
0227     call    EXCEPTION
0228 #endif /* PARANOID */
0229
0230 sqrt_stage_2_done:
0231
0232 /* Now the square root has been computed to better than 60 bits. */
0233
0234 /* Find the square of the guess. */
0235     movl    %edi,%eax       /* ls word of guess */
0236     mull    %edi
0237     movl    %edx,FPU_accum_1
0238
0239     movl    %esi,%eax
0240     mull    %esi
0241     movl    %edx,FPU_accum_3
0242     movl    %eax,FPU_accum_2
0243
0244     movl    %edi,%eax
0245     mull    %esi
0246     addl    %eax,FPU_accum_1
0247     adcl    %edx,FPU_accum_2
0248     adcl    $0,FPU_accum_3
0249
0250 /*  movl    %esi,%eax */
0251 /*  mull    %edi */
0252     addl    %eax,FPU_accum_1
0253     adcl    %edx,FPU_accum_2
0254     adcl    $0,FPU_accum_3
0255
0256 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
0257
0258     movl    FPU_fsqrt_arg_0,%eax        /* get normalized n */
0259     subl    %eax,FPU_accum_1
0260     movl    FPU_fsqrt_arg_1,%eax
0261     sbbl    %eax,FPU_accum_2
0262     movl    FPU_fsqrt_arg_2,%eax        /* ms word of normalized n */
0263     sbbl    %eax,FPU_accum_3
0264     jnc sqrt_stage_3_positive
0265
0266 /* Subtraction gives a negative result,
0267    negate the result before division */
0268     notl    FPU_accum_1
0269     notl    FPU_accum_2
0270     notl    FPU_accum_3
0271     addl    $1,FPU_accum_1
0272     adcl    $0,FPU_accum_2
0273
0274 #ifdef PARANOID
0275     adcl    $0,FPU_accum_3  /* This must be zero */
0276     jz  sqrt_stage_3_no_error
0277
0278 sqrt_stage_3_error:
0279     pushl   EX_INTERNAL|0x207
0280     call    EXCEPTION
0281
0282 sqrt_stage_3_no_error:
0283 #endif /* PARANOID */
0284
0285     movl    FPU_accum_2,%edx
0286     movl    FPU_accum_1,%eax
0287     divl    %esi
0288     movl    %eax,%ecx
0289
0290     movl    %edx,%eax
0291     divl    %esi
0292
0293     sarl    $1,%ecx     /* divide by 2 */
0294     rcrl    $1,%eax
0295
0296     /* prepare to round the result */
0297
0298     addl    %ecx,%edi
0299     adcl    $0,%esi
0300
0301     jmp sqrt_stage_3_finished
0302
0303 sqrt_stage_3_positive:
0304     movl    FPU_accum_2,%edx
0305     movl    FPU_accum_1,%eax
0306     divl    %esi
0307     movl    %eax,%ecx
0308
0309     movl    %edx,%eax
0310     divl    %esi
0311
0312     sarl    $1,%ecx     /* divide by 2 */
0313     rcrl    $1,%eax
0314
0315     /* prepare to round the result */
0316
0317     notl    %eax        /* Negate the correction term */
0318     notl    %ecx
0319     addl    $1,%eax
0320     adcl    $0,%ecx     /* carry here ==> correction == 0 */
0321     adcl    $0xffffffff,%esi
0322
0323     addl    %ecx,%edi
0324     adcl    $0,%esi
0325
0326 sqrt_stage_3_finished:
0327
0328 /*
0329  * The result in %esi:%edi:%esi should be good to about 90 bits here,
0330  * and the rounding information here does not have sufficient accuracy
0331  * in a few rare cases.
0332  */
0333     cmpl    $0xffffffe0,%eax
0334     ja  sqrt_near_exact_x
0335
0336     cmpl    $0x00000020,%eax
0337     jb  sqrt_near_exact
0338
0339     cmpl    $0x7fffffe0,%eax
0340     jb  sqrt_round_result
0341
0342     cmpl    $0x80000020,%eax
0343     jb  sqrt_get_more_precision
0344
0345 sqrt_round_result:
0346 /* Set up for rounding operations */
0347     movl    %eax,%edx
0348     movl    %esi,%eax
0349     movl    %edi,%ebx
0350     movl    PARAM1,%edi
0351     movw    EXP_BIAS,EXP(%edi)  /* Result is in  [1.0 .. 2.0) */
0352     jmp fpu_reg_round
0353
0354
0355 sqrt_near_exact_x:
0356 /* First, the estimate must be rounded up. */
0357     addl    $1,%edi
0358     adcl    $0,%esi
0359
0360 sqrt_near_exact:
0361 /*
0362  * This is an easy case because x^1/2 is monotonic.
0363  * We need just find the square of our estimate, compare it
0364  * with the argument, and deduce whether our estimate is
0365  * above, below, or exact. We use the fact that the estimate
0366  * is known to be accurate to about 90 bits.
0367  */
0368     movl    %edi,%eax       /* ls word of guess */
0369     mull    %edi
0370     movl    %edx,%ebx       /* 2nd ls word of square */
0371     movl    %eax,%ecx       /* ls word of square */
0372
0373     movl    %edi,%eax
0374     mull    %esi
0375     addl    %eax,%ebx
0376     addl    %eax,%ebx
0377
0378 #ifdef PARANOID
0379     cmp $0xffffffb0,%ebx
0380     jb  sqrt_near_exact_ok
0381
0382     cmp $0x00000050,%ebx
0383     ja  sqrt_near_exact_ok
0384
0385     pushl   EX_INTERNAL|0x214
0386     call    EXCEPTION
0387
0388 sqrt_near_exact_ok:
0389 #endif /* PARANOID */
0390
0391     or  %ebx,%ebx
0392     js  sqrt_near_exact_small
0393
0394     jnz sqrt_near_exact_large
0395
0396     or  %ebx,%edx
0397     jnz sqrt_near_exact_large
0398
0399 /* Our estimate is exactly the right answer */
0400     xorl    %eax,%eax
0401     jmp sqrt_round_result
0402
0403 sqrt_near_exact_small:
0404 /* Our estimate is too small */
0405     movl    $0x000000ff,%eax
0406     jmp sqrt_round_result
0407
0408 sqrt_near_exact_large:
0409 /* Our estimate is too large, we need to decrement it */
0410     subl    $1,%edi
0411     sbbl    $0,%esi
0412     movl    $0xffffff00,%eax
0413     jmp sqrt_round_result
0414
0415
0416 sqrt_get_more_precision:
0417 /* This case is almost the same as the above, except we start
0418    with an extra bit of precision in the estimate. */
0419     stc         /* The extra bit. */
0420     rcll    $1,%edi     /* Shift the estimate left one bit */
0421     rcll    $1,%esi
0422
0423     movl    %edi,%eax       /* ls word of guess */
0424     mull    %edi
0425     movl    %edx,%ebx       /* 2nd ls word of square */
0426     movl    %eax,%ecx       /* ls word of square */
0427
0428     movl    %edi,%eax
0429     mull    %esi
0430     addl    %eax,%ebx
0431     addl    %eax,%ebx
0432
0433 /* Put our estimate back to its original value */
0434     stc         /* The ms bit. */
0435     rcrl    $1,%esi     /* Shift the estimate left one bit */
0436     rcrl    $1,%edi
0437
0438 #ifdef PARANOID
0439     cmp $0xffffff60,%ebx
0440     jb  sqrt_more_prec_ok
0441
0442     cmp $0x000000a0,%ebx
0443     ja  sqrt_more_prec_ok
0444
0445     pushl   EX_INTERNAL|0x215
0446     call    EXCEPTION
0447
0448 sqrt_more_prec_ok:
0449 #endif /* PARANOID */
0450
0451     or  %ebx,%ebx
0452     js  sqrt_more_prec_small
0453
0454     jnz sqrt_more_prec_large
0455
0456     or  %ebx,%ecx
0457     jnz sqrt_more_prec_large
0458
0459 /* Our estimate is exactly the right answer */
0460     movl    $0x80000000,%eax
0461     jmp sqrt_round_result
0462
0463 sqrt_more_prec_small:
0464 /* Our estimate is too small */
0465     movl    $0x800000ff,%eax
0466     jmp sqrt_round_result
0467
0468 sqrt_more_prec_large:
0469 /* Our estimate is too large */
0470     movl    $0x7fffff00,%eax
0471     jmp sqrt_round_result
0472 SYM_FUNC_END(wm_sqrt)