x86/math-emu/div_Xsig.S

0001 /* SPDX-License-Identifier: GPL-2.0 */
0002     .file   "div_Xsig.S"
0003 /*---------------------------------------------------------------------------+
0004  |  div_Xsig.S                                                               |
0005  |                                                                           |
0006  | Division subroutine for 96 bit quantities                                 |
0007  |                                                                           |
0008  | Copyright (C) 1994,1995                                                   |
0009  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
0010  |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
0011  |                                                                           |
0012  |                                                                           |
0013  +---------------------------------------------------------------------------*/
0014
0015 /*---------------------------------------------------------------------------+
0016  | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
0017  | put the 96 bit result at the location d.                                  |
0018  |                                                                           |
0019  | The result may not be accurate to 96 bits. It is intended for use where   |
0020  | a result better than 64 bits is required. The result should usually be    |
0021  | good to at least 94 bits.                                                 |
0022  | The returned result is actually divided by one half. This is done to      |
0023  | prevent overflow.                                                         |
0024  |                                                                           |
0025  |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
0026  |                                                                           |
0027  |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
0028  |                                                                           |
0029  +---------------------------------------------------------------------------*/
0030
0031 #include "exception.h"
0032 #include "fpu_emu.h"
0033
0034
0035 #define XsigLL(x)   (x)
0036 #define XsigL(x)    4(x)
0037 #define XsigH(x)    8(x)
0038
0039
0040 #ifndef NON_REENTRANT_FPU
0041 /*
0042     Local storage on the stack:
0043     Accumulator:    FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
0044  */
0045 #define FPU_accum_3 -4(%ebp)
0046 #define FPU_accum_2 -8(%ebp)
0047 #define FPU_accum_1 -12(%ebp)
0048 #define FPU_accum_0 -16(%ebp)
0049 #define FPU_result_3    -20(%ebp)
0050 #define FPU_result_2    -24(%ebp)
0051 #define FPU_result_1    -28(%ebp)
0052
0053 #else
0054 .data
0055 /*
0056     Local storage in a static area:
0057     Accumulator:    FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
0058  */
0059     .align 4,0
0060 FPU_accum_3:
0061     .long   0
0062 FPU_accum_2:
0063     .long   0
0064 FPU_accum_1:
0065     .long   0
0066 FPU_accum_0:
0067     .long   0
0068 FPU_result_3:
0069     .long   0
0070 FPU_result_2:
0071     .long   0
0072 FPU_result_1:
0073     .long   0
0074 #endif /* NON_REENTRANT_FPU */
0075
0076
0077 .text
0078 SYM_FUNC_START(div_Xsig)
0079     pushl   %ebp
0080     movl    %esp,%ebp
0081 #ifndef NON_REENTRANT_FPU
0082     subl    $28,%esp
0083 #endif /* NON_REENTRANT_FPU */
0084
0085     pushl   %esi
0086     pushl   %edi
0087     pushl   %ebx
0088
0089     movl    PARAM1,%esi /* pointer to num */
0090     movl    PARAM2,%ebx /* pointer to denom */
0091
0092 #ifdef PARANOID
0093     testl   $0x80000000, XsigH(%ebx)    /* Divisor */
0094     je  L_bugged
0095 #endif /* PARANOID */
0096
0097
0098 /*---------------------------------------------------------------------------+
0099  |  Divide:   Return  arg1/arg2 to arg3.                                     |
0100  |                                                                           |
0101  |  The maximum returned value is (ignoring exponents)                       |
0102  |               .ffffffff ffffffff                                          |
0103  |               ------------------  =  1.ffffffff fffffffe                  |
0104  |               .80000000 00000000                                          |
0105  | and the minimum is                                                        |
0106  |               .80000000 00000000                                          |
0107  |               ------------------  =  .80000000 00000001   (rounded)       |
0108  |               .ffffffff ffffffff                                          |
0109  |                                                                           |
0110  +---------------------------------------------------------------------------*/
0111
0112     /* Save extended dividend in local register */
0113
0114     /* Divide by 2 to prevent overflow */
0115     clc
0116     movl    XsigH(%esi),%eax
0117     rcrl    %eax
0118     movl    %eax,FPU_accum_3
0119     movl    XsigL(%esi),%eax
0120     rcrl    %eax
0121     movl    %eax,FPU_accum_2
0122     movl    XsigLL(%esi),%eax
0123     rcrl    %eax
0124     movl    %eax,FPU_accum_1
0125     movl    $0,%eax
0126     rcrl    %eax
0127     movl    %eax,FPU_accum_0
0128
0129     movl    FPU_accum_2,%eax    /* Get the current num */
0130     movl    FPU_accum_3,%edx
0131
0132 /*----------------------------------------------------------------------*/
0133 /* Initialization done.
0134    Do the first 32 bits. */
0135
0136     /* We will divide by a number which is too large */
0137     movl    XsigH(%ebx),%ecx
0138     addl    $1,%ecx
0139     jnc LFirst_div_not_1
0140
0141     /* here we need to divide by 100000000h,
0142        i.e., no division at all.. */
0143     mov %edx,%eax
0144     jmp LFirst_div_done
0145
0146 LFirst_div_not_1:
0147     divl    %ecx        /* Divide the numerator by the augmented
0148                    denom ms dw */
0149
0150 LFirst_div_done:
0151     movl    %eax,FPU_result_3   /* Put the result in the answer */
0152
0153     mull    XsigH(%ebx) /* mul by the ms dw of the denom */
0154
0155     subl    %eax,FPU_accum_2    /* Subtract from the num local reg */
0156     sbbl    %edx,FPU_accum_3
0157
0158     movl    FPU_result_3,%eax   /* Get the result back */
0159     mull    XsigL(%ebx) /* now mul the ls dw of the denom */
0160
0161     subl    %eax,FPU_accum_1    /* Subtract from the num local reg */
0162     sbbl    %edx,FPU_accum_2
0163     sbbl    $0,FPU_accum_3
0164     je  LDo_2nd_32_bits     /* Must check for non-zero result here */
0165
0166 #ifdef PARANOID
0167     jb  L_bugged_1
0168 #endif /* PARANOID */
0169
0170     /* need to subtract another once of the denom */
0171     incl    FPU_result_3    /* Correct the answer */
0172
0173     movl    XsigL(%ebx),%eax
0174     movl    XsigH(%ebx),%edx
0175     subl    %eax,FPU_accum_1    /* Subtract from the num local reg */
0176     sbbl    %edx,FPU_accum_2
0177
0178 #ifdef PARANOID
0179     sbbl    $0,FPU_accum_3
0180     jne L_bugged_1  /* Must check for non-zero result here */
0181 #endif /* PARANOID */
0182
0183 /*----------------------------------------------------------------------*/
0184 /* Half of the main problem is done, there is just a reduced numerator
0185    to handle now.
0186    Work with the second 32 bits, FPU_accum_0 not used from now on */
0187 LDo_2nd_32_bits:
0188     movl    FPU_accum_2,%edx    /* get the reduced num */
0189     movl    FPU_accum_1,%eax
0190
0191     /* need to check for possible subsequent overflow */
0192     cmpl    XsigH(%ebx),%edx
0193     jb  LDo_2nd_div
0194     ja  LPrevent_2nd_overflow
0195
0196     cmpl    XsigL(%ebx),%eax
0197     jb  LDo_2nd_div
0198
0199 LPrevent_2nd_overflow:
0200 /* The numerator is greater or equal, would cause overflow */
0201     /* prevent overflow */
0202     subl    XsigL(%ebx),%eax
0203     sbbl    XsigH(%ebx),%edx
0204     movl    %edx,FPU_accum_2
0205     movl    %eax,FPU_accum_1
0206
0207     incl    FPU_result_3    /* Reflect the subtraction in the answer */
0208
0209 #ifdef PARANOID
0210     je  L_bugged_2  /* Can't bump the result to 1.0 */
0211 #endif /* PARANOID */
0212
0213 LDo_2nd_div:
0214     cmpl    $0,%ecx     /* augmented denom msw */
0215     jnz LSecond_div_not_1
0216
0217     /* %ecx == 0, we are dividing by 1.0 */
0218     mov %edx,%eax
0219     jmp LSecond_div_done
0220
0221 LSecond_div_not_1:
0222     divl    %ecx        /* Divide the numerator by the denom ms dw */
0223
0224 LSecond_div_done:
0225     movl    %eax,FPU_result_2   /* Put the result in the answer */
0226
0227     mull    XsigH(%ebx) /* mul by the ms dw of the denom */
0228
0229     subl    %eax,FPU_accum_1    /* Subtract from the num local reg */
0230     sbbl    %edx,FPU_accum_2
0231
0232 #ifdef PARANOID
0233     jc  L_bugged_2
0234 #endif /* PARANOID */
0235
0236     movl    FPU_result_2,%eax   /* Get the result back */
0237     mull    XsigL(%ebx) /* now mul the ls dw of the denom */
0238
0239     subl    %eax,FPU_accum_0    /* Subtract from the num local reg */
0240     sbbl    %edx,FPU_accum_1    /* Subtract from the num local reg */
0241     sbbl    $0,FPU_accum_2
0242
0243 #ifdef PARANOID
0244     jc  L_bugged_2
0245 #endif /* PARANOID */
0246
0247     jz  LDo_3rd_32_bits
0248
0249 #ifdef PARANOID
0250     cmpl    $1,FPU_accum_2
0251     jne L_bugged_2
0252 #endif /* PARANOID */
0253
0254     /* need to subtract another once of the denom */
0255     movl    XsigL(%ebx),%eax
0256     movl    XsigH(%ebx),%edx
0257     subl    %eax,FPU_accum_0    /* Subtract from the num local reg */
0258     sbbl    %edx,FPU_accum_1
0259     sbbl    $0,FPU_accum_2
0260
0261 #ifdef PARANOID
0262     jc  L_bugged_2
0263     jne L_bugged_2
0264 #endif /* PARANOID */
0265
0266     addl    $1,FPU_result_2 /* Correct the answer */
0267     adcl    $0,FPU_result_3
0268
0269 #ifdef PARANOID
0270     jc  L_bugged_2  /* Must check for non-zero result here */
0271 #endif /* PARANOID */
0272
0273 /*----------------------------------------------------------------------*/
0274 /* The division is essentially finished here, we just need to perform
0275    tidying operations.
0276    Deal with the 3rd 32 bits */
0277 LDo_3rd_32_bits:
0278     /* We use an approximation for the third 32 bits.
0279     To take account of the 3rd 32 bits of the divisor
0280     (call them del), we subtract  del * (a/b) */
0281
0282     movl    FPU_result_3,%eax   /* a/b */
0283     mull    XsigLL(%ebx)        /* del */
0284
0285     subl    %edx,FPU_accum_1
0286
0287     /* A borrow indicates that the result is negative */
0288     jnb LTest_over
0289
0290     movl    XsigH(%ebx),%edx
0291     addl    %edx,FPU_accum_1
0292
0293     subl    $1,FPU_result_2     /* Adjust the answer */
0294     sbbl    $0,FPU_result_3
0295
0296     /* The above addition might not have been enough, check again. */
0297     movl    FPU_accum_1,%edx    /* get the reduced num */
0298     cmpl    XsigH(%ebx),%edx    /* denom */
0299     jb  LDo_3rd_div
0300
0301     movl    XsigH(%ebx),%edx
0302     addl    %edx,FPU_accum_1
0303
0304     subl    $1,FPU_result_2     /* Adjust the answer */
0305     sbbl    $0,FPU_result_3
0306     jmp LDo_3rd_div
0307
0308 LTest_over:
0309     movl    FPU_accum_1,%edx    /* get the reduced num */
0310
0311     /* need to check for possible subsequent overflow */
0312     cmpl    XsigH(%ebx),%edx    /* denom */
0313     jb  LDo_3rd_div
0314
0315     /* prevent overflow */
0316     subl    XsigH(%ebx),%edx
0317     movl    %edx,FPU_accum_1
0318
0319     addl    $1,FPU_result_2 /* Reflect the subtraction in the answer */
0320     adcl    $0,FPU_result_3
0321
0322 LDo_3rd_div:
0323     movl    FPU_accum_0,%eax
0324     movl    FPU_accum_1,%edx
0325     divl    XsigH(%ebx)
0326
0327     movl    %eax,FPU_result_1       /* Rough estimate of third word */
0328
0329     movl    PARAM3,%esi     /* pointer to answer */
0330
0331     movl    FPU_result_1,%eax
0332     movl    %eax,XsigLL(%esi)
0333     movl    FPU_result_2,%eax
0334     movl    %eax,XsigL(%esi)
0335     movl    FPU_result_3,%eax
0336     movl    %eax,XsigH(%esi)
0337
0338 L_exit:
0339     popl    %ebx
0340     popl    %edi
0341     popl    %esi
0342
0343     leave
0344     RET
0345
0346
0347 #ifdef PARANOID
0348 /* The logic is wrong if we got here */
0349 L_bugged:
0350     pushl   EX_INTERNAL|0x240
0351     call    EXCEPTION
0352     pop %ebx
0353     jmp L_exit
0354
0355 L_bugged_1:
0356     pushl   EX_INTERNAL|0x241
0357     call    EXCEPTION
0358     pop %ebx
0359     jmp L_exit
0360
0361 L_bugged_2:
0362     pushl   EX_INTERNAL|0x242
0363     call    EXCEPTION
0364     pop %ebx
0365     jmp L_exit
0366 #endif /* PARANOID */
0367 SYM_FUNC_END(div_Xsig)