x86/math-emu/polynom_Xsig.S

0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*---------------------------------------------------------------------------+
0003  |  polynomial_Xsig.S                                                        |
0004  |                                                                           |
0005  | Fixed point arithmetic polynomial evaluation.                             |
0006  |                                                                           |
0007  | Copyright (C) 1992,1993,1994,1995                                         |
0008  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
0009  |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
0010  |                                                                           |
0011  | Call from C as:                                                           |
0012  |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
0013  |                        unsigned long long terms[], int n)                 |
0014  |                                                                           |
0015  | Computes:                                                                 |
0016  | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
0017  | and adds the result to the 12 byte Xsig.                                  |
0018  | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
0019  | precision.                                                                |
0020  |                                                                           |
0021  | This function must be used carefully: most overflow of intermediate       |
0022  | results is controlled, but overflow of the result is not.                 |
0023  |                                                                           |
0024  +---------------------------------------------------------------------------*/
0025     .file   "polynomial_Xsig.S"
0026
0027 #include "fpu_emu.h"
0028
0029
0030 #define TERM_SIZE   $8
0031 #define SUM_MS      -20(%ebp)   /* sum ms long */
0032 #define SUM_MIDDLE  -24(%ebp)   /* sum middle long */
0033 #define SUM_LS      -28(%ebp)   /* sum ls long */
0034 #define ACCUM_MS    -4(%ebp)    /* accum ms long */
0035 #define ACCUM_MIDDLE    -8(%ebp)    /* accum middle long */
0036 #define ACCUM_LS    -12(%ebp)   /* accum ls long */
0037 #define OVERFLOWED      -16(%ebp)   /* addition overflow flag */
0038
0039 .text
0040 SYM_FUNC_START(polynomial_Xsig)
0041     pushl   %ebp
0042     movl    %esp,%ebp
0043     subl    $32,%esp
0044     pushl   %esi
0045     pushl   %edi
0046     pushl   %ebx
0047
0048     movl    PARAM2,%esi     /* x */
0049     movl    PARAM3,%edi     /* terms */
0050
0051     movl    TERM_SIZE,%eax
0052     mull    PARAM4          /* n */
0053     addl    %eax,%edi
0054
0055     movl    4(%edi),%edx        /* terms[n] */
0056     movl    %edx,SUM_MS
0057     movl    (%edi),%edx     /* terms[n] */
0058     movl    %edx,SUM_MIDDLE
0059     xor %eax,%eax
0060     movl    %eax,SUM_LS
0061     movb    %al,OVERFLOWED
0062
0063     subl    TERM_SIZE,%edi
0064     decl    PARAM4
0065     js  L_accum_done
0066
0067 L_accum_loop:
0068     xor %eax,%eax
0069     movl    %eax,ACCUM_MS
0070     movl    %eax,ACCUM_MIDDLE
0071
0072     movl    SUM_MIDDLE,%eax
0073     mull    (%esi)          /* x ls long */
0074     movl    %edx,ACCUM_LS
0075
0076     movl    SUM_MIDDLE,%eax
0077     mull    4(%esi)         /* x ms long */
0078     addl    %eax,ACCUM_LS
0079     adcl    %edx,ACCUM_MIDDLE
0080     adcl    $0,ACCUM_MS
0081
0082     movl    SUM_MS,%eax
0083     mull    (%esi)          /* x ls long */
0084     addl    %eax,ACCUM_LS
0085     adcl    %edx,ACCUM_MIDDLE
0086     adcl    $0,ACCUM_MS
0087
0088     movl    SUM_MS,%eax
0089     mull    4(%esi)         /* x ms long */
0090     addl    %eax,ACCUM_MIDDLE
0091     adcl    %edx,ACCUM_MS
0092
0093     testb   $0xff,OVERFLOWED
0094     jz  L_no_overflow
0095
0096     movl    (%esi),%eax
0097     addl    %eax,ACCUM_MIDDLE
0098     movl    4(%esi),%eax
0099     adcl    %eax,ACCUM_MS       /* This could overflow too */
0100
0101 L_no_overflow:
0102
0103 /*
0104  * Now put the sum of next term and the accumulator
0105  * into the sum register
0106  */
0107     movl    ACCUM_LS,%eax
0108     addl    (%edi),%eax     /* term ls long */
0109     movl    %eax,SUM_LS
0110     movl    ACCUM_MIDDLE,%eax
0111     adcl    (%edi),%eax     /* term ls long */
0112     movl    %eax,SUM_MIDDLE
0113     movl    ACCUM_MS,%eax
0114     adcl    4(%edi),%eax        /* term ms long */
0115     movl    %eax,SUM_MS
0116     sbbb    %al,%al
0117     movb    %al,OVERFLOWED      /* Used in the next iteration */
0118
0119     subl    TERM_SIZE,%edi
0120     decl    PARAM4
0121     jns L_accum_loop
0122
0123 L_accum_done:
0124     movl    PARAM1,%edi     /* accum */
0125     movl    SUM_LS,%eax
0126     addl    %eax,(%edi)
0127     movl    SUM_MIDDLE,%eax
0128     adcl    %eax,4(%edi)
0129     movl    SUM_MS,%eax
0130     adcl    %eax,8(%edi)
0131
0132     popl    %ebx
0133     popl    %edi
0134     popl    %esi
0135     leave
0136     RET
0137 SYM_FUNC_END(polynomial_Xsig)