0001
0002 .file "wm_sqrt.S"
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029 #include "exception.h"
0030 #include "fpu_emu.h"
0031
0032
0033 #ifndef NON_REENTRANT_FPU
0034
0035 #define FPU_accum_3 -4(%ebp)
0036 #define FPU_accum_2 -8(%ebp)
0037 #define FPU_accum_1 -12(%ebp)
0038 #define FPU_accum_0 -16(%ebp)
0039
0040
0041
0042
0043
0044
0045
0046 #define FPU_fsqrt_arg_2 -20(%ebp)
0047 #define FPU_fsqrt_arg_1 -24(%ebp)
0048 #define FPU_fsqrt_arg_0 -28(%ebp)
0049
0050 #else
0051
0052 .data
0053 .align 4,0
0054 FPU_accum_3:
0055 .long 0
0056 FPU_accum_2:
0057 .long 0
0058 FPU_accum_1:
0059 .long 0
0060 FPU_accum_0:
0061 .long 0
0062
0063
0064
0065
0066
0067
0068 FPU_fsqrt_arg_2:
0069 .long 0
0070 FPU_fsqrt_arg_1:
0071 .long 0
0072 FPU_fsqrt_arg_0:
0073 .long 0
0074 #endif
0075
0076
0077 .text
0078 SYM_FUNC_START(wm_sqrt)
0079 pushl %ebp
0080 movl %esp,%ebp
0081 #ifndef NON_REENTRANT_FPU
0082 subl $28,%esp
0083 #endif
0084 pushl %esi
0085 pushl %edi
0086 pushl %ebx
0087
0088 movl PARAM1,%esi
0089
0090 movl SIGH(%esi),%eax
0091 movl SIGL(%esi),%ecx
0092 xorl %edx,%edx
0093
0094
0095
0096 cmpw EXP_BIAS,EXP(%esi)
0097 jnz sqrt_arg_ge_2
0098
0099 shrl $1,%eax
0100 rcrl $1,%ecx
0101 rcrl $1,%edx
0102
0103 sqrt_arg_ge_2:
0104
0105
0106
0107 movl %eax,FPU_fsqrt_arg_2
0108 movl %ecx,FPU_fsqrt_arg_1
0109 movl %edx,FPU_fsqrt_arg_0
0110
0111
0112 shrl $1,%eax
0113 addl $0x40000000,%eax
0114 movl $0xaaaaaaaa,%ecx
0115 mull %ecx
0116 shll %edx
0117 testl $0x80000000,%edx
0118 jnz sqrt_prelim_no_adjust
0119
0120 movl $0x80000000,%edx
0121
0122 sqrt_prelim_no_adjust:
0123 movl %edx,%esi
0124
0125
0126
0127
0128 movl FPU_fsqrt_arg_2,%ecx
0129
0130
0131
0132
0133
0134
0135
0136
0137 shrl %ecx
0138
0139
0140 movl %ecx,%edx
0141 divl %esi
0142 shrl %esi
0143 addl %eax,%esi
0144
0145 movl %ecx,%edx
0146 divl %esi
0147 shrl %esi
0148 addl %eax,%esi
0149
0150 movl %ecx,%edx
0151 divl %esi
0152 shrl %esi
0153 addl %eax,%esi
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164 movl %esi,%eax
0165 mull %esi
0166
0167
0168 movl FPU_fsqrt_arg_1,%ecx
0169 subl %ecx,%eax
0170 movl FPU_fsqrt_arg_2,%ecx
0171 sbbl %ecx,%edx
0172 jnc sqrt_stage_2_positive
0173
0174
0175
0176 notl %edx
0177 notl %eax
0178 addl $1,%eax
0179 adcl $0,%edx
0180
0181 divl %esi
0182 movl %eax,%ecx
0183
0184 movl %edx,%eax
0185 divl %esi
0186 jmp sqrt_stage_2_finish
0187
0188 sqrt_stage_2_positive:
0189 divl %esi
0190 movl %eax,%ecx
0191
0192 movl %edx,%eax
0193 divl %esi
0194
0195 notl %ecx
0196 notl %eax
0197 addl $1,%eax
0198 adcl $0,%ecx
0199
0200 sqrt_stage_2_finish:
0201 sarl $1,%ecx
0202 rcrl $1,%eax
0203
0204
0205 movl %eax,%edi
0206 addl %ecx,%esi
0207
0208 jnz sqrt_stage_2_done
0209
0210 #ifdef PARANOID
0211
0212 cmpl $0xffffffff,FPU_fsqrt_arg_1
0213 jnz sqrt_stage_2_error
0214 #endif
0215
0216
0217 xorl %eax,%eax
0218 decl %eax
0219 movl %eax,%edi
0220 movl %eax,%esi
0221 movl $0x7fffffff,%eax
0222 jmp sqrt_round_result
0223
0224 #ifdef PARANOID
0225 sqrt_stage_2_error:
0226 pushl EX_INTERNAL|0x213
0227 call EXCEPTION
0228 #endif
0229
0230 sqrt_stage_2_done:
0231
0232
0233
0234
0235 movl %edi,%eax
0236 mull %edi
0237 movl %edx,FPU_accum_1
0238
0239 movl %esi,%eax
0240 mull %esi
0241 movl %edx,FPU_accum_3
0242 movl %eax,FPU_accum_2
0243
0244 movl %edi,%eax
0245 mull %esi
0246 addl %eax,FPU_accum_1
0247 adcl %edx,FPU_accum_2
0248 adcl $0,FPU_accum_3
0249
0250
0251
0252 addl %eax,FPU_accum_1
0253 adcl %edx,FPU_accum_2
0254 adcl $0,FPU_accum_3
0255
0256
0257
0258 movl FPU_fsqrt_arg_0,%eax
0259 subl %eax,FPU_accum_1
0260 movl FPU_fsqrt_arg_1,%eax
0261 sbbl %eax,FPU_accum_2
0262 movl FPU_fsqrt_arg_2,%eax
0263 sbbl %eax,FPU_accum_3
0264 jnc sqrt_stage_3_positive
0265
0266
0267
0268 notl FPU_accum_1
0269 notl FPU_accum_2
0270 notl FPU_accum_3
0271 addl $1,FPU_accum_1
0272 adcl $0,FPU_accum_2
0273
0274 #ifdef PARANOID
0275 adcl $0,FPU_accum_3
0276 jz sqrt_stage_3_no_error
0277
0278 sqrt_stage_3_error:
0279 pushl EX_INTERNAL|0x207
0280 call EXCEPTION
0281
0282 sqrt_stage_3_no_error:
0283 #endif
0284
0285 movl FPU_accum_2,%edx
0286 movl FPU_accum_1,%eax
0287 divl %esi
0288 movl %eax,%ecx
0289
0290 movl %edx,%eax
0291 divl %esi
0292
0293 sarl $1,%ecx
0294 rcrl $1,%eax
0295
0296
0297
0298 addl %ecx,%edi
0299 adcl $0,%esi
0300
0301 jmp sqrt_stage_3_finished
0302
0303 sqrt_stage_3_positive:
0304 movl FPU_accum_2,%edx
0305 movl FPU_accum_1,%eax
0306 divl %esi
0307 movl %eax,%ecx
0308
0309 movl %edx,%eax
0310 divl %esi
0311
0312 sarl $1,%ecx
0313 rcrl $1,%eax
0314
0315
0316
0317 notl %eax
0318 notl %ecx
0319 addl $1,%eax
0320 adcl $0,%ecx
0321 adcl $0xffffffff,%esi
0322
0323 addl %ecx,%edi
0324 adcl $0,%esi
0325
0326 sqrt_stage_3_finished:
0327
0328
0329
0330
0331
0332
0333 cmpl $0xffffffe0,%eax
0334 ja sqrt_near_exact_x
0335
0336 cmpl $0x00000020,%eax
0337 jb sqrt_near_exact
0338
0339 cmpl $0x7fffffe0,%eax
0340 jb sqrt_round_result
0341
0342 cmpl $0x80000020,%eax
0343 jb sqrt_get_more_precision
0344
0345 sqrt_round_result:
0346
0347 movl %eax,%edx
0348 movl %esi,%eax
0349 movl %edi,%ebx
0350 movl PARAM1,%edi
0351 movw EXP_BIAS,EXP(%edi)
0352 jmp fpu_reg_round
0353
0354
0355 sqrt_near_exact_x:
0356
0357 addl $1,%edi
0358 adcl $0,%esi
0359
0360 sqrt_near_exact:
0361
0362
0363
0364
0365
0366
0367
0368 movl %edi,%eax
0369 mull %edi
0370 movl %edx,%ebx
0371 movl %eax,%ecx
0372
0373 movl %edi,%eax
0374 mull %esi
0375 addl %eax,%ebx
0376 addl %eax,%ebx
0377
0378 #ifdef PARANOID
0379 cmp $0xffffffb0,%ebx
0380 jb sqrt_near_exact_ok
0381
0382 cmp $0x00000050,%ebx
0383 ja sqrt_near_exact_ok
0384
0385 pushl EX_INTERNAL|0x214
0386 call EXCEPTION
0387
0388 sqrt_near_exact_ok:
0389 #endif
0390
0391 or %ebx,%ebx
0392 js sqrt_near_exact_small
0393
0394 jnz sqrt_near_exact_large
0395
0396 or %ebx,%edx
0397 jnz sqrt_near_exact_large
0398
0399
0400 xorl %eax,%eax
0401 jmp sqrt_round_result
0402
0403 sqrt_near_exact_small:
0404
0405 movl $0x000000ff,%eax
0406 jmp sqrt_round_result
0407
0408 sqrt_near_exact_large:
0409
0410 subl $1,%edi
0411 sbbl $0,%esi
0412 movl $0xffffff00,%eax
0413 jmp sqrt_round_result
0414
0415
0416 sqrt_get_more_precision:
0417
0418
0419 stc
0420 rcll $1,%edi
0421 rcll $1,%esi
0422
0423 movl %edi,%eax
0424 mull %edi
0425 movl %edx,%ebx
0426 movl %eax,%ecx
0427
0428 movl %edi,%eax
0429 mull %esi
0430 addl %eax,%ebx
0431 addl %eax,%ebx
0432
0433
0434 stc
0435 rcrl $1,%esi
0436 rcrl $1,%edi
0437
0438 #ifdef PARANOID
0439 cmp $0xffffff60,%ebx
0440 jb sqrt_more_prec_ok
0441
0442 cmp $0x000000a0,%ebx
0443 ja sqrt_more_prec_ok
0444
0445 pushl EX_INTERNAL|0x215
0446 call EXCEPTION
0447
0448 sqrt_more_prec_ok:
0449 #endif
0450
0451 or %ebx,%ebx
0452 js sqrt_more_prec_small
0453
0454 jnz sqrt_more_prec_large
0455
0456 or %ebx,%ecx
0457 jnz sqrt_more_prec_large
0458
0459
0460 movl $0x80000000,%eax
0461 jmp sqrt_round_result
0462
0463 sqrt_more_prec_small:
0464
0465 movl $0x800000ff,%eax
0466 jmp sqrt_round_result
0467
0468 sqrt_more_prec_large:
0469
0470 movl $0x7fffff00,%eax
0471 jmp sqrt_round_result
0472 SYM_FUNC_END(wm_sqrt)