arch/i386/math-emu/reg_u_div.S

   1         .file   "reg_u_div.S"
   2 /*---------------------------------------------------------------------------+
   3  |  reg_u_div.S                                                              |
   4  |                                                                           |
   5  | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
   6  |                                                                           |
   7  | Copyright (C) 1992,1993,1995,1997                                         |
   8  |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
   9  |                  E-mail   billm@suburbia.net                              |
  10  |                                                                           |
  11  |                                                                           |
  12  +---------------------------------------------------------------------------*/
  13
  14 /*---------------------------------------------------------------------------+
  15  | Call from C as:                                                           |
  16  |    int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest,                   |
  17  |                unsigned int control_word, char *sign)                     |
  18  |                                                                           |
  19  |  Does not compute the destination exponent, but does adjust it.           |
  20  |                                                                           |
  21  |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
  22  |    one was raised, or -1 on internal error.                               |
  23  +---------------------------------------------------------------------------*/
  24
  25 #include "exception.h"
  26 #include "fpu_emu.h"
  27 #include "control_w.h"
  28
  29
  30 /* #define      dSIGL(x)        (x) */
  31 /* #define      dSIGH(x)        4(x) */
  32
  33
  34 #ifndef NON_REENTRANT_FPU
  35 /*
  36         Local storage on the stack:
  37         Result:         FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  38         Overflow flag:  ovfl_flag
  39  */
  40 #define FPU_accum_3     -4(%ebp)
  41 #define FPU_accum_2     -8(%ebp)
  42 #define FPU_accum_1     -12(%ebp)
  43 #define FPU_accum_0     -16(%ebp)
  44 #define FPU_result_1    -20(%ebp)
  45 #define FPU_result_2    -24(%ebp)
  46 #define FPU_ovfl_flag   -28(%ebp)
  47
  48 #else
  49 .data
  50 /*
  51         Local storage in a static area:
  52         Result:         FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  53         Overflow flag:  ovfl_flag
  54  */
  55         .align 4,0
  56 FPU_accum_3:
  57         .long   0
  58 FPU_accum_2:
  59         .long   0
  60 FPU_accum_1:
  61         .long   0
  62 FPU_accum_0:
  63         .long   0
  64 FPU_result_1:
  65         .long   0
  66 FPU_result_2:
  67         .long   0
  68 FPU_ovfl_flag:
  69         .byte   0
  70 #endif /* NON_REENTRANT_FPU */
  71
  72 #define REGA    PARAM1
  73 #define REGB    PARAM2
  74 #define DEST    PARAM3
  75
  76 .text
  77 ENTRY(FPU_u_div)
  78         pushl   %ebp
  79         movl    %esp,%ebp
  80 #ifndef NON_REENTRANT_FPU
  81         subl    $28,%esp
  82 #endif /* NON_REENTRANT_FPU */
  83
  84         pushl   %esi
  85         pushl   %edi
  86         pushl   %ebx
  87
  88         movl    REGA,%esi
  89         movl    REGB,%ebx
  90         movl    DEST,%edi
  91
  92         movswl  EXP(%esi),%edx
  93         movswl  EXP(%ebx),%eax
  94         subl    %eax,%edx
  95         addl    EXP_BIAS,%edx
  96
  97         /* A denormal and a large number can cause an exponent underflow */
  98         cmpl    EXP_WAY_UNDER,%edx
  99         jg      xExp_not_underflow
 100
 101         /* Set to a really low value allow correct handling */
 102         movl    EXP_WAY_UNDER,%edx
 103
 104 xExp_not_underflow:
 105
 106         movw    %dx,EXP(%edi)
 107
 108 #ifdef PARANOID
 109 /*      testl   $0x80000000, SIGH(%esi) // Dividend */
 110 /*      je      L_bugged */
 111         testl   $0x80000000, SIGH(%ebx) /* Divisor */
 112         je      L_bugged
 113 #endif /* PARANOID */
 114
 115 /* Check if the divisor can be treated as having just 32 bits */
 116         cmpl    $0,SIGL(%ebx)
 117         jnz     L_Full_Division /* Can't do a quick divide */
 118
 119 /* We should be able to zip through the division here */
 120         movl    SIGH(%ebx),%ecx /* The divisor */
 121         movl    SIGH(%esi),%edx /* Dividend */
 122         movl    SIGL(%esi),%eax /* Dividend */
 123
 124         cmpl    %ecx,%edx
 125         setaeb  FPU_ovfl_flag   /* Keep a record */
 126         jb      L_no_adjust
 127
 128         subl    %ecx,%edx       /* Prevent the overflow */
 129
 130 L_no_adjust:
 131         /* Divide the 64 bit number by the 32 bit denominator */
 132         divl    %ecx
 133         movl    %eax,FPU_result_2
 134
 135         /* Work on the remainder of the first division */
 136         xorl    %eax,%eax
 137         divl    %ecx
 138         movl    %eax,FPU_result_1
 139
 140         /* Work on the remainder of the 64 bit division */
 141         xorl    %eax,%eax
 142         divl    %ecx
 143
 144         testb   $255,FPU_ovfl_flag      /* was the num > denom ? */
 145         je      L_no_overflow
 146
 147         /* Do the shifting here */
 148         /* increase the exponent */
 149         incw    EXP(%edi)
 150
 151         /* shift the mantissa right one bit */
 152         stc                     /* To set the ms bit */
 153         rcrl    FPU_result_2
 154         rcrl    FPU_result_1
 155         rcrl    %eax
 156
 157 L_no_overflow:
 158         jmp     LRound_precision        /* Do the rounding as required */
 159
 160
 161 /*---------------------------------------------------------------------------+
 162  |  Divide:   Return  arg1/arg2 to arg3.                                     |
 163  |                                                                           |
 164  |  This routine does not use the exponents of arg1 and arg2, but does       |
 165  |  adjust the exponent of arg3.                                             |
 166  |                                                                           |
 167  |  The maximum returned value is (ignoring exponents)                       |
 168  |               .ffffffff ffffffff                                          |
 169  |               ------------------  =  1.ffffffff fffffffe                  |
 170  |               .80000000 00000000                                          |
 171  | and the minimum is                                                        |
 172  |               .80000000 00000000                                          |
 173  |               ------------------  =  .80000000 00000001   (rounded)       |
 174  |               .ffffffff ffffffff                                          |
 175  |                                                                           |
 176  +---------------------------------------------------------------------------*/
 177
 178
 179 L_Full_Division:
 180         /* Save extended dividend in local register */
 181         movl    SIGL(%esi),%eax
 182         movl    %eax,FPU_accum_2
 183         movl    SIGH(%esi),%eax
 184         movl    %eax,FPU_accum_3
 185         xorl    %eax,%eax
 186         movl    %eax,FPU_accum_1        /* zero the extension */
 187         movl    %eax,FPU_accum_0        /* zero the extension */
 188
 189         movl    SIGL(%esi),%eax /* Get the current num */
 190         movl    SIGH(%esi),%edx
 191
 192 /*----------------------------------------------------------------------*/
 193 /* Initialization done.
 194    Do the first 32 bits. */
 195
 196         movb    $0,FPU_ovfl_flag
 197         cmpl    SIGH(%ebx),%edx /* Test for imminent overflow */
 198         jb      LLess_than_1
 199         ja      LGreater_than_1
 200
 201         cmpl    SIGL(%ebx),%eax
 202         jb      LLess_than_1
 203
 204 LGreater_than_1:
 205 /* The dividend is greater or equal, would cause overflow */
 206         setaeb  FPU_ovfl_flag           /* Keep a record */
 207
 208         subl    SIGL(%ebx),%eax
 209         sbbl    SIGH(%ebx),%edx /* Prevent the overflow */
 210         movl    %eax,FPU_accum_2
 211         movl    %edx,FPU_accum_3
 212
 213 LLess_than_1:
 214 /* At this point, we have a dividend < divisor, with a record of
 215    adjustment in FPU_ovfl_flag */
 216
 217         /* We will divide by a number which is too large */
 218         movl    SIGH(%ebx),%ecx
 219         addl    $1,%ecx
 220         jnc     LFirst_div_not_1
 221
 222         /* here we need to divide by 100000000h,
 223            i.e., no division at all.. */
 224         mov     %edx,%eax
 225         jmp     LFirst_div_done
 226
 227 LFirst_div_not_1:
 228         divl    %ecx            /* Divide the numerator by the augmented
 229                                    denom ms dw */
 230
 231 LFirst_div_done:
 232         movl    %eax,FPU_result_2       /* Put the result in the answer */
 233
 234         mull    SIGH(%ebx)      /* mul by the ms dw of the denom */
 235
 236         subl    %eax,FPU_accum_2        /* Subtract from the num local reg */
 237         sbbl    %edx,FPU_accum_3
 238
 239         movl    FPU_result_2,%eax       /* Get the result back */
 240         mull    SIGL(%ebx)      /* now mul the ls dw of the denom */
 241
 242         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 243         sbbl    %edx,FPU_accum_2
 244         sbbl    $0,FPU_accum_3
 245         je      LDo_2nd_32_bits         /* Must check for non-zero result here */
 246
 247 #ifdef PARANOID
 248         jb      L_bugged_1
 249 #endif /* PARANOID */
 250
 251         /* need to subtract another once of the denom */
 252         incl    FPU_result_2    /* Correct the answer */
 253
 254         movl    SIGL(%ebx),%eax
 255         movl    SIGH(%ebx),%edx
 256         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 257         sbbl    %edx,FPU_accum_2
 258
 259 #ifdef PARANOID
 260         sbbl    $0,FPU_accum_3
 261         jne     L_bugged_1      /* Must check for non-zero result here */
 262 #endif /* PARANOID */
 263
 264 /*----------------------------------------------------------------------*/
 265 /* Half of the main problem is done, there is just a reduced numerator
 266    to handle now.
 267    Work with the second 32 bits, FPU_accum_0 not used from now on */
 268 LDo_2nd_32_bits:
 269         movl    FPU_accum_2,%edx        /* get the reduced num */
 270         movl    FPU_accum_1,%eax
 271
 272         /* need to check for possible subsequent overflow */
 273         cmpl    SIGH(%ebx),%edx
 274         jb      LDo_2nd_div
 275         ja      LPrevent_2nd_overflow
 276
 277         cmpl    SIGL(%ebx),%eax
 278         jb      LDo_2nd_div
 279
 280 LPrevent_2nd_overflow:
 281 /* The numerator is greater or equal, would cause overflow */
 282         /* prevent overflow */
 283         subl    SIGL(%ebx),%eax
 284         sbbl    SIGH(%ebx),%edx
 285         movl    %edx,FPU_accum_2
 286         movl    %eax,FPU_accum_1
 287
 288         incl    FPU_result_2    /* Reflect the subtraction in the answer */
 289
 290 #ifdef PARANOID
 291         je      L_bugged_2      /* Can't bump the result to 1.0 */
 292 #endif /* PARANOID */
 293
 294 LDo_2nd_div:
 295         cmpl    $0,%ecx         /* augmented denom msw */
 296         jnz     LSecond_div_not_1
 297
 298         /* %ecx == 0, we are dividing by 1.0 */
 299         mov     %edx,%eax
 300         jmp     LSecond_div_done
 301
 302 LSecond_div_not_1:
 303         divl    %ecx            /* Divide the numerator by the denom ms dw */
 304
 305 LSecond_div_done:
 306         movl    %eax,FPU_result_1       /* Put the result in the answer */
 307
 308         mull    SIGH(%ebx)      /* mul by the ms dw of the denom */
 309
 310         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 311         sbbl    %edx,FPU_accum_2
 312
 313 #ifdef PARANOID
 314         jc      L_bugged_2
 315 #endif /* PARANOID */
 316
 317         movl    FPU_result_1,%eax       /* Get the result back */
 318         mull    SIGL(%ebx)      /* now mul the ls dw of the denom */
 319
 320         subl    %eax,FPU_accum_0        /* Subtract from the num local reg */
 321         sbbl    %edx,FPU_accum_1        /* Subtract from the num local reg */
 322         sbbl    $0,FPU_accum_2
 323
 324 #ifdef PARANOID
 325         jc      L_bugged_2
 326 #endif /* PARANOID */
 327
 328         jz      LDo_3rd_32_bits
 329
 330 #ifdef PARANOID
 331         cmpl    $1,FPU_accum_2
 332         jne     L_bugged_2
 333 #endif /* PARANOID */
 334
 335         /* need to subtract another once of the denom */
 336         movl    SIGL(%ebx),%eax
 337         movl    SIGH(%ebx),%edx
 338         subl    %eax,FPU_accum_0        /* Subtract from the num local reg */
 339         sbbl    %edx,FPU_accum_1
 340         sbbl    $0,FPU_accum_2
 341
 342 #ifdef PARANOID
 343         jc      L_bugged_2
 344         jne     L_bugged_2
 345 #endif /* PARANOID */
 346
 347         addl    $1,FPU_result_1 /* Correct the answer */
 348         adcl    $0,FPU_result_2
 349
 350 #ifdef PARANOID
 351         jc      L_bugged_2      /* Must check for non-zero result here */
 352 #endif /* PARANOID */
 353
 354 /*----------------------------------------------------------------------*/
 355 /* The division is essentially finished here, we just need to perform
 356    tidying operations.
 357    Deal with the 3rd 32 bits */
 358 LDo_3rd_32_bits:
 359         movl    FPU_accum_1,%edx                /* get the reduced num */
 360         movl    FPU_accum_0,%eax
 361
 362         /* need to check for possible subsequent overflow */
 363         cmpl    SIGH(%ebx),%edx /* denom */
 364         jb      LRound_prep
 365         ja      LPrevent_3rd_overflow
 366
 367         cmpl    SIGL(%ebx),%eax /* denom */
 368         jb      LRound_prep
 369
 370 LPrevent_3rd_overflow:
 371         /* prevent overflow */
 372         subl    SIGL(%ebx),%eax
 373         sbbl    SIGH(%ebx),%edx
 374         movl    %edx,FPU_accum_1
 375         movl    %eax,FPU_accum_0
 376
 377         addl    $1,FPU_result_1 /* Reflect the subtraction in the answer */
 378         adcl    $0,FPU_result_2
 379         jne     LRound_prep
 380         jnc     LRound_prep
 381
 382         /* This is a tricky spot, there is an overflow of the answer */
 383         movb    $255,FPU_ovfl_flag              /* Overflow -> 1.000 */
 384
 385 LRound_prep:
 386 /*
 387  * Prepare for rounding.
 388  * To test for rounding, we just need to compare 2*accum with the
 389  * denom.
 390  */
 391         movl    FPU_accum_0,%ecx
 392         movl    FPU_accum_1,%edx
 393         movl    %ecx,%eax
 394         orl     %edx,%eax
 395         jz      LRound_ovfl             /* The accumulator contains zero. */
 396
 397         /* Multiply by 2 */
 398         clc
 399         rcll    $1,%ecx
 400         rcll    $1,%edx
 401         jc      LRound_large            /* No need to compare, denom smaller */
 402
 403         subl    SIGL(%ebx),%ecx
 404         sbbl    SIGH(%ebx),%edx
 405         jnc     LRound_not_small
 406
 407         movl    $0x70000000,%eax        /* Denom was larger */
 408         jmp     LRound_ovfl
 409
 410 LRound_not_small:
 411         jnz     LRound_large
 412
 413         movl    $0x80000000,%eax        /* Remainder was exactly 1/2 denom */
 414         jmp     LRound_ovfl
 415
 416 LRound_large:
 417         movl    $0xff000000,%eax        /* Denom was smaller */
 418
 419 LRound_ovfl:
 420 /* We are now ready to deal with rounding, but first we must get
 421    the bits properly aligned */
 422         testb   $255,FPU_ovfl_flag      /* was the num > denom ? */
 423         je      LRound_precision
 424
 425         incw    EXP(%edi)
 426
 427         /* shift the mantissa right one bit */
 428         stc                     /* Will set the ms bit */
 429         rcrl    FPU_result_2
 430         rcrl    FPU_result_1
 431         rcrl    %eax
 432
 433 /* Round the result as required */
 434 LRound_precision:
 435         decw    EXP(%edi)       /* binary point between 1st & 2nd bits */
 436
 437         movl    %eax,%edx
 438         movl    FPU_result_1,%ebx
 439         movl    FPU_result_2,%eax
 440         jmp     fpu_reg_round
 441
 442
 443 #ifdef PARANOID
 444 /* The logic is wrong if we got here */
 445 L_bugged:
 446         pushl   EX_INTERNAL|0x202
 447         call    EXCEPTION
 448         pop     %ebx
 449         jmp     L_exit
 450
 451 L_bugged_1:
 452         pushl   EX_INTERNAL|0x203
 453         call    EXCEPTION
 454         pop     %ebx
 455         jmp     L_exit
 456
 457 L_bugged_2:
 458         pushl   EX_INTERNAL|0x204
 459         call    EXCEPTION
 460         pop     %ebx
 461         jmp     L_exit
 462
 463 L_exit:
 464         movl    $-1,%eax
 465         popl    %ebx
 466         popl    %edi
 467         popl    %esi
 468
 469         leave
 470         ret
 471 #endif /* PARANOID */