sysdeps/alpha/remqu.S

   1 /* Copyright (C) 2004-2015 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library.  If not, see
  16    <http://www.gnu.org/licenses/>.  */
  17
  18 #include "div_libc.h"
  19
  20
  21 /* 64-bit unsigned long remainder.  These are not normal C functions.  Argument
  22    registers are t10 and t11, the result goes in t12.  Only t12 and AT may be
  23    clobbered.
  24
  25    Theory of operation here is that we can use the FPU divider for virtually
  26    all operands that we see: all dividend values between -2**53 and 2**53-1
  27    can be computed directly.  Note that divisor values need not be checked
  28    against that range because the rounded fp value will be close enough such
  29    that the quotient is < 1, which will properly be truncated to zero when we
  30    convert back to integer.
  31
  32    When the dividend is outside the range for which we can compute exact
  33    results, we use the fp quotent as an estimate from which we begin refining
  34    an exact integral value.  This reduces the number of iterations in the
  35    shift-and-subtract loop significantly.
  36
  37    The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
  38    for cvttq/c even without /sui being set.  It will not, however, properly
  39    raise the exception, so we don't have to worry about FPCR_INED being clear
  40    and so dying by SIGFPE.  */
  41
  42         .text
  43         .align  4
  44         .globl  __remqu
  45         .type   __remqu, @funcnoplt
  46         .usepv  __remqu, no
  47
  48         cfi_startproc
  49         cfi_return_column (RA)
  50 __remqu:
  51         lda     sp, -FRAME(sp)
  52         cfi_def_cfa_offset (FRAME)
  53         CALL_MCOUNT
  54
  55         /* Get the fp divide insn issued as quickly as possible.  After
  56            that's done, we have at least 22 cycles until its results are
  57            ready -- all the time in the world to figure out how we're
  58            going to use the results.  */
  59         subq    Y, 1, AT
  60         stt     $f0, 0(sp)
  61         and     Y, AT, AT
  62
  63         stt     $f1, 8(sp)
  64         excb
  65         stt     $f3, 48(sp)
  66         beq     AT, $powerof2
  67         cfi_rel_offset ($f0, 0)
  68         cfi_rel_offset ($f1, 8)
  69         cfi_rel_offset ($f3, 48)
  70
  71         _ITOFT2 X, $f0, 16, Y, $f1, 24
  72         mf_fpcr $f3
  73         cvtqt   $f0, $f0
  74         cvtqt   $f1, $f1
  75
  76         blt     X, $x_is_neg
  77         divt/c  $f0, $f1, $f0
  78
  79         /* Check to see if Y was mis-converted as signed value.  */
  80         ldt     $f1, 8(sp)
  81         blt     Y, $y_is_neg
  82
  83         /* Check to see if X fit in the double as an exact value.  */
  84         srl     X, 53, AT
  85         bne     AT, $x_big
  86
  87         /* If we get here, we're expecting exact results from the division.
  88            Do nothing else besides convert, compute remainder, clean up.  */
  89         cvttq/c $f0, $f0
  90         excb
  91         mt_fpcr $f3
  92         _FTOIT  $f0, AT, 16
  93
  94         mulq    AT, Y, AT
  95         ldt     $f0, 0(sp)
  96         ldt     $f3, 48(sp)
  97         lda     sp, FRAME(sp)
  98         cfi_remember_state
  99         cfi_restore ($f0)
 100         cfi_restore ($f1)
 101         cfi_restore ($f3)
 102         cfi_def_cfa_offset (0)
 103
 104         .align  4
 105         subq    X, AT, RV
 106         ret     $31, (RA), 1
 107
 108         .align  4
 109         cfi_restore_state
 110 $x_is_neg:
 111         /* If we get here, X is so big that bit 63 is set, which made the
 112            conversion come out negative.  Fix it up lest we not even get
 113            a good estimate.  */
 114         ldah    AT, 0x5f80              /* 2**64 as float.  */
 115         stt     $f2, 24(sp)
 116         cfi_rel_offset ($f2, 24)
 117         _ITOFS  AT, $f2, 16
 118
 119         addt    $f0, $f2, $f0
 120         divt/c  $f0, $f1, $f0
 121
 122         /* Ok, we've now the divide issued.  Continue with other checks.  */
 123         .align  4
 124         ldt     $f1, 8(sp)
 125         unop
 126         ldt     $f2, 24(sp)
 127         blt     Y, $y_is_neg
 128         cfi_restore ($f1)
 129         cfi_restore ($f2)
 130         cfi_remember_state      /* for y_is_neg */
 131
 132         .align  4
 133 $x_big:
 134         /* If we get here, X is large enough that we don't expect exact
 135            results, and neither X nor Y got mis-translated for the fp
 136            division.  Our task is to take the fp result, figure out how
 137            far it's off from the correct result and compute a fixup.  */
 138         stq     t0, 16(sp)
 139         stq     t1, 24(sp)
 140         stq     t2, 32(sp)
 141         stq     t3, 40(sp)
 142         cfi_rel_offset (t0, 16)
 143         cfi_rel_offset (t1, 24)
 144         cfi_rel_offset (t2, 32)
 145         cfi_rel_offset (t3, 40)
 146
 147 #define Q       t0              /* quotient */
 148 #define R       RV              /* remainder */
 149 #define SY      t1              /* scaled Y */
 150 #define S       t2              /* scalar */
 151 #define QY      t3              /* Q*Y */
 152
 153         cvttq/c $f0, $f0
 154         _FTOIT  $f0, Q, 8
 155         mulq    Q, Y, QY
 156
 157         .align  4
 158         stq     t4, 8(sp)
 159         excb
 160         ldt     $f0, 0(sp)
 161         mt_fpcr $f3
 162         cfi_rel_offset (t4, 8)
 163         cfi_restore ($f0)
 164
 165         subq    QY, X, R
 166         mov     Y, SY
 167         mov     1, S
 168         bgt     R, $q_high
 169
 170 $q_high_ret:
 171         subq    X, QY, R
 172         mov     Y, SY
 173         mov     1, S
 174         bgt     R, $q_low
 175
 176 $q_low_ret:
 177         ldq     t4, 8(sp)
 178         ldq     t0, 16(sp)
 179         ldq     t1, 24(sp)
 180         ldq     t2, 32(sp)
 181
 182         ldq     t3, 40(sp)
 183         ldt     $f3, 48(sp)
 184         lda     sp, FRAME(sp)
 185         cfi_remember_state
 186         cfi_restore (t0)
 187         cfi_restore (t1)
 188         cfi_restore (t2)
 189         cfi_restore (t3)
 190         cfi_restore (t4)
 191         cfi_restore ($f3)
 192         cfi_def_cfa_offset (0)
 193         ret     $31, (RA), 1
 194
 195         .align  4
 196         cfi_restore_state
 197         /* The quotient that we computed was too large.  We need to reduce
 198            it by S such that Y*S >= R.  Obviously the closer we get to the
 199            correct value the better, but overshooting high is ok, as we'll
 200            fix that up later.  */
 201 0:
 202         addq    SY, SY, SY
 203         addq    S, S, S
 204 $q_high:
 205         cmpult  SY, R, AT
 206         bne     AT, 0b
 207
 208         subq    Q, S, Q
 209         unop
 210         subq    QY, SY, QY
 211         br      $q_high_ret
 212
 213         .align  4
 214         /* The quotient that we computed was too small.  Divide Y by the
 215            current remainder (R) and add that to the existing quotient (Q).
 216            The expectation, of course, is that R is much smaller than X.  */
 217         /* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
 218            already have a copy of Y in SY and the value 1 in S.  */
 219 0:
 220         addq    SY, SY, SY
 221         addq    S, S, S
 222 $q_low:
 223         cmpult  SY, R, AT
 224         bne     AT, 0b
 225
 226         /* Shift-down and subtract loop.  Each iteration compares our scaled
 227            Y (SY) with the remainder (R); if SY <= R then X is divisible by
 228            Y's scalar (S) so add it to the quotient (Q).  */
 229 2:      addq    Q, S, t3
 230         srl     S, 1, S
 231         cmpule  SY, R, AT
 232         subq    R, SY, t4
 233
 234         cmovne  AT, t3, Q
 235         cmovne  AT, t4, R
 236         srl     SY, 1, SY
 237         bne     S, 2b
 238
 239         br      $q_low_ret
 240
 241         .align  4
 242         cfi_restore_state
 243 $y_is_neg:
 244         /* If we get here, Y is so big that bit 63 is set.  The results
 245            from the divide will be completely wrong.  Fortunately, the
 246            quotient must be either 0 or 1, so the remainder must be X
 247            or X-Y, so just compute it directly.  */
 248         cmpule  Y, X, AT
 249         subq    X, Y, RV
 250         ldt     $f0, 0(sp)
 251         cmoveq  AT, X, RV
 252
 253         lda     sp, FRAME(sp)
 254         cfi_restore ($f0)
 255         cfi_def_cfa_offset (0)
 256         ret     $31, (RA), 1
 257
 258         .align  4
 259         cfi_def_cfa_offset (FRAME)
 260 $powerof2:
 261         subq    Y, 1, AT
 262         beq     Y, DIVBYZERO
 263         and     X, AT, RV
 264         lda     sp, FRAME(sp)
 265         cfi_def_cfa_offset (0)
 266         ret     $31, (RA), 1
 267
 268         cfi_endproc
 269         .size   __remqu, .-__remqu
 270
 271         DO_DIVBYZERO