libgcc/config/bfin/lib1funcs.S

   1 /* libgcc functions for Blackfin.
   2    Copyright (C) 2005, 2009 Free Software Foundation, Inc.
   3    Contributed by Analog Devices.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 Under Section 7 of GPL version 3, you are granted additional
  18 permissions described in the GCC Runtime Library Exception, version
  19 3.1, as published by the Free Software Foundation.
  20
  21 You should have received a copy of the GNU General Public License and
  22 a copy of the GCC Runtime Library Exception along with this program;
  23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  24 <http://www.gnu.org/licenses/>.  */
  25
  26 #ifdef L_divsi3
  27 .text
  28 .align 2
  29 .global ___divsi3;
  30 .type ___divsi3, STT_FUNC;
  31
  32 ___divsi3:
  33         [--SP]= RETS;
  34         [--SP] = R7;
  35
  36         R2 = -R0;
  37         CC = R0 < 0;
  38         IF CC R0 = R2;
  39         R7 = CC;
  40
  41         R2 = -R1;
  42         CC = R1 < 0;
  43         IF CC R1 = R2;
  44         R2 = CC;
  45         R7 = R7 ^ R2;
  46
  47         CALL ___udivsi3;
  48
  49         CC = R7;
  50         R1 = -R0;
  51         IF CC R0 = R1;
  52
  53         R7 = [SP++];
  54         RETS = [SP++];
  55         RTS;
  56 #endif
  57
  58 #ifdef L_modsi3
  59 .align 2
  60 .global ___modsi3;
  61 .type ___modsi3, STT_FUNC;
  62
  63 ___modsi3:
  64         [--SP] = RETS;
  65         [--SP] = R0;
  66         [--SP] = R1;
  67         CALL ___divsi3;
  68         R2 = [SP++];
  69         R1 = [SP++];
  70         R2 *= R0;
  71         R0 = R1 - R2;
  72         RETS = [SP++];
  73         RTS;
  74 #endif
  75
  76 #ifdef L_udivsi3
  77 .align 2
  78 .global ___udivsi3;
  79 .type ___udivsi3, STT_FUNC;
  80
  81 ___udivsi3:
  82         P0 = 32;
  83         LSETUP (0f, 1f) LC0 = P0;
  84         /* upper half of dividend */
  85         R3 = 0;
  86 0:
  87         /* The first time round in the loop we shift in garbage, but since we
  88            perform 33 shifts, it doesn't matter.  */
  89         R0 = ROT R0 BY 1;
  90         R3 = ROT R3 BY 1;
  91         R2 = R3 - R1;
  92         CC = R3 < R1 (IU);
  93 1:
  94         /* Last instruction of the loop.  */
  95         IF ! CC R3 = R2;
  96
  97         /* Shift in the last bit.  */
  98         R0 = ROT R0 BY 1;
  99         /* R0 is the result, R3 contains the remainder.  */
 100         R0 = ~ R0;
 101         RTS;
 102 #endif
 103
 104 #ifdef L_umodsi3
 105 .align 2
 106 .global ___umodsi3;
 107 .type ___umodsi3, STT_FUNC;
 108
 109 ___umodsi3:
 110         [--SP] = RETS;
 111         CALL ___udivsi3;
 112         R0 = R3;
 113         RETS = [SP++];
 114         RTS;
 115 #endif
 116
 117 #ifdef L_umulsi3_highpart
 118 .align 2
 119 .global ___umulsi3_highpart;
 120 .type ___umulsi3_highpart, STT_FUNC;
 121
 122 ___umulsi3_highpart:
 123         A1 = R1.L * R0.L (FU);
 124         A1 = A1 >> 16;
 125         A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
 126         A1 += R0.L * R1.H (FU);
 127         A1 = A1 >> 16;
 128         A0 += A1;
 129         R0 = A0 (FU);
 130         RTS;
 131 #endif
 132
 133 #ifdef L_smulsi3_highpart
 134 .align 2
 135 .global ___smulsi3_highpart;
 136 .type ___smulsi3_highpart, STT_FUNC;
 137
 138 ___smulsi3_highpart:
 139         A1 = R1.L * R0.L (FU);
 140         A1 = A1 >> 16;
 141         A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
 142         A1 += R1.H * R0.L (IS,M);
 143         A1 = A1 >>> 16;
 144         R0 = (A0 += A1);
 145         RTS;
 146 #endif
 147
 148 #ifdef L_muldi3
 149 .align 2
 150 .global ___muldi3;
 151 .type ___muldi3, STT_FUNC;
 152
 153 /*
 154            R1:R0 * R3:R2
 155          = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
 156 [X]      = (R1.h * R3.h) * 2^96
 157 [X]        + (R1.h * R3.l + R1.l * R3.h) * 2^80
 158 [X]        + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
 159 [T1]       + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
 160 [T2]       + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
 161 [T3]       + (R0.l * R2.h + R2.l * R0.h) * 2^16
 162 [T4]       + (R0.l * R2.l)
 163
 164         We can discard the first three lines marked "X" since we produce
 165         only a 64 bit result.  So, we need ten 16-bit multiplies.
 166
 167         Individual mul-acc results:
 168 [E1]     =  R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
 169 [E2]     =  R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
 170 [E3]     =  R0.l * R2.h + R2.l * R0.h
 171 [E4]     =  R0.l * R2.l
 172
 173         We also need to add high parts from lower-level results to higher ones:
 174         E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
 175
 176         One interesting property is that all parts of the result that depend
 177         on the sign of the multiplication are discarded.  Those would be the
 178         multiplications involving R1.h and R3.h, but only the top 16 bit of
 179         the 32 bit result depend on the sign, and since R1.h and R3.h only
 180         occur in E1, the top half of these results is cut off.
 181         So, we can just use FU mode for all of the 16-bit multiplies, and
 182         ignore questions of when to use mixed mode.  */
 183
 184 ___muldi3:
 185         /* [SP] technically is part of the caller's frame, but we can
 186            use it as scratch space.  */
 187         A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12];      /* E1 */
 188         A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4;         /* E1 */
 189         A0 += A1;                                                       /* E1 */
 190         R4 = A0.w;
 191         A0 = R0.l * R3.l (FU);                                          /* E2 */
 192         A0 += R2.l * R1.l (FU);                                         /* E2 */
 193
 194         A1 = R2.L * R0.L (FU);                                          /* E4 */
 195         R3 = A1.w;
 196         A1 = A1 >> 16;                                                  /* E3c */
 197         A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU);                      /* E2, E3c */
 198         A1 += R0.L * R2.H (FU);                                         /* E3c */
 199         R0 = A1.w;
 200         A1 = A1 >> 16;                                                  /* E2c */
 201         A0 += A1;                                                       /* E2c */
 202         R1 = A0.w;
 203
 204         /* low(result) = low(E3c):low(E4) */
 205         R0 = PACK (R0.l, R3.l);
 206         /* high(result) = E2c + (E1 << 16) */
 207         R1.h = R1.h + R4.l (NS) || R4 = [SP];
 208         RTS;
 209
 210 .size ___muldi3, .-___muldi3
 211 #endif