]>
Commit | Line | Data |
---|---|---|
f1717362 | 1 | ;; Copyright (C) 2007-2016 Free Software Foundation, Inc. |
7dfbd804 | 2 | ;; |
3 | ;; This file is part of GCC. | |
4 | ;; | |
5 | ;; GCC is free software; you can redistribute it and/or modify | |
6 | ;; it under the terms of the GNU General Public License as published by | |
7 | ;; the Free Software Foundation; either version 3, or (at your option) | |
8 | ;; any later version. | |
9 | ;; | |
10 | ;; GCC is distributed in the hope that it will be useful, | |
11 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | ;; GNU General Public License for more details. | |
14 | ;; | |
15 | ;; You should have received a copy of the GNU General Public License | |
16 | ;; along with GCC; see the file COPYING3. If not see | |
17 | ;; <http://www.gnu.org/licenses/>. | |
78d690bb | 18 | |
19 | ;; For the internal conditional math routines: | |
20 | ||
21 | ;; operand 0 is always the result | |
22 | ;; operand 1 is always the predicate | |
23 | ;; operand 2, 3, and sometimes 4 are the input values. | |
24 | ;; operand 4 or 5 is the floating point status register to use. | |
25 | ;; operand 5 or 6 is the rounding to do. (0 = single, 1 = double, 2 = none) | |
26 | ;; | |
27 | ;; addrf3_cond - F0 = F2 + F3 | |
28 | ;; subrf3_cond - F0 = F2 - F3 | |
29 | ;; mulrf3_cond - F0 = F2 * F3 | |
30 | ;; nmulrf3_cond - F0 = - (F2 * F3) | |
31 | ;; m1addrf4_cond - F0 = (F2 * F3) + F4 | |
32 | ;; m1subrf4_cond - F0 = (F2 * F3) - F4 | |
33 | ;; m2addrf4_cond - F0 = F2 + (F3 * F4) | |
34 | ;; m2subrf4_cond - F0 = F2 - (F3 * F4) | |
35 | ||
36 | ;; Basic plus/minus/mult operations | |
37 | ||
38 | (define_insn "addrf3_cond" | |
39 | [(set (match_operand:RF 0 "fr_register_operand" "=f,f") | |
690f2a81 | 40 | (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") |
78d690bb | 41 | (const_int 0)) |
42 | (plus:RF | |
43 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") | |
44 | (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) | |
45 | (match_operand:RF 4 "fr_reg_or_0_operand" "0,H"))) | |
46 | (use (match_operand:SI 5 "const_int_operand" "")) | |
47 | (use (match_operand:SI 6 "const_int_operand" ""))] | |
48 | "" | |
49 | "(%1) fadd%R6.s%5 %0 = %F2, %F3" | |
50 | [(set_attr "itanium_class" "fmac") | |
51 | (set_attr "predicable" "no")]) | |
52 | ||
53 | (define_insn "subrf3_cond" | |
54 | [(set (match_operand:RF 0 "fr_register_operand" "=f,f") | |
690f2a81 | 55 | (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") |
78d690bb | 56 | (const_int 0)) |
57 | (minus:RF | |
58 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") | |
59 | (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) | |
60 | (match_operand:RF 4 "fr_reg_or_0_operand" "0,H"))) | |
61 | (use (match_operand:SI 5 "const_int_operand" "")) | |
62 | (use (match_operand:SI 6 "const_int_operand" ""))] | |
63 | "" | |
64 | "(%1) fsub%R6.s%5 %0 = %F2, %F3" | |
65 | [(set_attr "itanium_class" "fmac") | |
66 | (set_attr "predicable" "no")]) | |
67 | ||
68 | (define_insn "mulrf3_cond" | |
69 | [(set (match_operand:RF 0 "fr_register_operand" "=f,f") | |
690f2a81 | 70 | (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") |
78d690bb | 71 | (const_int 0)) |
72 | (mult:RF | |
73 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") | |
74 | (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) | |
75 | (match_operand:RF 4 "fr_reg_or_0_operand" "0,H"))) | |
76 | (use (match_operand:SI 5 "const_int_operand" "")) | |
77 | (use (match_operand:SI 6 "const_int_operand" ""))] | |
78 | "" | |
79 | "(%1) fmpy%R6.s%5 %0 = %F2, %F3" | |
80 | [(set_attr "itanium_class" "fmac") | |
81 | (set_attr "predicable" "no")]) | |
82 | ||
83 | ;; neg-mult operation | |
84 | ||
85 | (define_insn "nmulrf3_cond" | |
86 | [(set (match_operand:RF 0 "fr_register_operand" "=f,f") | |
690f2a81 | 87 | (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") |
78d690bb | 88 | (const_int 0)) |
89 | (neg:RF (mult:RF | |
90 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") | |
91 | (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))) | |
92 | (match_operand:RF 4 "fr_reg_or_0_operand" "0,H"))) | |
93 | (use (match_operand:SI 5 "const_int_operand" "")) | |
94 | (use (match_operand:SI 6 "const_int_operand" ""))] | |
95 | "" | |
96 | "(%1) fnmpy%R6.s%5 %0 = %F2, %F3" | |
97 | [(set_attr "itanium_class" "fmac") | |
98 | (set_attr "predicable" "no")]) | |
99 | ||
100 | ;; add-mult/sub-mult operations (mult as op1) | |
101 | ||
102 | (define_insn "m1addrf4_cond" | |
103 | [(set (match_operand:RF 0 "fr_register_operand" "=f,f") | |
690f2a81 | 104 | (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") |
78d690bb | 105 | (const_int 0)) |
106 | (plus:RF | |
107 | (mult:RF | |
108 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") | |
109 | (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) | |
110 | (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")) | |
111 | (match_operand:RF 5 "fr_reg_or_0_operand" "0,H"))) | |
112 | (use (match_operand:SI 6 "const_int_operand" "")) | |
113 | (use (match_operand:SI 7 "const_int_operand" ""))] | |
114 | "" | |
115 | "(%1) fma%R7.s%6 %0 = %F2, %F3, %F4" | |
116 | [(set_attr "itanium_class" "fmac") | |
117 | (set_attr "predicable" "no")]) | |
118 | ||
119 | (define_insn "m1subrf4_cond" | |
120 | [(set (match_operand:RF 0 "fr_register_operand" "=f,f") | |
690f2a81 | 121 | (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") |
78d690bb | 122 | (const_int 0)) |
123 | (minus:RF | |
124 | (mult:RF | |
125 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") | |
126 | (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")) | |
127 | (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")) | |
128 | (match_operand:RF 5 "fr_reg_or_0_operand" "0,H"))) | |
129 | (use (match_operand:SI 6 "const_int_operand" "")) | |
130 | (use (match_operand:SI 7 "const_int_operand" ""))] | |
131 | "" | |
132 | "(%1) fms%R7.s%6 %0 = %F2, %F3, %F4" | |
133 | [(set_attr "itanium_class" "fmac") | |
134 | (set_attr "predicable" "no")]) | |
135 | ||
136 | ;; add-mult/sub-mult operations (mult as op2) | |
137 | ||
138 | (define_insn "m2addrf4_cond" | |
139 | [(set (match_operand:RF 0 "fr_register_operand" "=f,f") | |
690f2a81 | 140 | (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") |
78d690bb | 141 | (const_int 0)) |
142 | (plus:RF | |
143 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") | |
144 | (mult:RF | |
145 | (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG") | |
146 | (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))) | |
147 | (match_operand:RF 5 "fr_reg_or_0_operand" "0,H"))) | |
148 | (use (match_operand:SI 6 "const_int_operand" "")) | |
149 | (use (match_operand:SI 7 "const_int_operand" ""))] | |
150 | "" | |
151 | "(%1) fma%R7.s%6 %0 = %F3, %F4, %F2" | |
152 | [(set_attr "itanium_class" "fmac") | |
153 | (set_attr "predicable" "no")]) | |
154 | ||
155 | (define_insn "m2subrf4_cond" | |
156 | [(set (match_operand:RF 0 "fr_register_operand" "=f,f") | |
690f2a81 | 157 | (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c") |
78d690bb | 158 | (const_int 0)) |
159 | (minus:RF | |
ffd77765 | 160 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG") |
78d690bb | 161 | (mult:RF |
ffd77765 | 162 | (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG") |
163 | (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))) | |
78d690bb | 164 | (match_operand:RF 5 "fr_reg_or_0_operand" "0,H"))) |
165 | (use (match_operand:SI 6 "const_int_operand" "")) | |
166 | (use (match_operand:SI 7 "const_int_operand" ""))] | |
167 | "" | |
168 | "(%1) fnma%R7.s%6 %0 = %F3, %F4, %F2" | |
169 | [(set_attr "itanium_class" "fmac") | |
170 | (set_attr "predicable" "no")]) | |
171 | ||
172 | ;; Conversions to/from RF and SF/DF/XF | |
173 | ;; These conversions should not generate any code but make it possible | |
174 | ;; for all the instructions used to implement floating point division | |
175 | ;; to be written for RFmode only and to not have to handle multiple | |
176 | ;; modes or to have to handle a register in more than one mode. | |
177 | ||
fd781bb2 | 178 | (define_mode_iterator SDX_F [SF DF XF]) |
78d690bb | 179 | |
180 | (define_insn "extend<mode>rf2" | |
181 | [(set (match_operand:RF 0 "fr_register_operand" "=f") | |
d363dedc | 182 | (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "fG")))] |
78d690bb | 183 | "" |
184 | "#" | |
185 | [(set_attr "itanium_class" "fmisc") | |
186 | (set_attr "predicable" "yes")]) | |
187 | ||
188 | (define_split | |
189 | [(set (match_operand:RF 0 "fr_register_operand" "") | |
d363dedc | 190 | (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "")))] |
78d690bb | 191 | "reload_completed" |
192 | [(set (match_dup 0) (match_dup 2))] | |
193 | { | |
d363dedc | 194 | if (operands[1] == CONST0_RTX (<MODE>mode)) |
195 | operands[2] = gen_rtx_REG (RFmode, FR_REG (0)); | |
196 | else if (operands[1] == CONST1_RTX (<MODE>mode)) | |
197 | operands[2] = gen_rtx_REG (RFmode, FR_REG (1)); | |
198 | else | |
199 | operands[2] = gen_rtx_REG (RFmode, REGNO (operands[1])); | |
78d690bb | 200 | }) |
201 | ||
202 | ||
203 | (define_insn "truncrf<mode>2" | |
204 | [(set (match_operand:SDX_F 0 "fr_register_operand" "=f") | |
d363dedc | 205 | (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))] |
78d690bb | 206 | "" |
207 | "#" | |
208 | [(set_attr "itanium_class" "fmisc") | |
209 | (set_attr "predicable" "yes")]) | |
210 | ||
211 | (define_split | |
212 | [(set (match_operand:SDX_F 0 "fr_register_operand" "") | |
d363dedc | 213 | (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "")))] |
78d690bb | 214 | "reload_completed" |
215 | [(set (match_dup 0) (match_dup 2))] | |
216 | { | |
d363dedc | 217 | if (operands[1] == CONST0_RTX (RFmode)) |
218 | operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (0)); | |
219 | else if (operands[1] == CONST1_RTX (RFmode)) | |
220 | operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (1)); | |
221 | else | |
222 | operands[2] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); | |
78d690bb | 223 | }) |
224 | ||
a58127d5 | 225 | ;; Float to integer truncations using an alternative status register. |
226 | ||
227 | (define_insn "fix_truncrfdi2_alts" | |
228 | [(set (match_operand:DI 0 "fr_register_operand" "=f") | |
229 | (fix:DI (match_operand:RF 1 "fr_register_operand" "f"))) | |
230 | (use (match_operand:SI 2 "const_int_operand" ""))] | |
231 | "" | |
232 | "fcvt.fx.trunc.s%2 %0 = %1" | |
233 | [(set_attr "itanium_class" "fcvtfx")]) | |
234 | ||
235 | (define_insn "fixuns_truncrfdi2_alts" | |
236 | [(set (match_operand:DI 0 "fr_register_operand" "=f") | |
237 | (unsigned_fix:DI (match_operand:RF 1 "fr_register_operand" "f"))) | |
238 | (use (match_operand:SI 2 "const_int_operand" ""))] | |
239 | "" | |
240 | "fcvt.fxu.trunc.s%2 %0 = %1" | |
241 | [(set_attr "itanium_class" "fcvtfx")]) | |
242 | ||
243 | (define_insn "setf_exp_rf" | |
244 | [(set (match_operand:RF 0 "fr_register_operand" "=f") | |
245 | (unspec:RF [(match_operand:DI 1 "register_operand" "r")] | |
246 | UNSPEC_SETF_EXP))] | |
247 | "" | |
248 | "setf.exp %0 = %1" | |
249 | [(set_attr "itanium_class" "frfr")]) | |
250 | ||
4a7e4fcc | 251 | ;; Reciprocal approximation |
78d690bb | 252 | |
253 | (define_insn "recip_approx_rf" | |
254 | [(set (match_operand:RF 0 "fr_register_operand" "=f") | |
d363dedc | 255 | (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG") |
256 | (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG")] | |
4e508025 | 257 | UNSPEC_FR_RECIP_APPROX_RES)) |
690f2a81 | 258 | (set (match_operand:CCI 3 "register_operand" "=c") |
259 | (unspec:CCI [(match_dup 1) (match_dup 2)] UNSPEC_FR_RECIP_APPROX)) | |
78d690bb | 260 | (use (match_operand:SI 4 "const_int_operand" ""))] |
261 | "" | |
d363dedc | 262 | "frcpa.s%4 %0, %3 = %F1, %F2" |
78d690bb | 263 | [(set_attr "itanium_class" "fmisc") |
264 | (set_attr "predicable" "no")]) | |
265 | ||
a58127d5 | 266 | ;; Single precision floating point division |
267 | ||
268 | (define_expand "divsf3" | |
269 | [(set (match_operand:SF 0 "fr_register_operand" "") | |
270 | (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "") | |
271 | (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))] | |
272 | "TARGET_INLINE_FLOAT_DIV" | |
273 | { | |
274 | rtx insn; | |
275 | if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT) | |
276 | insn = gen_divsf3_internal_lat (operands[0], operands[1], operands[2]); | |
277 | else | |
278 | insn = gen_divsf3_internal_thr (operands[0], operands[1], operands[2]); | |
279 | emit_insn (insn); | |
280 | DONE; | |
281 | }) | |
282 | ||
78d690bb | 283 | ;; Single precision floating point division (maximum throughput algorithm). |
284 | ||
285 | (define_expand "divsf3_internal_thr" | |
286 | [(set (match_operand:SF 0 "fr_register_operand" "") | |
d363dedc | 287 | (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "") |
288 | (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))] | |
78d690bb | 289 | "TARGET_INLINE_FLOAT_DIV" |
290 | { | |
291 | rtx y = gen_reg_rtx (RFmode); | |
292 | rtx a = gen_reg_rtx (RFmode); | |
293 | rtx b = gen_reg_rtx (RFmode); | |
294 | rtx e = gen_reg_rtx (RFmode); | |
295 | rtx y1 = gen_reg_rtx (RFmode); | |
296 | rtx y2 = gen_reg_rtx (RFmode); | |
297 | rtx q = gen_reg_rtx (RFmode); | |
298 | rtx r = gen_reg_rtx (RFmode); | |
299 | rtx q_res = gen_reg_rtx (RFmode); | |
690f2a81 | 300 | rtx cond = gen_reg_rtx (CCImode); |
78d690bb | 301 | rtx zero = CONST0_RTX (RFmode); |
302 | rtx one = CONST1_RTX (RFmode); | |
303 | rtx status0 = CONST0_RTX (SImode); | |
304 | rtx status1 = CONST1_RTX (SImode); | |
305 | rtx trunc_sgl = CONST0_RTX (SImode); | |
306 | rtx trunc_off = CONST2_RTX (SImode); | |
307 | ||
308 | /* Empty conversions to put inputs into RFmode. */ | |
309 | emit_insn (gen_extendsfrf2 (a, operands[1])); | |
310 | emit_insn (gen_extendsfrf2 (b, operands[2])); | |
311 | /* y = 1 / b */ | |
312 | emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); | |
313 | /* e = 1 - (b * y) */ | |
314 | emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); | |
315 | /* y1 = y + (y * e) */ | |
316 | emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off)); | |
317 | /* y2 = y + (y1 * e) */ | |
318 | emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e, zero, status1, trunc_off)); | |
319 | /* q = single(a * y2) */ | |
320 | emit_insn (gen_mulrf3_cond (q, cond, a, y2, zero, status1, trunc_sgl)); | |
321 | /* r = a - (q * b) */ | |
322 | emit_insn (gen_m2subrf4_cond (r, cond, a, q, b, zero, status1, trunc_off)); | |
323 | /* Q = single (q + (r * y2)) */ | |
324 | emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y2, y, status0, trunc_sgl)); | |
325 | /* Conversion back into SFmode. */ | |
326 | emit_insn (gen_truncrfsf2 (operands[0], q_res)); | |
327 | DONE; | |
328 | }) | |
329 | ||
3f970e2e | 330 | ;; Single precision floating point division (minimum latency algorithm). |
331 | ||
332 | (define_expand "divsf3_internal_lat" | |
333 | [(set (match_operand:SF 0 "fr_register_operand" "") | |
d363dedc | 334 | (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "") |
335 | (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))] | |
3f970e2e | 336 | "TARGET_INLINE_FLOAT_DIV" |
337 | { | |
338 | rtx y = gen_reg_rtx (RFmode); | |
339 | rtx a = gen_reg_rtx (RFmode); | |
340 | rtx b = gen_reg_rtx (RFmode); | |
341 | rtx e = gen_reg_rtx (RFmode); | |
342 | rtx q = gen_reg_rtx (RFmode); | |
343 | rtx e1 = gen_reg_rtx (RFmode); | |
344 | rtx y1 = gen_reg_rtx (RFmode); | |
345 | rtx q1 = gen_reg_rtx (RFmode); | |
346 | rtx r = gen_reg_rtx (RFmode); | |
347 | rtx q_res = gen_reg_rtx (RFmode); | |
690f2a81 | 348 | rtx cond = gen_reg_rtx (CCImode); |
3f970e2e | 349 | rtx zero = CONST0_RTX (RFmode); |
350 | rtx one = CONST1_RTX (RFmode); | |
351 | rtx status0 = CONST0_RTX (SImode); | |
352 | rtx status1 = CONST1_RTX (SImode); | |
353 | rtx trunc_sgl = CONST0_RTX (SImode); | |
354 | rtx trunc_off = CONST2_RTX (SImode); | |
355 | ||
356 | /* Empty conversions to put inputs into RFmode. */ | |
357 | emit_insn (gen_extendsfrf2 (a, operands[1])); | |
358 | emit_insn (gen_extendsfrf2 (b, operands[2])); | |
359 | /* y = 1 / b */ | |
360 | emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); | |
361 | /* q = a * y */ | |
362 | emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); | |
363 | /* e = 1 - (b * y) */ | |
364 | emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); | |
365 | /* e1 = e + (e * e) */ | |
366 | emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off)); | |
367 | /* q1 = single(q + (q * e1)) */ | |
368 | emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl)); | |
369 | /* y1 = y + (y * e1) */ | |
370 | emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off)); | |
371 | /* r = a - (q1 * b) */ | |
372 | emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off)); | |
373 | /* Q = single (q1 + (r * y1)) */ | |
374 | emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl)); | |
375 | /* Conversion back into SFmode. */ | |
376 | emit_insn (gen_truncrfsf2 (operands[0], q_res)); | |
377 | DONE; | |
378 | }) | |
379 | ||
a58127d5 | 380 | ;; Double precision floating point division |
381 | ||
382 | (define_expand "divdf3" | |
383 | [(set (match_operand:DF 0 "fr_register_operand" "") | |
384 | (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "") | |
385 | (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))] | |
386 | "TARGET_INLINE_FLOAT_DIV" | |
387 | { | |
388 | rtx insn; | |
389 | if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT) | |
390 | insn = gen_divdf3_internal_lat (operands[0], operands[1], operands[2]); | |
391 | else | |
392 | insn = gen_divdf3_internal_thr (operands[0], operands[1], operands[2]); | |
393 | emit_insn (insn); | |
394 | DONE; | |
395 | }) | |
78d690bb | 396 | |
397 | ;; Double precision floating point division (maximum throughput algorithm). | |
398 | ||
399 | (define_expand "divdf3_internal_thr" | |
400 | [(set (match_operand:DF 0 "fr_register_operand" "") | |
d363dedc | 401 | (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "") |
402 | (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))] | |
78d690bb | 403 | "TARGET_INLINE_FLOAT_DIV" |
404 | { | |
405 | rtx q_res = gen_reg_rtx (RFmode); | |
406 | rtx a = gen_reg_rtx (RFmode); | |
407 | rtx b = gen_reg_rtx (RFmode); | |
408 | rtx y = gen_reg_rtx (RFmode); | |
409 | rtx e = gen_reg_rtx (RFmode); | |
410 | rtx y1 = gen_reg_rtx (RFmode); | |
411 | rtx e1 = gen_reg_rtx (RFmode); | |
412 | rtx y2 = gen_reg_rtx (RFmode); | |
413 | rtx e2 = gen_reg_rtx (RFmode); | |
414 | rtx y3 = gen_reg_rtx (RFmode); | |
415 | rtx q = gen_reg_rtx (RFmode); | |
416 | rtx r = gen_reg_rtx (RFmode); | |
690f2a81 | 417 | rtx cond = gen_reg_rtx (CCImode); |
78d690bb | 418 | rtx zero = CONST0_RTX (RFmode); |
419 | rtx one = CONST1_RTX (RFmode); | |
420 | rtx status0 = CONST0_RTX (SImode); | |
421 | rtx status1 = CONST1_RTX (SImode); | |
422 | rtx trunc_dbl = CONST1_RTX (SImode); | |
423 | rtx trunc_off = CONST2_RTX (SImode); | |
424 | /* Empty conversions to put inputs into RFmode */ | |
425 | emit_insn (gen_extenddfrf2 (a, operands[1])); | |
426 | emit_insn (gen_extenddfrf2 (b, operands[2])); | |
427 | /* y = 1 / b */ | |
428 | emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); | |
429 | /* e = 1 - (b * y) */ | |
430 | emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); | |
431 | /* y1 = y + (y * e) */ | |
432 | emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off)); | |
433 | /* e1 = e * e */ | |
434 | emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); | |
435 | /* y2 = y1 + (y1 * e1) */ | |
436 | emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off)); | |
437 | /* e2 = e1 * e1 */ | |
438 | emit_insn (gen_mulrf3_cond (e2, cond, e1, e1, zero, status1, trunc_off)); | |
439 | /* y3 = y2 + (y2 * e2) */ | |
440 | emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e2, zero, status1, trunc_off)); | |
441 | /* q = double (a * y3) */ | |
442 | emit_insn (gen_mulrf3_cond (q, cond, a, y3, zero, status1, trunc_dbl)); | |
443 | /* r = a - (b * q) */ | |
444 | emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off)); | |
445 | /* Q = double (q + (r * y3)) */ | |
446 | emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y3, y, status0, trunc_dbl)); | |
447 | /* Conversion back into DFmode */ | |
448 | emit_insn (gen_truncrfdf2 (operands[0], q_res)); | |
449 | DONE; | |
450 | }) | |
3f970e2e | 451 | |
452 | ;; Double precision floating point division (minimum latency algorithm). | |
453 | ||
454 | (define_expand "divdf3_internal_lat" | |
455 | [(set (match_operand:DF 0 "fr_register_operand" "") | |
d363dedc | 456 | (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "") |
457 | (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))] | |
3f970e2e | 458 | "TARGET_INLINE_FLOAT_DIV" |
459 | { | |
460 | rtx q_res = gen_reg_rtx (RFmode); | |
461 | rtx a = gen_reg_rtx (RFmode); | |
462 | rtx b = gen_reg_rtx (RFmode); | |
463 | rtx y = gen_reg_rtx (RFmode); | |
464 | rtx e = gen_reg_rtx (RFmode); | |
465 | rtx y1 = gen_reg_rtx (RFmode); | |
466 | rtx e1 = gen_reg_rtx (RFmode); | |
467 | rtx q1 = gen_reg_rtx (RFmode); | |
468 | rtx y2 = gen_reg_rtx (RFmode); | |
469 | rtx e2 = gen_reg_rtx (RFmode); | |
470 | rtx q2 = gen_reg_rtx (RFmode); | |
471 | rtx e3 = gen_reg_rtx (RFmode); | |
472 | rtx q = gen_reg_rtx (RFmode); | |
473 | rtx r1 = gen_reg_rtx (RFmode); | |
690f2a81 | 474 | rtx cond = gen_reg_rtx (CCImode); |
3f970e2e | 475 | rtx zero = CONST0_RTX (RFmode); |
476 | rtx one = CONST1_RTX (RFmode); | |
477 | rtx status0 = CONST0_RTX (SImode); | |
478 | rtx status1 = CONST1_RTX (SImode); | |
479 | rtx trunc_dbl = CONST1_RTX (SImode); | |
480 | rtx trunc_off = CONST2_RTX (SImode); | |
481 | ||
482 | /* Empty conversions to put inputs into RFmode */ | |
483 | emit_insn (gen_extenddfrf2 (a, operands[1])); | |
484 | emit_insn (gen_extenddfrf2 (b, operands[2])); | |
485 | /* y = 1 / b */ | |
486 | emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); | |
487 | /* e = 1 - (b * y) */ | |
488 | emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); | |
489 | /* q = a * y */ | |
490 | emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); | |
491 | /* e2 = e + (e * e) */ | |
492 | emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off)); | |
493 | /* e1 = e * e */ | |
494 | emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); | |
495 | /* e3 = e + (e1 * e1) */ | |
496 | emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off)); | |
497 | /* q1 = q + (q * e2) */ | |
498 | emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off)); | |
499 | /* y1 = y + (y * e2) */ | |
500 | emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off)); | |
501 | /* q2 = double(q + (q1 * e3)) */ | |
502 | emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl)); | |
503 | /* y2 = y + (y1 * e3) */ | |
504 | emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off)); | |
505 | /* r1 = a - (b * q2) */ | |
506 | emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off)); | |
507 | /* Q = double (q2 + (r1 * y2)) */ | |
508 | emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl)); | |
509 | /* Conversion back into DFmode */ | |
510 | emit_insn (gen_truncrfdf2 (operands[0], q_res)); | |
511 | DONE; | |
512 | }) | |
513 | ||
514 | ;; Extended precision floating point division. | |
515 | ||
a58127d5 | 516 | (define_expand "divxf3" |
3f970e2e | 517 | [(set (match_operand:XF 0 "fr_register_operand" "") |
d363dedc | 518 | (div:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "") |
519 | (match_operand:XF 2 "fr_reg_or_fp01_operand" "")))] | |
3f970e2e | 520 | "TARGET_INLINE_FLOAT_DIV" |
521 | { | |
522 | rtx q_res = gen_reg_rtx (RFmode); | |
523 | rtx a = gen_reg_rtx (RFmode); | |
524 | rtx b = gen_reg_rtx (RFmode); | |
525 | rtx y = gen_reg_rtx (RFmode); | |
526 | rtx e = gen_reg_rtx (RFmode); | |
527 | rtx y1 = gen_reg_rtx (RFmode); | |
528 | rtx e1 = gen_reg_rtx (RFmode); | |
529 | rtx q1 = gen_reg_rtx (RFmode); | |
530 | rtx y2 = gen_reg_rtx (RFmode); | |
531 | rtx e2 = gen_reg_rtx (RFmode); | |
532 | rtx y3 = gen_reg_rtx (RFmode); | |
533 | rtx e3 = gen_reg_rtx (RFmode); | |
534 | rtx e4 = gen_reg_rtx (RFmode); | |
535 | rtx q = gen_reg_rtx (RFmode); | |
536 | rtx r = gen_reg_rtx (RFmode); | |
537 | rtx r1 = gen_reg_rtx (RFmode); | |
690f2a81 | 538 | rtx cond = gen_reg_rtx (CCImode); |
3f970e2e | 539 | rtx zero = CONST0_RTX (RFmode); |
540 | rtx one = CONST1_RTX (RFmode); | |
541 | rtx status0 = CONST0_RTX (SImode); | |
542 | rtx status1 = CONST1_RTX (SImode); | |
543 | rtx trunc_off = CONST2_RTX (SImode); | |
544 | ||
545 | /* Empty conversions to put inputs into RFmode */ | |
546 | emit_insn (gen_extendxfrf2 (a, operands[1])); | |
547 | emit_insn (gen_extendxfrf2 (b, operands[2])); | |
548 | /* y = 1 / b */ | |
549 | emit_insn (gen_recip_approx_rf (y, a, b, cond, status0)); | |
550 | /* e = 1 - (b * y) */ | |
551 | emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); | |
552 | /* q = a * y */ | |
553 | emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); | |
554 | /* e2 = e + (e * e) */ | |
555 | emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off)); | |
556 | /* e1 = e * e */ | |
557 | emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); | |
558 | /* y1 = y + (y * e2) */ | |
559 | emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off)); | |
560 | /* e3 = e + (e1 * e1) */ | |
561 | emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off)); | |
562 | /* y2 = y + (y1 * e3) */ | |
563 | emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off)); | |
564 | /* r = a - (b * q) */ | |
565 | emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off)); | |
566 | /* e4 = 1 - (b * y2) */ | |
567 | emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off)); | |
568 | /* q1 = q + (r * y2) */ | |
569 | emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off)); | |
570 | /* y3 = y2 + (y2 * e4) */ | |
571 | emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off)); | |
572 | /* r1 = a - (b * q1) */ | |
573 | emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off)); | |
574 | /* Q = q1 + (r1 * y3) */ | |
575 | emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off)); | |
576 | /* Conversion back into XFmode */ | |
577 | emit_insn (gen_truncrfxf2 (operands[0], q_res)); | |
578 | DONE; | |
579 | }) | |
50738926 | 580 | |
581 | ||
a58127d5 | 582 | ;; Integer division operations |
583 | ||
584 | (define_expand "divsi3" | |
585 | [(set (match_operand:SI 0 "register_operand" "") | |
586 | (div:SI (match_operand:SI 1 "general_operand" "") | |
587 | (match_operand:SI 2 "general_operand" "")))] | |
588 | "TARGET_INLINE_INT_DIV" | |
589 | { | |
590 | rtx op1_rf, op2_rf, op0_rf, op0_di; | |
591 | ||
592 | op0_rf = gen_reg_rtx (RFmode); | |
593 | op0_di = gen_reg_rtx (DImode); | |
594 | ||
595 | if (! register_operand (operands[1], SImode)) | |
596 | operands[1] = force_reg (SImode, operands[1]); | |
597 | op1_rf = gen_reg_rtx (RFmode); | |
598 | expand_float (op1_rf, operands[1], 0); | |
599 | ||
600 | if (! register_operand (operands[2], SImode)) | |
601 | operands[2] = force_reg (SImode, operands[2]); | |
602 | op2_rf = gen_reg_rtx (RFmode); | |
603 | expand_float (op2_rf, operands[2], 0); | |
604 | ||
605 | emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode), | |
606 | CONST1_RTX (SImode))); | |
607 | ||
608 | emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf)); | |
609 | ||
610 | emit_insn (gen_fix_truncrfdi2_alts (op0_di, op0_rf, const1_rtx)); | |
611 | emit_move_insn (operands[0], gen_lowpart (SImode, op0_di)); | |
612 | DONE; | |
613 | }) | |
614 | ||
615 | (define_expand "modsi3" | |
616 | [(set (match_operand:SI 0 "register_operand" "") | |
617 | (mod:SI (match_operand:SI 1 "general_operand" "") | |
618 | (match_operand:SI 2 "general_operand" "")))] | |
619 | "TARGET_INLINE_INT_DIV" | |
620 | { | |
621 | rtx op2_neg, op1_di, div; | |
622 | ||
623 | div = gen_reg_rtx (SImode); | |
624 | emit_insn (gen_divsi3 (div, operands[1], operands[2])); | |
625 | ||
626 | op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0); | |
627 | ||
628 | /* This is a trick to get us to reuse the value that we're sure to | |
629 | have already copied to the FP regs. */ | |
630 | op1_di = gen_reg_rtx (DImode); | |
631 | convert_move (op1_di, operands[1], 0); | |
632 | ||
633 | emit_insn (gen_maddsi4 (operands[0], div, op2_neg, | |
634 | gen_lowpart (SImode, op1_di))); | |
635 | DONE; | |
636 | }) | |
637 | ||
638 | (define_expand "udivsi3" | |
639 | [(set (match_operand:SI 0 "register_operand" "") | |
640 | (udiv:SI (match_operand:SI 1 "general_operand" "") | |
641 | (match_operand:SI 2 "general_operand" "")))] | |
642 | "TARGET_INLINE_INT_DIV" | |
643 | { | |
644 | rtx op1_rf, op2_rf, op0_rf, op0_di; | |
645 | ||
646 | op0_rf = gen_reg_rtx (RFmode); | |
647 | op0_di = gen_reg_rtx (DImode); | |
648 | ||
649 | if (! register_operand (operands[1], SImode)) | |
650 | operands[1] = force_reg (SImode, operands[1]); | |
651 | op1_rf = gen_reg_rtx (RFmode); | |
652 | expand_float (op1_rf, operands[1], 1); | |
653 | ||
654 | if (! register_operand (operands[2], SImode)) | |
655 | operands[2] = force_reg (SImode, operands[2]); | |
656 | op2_rf = gen_reg_rtx (RFmode); | |
657 | expand_float (op2_rf, operands[2], 1); | |
658 | ||
659 | emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode), | |
660 | CONST1_RTX (SImode))); | |
661 | ||
662 | emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf)); | |
663 | ||
664 | emit_insn (gen_fixuns_truncrfdi2_alts (op0_di, op0_rf, const1_rtx)); | |
665 | emit_move_insn (operands[0], gen_lowpart (SImode, op0_di)); | |
666 | DONE; | |
667 | }) | |
668 | ||
669 | (define_expand "umodsi3" | |
670 | [(set (match_operand:SI 0 "register_operand" "") | |
671 | (umod:SI (match_operand:SI 1 "general_operand" "") | |
672 | (match_operand:SI 2 "general_operand" "")))] | |
673 | "TARGET_INLINE_INT_DIV" | |
674 | { | |
675 | rtx op2_neg, op1_di, div; | |
676 | ||
677 | div = gen_reg_rtx (SImode); | |
678 | emit_insn (gen_udivsi3 (div, operands[1], operands[2])); | |
679 | ||
680 | op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0); | |
681 | ||
682 | /* This is a trick to get us to reuse the value that we're sure to | |
683 | have already copied to the FP regs. */ | |
684 | op1_di = gen_reg_rtx (DImode); | |
685 | convert_move (op1_di, operands[1], 1); | |
686 | ||
687 | emit_insn (gen_maddsi4 (operands[0], div, op2_neg, | |
688 | gen_lowpart (SImode, op1_di))); | |
689 | DONE; | |
690 | }) | |
691 | ||
692 | (define_expand "divsi3_internal" | |
693 | [(set (match_operand:RF 0 "fr_register_operand" "") | |
694 | (float:RF (div:SI (match_operand:RF 1 "fr_register_operand" "") | |
695 | (match_operand:RF 2 "fr_register_operand" ""))))] | |
696 | "TARGET_INLINE_INT_DIV" | |
697 | { | |
698 | rtx a = operands[1]; | |
699 | rtx b = operands[2]; | |
700 | rtx y = gen_reg_rtx (RFmode); | |
701 | rtx e = gen_reg_rtx (RFmode); | |
702 | rtx e1 = gen_reg_rtx (RFmode); | |
703 | rtx q = gen_reg_rtx (RFmode); | |
704 | rtx q1 = gen_reg_rtx (RFmode); | |
690f2a81 | 705 | rtx cond = gen_reg_rtx (CCImode); |
a58127d5 | 706 | rtx zero = CONST0_RTX (RFmode); |
707 | rtx one = CONST1_RTX (RFmode); | |
708 | rtx status1 = CONST1_RTX (SImode); | |
709 | rtx trunc_off = CONST2_RTX (SImode); | |
710 | rtx twon34_exp = gen_reg_rtx (DImode); | |
711 | rtx twon34 = gen_reg_rtx (RFmode); | |
712 | ||
713 | /* Load cosntant 2**(-34) */ | |
714 | emit_move_insn (twon34_exp, GEN_INT (65501)); | |
715 | emit_insn (gen_setf_exp_rf (twon34, twon34_exp)); | |
716 | ||
717 | /* y = 1 / b */ | |
718 | emit_insn (gen_recip_approx_rf (y, a, b, cond, status1)); | |
719 | /* e = 1 - (b * y) */ | |
720 | emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); | |
721 | /* q = a * y */ | |
722 | emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); | |
723 | /* q1 = q + (q * e) */ | |
724 | emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off)); | |
725 | /* e1 = (2**-34) + (e * e) */ | |
726 | emit_insn (gen_m2addrf4_cond (e1, cond, twon34, e, e, zero, status1, trunc_off)); | |
727 | /* q2 = q1 + (e1 * q1) */ | |
728 | emit_insn (gen_m2addrf4_cond (operands[0], cond, q1, e1, q1, y, status1, trunc_off)); | |
729 | DONE; | |
730 | }) | |
731 | ||
732 | (define_expand "divdi3" | |
733 | [(set (match_operand:DI 0 "register_operand" "") | |
734 | (div:DI (match_operand:DI 1 "general_operand" "") | |
735 | (match_operand:DI 2 "general_operand" "")))] | |
736 | "TARGET_INLINE_INT_DIV" | |
737 | { | |
738 | rtx op1_rf, op2_rf, op0_rf; | |
739 | ||
740 | op0_rf = gen_reg_rtx (RFmode); | |
741 | ||
742 | if (! register_operand (operands[1], DImode)) | |
743 | operands[1] = force_reg (DImode, operands[1]); | |
744 | op1_rf = gen_reg_rtx (RFmode); | |
745 | expand_float (op1_rf, operands[1], 0); | |
746 | ||
747 | if (! register_operand (operands[2], DImode)) | |
748 | operands[2] = force_reg (DImode, operands[2]); | |
749 | op2_rf = gen_reg_rtx (RFmode); | |
750 | expand_float (op2_rf, operands[2], 0); | |
751 | ||
752 | emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode), | |
753 | CONST1_RTX (DImode))); | |
754 | ||
755 | if (TARGET_INLINE_INT_DIV == INL_MIN_LAT) | |
756 | emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf)); | |
757 | else | |
758 | emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf)); | |
759 | ||
760 | emit_insn (gen_fix_truncrfdi2_alts (operands[0], op0_rf, const1_rtx)); | |
761 | DONE; | |
762 | }) | |
763 | ||
764 | (define_expand "moddi3" | |
765 | [(set (match_operand:DI 0 "register_operand" "") | |
766 | (mod:SI (match_operand:DI 1 "general_operand" "") | |
767 | (match_operand:DI 2 "general_operand" "")))] | |
768 | "TARGET_INLINE_INT_DIV" | |
769 | { | |
770 | rtx op2_neg, div; | |
771 | ||
772 | div = gen_reg_rtx (DImode); | |
773 | emit_insn (gen_divdi3 (div, operands[1], operands[2])); | |
774 | ||
775 | op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0); | |
776 | ||
777 | emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1])); | |
778 | DONE; | |
779 | }) | |
780 | ||
781 | (define_expand "udivdi3" | |
782 | [(set (match_operand:DI 0 "register_operand" "") | |
783 | (udiv:DI (match_operand:DI 1 "general_operand" "") | |
784 | (match_operand:DI 2 "general_operand" "")))] | |
785 | "TARGET_INLINE_INT_DIV" | |
786 | { | |
787 | rtx op1_rf, op2_rf, op0_rf; | |
788 | ||
789 | op0_rf = gen_reg_rtx (RFmode); | |
790 | ||
791 | if (! register_operand (operands[1], DImode)) | |
792 | operands[1] = force_reg (DImode, operands[1]); | |
793 | op1_rf = gen_reg_rtx (RFmode); | |
794 | expand_float (op1_rf, operands[1], 1); | |
795 | ||
796 | if (! register_operand (operands[2], DImode)) | |
797 | operands[2] = force_reg (DImode, operands[2]); | |
798 | op2_rf = gen_reg_rtx (RFmode); | |
799 | expand_float (op2_rf, operands[2], 1); | |
800 | ||
801 | emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode), | |
802 | CONST1_RTX (DImode))); | |
803 | ||
804 | if (TARGET_INLINE_INT_DIV == INL_MIN_LAT) | |
805 | emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf)); | |
806 | else | |
807 | emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf)); | |
808 | ||
809 | emit_insn (gen_fixuns_truncrfdi2_alts (operands[0], op0_rf, const1_rtx)); | |
810 | DONE; | |
811 | }) | |
812 | ||
813 | (define_expand "umoddi3" | |
814 | [(set (match_operand:DI 0 "register_operand" "") | |
815 | (umod:DI (match_operand:DI 1 "general_operand" "") | |
816 | (match_operand:DI 2 "general_operand" "")))] | |
817 | "TARGET_INLINE_INT_DIV" | |
818 | { | |
819 | rtx op2_neg, div; | |
820 | ||
821 | div = gen_reg_rtx (DImode); | |
822 | emit_insn (gen_udivdi3 (div, operands[1], operands[2])); | |
823 | ||
824 | op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0); | |
825 | ||
826 | emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1])); | |
827 | DONE; | |
828 | }) | |
829 | ||
830 | (define_expand "divdi3_internal_lat" | |
831 | [(set (match_operand:RF 0 "fr_register_operand" "") | |
832 | (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "") | |
833 | (match_operand:RF 2 "fr_register_operand" ""))))] | |
834 | "TARGET_INLINE_INT_DIV" | |
835 | { | |
836 | rtx a = operands[1]; | |
837 | rtx b = operands[2]; | |
838 | rtx y = gen_reg_rtx (RFmode); | |
839 | rtx y1 = gen_reg_rtx (RFmode); | |
840 | rtx y2 = gen_reg_rtx (RFmode); | |
841 | rtx e = gen_reg_rtx (RFmode); | |
842 | rtx e1 = gen_reg_rtx (RFmode); | |
843 | rtx q = gen_reg_rtx (RFmode); | |
844 | rtx q1 = gen_reg_rtx (RFmode); | |
845 | rtx q2 = gen_reg_rtx (RFmode); | |
846 | rtx r = gen_reg_rtx (RFmode); | |
690f2a81 | 847 | rtx cond = gen_reg_rtx (CCImode); |
a58127d5 | 848 | rtx zero = CONST0_RTX (RFmode); |
849 | rtx one = CONST1_RTX (RFmode); | |
850 | rtx status1 = CONST1_RTX (SImode); | |
851 | rtx trunc_off = CONST2_RTX (SImode); | |
852 | ||
853 | /* y = 1 / b */ | |
854 | emit_insn (gen_recip_approx_rf (y, a, b, cond, status1)); | |
855 | /* e = 1 - (b * y) */ | |
856 | emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); | |
857 | /* q = a * y */ | |
858 | emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off)); | |
859 | /* q1 = q + (q * e) */ | |
860 | emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off)); | |
861 | /* e1 = e * e */ | |
862 | emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); | |
863 | /* q2 = q1 + (e1 * q1) */ | |
864 | emit_insn (gen_m2addrf4_cond (q2, cond, q1, e1, q1, zero, status1, trunc_off)); | |
865 | /* y1 = y + (y * e) */ | |
866 | emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off)); | |
867 | /* r = a - (b * q2) */ | |
868 | emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off)); | |
869 | /* y2 = y1 + (y1 * e1) */ | |
870 | emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off)); | |
871 | /* q3 = q2 + (r * y2) */ | |
872 | emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off)); | |
873 | DONE; | |
874 | }) | |
875 | ||
876 | (define_expand "divdi3_internal_thr" | |
877 | [(set (match_operand:RF 0 "fr_register_operand" "") | |
878 | (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "") | |
879 | (match_operand:RF 2 "fr_register_operand" ""))))] | |
880 | "TARGET_INLINE_INT_DIV" | |
881 | { | |
882 | rtx a = operands[1]; | |
883 | rtx b = operands[2]; | |
884 | rtx y = gen_reg_rtx (RFmode); | |
885 | rtx y1 = gen_reg_rtx (RFmode); | |
886 | rtx y2 = gen_reg_rtx (RFmode); | |
887 | rtx e = gen_reg_rtx (RFmode); | |
888 | rtx e1 = gen_reg_rtx (RFmode); | |
889 | rtx q2 = gen_reg_rtx (RFmode); | |
890 | rtx r = gen_reg_rtx (RFmode); | |
690f2a81 | 891 | rtx cond = gen_reg_rtx (CCImode); |
a58127d5 | 892 | rtx zero = CONST0_RTX (RFmode); |
893 | rtx one = CONST1_RTX (RFmode); | |
894 | rtx status1 = CONST1_RTX (SImode); | |
895 | rtx trunc_off = CONST2_RTX (SImode); | |
896 | ||
897 | /* y = 1 / b */ | |
898 | emit_insn (gen_recip_approx_rf (y, a, b, cond, status1)); | |
899 | /* e = 1 - (b * y) */ | |
900 | emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off)); | |
901 | /* y1 = y + (y * e) */ | |
902 | emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off)); | |
903 | /* e1 = e * e */ | |
904 | emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off)); | |
905 | /* y2 = y1 + (y1 * e1) */ | |
906 | emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off)); | |
907 | /* q2 = y2 * a */ | |
908 | emit_insn (gen_mulrf3_cond (q2, cond, y2, a, zero, status1, trunc_off)); | |
909 | /* r = a - (b * q2) */ | |
910 | emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off)); | |
911 | /* q3 = q2 + (r * y2) */ | |
912 | emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off)); | |
913 | DONE; | |
914 | }) | |
915 | ||
50738926 | 916 | ;; SQRT operations |
917 | ||
918 | ||
919 | (define_insn "sqrt_approx_rf" | |
920 | [(set (match_operand:RF 0 "fr_register_operand" "=f") | |
921 | (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")] | |
922 | UNSPEC_FR_SQRT_RECIP_APPROX_RES)) | |
690f2a81 | 923 | (set (match_operand:CCI 2 "register_operand" "=c") |
924 | (unspec:CCI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX)) | |
50738926 | 925 | (use (match_operand:SI 3 "const_int_operand" ""))] |
926 | "" | |
927 | "frsqrta.s%3 %0, %2 = %F1" | |
928 | [(set_attr "itanium_class" "fmisc") | |
929 | (set_attr "predicable" "no")]) | |
930 | ||
a58127d5 | 931 | (define_expand "sqrtsf2" |
932 | [(set (match_operand:SF 0 "fr_register_operand" "=&f") | |
933 | (sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))] | |
934 | "TARGET_INLINE_SQRT" | |
935 | { | |
936 | rtx insn; | |
937 | if (TARGET_INLINE_SQRT == INL_MIN_LAT) | |
938 | insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]); | |
939 | else | |
940 | insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]); | |
941 | emit_insn (insn); | |
942 | DONE; | |
943 | }) | |
944 | ||
50738926 | 945 | (define_expand "sqrtsf2_internal_thr" |
946 | [(set (match_operand:SF 0 "fr_register_operand" "") | |
947 | (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))] | |
948 | "TARGET_INLINE_SQRT" | |
949 | { | |
950 | rtx y = gen_reg_rtx (RFmode); | |
951 | rtx b = gen_reg_rtx (RFmode); | |
952 | rtx g = gen_reg_rtx (RFmode); | |
953 | rtx e = gen_reg_rtx (RFmode); | |
954 | rtx s = gen_reg_rtx (RFmode); | |
955 | rtx f = gen_reg_rtx (RFmode); | |
956 | rtx y1 = gen_reg_rtx (RFmode); | |
957 | rtx g1 = gen_reg_rtx (RFmode); | |
958 | rtx h = gen_reg_rtx (RFmode); | |
959 | rtx d = gen_reg_rtx (RFmode); | |
960 | rtx g2 = gen_reg_rtx (RFmode); | |
690f2a81 | 961 | rtx cond = gen_reg_rtx (CCImode); |
50738926 | 962 | rtx zero = CONST0_RTX (RFmode); |
963 | rtx one = CONST1_RTX (RFmode); | |
964 | rtx c1 = ia64_dconst_0_5(); | |
965 | rtx c2 = ia64_dconst_0_375(); | |
966 | rtx reg_df_c1 = gen_reg_rtx (DFmode); | |
967 | rtx reg_df_c2 = gen_reg_rtx (DFmode); | |
968 | rtx reg_rf_c1 = gen_reg_rtx (RFmode); | |
969 | rtx reg_rf_c2 = gen_reg_rtx (RFmode); | |
970 | rtx status0 = CONST0_RTX (SImode); | |
971 | rtx status1 = CONST1_RTX (SImode); | |
972 | rtx trunc_sgl = CONST0_RTX (SImode); | |
973 | rtx trunc_off = CONST2_RTX (SImode); | |
974 | ||
975 | /* Put needed constants into registers. */ | |
976 | emit_insn (gen_movdf (reg_df_c1, c1)); | |
977 | emit_insn (gen_movdf (reg_df_c2, c2)); | |
978 | emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); | |
979 | emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2)); | |
980 | /* Empty conversion to put input into RFmode. */ | |
981 | emit_insn (gen_extendsfrf2 (b, operands[1])); | |
982 | /* y = sqrt (1 / b) */ | |
983 | emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); | |
984 | /* g = b * y */ | |
985 | emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); | |
986 | /* e = 1 - (g * y) */ | |
987 | emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off)); | |
988 | /* s = 0.5 + (0.375 * e) */ | |
989 | emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off)); | |
990 | /* f = y * e */ | |
991 | emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off)); | |
992 | /* y1 = y + (f * s) */ | |
993 | emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off)); | |
994 | /* g1 = single (b * y1) */ | |
995 | emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl)); | |
996 | /* h = 0.5 * y1 */ | |
997 | emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off)); | |
998 | /* d = b - g1 * g1 */ | |
999 | emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off)); | |
1000 | /* g2 = single(g1 + (d * h)) */ | |
1001 | emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl)); | |
1002 | /* Conversion back into SFmode. */ | |
1003 | emit_insn (gen_truncrfsf2 (operands[0], g2)); | |
1004 | DONE; | |
1005 | }) | |
1006 | ||
1007 | (define_expand "sqrtsf2_internal_lat" | |
1008 | [(set (match_operand:SF 0 "fr_register_operand" "") | |
1009 | (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))] | |
1010 | "TARGET_INLINE_SQRT" | |
1011 | { | |
1012 | rtx y = gen_reg_rtx (RFmode); | |
1013 | rtx b = gen_reg_rtx (RFmode); | |
1014 | rtx g = gen_reg_rtx (RFmode); | |
1015 | rtx g1 = gen_reg_rtx (RFmode); | |
1016 | rtx g2 = gen_reg_rtx (RFmode); | |
1017 | rtx e = gen_reg_rtx (RFmode); | |
1018 | rtx s = gen_reg_rtx (RFmode); | |
1019 | rtx f = gen_reg_rtx (RFmode); | |
1020 | rtx f1 = gen_reg_rtx (RFmode); | |
1021 | rtx h = gen_reg_rtx (RFmode); | |
1022 | rtx h1 = gen_reg_rtx (RFmode); | |
1023 | rtx d = gen_reg_rtx (RFmode); | |
690f2a81 | 1024 | rtx cond = gen_reg_rtx (CCImode); |
50738926 | 1025 | rtx zero = CONST0_RTX (RFmode); |
1026 | rtx one = CONST1_RTX (RFmode); | |
1027 | rtx c1 = ia64_dconst_0_5(); | |
1028 | rtx c2 = ia64_dconst_0_375(); | |
1029 | rtx reg_df_c1 = gen_reg_rtx (DFmode); | |
1030 | rtx reg_df_c2 = gen_reg_rtx (DFmode); | |
1031 | rtx reg_rf_c1 = gen_reg_rtx (RFmode); | |
1032 | rtx reg_rf_c2 = gen_reg_rtx (RFmode); | |
1033 | rtx status0 = CONST0_RTX (SImode); | |
1034 | rtx status1 = CONST1_RTX (SImode); | |
1035 | rtx trunc_sgl = CONST0_RTX (SImode); | |
1036 | rtx trunc_off = CONST2_RTX (SImode); | |
1037 | ||
1038 | /* Put needed constants into registers. */ | |
1039 | emit_insn (gen_movdf (reg_df_c1, c1)); | |
1040 | emit_insn (gen_movdf (reg_df_c2, c2)); | |
1041 | emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); | |
1042 | emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2)); | |
1043 | /* Empty conversion to put input into RFmode. */ | |
1044 | emit_insn (gen_extendsfrf2 (b, operands[1])); | |
1045 | /* y = sqrt (1 / b) */ | |
1046 | emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); | |
1047 | /* g = b * y */ | |
1048 | emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); | |
1049 | /* e = 1 - (g * y) */ | |
1050 | emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off)); | |
1051 | /* h = 0.5 * y */ | |
1052 | emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); | |
1053 | /* s = 0.5 + (0.375 * e) */ | |
1054 | emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off)); | |
1055 | /* f = e * g */ | |
1056 | emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off)); | |
1057 | /* g1 = single (g + (f * s)) */ | |
1058 | emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl)); | |
1059 | /* f1 = e * h */ | |
1060 | emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off)); | |
1061 | /* d = b - g1 * g1 */ | |
1062 | emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off)); | |
1063 | /* h1 = h + (f1 * s) */ | |
1064 | emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off)); | |
1065 | /* g2 = single(g1 + (d * h1)) */ | |
1066 | emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl)); | |
1067 | /* Conversion back into SFmode. */ | |
1068 | emit_insn (gen_truncrfsf2 (operands[0], g2)); | |
1069 | DONE; | |
1070 | }) | |
1071 | ||
a58127d5 | 1072 | (define_expand "sqrtdf2" |
1073 | [(set (match_operand:DF 0 "fr_register_operand" "=&f") | |
1074 | (sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))] | |
1075 | "TARGET_INLINE_SQRT" | |
1076 | { | |
1077 | rtx insn; | |
1078 | #if 0 | |
1079 | if (TARGET_INLINE_SQRT == INL_MIN_LAT) | |
1080 | insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]); | |
1081 | else | |
1082 | #endif | |
1083 | insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]); | |
1084 | emit_insn (insn); | |
1085 | DONE; | |
1086 | }) | |
1087 | ||
50738926 | 1088 | (define_expand "sqrtdf2_internal_thr" |
1089 | [(set (match_operand:DF 0 "fr_register_operand" "") | |
1090 | (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))] | |
1091 | "TARGET_INLINE_SQRT" | |
1092 | { | |
1093 | rtx y = gen_reg_rtx (RFmode); | |
1094 | rtx b = gen_reg_rtx (RFmode); | |
1095 | rtx g = gen_reg_rtx (RFmode); | |
1096 | rtx g1 = gen_reg_rtx (RFmode); | |
1097 | rtx g2 = gen_reg_rtx (RFmode); | |
1098 | rtx g3 = gen_reg_rtx (RFmode); | |
1099 | rtx g4 = gen_reg_rtx (RFmode); | |
1100 | rtx r = gen_reg_rtx (RFmode); | |
1101 | rtx r1 = gen_reg_rtx (RFmode); | |
1102 | rtx h = gen_reg_rtx (RFmode); | |
1103 | rtx h1 = gen_reg_rtx (RFmode); | |
1104 | rtx h2 = gen_reg_rtx (RFmode); | |
1105 | rtx d = gen_reg_rtx (RFmode); | |
1106 | rtx d1 = gen_reg_rtx (RFmode); | |
690f2a81 | 1107 | rtx cond = gen_reg_rtx (CCImode); |
50738926 | 1108 | rtx zero = CONST0_RTX (RFmode); |
1109 | rtx c1 = ia64_dconst_0_5(); | |
1110 | rtx reg_df_c1 = gen_reg_rtx (DFmode); | |
1111 | rtx reg_rf_c1 = gen_reg_rtx (RFmode); | |
1112 | rtx status0 = CONST0_RTX (SImode); | |
1113 | rtx status1 = CONST1_RTX (SImode); | |
1114 | rtx trunc_dbl = CONST1_RTX (SImode); | |
1115 | rtx trunc_off = CONST2_RTX (SImode); | |
1116 | ||
1117 | /* Put needed constants into registers. */ | |
1118 | emit_insn (gen_movdf (reg_df_c1, c1)); | |
1119 | emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); | |
1120 | /* Empty conversion to put input into RFmode. */ | |
1121 | emit_insn (gen_extenddfrf2 (b, operands[1])); | |
1122 | /* y = sqrt (1 / b) */ | |
1123 | emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); | |
1124 | /* g = b * y */ | |
1125 | emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); | |
1126 | /* h = 0.5 * y */ | |
1127 | emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); | |
1128 | /* r = 0.5 - (g * h) */ | |
1129 | emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off)); | |
1130 | /* g1 = g + (g * r) */ | |
1131 | emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off)); | |
1132 | /* h1 = h + (h * r) */ | |
1133 | emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off)); | |
1134 | /* r1 = 0.5 - (g1 * h1) */ | |
1135 | emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off)); | |
1136 | /* g2 = g1 + (g1 * r1) */ | |
1137 | emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off)); | |
1138 | /* h2 = h1 + (h1 * r1) */ | |
1139 | emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off)); | |
1140 | /* d = b - (g2 * g2) */ | |
1141 | emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off)); | |
1142 | /* g3 = g2 + (d * h2) */ | |
1143 | emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off)); | |
1144 | /* d1 = b - (g3 * g3) */ | |
1145 | emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off)); | |
1146 | /* g4 = g3 + (d1 * h2) */ | |
1147 | emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl)); | |
1148 | /* Conversion back into SFmode. */ | |
1149 | emit_insn (gen_truncrfdf2 (operands[0], g4)); | |
1150 | DONE; | |
1151 | }) | |
1152 | ||
a58127d5 | 1153 | (define_expand "sqrtxf2" |
50738926 | 1154 | [(set (match_operand:XF 0 "fr_register_operand" "") |
1155 | (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))] | |
1156 | "TARGET_INLINE_SQRT" | |
1157 | { | |
1158 | rtx y = gen_reg_rtx (RFmode); | |
1159 | rtx b = gen_reg_rtx (RFmode); | |
1160 | rtx g = gen_reg_rtx (RFmode); | |
1161 | rtx g1 = gen_reg_rtx (RFmode); | |
1162 | rtx g2 = gen_reg_rtx (RFmode); | |
1163 | rtx g3 = gen_reg_rtx (RFmode); | |
1164 | rtx g4 = gen_reg_rtx (RFmode); | |
1165 | rtx e = gen_reg_rtx (RFmode); | |
1166 | rtx e1 = gen_reg_rtx (RFmode); | |
1167 | rtx e2 = gen_reg_rtx (RFmode); | |
1168 | rtx h = gen_reg_rtx (RFmode); | |
1169 | rtx h1 = gen_reg_rtx (RFmode); | |
1170 | rtx h2 = gen_reg_rtx (RFmode); | |
1171 | rtx h3 = gen_reg_rtx (RFmode); | |
1172 | rtx d = gen_reg_rtx (RFmode); | |
1173 | rtx d1 = gen_reg_rtx (RFmode); | |
690f2a81 | 1174 | rtx cond = gen_reg_rtx (CCImode); |
50738926 | 1175 | rtx zero = CONST0_RTX (RFmode); |
1176 | rtx c1 = ia64_dconst_0_5(); | |
1177 | rtx reg_df_c1 = gen_reg_rtx (DFmode); | |
1178 | rtx reg_rf_c1 = gen_reg_rtx (RFmode); | |
1179 | rtx status0 = CONST0_RTX (SImode); | |
1180 | rtx status1 = CONST1_RTX (SImode); | |
1181 | rtx trunc_off = CONST2_RTX (SImode); | |
1182 | ||
1183 | /* Put needed constants into registers. */ | |
1184 | emit_insn (gen_movdf (reg_df_c1, c1)); | |
1185 | emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1)); | |
1186 | /* Empty conversion to put input into RFmode. */ | |
1187 | emit_insn (gen_extendxfrf2 (b, operands[1])); | |
1188 | /* y = sqrt (1 / b) */ | |
1189 | emit_insn (gen_sqrt_approx_rf (y, b, cond, status0)); | |
1190 | /* g = b * y */ | |
1191 | emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off)); | |
1192 | /* h = 0.5 * y */ | |
1193 | emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off)); | |
1194 | /* e = 0.5 - (g * h) */ | |
1195 | emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off)); | |
1196 | /* g1 = g + (g * e) */ | |
1197 | emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off)); | |
1198 | /* h1 = h + (h * e) */ | |
1199 | emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off)); | |
1200 | /* e1 = 0.5 - (g1 * h1) */ | |
1201 | emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off)); | |
1202 | /* g2 = g1 + (g1 * e1) */ | |
1203 | emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off)); | |
1204 | /* h2 = h1 + (h1 * e1) */ | |
1205 | emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off)); | |
1206 | /* d = b - (g2 * g2) */ | |
1207 | emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off)); | |
1208 | /* e2 = 0.5 - (g2 * h2) */ | |
1209 | emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off)); | |
1210 | /* g3 = g2 + (d * h2) */ | |
1211 | emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off)); | |
1212 | /* h3 = h2 + (e2 * h2) */ | |
1213 | emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off)); | |
1214 | /* d1 = b - (g3 * g3) */ | |
1215 | emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off)); | |
1216 | /* g4 = g3 + (d1 * h3) */ | |
1217 | emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off)); | |
1218 | /* Conversion back into SFmode. */ | |
1219 | emit_insn (gen_truncrfxf2 (operands[0], g4)); | |
1220 | DONE; | |
1221 | }) |