]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/div.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / ia64 / div.md
CommitLineData
99dee823 1;; Copyright (C) 2007-2021 Free Software Foundation, Inc.
ad41bd84
JM
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 3, or (at your option)
8;; any later version.
9;;
10;; GCC is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3. If not see
17;; <http://www.gnu.org/licenses/>.
4883241c
SE
18
19;; For the internal conditional math routines:
20
21;; operand 0 is always the result
22;; operand 1 is always the predicate
23;; operand 2, 3, and sometimes 4 are the input values.
24;; operand 4 or 5 is the floating point status register to use.
25;; operand 5 or 6 is the rounding to do. (0 = single, 1 = double, 2 = none)
26;;
27;; addrf3_cond - F0 = F2 + F3
28;; subrf3_cond - F0 = F2 - F3
29;; mulrf3_cond - F0 = F2 * F3
30;; nmulrf3_cond - F0 = - (F2 * F3)
31;; m1addrf4_cond - F0 = (F2 * F3) + F4
32;; m1subrf4_cond - F0 = (F2 * F3) - F4
33;; m2addrf4_cond - F0 = F2 + (F3 * F4)
34;; m2subrf4_cond - F0 = F2 - (F3 * F4)
35
36;; Basic plus/minus/mult operations
37
38(define_insn "addrf3_cond"
39 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
33620355 40 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
4883241c
SE
41 (const_int 0))
42 (plus:RF
43 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
44 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
45 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
46 (use (match_operand:SI 5 "const_int_operand" ""))
47 (use (match_operand:SI 6 "const_int_operand" ""))]
48 ""
49 "(%1) fadd%R6.s%5 %0 = %F2, %F3"
50 [(set_attr "itanium_class" "fmac")
51 (set_attr "predicable" "no")])
52
53(define_insn "subrf3_cond"
54 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
33620355 55 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
4883241c
SE
56 (const_int 0))
57 (minus:RF
58 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
59 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
60 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
61 (use (match_operand:SI 5 "const_int_operand" ""))
62 (use (match_operand:SI 6 "const_int_operand" ""))]
63 ""
64 "(%1) fsub%R6.s%5 %0 = %F2, %F3"
65 [(set_attr "itanium_class" "fmac")
66 (set_attr "predicable" "no")])
67
68(define_insn "mulrf3_cond"
69 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
33620355 70 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
4883241c
SE
71 (const_int 0))
72 (mult:RF
73 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
74 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
75 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
76 (use (match_operand:SI 5 "const_int_operand" ""))
77 (use (match_operand:SI 6 "const_int_operand" ""))]
78 ""
79 "(%1) fmpy%R6.s%5 %0 = %F2, %F3"
80 [(set_attr "itanium_class" "fmac")
81 (set_attr "predicable" "no")])
82
83;; neg-mult operation
84
85(define_insn "nmulrf3_cond"
86 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
33620355 87 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
4883241c
SE
88 (const_int 0))
89 (neg:RF (mult:RF
90 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
91 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")))
92 (match_operand:RF 4 "fr_reg_or_0_operand" "0,H")))
93 (use (match_operand:SI 5 "const_int_operand" ""))
94 (use (match_operand:SI 6 "const_int_operand" ""))]
95 ""
96 "(%1) fnmpy%R6.s%5 %0 = %F2, %F3"
97 [(set_attr "itanium_class" "fmac")
98 (set_attr "predicable" "no")])
99
100;; add-mult/sub-mult operations (mult as op1)
101
102(define_insn "m1addrf4_cond"
103 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
33620355 104 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
4883241c
SE
105 (const_int 0))
106 (plus:RF
107 (mult:RF
108 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
109 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
110 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
111 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
112 (use (match_operand:SI 6 "const_int_operand" ""))
113 (use (match_operand:SI 7 "const_int_operand" ""))]
114 ""
115 "(%1) fma%R7.s%6 %0 = %F2, %F3, %F4"
116 [(set_attr "itanium_class" "fmac")
117 (set_attr "predicable" "no")])
118
119(define_insn "m1subrf4_cond"
120 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
33620355 121 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
4883241c
SE
122 (const_int 0))
123 (minus:RF
124 (mult:RF
125 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
126 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG"))
127 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG"))
128 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
129 (use (match_operand:SI 6 "const_int_operand" ""))
130 (use (match_operand:SI 7 "const_int_operand" ""))]
131 ""
132 "(%1) fms%R7.s%6 %0 = %F2, %F3, %F4"
133 [(set_attr "itanium_class" "fmac")
134 (set_attr "predicable" "no")])
135
136;; add-mult/sub-mult operations (mult as op2)
137
138(define_insn "m2addrf4_cond"
139 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
33620355 140 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
4883241c
SE
141 (const_int 0))
142 (plus:RF
143 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
144 (mult:RF
145 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
146 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
147 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
148 (use (match_operand:SI 6 "const_int_operand" ""))
149 (use (match_operand:SI 7 "const_int_operand" ""))]
150 ""
151 "(%1) fma%R7.s%6 %0 = %F3, %F4, %F2"
152 [(set_attr "itanium_class" "fmac")
153 (set_attr "predicable" "no")])
154
155(define_insn "m2subrf4_cond"
156 [(set (match_operand:RF 0 "fr_register_operand" "=f,f")
33620355 157 (if_then_else:RF (ne:RF (match_operand:CCI 1 "register_operand" "c,c")
4883241c
SE
158 (const_int 0))
159 (minus:RF
b441fbb0 160 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG,fG")
4883241c 161 (mult:RF
b441fbb0
SE
162 (match_operand:RF 3 "fr_reg_or_fp01_operand" "fG,fG")
163 (match_operand:RF 4 "fr_reg_or_fp01_operand" "fG,fG")))
4883241c
SE
164 (match_operand:RF 5 "fr_reg_or_0_operand" "0,H")))
165 (use (match_operand:SI 6 "const_int_operand" ""))
166 (use (match_operand:SI 7 "const_int_operand" ""))]
167 ""
168 "(%1) fnma%R7.s%6 %0 = %F3, %F4, %F2"
169 [(set_attr "itanium_class" "fmac")
170 (set_attr "predicable" "no")])
171
172;; Conversions to/from RF and SF/DF/XF
173;; These conversions should not generate any code but make it possible
174;; for all the instructions used to implement floating point division
175;; to be written for RFmode only and to not have to handle multiple
176;; modes or to have to handle a register in more than one mode.
177
3abcb3a7 178(define_mode_iterator SDX_F [SF DF XF])
4883241c
SE
179
180(define_insn "extend<mode>rf2"
181 [(set (match_operand:RF 0 "fr_register_operand" "=f")
6adb807e 182 (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "fG")))]
4883241c
SE
183 ""
184 "#"
185 [(set_attr "itanium_class" "fmisc")
186 (set_attr "predicable" "yes")])
187
188(define_split
189 [(set (match_operand:RF 0 "fr_register_operand" "")
6adb807e 190 (float_extend:RF (match_operand:SDX_F 1 "fr_reg_or_fp01_operand" "")))]
4883241c
SE
191 "reload_completed"
192 [(set (match_dup 0) (match_dup 2))]
193{
6adb807e
SE
194 if (operands[1] == CONST0_RTX (<MODE>mode))
195 operands[2] = gen_rtx_REG (RFmode, FR_REG (0));
196 else if (operands[1] == CONST1_RTX (<MODE>mode))
197 operands[2] = gen_rtx_REG (RFmode, FR_REG (1));
198 else
199 operands[2] = gen_rtx_REG (RFmode, REGNO (operands[1]));
4883241c
SE
200})
201
202
203(define_insn "truncrf<mode>2"
204 [(set (match_operand:SDX_F 0 "fr_register_operand" "=f")
6adb807e 205 (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")))]
4883241c
SE
206 ""
207 "#"
208 [(set_attr "itanium_class" "fmisc")
209 (set_attr "predicable" "yes")])
210
211(define_split
212 [(set (match_operand:SDX_F 0 "fr_register_operand" "")
6adb807e 213 (float_truncate:SDX_F (match_operand:RF 1 "fr_reg_or_fp01_operand" "")))]
4883241c
SE
214 "reload_completed"
215 [(set (match_dup 0) (match_dup 2))]
216{
6adb807e
SE
217 if (operands[1] == CONST0_RTX (RFmode))
218 operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (0));
219 else if (operands[1] == CONST1_RTX (RFmode))
220 operands[2] = gen_rtx_REG (<MODE>mode, FR_REG (1));
221 else
222 operands[2] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
4883241c
SE
223})
224
49df2fb8
SE
225;; Float to integer truncations using an alternative status register.
226
227(define_insn "fix_truncrfdi2_alts"
228 [(set (match_operand:DI 0 "fr_register_operand" "=f")
229 (fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
230 (use (match_operand:SI 2 "const_int_operand" ""))]
231 ""
232 "fcvt.fx.trunc.s%2 %0 = %1"
233 [(set_attr "itanium_class" "fcvtfx")])
234
235(define_insn "fixuns_truncrfdi2_alts"
236 [(set (match_operand:DI 0 "fr_register_operand" "=f")
237 (unsigned_fix:DI (match_operand:RF 1 "fr_register_operand" "f")))
238 (use (match_operand:SI 2 "const_int_operand" ""))]
239 ""
240 "fcvt.fxu.trunc.s%2 %0 = %1"
241 [(set_attr "itanium_class" "fcvtfx")])
242
243(define_insn "setf_exp_rf"
244 [(set (match_operand:RF 0 "fr_register_operand" "=f")
245 (unspec:RF [(match_operand:DI 1 "register_operand" "r")]
246 UNSPEC_SETF_EXP))]
247 ""
248 "setf.exp %0 = %1"
249 [(set_attr "itanium_class" "frfr")])
250
cea618ac 251;; Reciprocal approximation
4883241c
SE
252
253(define_insn "recip_approx_rf"
254 [(set (match_operand:RF 0 "fr_register_operand" "=f")
6adb807e
SE
255 (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")
256 (match_operand:RF 2 "fr_reg_or_fp01_operand" "fG")]
1def9c3f 257 UNSPEC_FR_RECIP_APPROX_RES))
33620355
EB
258 (set (match_operand:CCI 3 "register_operand" "=c")
259 (unspec:CCI [(match_dup 1) (match_dup 2)] UNSPEC_FR_RECIP_APPROX))
4883241c
SE
260 (use (match_operand:SI 4 "const_int_operand" ""))]
261 ""
6adb807e 262 "frcpa.s%4 %0, %3 = %F1, %F2"
4883241c
SE
263 [(set_attr "itanium_class" "fmisc")
264 (set_attr "predicable" "no")])
265
49df2fb8
SE
266;; Single precision floating point division
267
268(define_expand "divsf3"
269 [(set (match_operand:SF 0 "fr_register_operand" "")
270 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
271 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
272 "TARGET_INLINE_FLOAT_DIV"
273{
274 rtx insn;
275 if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
276 insn = gen_divsf3_internal_lat (operands[0], operands[1], operands[2]);
277 else
278 insn = gen_divsf3_internal_thr (operands[0], operands[1], operands[2]);
279 emit_insn (insn);
280 DONE;
281})
282
4883241c
SE
283;; Single precision floating point division (maximum throughput algorithm).
284
285(define_expand "divsf3_internal_thr"
286 [(set (match_operand:SF 0 "fr_register_operand" "")
6adb807e
SE
287 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
288 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
4883241c
SE
289 "TARGET_INLINE_FLOAT_DIV"
290{
291 rtx y = gen_reg_rtx (RFmode);
292 rtx a = gen_reg_rtx (RFmode);
293 rtx b = gen_reg_rtx (RFmode);
294 rtx e = gen_reg_rtx (RFmode);
295 rtx y1 = gen_reg_rtx (RFmode);
296 rtx y2 = gen_reg_rtx (RFmode);
297 rtx q = gen_reg_rtx (RFmode);
298 rtx r = gen_reg_rtx (RFmode);
299 rtx q_res = gen_reg_rtx (RFmode);
33620355 300 rtx cond = gen_reg_rtx (CCImode);
4883241c
SE
301 rtx zero = CONST0_RTX (RFmode);
302 rtx one = CONST1_RTX (RFmode);
303 rtx status0 = CONST0_RTX (SImode);
304 rtx status1 = CONST1_RTX (SImode);
305 rtx trunc_sgl = CONST0_RTX (SImode);
306 rtx trunc_off = CONST2_RTX (SImode);
307
308 /* Empty conversions to put inputs into RFmode. */
309 emit_insn (gen_extendsfrf2 (a, operands[1]));
310 emit_insn (gen_extendsfrf2 (b, operands[2]));
311 /* y = 1 / b */
312 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
313 /* e = 1 - (b * y) */
314 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
315 /* y1 = y + (y * e) */
316 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
317 /* y2 = y + (y1 * e) */
318 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e, zero, status1, trunc_off));
319 /* q = single(a * y2) */
320 emit_insn (gen_mulrf3_cond (q, cond, a, y2, zero, status1, trunc_sgl));
321 /* r = a - (q * b) */
322 emit_insn (gen_m2subrf4_cond (r, cond, a, q, b, zero, status1, trunc_off));
323 /* Q = single (q + (r * y2)) */
324 emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y2, y, status0, trunc_sgl));
325 /* Conversion back into SFmode. */
326 emit_insn (gen_truncrfsf2 (operands[0], q_res));
327 DONE;
328})
329
13d1a6e7
SE
330;; Single precision floating point division (minimum latency algorithm).
331
332(define_expand "divsf3_internal_lat"
333 [(set (match_operand:SF 0 "fr_register_operand" "")
6adb807e
SE
334 (div:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "")
335 (match_operand:SF 2 "fr_reg_or_fp01_operand" "")))]
13d1a6e7
SE
336 "TARGET_INLINE_FLOAT_DIV"
337{
338 rtx y = gen_reg_rtx (RFmode);
339 rtx a = gen_reg_rtx (RFmode);
340 rtx b = gen_reg_rtx (RFmode);
341 rtx e = gen_reg_rtx (RFmode);
342 rtx q = gen_reg_rtx (RFmode);
343 rtx e1 = gen_reg_rtx (RFmode);
344 rtx y1 = gen_reg_rtx (RFmode);
345 rtx q1 = gen_reg_rtx (RFmode);
346 rtx r = gen_reg_rtx (RFmode);
347 rtx q_res = gen_reg_rtx (RFmode);
33620355 348 rtx cond = gen_reg_rtx (CCImode);
13d1a6e7
SE
349 rtx zero = CONST0_RTX (RFmode);
350 rtx one = CONST1_RTX (RFmode);
351 rtx status0 = CONST0_RTX (SImode);
352 rtx status1 = CONST1_RTX (SImode);
353 rtx trunc_sgl = CONST0_RTX (SImode);
354 rtx trunc_off = CONST2_RTX (SImode);
355
356 /* Empty conversions to put inputs into RFmode. */
357 emit_insn (gen_extendsfrf2 (a, operands[1]));
358 emit_insn (gen_extendsfrf2 (b, operands[2]));
359 /* y = 1 / b */
360 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
361 /* q = a * y */
362 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
363 /* e = 1 - (b * y) */
364 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
365 /* e1 = e + (e * e) */
366 emit_insn (gen_m2addrf4_cond (e1, cond, e, e, e, zero, status1, trunc_off));
367 /* q1 = single(q + (q * e1)) */
368 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e1, zero, status1, trunc_sgl));
369 /* y1 = y + (y * e1) */
370 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e1, zero, status1, trunc_off));
371 /* r = a - (q1 * b) */
372 emit_insn (gen_m2subrf4_cond (r, cond, a, q1, b, zero, status1, trunc_off));
373 /* Q = single (q1 + (r * y1)) */
374 emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r, y1, y, status0, trunc_sgl));
375 /* Conversion back into SFmode. */
376 emit_insn (gen_truncrfsf2 (operands[0], q_res));
377 DONE;
378})
379
49df2fb8
SE
380;; Double precision floating point division
381
382(define_expand "divdf3"
383 [(set (match_operand:DF 0 "fr_register_operand" "")
384 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
385 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
386 "TARGET_INLINE_FLOAT_DIV"
387{
388 rtx insn;
389 if (TARGET_INLINE_FLOAT_DIV == INL_MIN_LAT)
390 insn = gen_divdf3_internal_lat (operands[0], operands[1], operands[2]);
391 else
392 insn = gen_divdf3_internal_thr (operands[0], operands[1], operands[2]);
393 emit_insn (insn);
394 DONE;
395})
4883241c
SE
396
397;; Double precision floating point division (maximum throughput algorithm).
398
399(define_expand "divdf3_internal_thr"
400 [(set (match_operand:DF 0 "fr_register_operand" "")
6adb807e
SE
401 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
402 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
4883241c
SE
403 "TARGET_INLINE_FLOAT_DIV"
404{
405 rtx q_res = gen_reg_rtx (RFmode);
406 rtx a = gen_reg_rtx (RFmode);
407 rtx b = gen_reg_rtx (RFmode);
408 rtx y = gen_reg_rtx (RFmode);
409 rtx e = gen_reg_rtx (RFmode);
410 rtx y1 = gen_reg_rtx (RFmode);
411 rtx e1 = gen_reg_rtx (RFmode);
412 rtx y2 = gen_reg_rtx (RFmode);
413 rtx e2 = gen_reg_rtx (RFmode);
414 rtx y3 = gen_reg_rtx (RFmode);
415 rtx q = gen_reg_rtx (RFmode);
416 rtx r = gen_reg_rtx (RFmode);
33620355 417 rtx cond = gen_reg_rtx (CCImode);
4883241c
SE
418 rtx zero = CONST0_RTX (RFmode);
419 rtx one = CONST1_RTX (RFmode);
420 rtx status0 = CONST0_RTX (SImode);
421 rtx status1 = CONST1_RTX (SImode);
422 rtx trunc_dbl = CONST1_RTX (SImode);
423 rtx trunc_off = CONST2_RTX (SImode);
424 /* Empty conversions to put inputs into RFmode */
425 emit_insn (gen_extenddfrf2 (a, operands[1]));
426 emit_insn (gen_extenddfrf2 (b, operands[2]));
427 /* y = 1 / b */
428 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
429 /* e = 1 - (b * y) */
430 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
431 /* y1 = y + (y * e) */
432 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
433 /* e1 = e * e */
434 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
435 /* y2 = y1 + (y1 * e1) */
436 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
437 /* e2 = e1 * e1 */
438 emit_insn (gen_mulrf3_cond (e2, cond, e1, e1, zero, status1, trunc_off));
439 /* y3 = y2 + (y2 * e2) */
440 emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e2, zero, status1, trunc_off));
441 /* q = double (a * y3) */
442 emit_insn (gen_mulrf3_cond (q, cond, a, y3, zero, status1, trunc_dbl));
443 /* r = a - (b * q) */
444 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
445 /* Q = double (q + (r * y3)) */
446 emit_insn (gen_m2addrf4_cond (q_res, cond, q, r, y3, y, status0, trunc_dbl));
447 /* Conversion back into DFmode */
448 emit_insn (gen_truncrfdf2 (operands[0], q_res));
449 DONE;
450})
13d1a6e7
SE
451
452;; Double precision floating point division (minimum latency algorithm).
453
454(define_expand "divdf3_internal_lat"
455 [(set (match_operand:DF 0 "fr_register_operand" "")
6adb807e
SE
456 (div:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "")
457 (match_operand:DF 2 "fr_reg_or_fp01_operand" "")))]
13d1a6e7
SE
458 "TARGET_INLINE_FLOAT_DIV"
459{
460 rtx q_res = gen_reg_rtx (RFmode);
461 rtx a = gen_reg_rtx (RFmode);
462 rtx b = gen_reg_rtx (RFmode);
463 rtx y = gen_reg_rtx (RFmode);
464 rtx e = gen_reg_rtx (RFmode);
465 rtx y1 = gen_reg_rtx (RFmode);
466 rtx e1 = gen_reg_rtx (RFmode);
467 rtx q1 = gen_reg_rtx (RFmode);
468 rtx y2 = gen_reg_rtx (RFmode);
469 rtx e2 = gen_reg_rtx (RFmode);
470 rtx q2 = gen_reg_rtx (RFmode);
471 rtx e3 = gen_reg_rtx (RFmode);
472 rtx q = gen_reg_rtx (RFmode);
473 rtx r1 = gen_reg_rtx (RFmode);
33620355 474 rtx cond = gen_reg_rtx (CCImode);
13d1a6e7
SE
475 rtx zero = CONST0_RTX (RFmode);
476 rtx one = CONST1_RTX (RFmode);
477 rtx status0 = CONST0_RTX (SImode);
478 rtx status1 = CONST1_RTX (SImode);
479 rtx trunc_dbl = CONST1_RTX (SImode);
480 rtx trunc_off = CONST2_RTX (SImode);
481
482 /* Empty conversions to put inputs into RFmode */
483 emit_insn (gen_extenddfrf2 (a, operands[1]));
484 emit_insn (gen_extenddfrf2 (b, operands[2]));
485 /* y = 1 / b */
486 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
487 /* e = 1 - (b * y) */
488 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
489 /* q = a * y */
490 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
491 /* e2 = e + (e * e) */
492 emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
493 /* e1 = e * e */
494 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
495 /* e3 = e + (e1 * e1) */
496 emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
497 /* q1 = q + (q * e2) */
498 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e2, zero, status1, trunc_off));
499 /* y1 = y + (y * e2) */
500 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
501 /* q2 = double(q + (q1 * e3)) */
502 emit_insn (gen_m2addrf4_cond (q2, cond, q, q1, e3, zero, status1, trunc_dbl));
503 /* y2 = y + (y1 * e3) */
504 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
505 /* r1 = a - (b * q2) */
506 emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q2, zero, status1, trunc_off));
507 /* Q = double (q2 + (r1 * y2)) */
508 emit_insn (gen_m2addrf4_cond (q_res, cond, q2, r1, y2, y, status0, trunc_dbl));
509 /* Conversion back into DFmode */
510 emit_insn (gen_truncrfdf2 (operands[0], q_res));
511 DONE;
512})
513
514;; Extended precision floating point division.
515
49df2fb8 516(define_expand "divxf3"
13d1a6e7 517 [(set (match_operand:XF 0 "fr_register_operand" "")
6adb807e
SE
518 (div:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "")
519 (match_operand:XF 2 "fr_reg_or_fp01_operand" "")))]
13d1a6e7
SE
520 "TARGET_INLINE_FLOAT_DIV"
521{
522 rtx q_res = gen_reg_rtx (RFmode);
523 rtx a = gen_reg_rtx (RFmode);
524 rtx b = gen_reg_rtx (RFmode);
525 rtx y = gen_reg_rtx (RFmode);
526 rtx e = gen_reg_rtx (RFmode);
527 rtx y1 = gen_reg_rtx (RFmode);
528 rtx e1 = gen_reg_rtx (RFmode);
529 rtx q1 = gen_reg_rtx (RFmode);
530 rtx y2 = gen_reg_rtx (RFmode);
531 rtx e2 = gen_reg_rtx (RFmode);
532 rtx y3 = gen_reg_rtx (RFmode);
533 rtx e3 = gen_reg_rtx (RFmode);
534 rtx e4 = gen_reg_rtx (RFmode);
535 rtx q = gen_reg_rtx (RFmode);
536 rtx r = gen_reg_rtx (RFmode);
537 rtx r1 = gen_reg_rtx (RFmode);
33620355 538 rtx cond = gen_reg_rtx (CCImode);
13d1a6e7
SE
539 rtx zero = CONST0_RTX (RFmode);
540 rtx one = CONST1_RTX (RFmode);
541 rtx status0 = CONST0_RTX (SImode);
542 rtx status1 = CONST1_RTX (SImode);
543 rtx trunc_off = CONST2_RTX (SImode);
544
545 /* Empty conversions to put inputs into RFmode */
546 emit_insn (gen_extendxfrf2 (a, operands[1]));
547 emit_insn (gen_extendxfrf2 (b, operands[2]));
548 /* y = 1 / b */
549 emit_insn (gen_recip_approx_rf (y, a, b, cond, status0));
550 /* e = 1 - (b * y) */
551 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
552 /* q = a * y */
553 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
554 /* e2 = e + (e * e) */
555 emit_insn (gen_m2addrf4_cond (e2, cond, e, e, e, zero, status1, trunc_off));
556 /* e1 = e * e */
557 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
558 /* y1 = y + (y * e2) */
559 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e2, zero, status1, trunc_off));
560 /* e3 = e + (e1 * e1) */
561 emit_insn (gen_m2addrf4_cond (e3, cond, e, e1, e1, zero, status1, trunc_off));
562 /* y2 = y + (y1 * e3) */
563 emit_insn (gen_m2addrf4_cond (y2, cond, y, y1, e3, zero, status1, trunc_off));
564 /* r = a - (b * q) */
565 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q, zero, status1, trunc_off));
566 /* e4 = 1 - (b * y2) */
567 emit_insn (gen_m2subrf4_cond (e4, cond, one, b, y2, zero, status1, trunc_off));
568 /* q1 = q + (r * y2) */
569 emit_insn (gen_m2addrf4_cond (q1, cond, q, r, y2, zero, status1, trunc_off));
570 /* y3 = y2 + (y2 * e4) */
571 emit_insn (gen_m2addrf4_cond (y3, cond, y2, y2, e4, zero, status1, trunc_off));
572 /* r1 = a - (b * q1) */
573 emit_insn (gen_m2subrf4_cond (r1, cond, a, b, q1, zero, status1, trunc_off));
574 /* Q = q1 + (r1 * y3) */
575 emit_insn (gen_m2addrf4_cond (q_res, cond, q1, r1, y3, y, status0, trunc_off));
576 /* Conversion back into XFmode */
577 emit_insn (gen_truncrfxf2 (operands[0], q_res));
578 DONE;
579})
f3a83111
SE
580
581
49df2fb8
SE
582;; Integer division operations
583
584(define_expand "divsi3"
585 [(set (match_operand:SI 0 "register_operand" "")
586 (div:SI (match_operand:SI 1 "general_operand" "")
587 (match_operand:SI 2 "general_operand" "")))]
588 "TARGET_INLINE_INT_DIV"
589{
590 rtx op1_rf, op2_rf, op0_rf, op0_di;
591
592 op0_rf = gen_reg_rtx (RFmode);
593 op0_di = gen_reg_rtx (DImode);
594
595 if (! register_operand (operands[1], SImode))
596 operands[1] = force_reg (SImode, operands[1]);
597 op1_rf = gen_reg_rtx (RFmode);
598 expand_float (op1_rf, operands[1], 0);
599
600 if (! register_operand (operands[2], SImode))
601 operands[2] = force_reg (SImode, operands[2]);
602 op2_rf = gen_reg_rtx (RFmode);
603 expand_float (op2_rf, operands[2], 0);
604
605 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
606 CONST1_RTX (SImode)));
607
608 emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
609
610 emit_insn (gen_fix_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
611 emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
612 DONE;
613})
614
615(define_expand "modsi3"
616 [(set (match_operand:SI 0 "register_operand" "")
617 (mod:SI (match_operand:SI 1 "general_operand" "")
618 (match_operand:SI 2 "general_operand" "")))]
619 "TARGET_INLINE_INT_DIV"
620{
621 rtx op2_neg, op1_di, div;
622
623 div = gen_reg_rtx (SImode);
624 emit_insn (gen_divsi3 (div, operands[1], operands[2]));
625
626 op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
627
628 /* This is a trick to get us to reuse the value that we're sure to
629 have already copied to the FP regs. */
630 op1_di = gen_reg_rtx (DImode);
631 convert_move (op1_di, operands[1], 0);
632
633 emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
634 gen_lowpart (SImode, op1_di)));
635 DONE;
636})
637
638(define_expand "udivsi3"
639 [(set (match_operand:SI 0 "register_operand" "")
640 (udiv:SI (match_operand:SI 1 "general_operand" "")
641 (match_operand:SI 2 "general_operand" "")))]
642 "TARGET_INLINE_INT_DIV"
643{
644 rtx op1_rf, op2_rf, op0_rf, op0_di;
645
646 op0_rf = gen_reg_rtx (RFmode);
647 op0_di = gen_reg_rtx (DImode);
648
649 if (! register_operand (operands[1], SImode))
650 operands[1] = force_reg (SImode, operands[1]);
651 op1_rf = gen_reg_rtx (RFmode);
652 expand_float (op1_rf, operands[1], 1);
653
654 if (! register_operand (operands[2], SImode))
655 operands[2] = force_reg (SImode, operands[2]);
656 op2_rf = gen_reg_rtx (RFmode);
657 expand_float (op2_rf, operands[2], 1);
658
659 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (SImode),
660 CONST1_RTX (SImode)));
661
662 emit_insn (gen_divsi3_internal (op0_rf, op1_rf, op2_rf));
663
664 emit_insn (gen_fixuns_truncrfdi2_alts (op0_di, op0_rf, const1_rtx));
665 emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
666 DONE;
667})
668
669(define_expand "umodsi3"
670 [(set (match_operand:SI 0 "register_operand" "")
671 (umod:SI (match_operand:SI 1 "general_operand" "")
672 (match_operand:SI 2 "general_operand" "")))]
673 "TARGET_INLINE_INT_DIV"
674{
675 rtx op2_neg, op1_di, div;
676
677 div = gen_reg_rtx (SImode);
678 emit_insn (gen_udivsi3 (div, operands[1], operands[2]));
679
680 op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
681
682 /* This is a trick to get us to reuse the value that we're sure to
683 have already copied to the FP regs. */
684 op1_di = gen_reg_rtx (DImode);
685 convert_move (op1_di, operands[1], 1);
686
687 emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
688 gen_lowpart (SImode, op1_di)));
689 DONE;
690})
691
692(define_expand "divsi3_internal"
693 [(set (match_operand:RF 0 "fr_register_operand" "")
694 (float:RF (div:SI (match_operand:RF 1 "fr_register_operand" "")
695 (match_operand:RF 2 "fr_register_operand" ""))))]
696 "TARGET_INLINE_INT_DIV"
697{
698 rtx a = operands[1];
699 rtx b = operands[2];
700 rtx y = gen_reg_rtx (RFmode);
701 rtx e = gen_reg_rtx (RFmode);
702 rtx e1 = gen_reg_rtx (RFmode);
703 rtx q = gen_reg_rtx (RFmode);
704 rtx q1 = gen_reg_rtx (RFmode);
33620355 705 rtx cond = gen_reg_rtx (CCImode);
49df2fb8
SE
706 rtx zero = CONST0_RTX (RFmode);
707 rtx one = CONST1_RTX (RFmode);
708 rtx status1 = CONST1_RTX (SImode);
709 rtx trunc_off = CONST2_RTX (SImode);
710 rtx twon34_exp = gen_reg_rtx (DImode);
711 rtx twon34 = gen_reg_rtx (RFmode);
712
713 /* Load cosntant 2**(-34) */
714 emit_move_insn (twon34_exp, GEN_INT (65501));
715 emit_insn (gen_setf_exp_rf (twon34, twon34_exp));
716
717 /* y = 1 / b */
718 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
719 /* e = 1 - (b * y) */
720 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
721 /* q = a * y */
722 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
723 /* q1 = q + (q * e) */
724 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
725 /* e1 = (2**-34) + (e * e) */
726 emit_insn (gen_m2addrf4_cond (e1, cond, twon34, e, e, zero, status1, trunc_off));
727 /* q2 = q1 + (e1 * q1) */
728 emit_insn (gen_m2addrf4_cond (operands[0], cond, q1, e1, q1, y, status1, trunc_off));
729 DONE;
730})
731
732(define_expand "divdi3"
733 [(set (match_operand:DI 0 "register_operand" "")
734 (div:DI (match_operand:DI 1 "general_operand" "")
735 (match_operand:DI 2 "general_operand" "")))]
736 "TARGET_INLINE_INT_DIV"
737{
738 rtx op1_rf, op2_rf, op0_rf;
739
740 op0_rf = gen_reg_rtx (RFmode);
741
742 if (! register_operand (operands[1], DImode))
743 operands[1] = force_reg (DImode, operands[1]);
744 op1_rf = gen_reg_rtx (RFmode);
745 expand_float (op1_rf, operands[1], 0);
746
747 if (! register_operand (operands[2], DImode))
748 operands[2] = force_reg (DImode, operands[2]);
749 op2_rf = gen_reg_rtx (RFmode);
750 expand_float (op2_rf, operands[2], 0);
751
752 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
753 CONST1_RTX (DImode)));
754
755 if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
756 emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
757 else
758 emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
759
760 emit_insn (gen_fix_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
761 DONE;
762})
763
764(define_expand "moddi3"
765 [(set (match_operand:DI 0 "register_operand" "")
766 (mod:SI (match_operand:DI 1 "general_operand" "")
767 (match_operand:DI 2 "general_operand" "")))]
768 "TARGET_INLINE_INT_DIV"
769{
770 rtx op2_neg, div;
771
772 div = gen_reg_rtx (DImode);
773 emit_insn (gen_divdi3 (div, operands[1], operands[2]));
774
775 op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
776
777 emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
778 DONE;
779})
780
781(define_expand "udivdi3"
782 [(set (match_operand:DI 0 "register_operand" "")
783 (udiv:DI (match_operand:DI 1 "general_operand" "")
784 (match_operand:DI 2 "general_operand" "")))]
785 "TARGET_INLINE_INT_DIV"
786{
787 rtx op1_rf, op2_rf, op0_rf;
788
789 op0_rf = gen_reg_rtx (RFmode);
790
791 if (! register_operand (operands[1], DImode))
792 operands[1] = force_reg (DImode, operands[1]);
793 op1_rf = gen_reg_rtx (RFmode);
794 expand_float (op1_rf, operands[1], 1);
795
796 if (! register_operand (operands[2], DImode))
797 operands[2] = force_reg (DImode, operands[2]);
798 op2_rf = gen_reg_rtx (RFmode);
799 expand_float (op2_rf, operands[2], 1);
800
801 emit_insn (gen_cond_trap (EQ, operands[2], CONST0_RTX (DImode),
802 CONST1_RTX (DImode)));
803
804 if (TARGET_INLINE_INT_DIV == INL_MIN_LAT)
805 emit_insn (gen_divdi3_internal_lat (op0_rf, op1_rf, op2_rf));
806 else
807 emit_insn (gen_divdi3_internal_thr (op0_rf, op1_rf, op2_rf));
808
809 emit_insn (gen_fixuns_truncrfdi2_alts (operands[0], op0_rf, const1_rtx));
810 DONE;
811})
812
813(define_expand "umoddi3"
814 [(set (match_operand:DI 0 "register_operand" "")
815 (umod:DI (match_operand:DI 1 "general_operand" "")
816 (match_operand:DI 2 "general_operand" "")))]
817 "TARGET_INLINE_INT_DIV"
818{
819 rtx op2_neg, div;
820
821 div = gen_reg_rtx (DImode);
822 emit_insn (gen_udivdi3 (div, operands[1], operands[2]));
823
824 op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
825
826 emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
827 DONE;
828})
829
830(define_expand "divdi3_internal_lat"
831 [(set (match_operand:RF 0 "fr_register_operand" "")
832 (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
833 (match_operand:RF 2 "fr_register_operand" ""))))]
834 "TARGET_INLINE_INT_DIV"
835{
836 rtx a = operands[1];
837 rtx b = operands[2];
838 rtx y = gen_reg_rtx (RFmode);
839 rtx y1 = gen_reg_rtx (RFmode);
840 rtx y2 = gen_reg_rtx (RFmode);
841 rtx e = gen_reg_rtx (RFmode);
842 rtx e1 = gen_reg_rtx (RFmode);
843 rtx q = gen_reg_rtx (RFmode);
844 rtx q1 = gen_reg_rtx (RFmode);
845 rtx q2 = gen_reg_rtx (RFmode);
846 rtx r = gen_reg_rtx (RFmode);
33620355 847 rtx cond = gen_reg_rtx (CCImode);
49df2fb8
SE
848 rtx zero = CONST0_RTX (RFmode);
849 rtx one = CONST1_RTX (RFmode);
850 rtx status1 = CONST1_RTX (SImode);
851 rtx trunc_off = CONST2_RTX (SImode);
852
853 /* y = 1 / b */
854 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
855 /* e = 1 - (b * y) */
856 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
857 /* q = a * y */
858 emit_insn (gen_mulrf3_cond (q, cond, a, y, zero, status1, trunc_off));
859 /* q1 = q + (q * e) */
860 emit_insn (gen_m2addrf4_cond (q1, cond, q, q, e, zero, status1, trunc_off));
861 /* e1 = e * e */
862 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
863 /* q2 = q1 + (e1 * q1) */
864 emit_insn (gen_m2addrf4_cond (q2, cond, q1, e1, q1, zero, status1, trunc_off));
865 /* y1 = y + (y * e) */
866 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
867 /* r = a - (b * q2) */
868 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
869 /* y2 = y1 + (y1 * e1) */
870 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
871 /* q3 = q2 + (r * y2) */
872 emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
873 DONE;
874})
875
876(define_expand "divdi3_internal_thr"
877 [(set (match_operand:RF 0 "fr_register_operand" "")
878 (float:RF (div:DI (match_operand:RF 1 "fr_register_operand" "")
879 (match_operand:RF 2 "fr_register_operand" ""))))]
880 "TARGET_INLINE_INT_DIV"
881{
882 rtx a = operands[1];
883 rtx b = operands[2];
884 rtx y = gen_reg_rtx (RFmode);
885 rtx y1 = gen_reg_rtx (RFmode);
886 rtx y2 = gen_reg_rtx (RFmode);
887 rtx e = gen_reg_rtx (RFmode);
888 rtx e1 = gen_reg_rtx (RFmode);
889 rtx q2 = gen_reg_rtx (RFmode);
890 rtx r = gen_reg_rtx (RFmode);
33620355 891 rtx cond = gen_reg_rtx (CCImode);
49df2fb8
SE
892 rtx zero = CONST0_RTX (RFmode);
893 rtx one = CONST1_RTX (RFmode);
894 rtx status1 = CONST1_RTX (SImode);
895 rtx trunc_off = CONST2_RTX (SImode);
896
897 /* y = 1 / b */
898 emit_insn (gen_recip_approx_rf (y, a, b, cond, status1));
899 /* e = 1 - (b * y) */
900 emit_insn (gen_m2subrf4_cond (e, cond, one, b, y, zero, status1, trunc_off));
901 /* y1 = y + (y * e) */
902 emit_insn (gen_m2addrf4_cond (y1, cond, y, y, e, zero, status1, trunc_off));
903 /* e1 = e * e */
904 emit_insn (gen_mulrf3_cond (e1, cond, e, e, zero, status1, trunc_off));
905 /* y2 = y1 + (y1 * e1) */
906 emit_insn (gen_m2addrf4_cond (y2, cond, y1, y1, e1, zero, status1, trunc_off));
907 /* q2 = y2 * a */
908 emit_insn (gen_mulrf3_cond (q2, cond, y2, a, zero, status1, trunc_off));
909 /* r = a - (b * q2) */
910 emit_insn (gen_m2subrf4_cond (r, cond, a, b, q2, zero, status1, trunc_off));
911 /* q3 = q2 + (r * y2) */
912 emit_insn (gen_m2addrf4_cond (operands[0], cond, q2, r, y2, y, status1, trunc_off));
913 DONE;
914})
915
f3a83111
SE
916;; SQRT operations
917
918
919(define_insn "sqrt_approx_rf"
920 [(set (match_operand:RF 0 "fr_register_operand" "=f")
921 (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")]
922 UNSPEC_FR_SQRT_RECIP_APPROX_RES))
33620355
EB
923 (set (match_operand:CCI 2 "register_operand" "=c")
924 (unspec:CCI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX))
f3a83111
SE
925 (use (match_operand:SI 3 "const_int_operand" ""))]
926 ""
927 "frsqrta.s%3 %0, %2 = %F1"
928 [(set_attr "itanium_class" "fmisc")
929 (set_attr "predicable" "no")])
930
49df2fb8
SE
931(define_expand "sqrtsf2"
932 [(set (match_operand:SF 0 "fr_register_operand" "=&f")
933 (sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
934 "TARGET_INLINE_SQRT"
935{
936 rtx insn;
937 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
938 insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]);
939 else
940 insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]);
941 emit_insn (insn);
942 DONE;
943})
944
f3a83111
SE
945(define_expand "sqrtsf2_internal_thr"
946 [(set (match_operand:SF 0 "fr_register_operand" "")
947 (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
948 "TARGET_INLINE_SQRT"
949{
950 rtx y = gen_reg_rtx (RFmode);
951 rtx b = gen_reg_rtx (RFmode);
952 rtx g = gen_reg_rtx (RFmode);
953 rtx e = gen_reg_rtx (RFmode);
954 rtx s = gen_reg_rtx (RFmode);
955 rtx f = gen_reg_rtx (RFmode);
956 rtx y1 = gen_reg_rtx (RFmode);
957 rtx g1 = gen_reg_rtx (RFmode);
958 rtx h = gen_reg_rtx (RFmode);
959 rtx d = gen_reg_rtx (RFmode);
960 rtx g2 = gen_reg_rtx (RFmode);
33620355 961 rtx cond = gen_reg_rtx (CCImode);
f3a83111
SE
962 rtx zero = CONST0_RTX (RFmode);
963 rtx one = CONST1_RTX (RFmode);
964 rtx c1 = ia64_dconst_0_5();
965 rtx c2 = ia64_dconst_0_375();
966 rtx reg_df_c1 = gen_reg_rtx (DFmode);
967 rtx reg_df_c2 = gen_reg_rtx (DFmode);
968 rtx reg_rf_c1 = gen_reg_rtx (RFmode);
969 rtx reg_rf_c2 = gen_reg_rtx (RFmode);
970 rtx status0 = CONST0_RTX (SImode);
971 rtx status1 = CONST1_RTX (SImode);
972 rtx trunc_sgl = CONST0_RTX (SImode);
973 rtx trunc_off = CONST2_RTX (SImode);
974
975 /* Put needed constants into registers. */
976 emit_insn (gen_movdf (reg_df_c1, c1));
977 emit_insn (gen_movdf (reg_df_c2, c2));
978 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
979 emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
980 /* Empty conversion to put input into RFmode. */
981 emit_insn (gen_extendsfrf2 (b, operands[1]));
982 /* y = sqrt (1 / b) */
983 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
984 /* g = b * y */
985 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
986 /* e = 1 - (g * y) */
987 emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
988 /* s = 0.5 + (0.375 * e) */
989 emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
990 /* f = y * e */
991 emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off));
992 /* y1 = y + (f * s) */
993 emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off));
994 /* g1 = single (b * y1) */
995 emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl));
996 /* h = 0.5 * y1 */
997 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off));
998 /* d = b - g1 * g1 */
999 emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
1000 /* g2 = single(g1 + (d * h)) */
1001 emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl));
1002 /* Conversion back into SFmode. */
1003 emit_insn (gen_truncrfsf2 (operands[0], g2));
1004 DONE;
1005})
1006
1007(define_expand "sqrtsf2_internal_lat"
1008 [(set (match_operand:SF 0 "fr_register_operand" "")
1009 (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
1010 "TARGET_INLINE_SQRT"
1011{
1012 rtx y = gen_reg_rtx (RFmode);
1013 rtx b = gen_reg_rtx (RFmode);
1014 rtx g = gen_reg_rtx (RFmode);
1015 rtx g1 = gen_reg_rtx (RFmode);
1016 rtx g2 = gen_reg_rtx (RFmode);
1017 rtx e = gen_reg_rtx (RFmode);
1018 rtx s = gen_reg_rtx (RFmode);
1019 rtx f = gen_reg_rtx (RFmode);
1020 rtx f1 = gen_reg_rtx (RFmode);
1021 rtx h = gen_reg_rtx (RFmode);
1022 rtx h1 = gen_reg_rtx (RFmode);
1023 rtx d = gen_reg_rtx (RFmode);
33620355 1024 rtx cond = gen_reg_rtx (CCImode);
f3a83111
SE
1025 rtx zero = CONST0_RTX (RFmode);
1026 rtx one = CONST1_RTX (RFmode);
1027 rtx c1 = ia64_dconst_0_5();
1028 rtx c2 = ia64_dconst_0_375();
1029 rtx reg_df_c1 = gen_reg_rtx (DFmode);
1030 rtx reg_df_c2 = gen_reg_rtx (DFmode);
1031 rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1032 rtx reg_rf_c2 = gen_reg_rtx (RFmode);
1033 rtx status0 = CONST0_RTX (SImode);
1034 rtx status1 = CONST1_RTX (SImode);
1035 rtx trunc_sgl = CONST0_RTX (SImode);
1036 rtx trunc_off = CONST2_RTX (SImode);
1037
1038 /* Put needed constants into registers. */
1039 emit_insn (gen_movdf (reg_df_c1, c1));
1040 emit_insn (gen_movdf (reg_df_c2, c2));
1041 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1042 emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
1043 /* Empty conversion to put input into RFmode. */
1044 emit_insn (gen_extendsfrf2 (b, operands[1]));
1045 /* y = sqrt (1 / b) */
1046 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1047 /* g = b * y */
1048 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1049 /* e = 1 - (g * y) */
1050 emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
1051 /* h = 0.5 * y */
1052 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1053 /* s = 0.5 + (0.375 * e) */
1054 emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
1055 /* f = e * g */
1056 emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off));
1057 /* g1 = single (g + (f * s)) */
1058 emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl));
1059 /* f1 = e * h */
1060 emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off));
1061 /* d = b - g1 * g1 */
1062 emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
1063 /* h1 = h + (f1 * s) */
1064 emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off));
1065 /* g2 = single(g1 + (d * h1)) */
1066 emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl));
1067 /* Conversion back into SFmode. */
1068 emit_insn (gen_truncrfsf2 (operands[0], g2));
1069 DONE;
1070})
1071
49df2fb8
SE
1072(define_expand "sqrtdf2"
1073 [(set (match_operand:DF 0 "fr_register_operand" "=&f")
1074 (sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))]
1075 "TARGET_INLINE_SQRT"
1076{
1077 rtx insn;
1078#if 0
1079 if (TARGET_INLINE_SQRT == INL_MIN_LAT)
1080 insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]);
1081 else
1082#endif
1083 insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]);
1084 emit_insn (insn);
1085 DONE;
1086})
1087
f3a83111
SE
1088(define_expand "sqrtdf2_internal_thr"
1089 [(set (match_operand:DF 0 "fr_register_operand" "")
1090 (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))]
1091 "TARGET_INLINE_SQRT"
1092{
1093 rtx y = gen_reg_rtx (RFmode);
1094 rtx b = gen_reg_rtx (RFmode);
1095 rtx g = gen_reg_rtx (RFmode);
1096 rtx g1 = gen_reg_rtx (RFmode);
1097 rtx g2 = gen_reg_rtx (RFmode);
1098 rtx g3 = gen_reg_rtx (RFmode);
1099 rtx g4 = gen_reg_rtx (RFmode);
1100 rtx r = gen_reg_rtx (RFmode);
1101 rtx r1 = gen_reg_rtx (RFmode);
1102 rtx h = gen_reg_rtx (RFmode);
1103 rtx h1 = gen_reg_rtx (RFmode);
1104 rtx h2 = gen_reg_rtx (RFmode);
1105 rtx d = gen_reg_rtx (RFmode);
1106 rtx d1 = gen_reg_rtx (RFmode);
33620355 1107 rtx cond = gen_reg_rtx (CCImode);
f3a83111
SE
1108 rtx zero = CONST0_RTX (RFmode);
1109 rtx c1 = ia64_dconst_0_5();
1110 rtx reg_df_c1 = gen_reg_rtx (DFmode);
1111 rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1112 rtx status0 = CONST0_RTX (SImode);
1113 rtx status1 = CONST1_RTX (SImode);
1114 rtx trunc_dbl = CONST1_RTX (SImode);
1115 rtx trunc_off = CONST2_RTX (SImode);
1116
1117 /* Put needed constants into registers. */
1118 emit_insn (gen_movdf (reg_df_c1, c1));
1119 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1120 /* Empty conversion to put input into RFmode. */
1121 emit_insn (gen_extenddfrf2 (b, operands[1]));
1122 /* y = sqrt (1 / b) */
1123 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1124 /* g = b * y */
1125 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1126 /* h = 0.5 * y */
1127 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1128 /* r = 0.5 - (g * h) */
1129 emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
1130 /* g1 = g + (g * r) */
1131 emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off));
1132 /* h1 = h + (h * r) */
1133 emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off));
1134 /* r1 = 0.5 - (g1 * h1) */
1135 emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
1136 /* g2 = g1 + (g1 * r1) */
1137 emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off));
1138 /* h2 = h1 + (h1 * r1) */
1139 emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off));
1140 /* d = b - (g2 * g2) */
1141 emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
1142 /* g3 = g2 + (d * h2) */
1143 emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
1144 /* d1 = b - (g3 * g3) */
1145 emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
1146 /* g4 = g3 + (d1 * h2) */
1147 emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl));
1148 /* Conversion back into SFmode. */
1149 emit_insn (gen_truncrfdf2 (operands[0], g4));
1150 DONE;
1151})
1152
49df2fb8 1153(define_expand "sqrtxf2"
f3a83111
SE
1154 [(set (match_operand:XF 0 "fr_register_operand" "")
1155 (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))]
1156 "TARGET_INLINE_SQRT"
1157{
1158 rtx y = gen_reg_rtx (RFmode);
1159 rtx b = gen_reg_rtx (RFmode);
1160 rtx g = gen_reg_rtx (RFmode);
1161 rtx g1 = gen_reg_rtx (RFmode);
1162 rtx g2 = gen_reg_rtx (RFmode);
1163 rtx g3 = gen_reg_rtx (RFmode);
1164 rtx g4 = gen_reg_rtx (RFmode);
1165 rtx e = gen_reg_rtx (RFmode);
1166 rtx e1 = gen_reg_rtx (RFmode);
1167 rtx e2 = gen_reg_rtx (RFmode);
1168 rtx h = gen_reg_rtx (RFmode);
1169 rtx h1 = gen_reg_rtx (RFmode);
1170 rtx h2 = gen_reg_rtx (RFmode);
1171 rtx h3 = gen_reg_rtx (RFmode);
1172 rtx d = gen_reg_rtx (RFmode);
1173 rtx d1 = gen_reg_rtx (RFmode);
33620355 1174 rtx cond = gen_reg_rtx (CCImode);
f3a83111
SE
1175 rtx zero = CONST0_RTX (RFmode);
1176 rtx c1 = ia64_dconst_0_5();
1177 rtx reg_df_c1 = gen_reg_rtx (DFmode);
1178 rtx reg_rf_c1 = gen_reg_rtx (RFmode);
1179 rtx status0 = CONST0_RTX (SImode);
1180 rtx status1 = CONST1_RTX (SImode);
1181 rtx trunc_off = CONST2_RTX (SImode);
1182
1183 /* Put needed constants into registers. */
1184 emit_insn (gen_movdf (reg_df_c1, c1));
1185 emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
1186 /* Empty conversion to put input into RFmode. */
1187 emit_insn (gen_extendxfrf2 (b, operands[1]));
1188 /* y = sqrt (1 / b) */
1189 emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
1190 /* g = b * y */
1191 emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
1192 /* h = 0.5 * y */
1193 emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
1194 /* e = 0.5 - (g * h) */
1195 emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
1196 /* g1 = g + (g * e) */
1197 emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off));
1198 /* h1 = h + (h * e) */
1199 emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off));
1200 /* e1 = 0.5 - (g1 * h1) */
1201 emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
1202 /* g2 = g1 + (g1 * e1) */
1203 emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off));
1204 /* h2 = h1 + (h1 * e1) */
1205 emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off));
1206 /* d = b - (g2 * g2) */
1207 emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
1208 /* e2 = 0.5 - (g2 * h2) */
1209 emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off));
1210 /* g3 = g2 + (d * h2) */
1211 emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
1212 /* h3 = h2 + (e2 * h2) */
1213 emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off));
1214 /* d1 = b - (g3 * g3) */
1215 emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
1216 /* g4 = g3 + (d1 * h3) */
1217 emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off));
1218 /* Conversion back into SFmode. */
1219 emit_insn (gen_truncrfxf2 (operands[0], g4));
1220 DONE;
1221})