]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/expmed.c
decl.c, [...]: Remove redundant enum from machine_mode.
[thirdparty/gcc.git] / gcc / expmed.c
CommitLineData
44037a66
TG
1/* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
23a5b65a 3 Copyright (C) 1987-2014 Free Software Foundation, Inc.
44037a66 4
1322177d 5This file is part of GCC.
44037a66 6
1322177d
LB
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
1322177d 10version.
44037a66 11
1322177d
LB
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
44037a66
TG
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
44037a66
TG
20
21
22#include "config.h"
670ee920 23#include "system.h"
4977bab6
ZW
24#include "coretypes.h"
25#include "tm.h"
718f9c0f 26#include "diagnostic-core.h"
44037a66
TG
27#include "rtl.h"
28#include "tree.h"
d8a2d370 29#include "stor-layout.h"
6baf1cc8 30#include "tm_p.h"
44037a66 31#include "flags.h"
44037a66
TG
32#include "insn-config.h"
33#include "expr.h"
e78d8e51 34#include "optabs.h"
44037a66 35#include "recog.h"
b0c48229 36#include "langhooks.h"
60393bbc
AM
37#include "predict.h"
38#include "basic-block.h"
6fb5fa3c 39#include "df.h"
0890b981 40#include "target.h"
462f85ce
RS
41#include "expmed.h"
42
43struct target_expmed default_target_expmed;
44#if SWITCHABLE_TARGET
45struct target_expmed *this_target_expmed = &default_target_expmed;
46#endif
44037a66 47
502b8322
AJ
48static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
1169e45d
AH
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
1169e45d 52 rtx);
ebb99f96
BE
53static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx);
502b8322 56static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
1169e45d
AH
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 rtx);
ef4bddc2 61static rtx extract_fixed_bit_field (machine_mode, rtx,
502b8322 62 unsigned HOST_WIDE_INT,
c6285bd7 63 unsigned HOST_WIDE_INT, rtx, int);
ef4bddc2 64static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
6f4e9cf8
BE
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, rtx, int);
ef4bddc2 67static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
502b8322
AJ
68static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT, int);
ef4bddc2
RS
70static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
44037a66 73
807e902e
KZ
74/* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
78
79static inline rtx
ef4bddc2 80mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
807e902e
KZ
81{
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos, bitsize, complement,
84 GET_MODE_PRECISION (mode)), mode);
85}
86
58b42e19 87/* Test whether a value is zero of a power of two. */
be63b77d
JJ
88#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
58b42e19 90
84ddb681 91struct init_expmed_rtl
44037a66 92{
c83cf304
JJ
93 rtx reg;
94 rtx plus;
95 rtx neg;
96 rtx mult;
97 rtx sdiv;
98 rtx udiv;
99 rtx sdiv_32;
100 rtx smod_32;
101 rtx wide_mult;
102 rtx wide_lshr;
103 rtx wide_trunc;
104 rtx shift;
105 rtx shift_mult;
106 rtx shift_add;
107 rtx shift_sub0;
108 rtx shift_sub1;
109 rtx zext;
110 rtx trunc;
79b4a8dc 111
965703ed
RS
112 rtx pow2[MAX_BITS_PER_WORD];
113 rtx cint[MAX_BITS_PER_WORD];
84ddb681
RH
114};
115
91f8035e 116static void
ef4bddc2
RS
117init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 machine_mode from_mode, bool speed)
91f8035e
RH
119{
120 int to_size, from_size;
121 rtx which;
122
50b6ee8b
DD
123 to_size = GET_MODE_PRECISION (to_mode);
124 from_size = GET_MODE_PRECISION (from_mode);
125
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
129 case. */
130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131 && exact_log2 (to_size) != -1)
132 to_size --;
133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134 && exact_log2 (from_size) != -1)
135 from_size --;
91f8035e
RH
136
137 /* Assume cost of zero-extend and sign-extend is the same. */
c83cf304 138 which = (to_size < from_size ? all->trunc : all->zext);
91f8035e 139
c83cf304 140 PUT_MODE (all->reg, from_mode);
91f8035e
RH
141 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
142}
143
84ddb681
RH
144static void
145init_expmed_one_mode (struct init_expmed_rtl *all,
ef4bddc2 146 machine_mode mode, int speed)
84ddb681
RH
147{
148 int m, n, mode_bitsize;
ef4bddc2 149 machine_mode mode_from;
44037a66 150
84ddb681 151 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
38a448ca 152
c83cf304
JJ
153 PUT_MODE (all->reg, mode);
154 PUT_MODE (all->plus, mode);
155 PUT_MODE (all->neg, mode);
156 PUT_MODE (all->mult, mode);
157 PUT_MODE (all->sdiv, mode);
158 PUT_MODE (all->udiv, mode);
159 PUT_MODE (all->sdiv_32, mode);
160 PUT_MODE (all->smod_32, mode);
161 PUT_MODE (all->wide_trunc, mode);
162 PUT_MODE (all->shift, mode);
163 PUT_MODE (all->shift_mult, mode);
164 PUT_MODE (all->shift_add, mode);
165 PUT_MODE (all->shift_sub0, mode);
166 PUT_MODE (all->shift_sub1, mode);
167 PUT_MODE (all->zext, mode);
168 PUT_MODE (all->trunc, mode);
169
170 set_add_cost (speed, mode, set_src_cost (all->plus, speed));
171 set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
172 set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
173 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
174 set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
175
176 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
5322d07e 177 <= 2 * add_cost (speed, mode)));
c83cf304 178 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
5322d07e
NF
179 <= 4 * add_cost (speed, mode)));
180
181 set_shift_cost (speed, mode, 0, 0);
182 {
183 int cost = add_cost (speed, mode);
184 set_shiftadd_cost (speed, mode, 0, cost);
185 set_shiftsub0_cost (speed, mode, 0, cost);
186 set_shiftsub1_cost (speed, mode, 0, cost);
187 }
84ddb681
RH
188
189 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
190 for (m = 1; m < n; m++)
191 {
c83cf304
JJ
192 XEXP (all->shift, 1) = all->cint[m];
193 XEXP (all->shift_mult, 1) = all->pow2[m];
84ddb681 194
c83cf304
JJ
195 set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
196 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
197 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
198 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
84ddb681
RH
199 }
200
201 if (SCALAR_INT_MODE_P (mode))
965703ed 202 {
91f8035e 203 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
ef4bddc2 204 mode_from = (machine_mode)(mode_from + 1))
91f8035e
RH
205 init_expmed_one_conv (all, mode, mode_from, speed);
206 }
207 if (GET_MODE_CLASS (mode) == MODE_INT)
208 {
ef4bddc2 209 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
84ddb681
RH
210 if (wider_mode != VOIDmode)
211 {
c83cf304
JJ
212 PUT_MODE (all->zext, wider_mode);
213 PUT_MODE (all->wide_mult, wider_mode);
214 PUT_MODE (all->wide_lshr, wider_mode);
215 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
84ddb681 216
91f8035e 217 set_mul_widen_cost (speed, wider_mode,
c83cf304 218 set_src_cost (all->wide_mult, speed));
91f8035e 219 set_mul_highpart_cost (speed, mode,
c83cf304 220 set_src_cost (all->wide_trunc, speed));
84ddb681 221 }
965703ed 222 }
84ddb681
RH
223}
224
225void
226init_expmed (void)
227{
228 struct init_expmed_rtl all;
ef4bddc2 229 machine_mode mode = QImode;
84ddb681
RH
230 int m, speed;
231
79b4a8dc 232 memset (&all, 0, sizeof all);
84ddb681
RH
233 for (m = 1; m < MAX_BITS_PER_WORD; m++)
234 {
235 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
236 all.cint[m] = GEN_INT (m);
237 }
79b4a8dc 238
1d27fed4 239 /* Avoid using hard regs in ways which may be unsupported. */
c83cf304
JJ
240 all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
241 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
242 all.neg = gen_rtx_NEG (mode, all.reg);
243 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
244 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
245 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
246 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
247 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
248 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
249 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
250 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
251 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
252 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
253 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
254 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
255 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
256 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
257 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
6dd8f4bb 258
f40751dd 259 for (speed = 0; speed < 2; speed++)
71af73bb 260 {
f40751dd 261 crtl->maybe_hot_insn_p = speed;
5322d07e 262 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
79b4a8dc 263
91f8035e 264 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
ef4bddc2 265 mode = (machine_mode)(mode + 1))
84ddb681 266 init_expmed_one_mode (&all, mode, speed);
79b4a8dc 267
91f8035e
RH
268 if (MIN_MODE_PARTIAL_INT != VOIDmode)
269 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
ef4bddc2 270 mode = (machine_mode)(mode + 1))
91f8035e
RH
271 init_expmed_one_mode (&all, mode, speed);
272
273 if (MIN_MODE_VECTOR_INT != VOIDmode)
274 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
ef4bddc2 275 mode = (machine_mode)(mode + 1))
91f8035e 276 init_expmed_one_mode (&all, mode, speed);
79b4a8dc 277 }
84ddb681 278
5322d07e
NF
279 if (alg_hash_used_p ())
280 {
281 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
282 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
283 }
c371bb73 284 else
5322d07e 285 set_alg_hash_used_p (true);
f40751dd 286 default_rtl_profile ();
c83cf304
JJ
287
288 ggc_free (all.trunc);
289 ggc_free (all.shift_sub1);
290 ggc_free (all.shift_sub0);
291 ggc_free (all.shift_add);
292 ggc_free (all.shift_mult);
293 ggc_free (all.shift);
294 ggc_free (all.wide_trunc);
295 ggc_free (all.wide_lshr);
296 ggc_free (all.wide_mult);
297 ggc_free (all.zext);
298 ggc_free (all.smod_32);
299 ggc_free (all.sdiv_32);
300 ggc_free (all.udiv);
301 ggc_free (all.sdiv);
302 ggc_free (all.mult);
303 ggc_free (all.neg);
304 ggc_free (all.plus);
305 ggc_free (all.reg);
44037a66
TG
306}
307
308/* Return an rtx representing minus the value of X.
309 MODE is the intended mode of the result,
310 useful if X is a CONST_INT. */
311
312rtx
ef4bddc2 313negate_rtx (machine_mode mode, rtx x)
44037a66 314{
a39a7484
RK
315 rtx result = simplify_unary_operation (NEG, mode, x, mode);
316
fdb5537f 317 if (result == 0)
a39a7484
RK
318 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
319
320 return result;
44037a66 321}
da920570 322
26f8b976
RS
323/* Adjust bitfield memory MEM so that it points to the first unit of mode
324 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
325 If MODE is BLKmode, return a reference to every byte in the bitfield.
326 Set *NEW_BITNUM to the bit position of the field within the new memory. */
327
328static rtx
ef4bddc2 329narrow_bit_field_mem (rtx mem, machine_mode mode,
26f8b976
RS
330 unsigned HOST_WIDE_INT bitsize,
331 unsigned HOST_WIDE_INT bitnum,
332 unsigned HOST_WIDE_INT *new_bitnum)
333{
334 if (mode == BLKmode)
335 {
336 *new_bitnum = bitnum % BITS_PER_UNIT;
337 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
338 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
339 / BITS_PER_UNIT);
340 return adjust_bitfield_address_size (mem, mode, offset, size);
341 }
342 else
343 {
344 unsigned int unit = GET_MODE_BITSIZE (mode);
345 *new_bitnum = bitnum % unit;
346 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
347 return adjust_bitfield_address (mem, mode, offset);
348 }
349}
350
fcdd52b7
RS
351/* The caller wants to perform insertion or extraction PATTERN on a
352 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
353 BITREGION_START and BITREGION_END are as for store_bit_field
354 and FIELDMODE is the natural mode of the field.
355
356 Search for a mode that is compatible with the memory access
357 restrictions and (where applicable) with a register insertion or
358 extraction. Return the new memory on success, storing the adjusted
359 bit position in *NEW_BITNUM. Return null otherwise. */
360
361static rtx
362adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
363 rtx op0, HOST_WIDE_INT bitsize,
364 HOST_WIDE_INT bitnum,
365 unsigned HOST_WIDE_INT bitregion_start,
366 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 367 machine_mode fieldmode,
fcdd52b7
RS
368 unsigned HOST_WIDE_INT *new_bitnum)
369{
370 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
371 bitregion_end, MEM_ALIGN (op0),
372 MEM_VOLATILE_P (op0));
ef4bddc2 373 machine_mode best_mode;
fcdd52b7
RS
374 if (iter.next_mode (&best_mode))
375 {
376 /* We can use a memory in BEST_MODE. See whether this is true for
377 any wider modes. All other things being equal, we prefer to
378 use the widest mode possible because it tends to expose more
379 CSE opportunities. */
380 if (!iter.prefer_smaller_modes ())
381 {
382 /* Limit the search to the mode required by the corresponding
383 register insertion or extraction instruction, if any. */
ef4bddc2 384 machine_mode limit_mode = word_mode;
fcdd52b7
RS
385 extraction_insn insn;
386 if (get_best_reg_extraction_insn (&insn, pattern,
387 GET_MODE_BITSIZE (best_mode),
388 fieldmode))
389 limit_mode = insn.field_mode;
390
ef4bddc2 391 machine_mode wider_mode;
fcdd52b7
RS
392 while (iter.next_mode (&wider_mode)
393 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
394 best_mode = wider_mode;
395 }
396 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
397 new_bitnum);
398 }
399 return NULL_RTX;
400}
401
bebf0797
RS
402/* Return true if a bitfield of size BITSIZE at bit number BITNUM within
403 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
404 offset is then BITNUM / BITS_PER_UNIT. */
405
406static bool
407lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
408 unsigned HOST_WIDE_INT bitsize,
ef4bddc2 409 machine_mode struct_mode)
bebf0797
RS
410{
411 if (BYTES_BIG_ENDIAN)
c1a4d0b5 412 return (bitnum % BITS_PER_UNIT == 0
bebf0797
RS
413 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
414 || (bitnum + bitsize) % BITS_PER_WORD == 0));
415 else
416 return bitnum % BITS_PER_WORD == 0;
417}
00efe3ea 418
548cfdc2 419/* Return true if -fstrict-volatile-bitfields applies to an access of OP0
6f4e9cf8
BE
420 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
421 Return false if the access would touch memory outside the range
422 BITREGION_START to BITREGION_END for conformance to the C++ memory
423 model. */
f5d4f18c
SL
424
425static bool
426strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
427 unsigned HOST_WIDE_INT bitnum,
ef4bddc2 428 machine_mode fieldmode,
6f4e9cf8
BE
429 unsigned HOST_WIDE_INT bitregion_start,
430 unsigned HOST_WIDE_INT bitregion_end)
f5d4f18c
SL
431{
432 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
433
434 /* -fstrict-volatile-bitfields must be enabled and we must have a
435 volatile MEM. */
436 if (!MEM_P (op0)
437 || !MEM_VOLATILE_P (op0)
438 || flag_strict_volatile_bitfields <= 0)
439 return false;
440
441 /* Non-integral modes likely only happen with packed structures.
442 Punt. */
443 if (!SCALAR_INT_MODE_P (fieldmode))
444 return false;
445
446 /* The bit size must not be larger than the field mode, and
447 the field mode must not be larger than a word. */
448 if (bitsize > modesize || modesize > BITS_PER_WORD)
449 return false;
450
451 /* Check for cases of unaligned fields that must be split. */
452 if (bitnum % BITS_PER_UNIT + bitsize > modesize
453 || (STRICT_ALIGNMENT
454 && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
455 return false;
456
6f4e9cf8
BE
457 /* Check for cases where the C++ memory model applies. */
458 if (bitregion_end != 0
459 && (bitnum - bitnum % modesize < bitregion_start
460 || bitnum - bitnum % modesize + modesize > bitregion_end))
461 return false;
462
f5d4f18c
SL
463 return true;
464}
465
00efe3ea
RS
466/* Return true if OP is a memory and if a bitfield of size BITSIZE at
467 bit number BITNUM can be treated as a simple value of mode MODE. */
468
469static bool
470simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
ef4bddc2 471 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
00efe3ea
RS
472{
473 return (MEM_P (op0)
474 && bitnum % BITS_PER_UNIT == 0
475 && bitsize == GET_MODE_BITSIZE (mode)
476 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
477 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
478 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
479}
6d7db3c5 480\f
fcdd52b7
RS
481/* Try to use instruction INSV to store VALUE into a field of OP0.
482 BITSIZE and BITNUM are as for store_bit_field. */
a20556e4
RS
483
484static bool
fcdd52b7
RS
485store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
486 unsigned HOST_WIDE_INT bitsize,
548cfdc2
EB
487 unsigned HOST_WIDE_INT bitnum,
488 rtx value)
a20556e4
RS
489{
490 struct expand_operand ops[4];
491 rtx value1;
492 rtx xop0 = op0;
f3f6fb16 493 rtx_insn *last = get_last_insn ();
a20556e4
RS
494 bool copy_back = false;
495
ef4bddc2 496 machine_mode op_mode = insv->field_mode;
a20556e4
RS
497 unsigned int unit = GET_MODE_BITSIZE (op_mode);
498 if (bitsize == 0 || bitsize > unit)
499 return false;
500
501 if (MEM_P (xop0))
26f8b976 502 /* Get a reference to the first byte of the field. */
fcdd52b7
RS
503 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
504 &bitnum);
a20556e4
RS
505 else
506 {
507 /* Convert from counting within OP0 to counting in OP_MODE. */
508 if (BYTES_BIG_ENDIAN)
509 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
510
511 /* If xop0 is a register, we need it in OP_MODE
512 to make it acceptable to the format of insv. */
513 if (GET_CODE (xop0) == SUBREG)
514 /* We can't just change the mode, because this might clobber op0,
515 and we will need the original value of op0 if insv fails. */
516 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
517 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
518 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
519 }
520
521 /* If the destination is a paradoxical subreg such that we need a
522 truncate to the inner mode, perform the insertion on a temporary and
523 truncate the result to the original destination. Note that we can't
524 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
525 X) 0)) is (reg:N X). */
526 if (GET_CODE (xop0) == SUBREG
527 && REG_P (SUBREG_REG (xop0))
528 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
529 op_mode))
530 {
531 rtx tem = gen_reg_rtx (op_mode);
532 emit_move_insn (tem, xop0);
533 xop0 = tem;
534 copy_back = true;
535 }
536
537 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
538 "backwards" from the size of the unit we are inserting into.
539 Otherwise, we count bits from the most significant on a
540 BYTES/BITS_BIG_ENDIAN machine. */
541
542 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
543 bitnum = unit - bitsize - bitnum;
544
545 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
546 value1 = value;
547 if (GET_MODE (value) != op_mode)
548 {
549 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
550 {
551 /* Optimization: Don't bother really extending VALUE
552 if it has all the bits we will actually use. However,
553 if we must narrow it, be sure we do it correctly. */
554
555 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
556 {
557 rtx tmp;
558
559 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
560 if (! tmp)
561 tmp = simplify_gen_subreg (op_mode,
562 force_reg (GET_MODE (value),
563 value1),
564 GET_MODE (value), 0);
565 value1 = tmp;
566 }
567 else
568 value1 = gen_lowpart (op_mode, value1);
569 }
570 else if (CONST_INT_P (value))
571 value1 = gen_int_mode (INTVAL (value), op_mode);
572 else
573 /* Parse phase is supposed to make VALUE's data type
574 match that of the component reference, which is a type
575 at least as wide as the field; so VALUE should have
576 a mode that corresponds to that type. */
577 gcc_assert (CONSTANT_P (value));
578 }
579
580 create_fixed_operand (&ops[0], xop0);
581 create_integer_operand (&ops[1], bitsize);
582 create_integer_operand (&ops[2], bitnum);
583 create_input_operand (&ops[3], value1, op_mode);
fcdd52b7 584 if (maybe_expand_insn (insv->icode, 4, ops))
a20556e4
RS
585 {
586 if (copy_back)
587 convert_move (op0, xop0, true);
588 return true;
589 }
590 delete_insns_since (last);
591 return false;
592}
593
6d7db3c5
RS
594/* A subroutine of store_bit_field, with the same arguments. Return true
595 if the operation could be implemented.
44037a66 596
6d7db3c5
RS
597 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
598 no other way of implementing the operation. If FALLBACK_P is false,
599 return false instead. */
600
601static bool
602store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
603 unsigned HOST_WIDE_INT bitnum,
604 unsigned HOST_WIDE_INT bitregion_start,
605 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 606 machine_mode fieldmode,
6d7db3c5 607 rtx value, bool fallback_p)
44037a66 608{
b3694847 609 rtx op0 = str_rtx;
28526e20 610 rtx orig_value;
da920570 611
44037a66
TG
612 while (GET_CODE (op0) == SUBREG)
613 {
614 /* The following line once was done only if WORDS_BIG_ENDIAN,
615 but I think that is a mistake. WORDS_BIG_ENDIAN is
616 meaningful at a much higher level; when structures are copied
617 between memory and regs, the higher-numbered regs
618 always get higher addresses. */
495db1a1
AK
619 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
620 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
bebf0797 621 int byte_offset = 0;
495db1a1
AK
622
623 /* Paradoxical subregs need special handling on big endian machines. */
624 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
625 {
626 int difference = inner_mode_size - outer_mode_size;
627
628 if (WORDS_BIG_ENDIAN)
629 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
630 if (BYTES_BIG_ENDIAN)
631 byte_offset += difference % UNITS_PER_WORD;
632 }
633 else
634 byte_offset = SUBREG_BYTE (op0);
635
636 bitnum += byte_offset * BITS_PER_UNIT;
44037a66
TG
637 op0 = SUBREG_REG (op0);
638 }
639
2c58f7dd
RS
640 /* No action is needed if the target is a register and if the field
641 lies completely outside that register. This can occur if the source
642 code contains an out-of-bounds access to a small array. */
643 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
6d7db3c5 644 return true;
2c58f7dd 645
b42271d6 646 /* Use vec_set patterns for inserting parts of vectors whenever
997404de
JH
647 available. */
648 if (VECTOR_MODE_P (GET_MODE (op0))
3c0cb5de 649 && !MEM_P (op0)
947131ba 650 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
997404de
JH
651 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
652 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
653 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
654 {
a5c7d693 655 struct expand_operand ops[3];
ef4bddc2
RS
656 machine_mode outermode = GET_MODE (op0);
657 machine_mode innermode = GET_MODE_INNER (outermode);
a5c7d693 658 enum insn_code icode = optab_handler (vec_set_optab, outermode);
997404de 659 int pos = bitnum / GET_MODE_BITSIZE (innermode);
997404de 660
a5c7d693
RS
661 create_fixed_operand (&ops[0], op0);
662 create_input_operand (&ops[1], value, innermode);
663 create_integer_operand (&ops[2], pos);
664 if (maybe_expand_insn (icode, 3, ops))
665 return true;
997404de
JH
666 }
667
308ecea0 668 /* If the target is a register, overwriting the entire object, or storing
bebf0797
RS
669 a full-word or multi-word field can be done with just a SUBREG. */
670 if (!MEM_P (op0)
671 && bitsize == GET_MODE_BITSIZE (fieldmode)
672 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
673 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
674 {
675 /* Use the subreg machinery either to narrow OP0 to the required
d8c84975
JJ
676 words or to cope with mode punning between equal-sized modes.
677 In the latter case, use subreg on the rhs side, not lhs. */
678 rtx sub;
679
680 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
681 {
682 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
683 if (sub)
684 {
685 emit_move_insn (op0, sub);
686 return true;
687 }
688 }
689 else
bebf0797 690 {
d8c84975
JJ
691 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
692 bitnum / BITS_PER_UNIT);
693 if (sub)
694 {
695 emit_move_insn (sub, value);
696 return true;
697 }
bebf0797
RS
698 }
699 }
308ecea0 700
bebf0797 701 /* If the target is memory, storing any naturally aligned field can be
308ecea0 702 done with a simple store. For targets that support fast unaligned
0b69c29f 703 memory, any naturally sized, unit aligned field can be done directly. */
00efe3ea 704 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
44037a66 705 {
bebf0797 706 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
44037a66 707 emit_move_insn (op0, value);
6d7db3c5 708 return true;
44037a66
TG
709 }
710
a8ca7756
JW
711 /* Make sure we are playing with integral modes. Pun with subregs
712 if we aren't. This must come after the entire register case above,
713 since that case is valid for any mode. The following cases are only
714 valid for integral modes. */
715 {
ef4bddc2 716 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
a8ca7756
JW
717 if (imode != GET_MODE (op0))
718 {
3c0cb5de 719 if (MEM_P (op0))
e98fc6de 720 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
a8ca7756 721 else
5b0264cb
NS
722 {
723 gcc_assert (imode != BLKmode);
724 op0 = gen_lowpart (imode, op0);
725 }
a8ca7756
JW
726 }
727 }
728
44037a66 729 /* Storing an lsb-aligned field in a register
bebf0797 730 can be done with a movstrict instruction. */
44037a66 731
3c0cb5de 732 if (!MEM_P (op0)
bebf0797 733 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
44037a66 734 && bitsize == GET_MODE_BITSIZE (fieldmode)
947131ba 735 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
44037a66 736 {
a5c7d693
RS
737 struct expand_operand ops[2];
738 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
5d560619 739 rtx arg0 = op0;
19228b93 740 unsigned HOST_WIDE_INT subreg_off;
5e4900c7 741
a5c7d693 742 if (GET_CODE (arg0) == SUBREG)
44037a66 743 {
5b0264cb
NS
744 /* Else we've got some float mode source being extracted into
745 a different float mode destination -- this combination of
746 subregs results in Severe Tire Damage. */
a5c7d693 747 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
5b0264cb
NS
748 || GET_MODE_CLASS (fieldmode) == MODE_INT
749 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
a5c7d693 750 arg0 = SUBREG_REG (arg0);
5e4900c7 751 }
470032d7 752
bebf0797 753 subreg_off = bitnum / BITS_PER_UNIT;
19228b93
JJ
754 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
755 {
756 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
a5c7d693 757
19228b93
JJ
758 create_fixed_operand (&ops[0], arg0);
759 /* Shrink the source operand to FIELDMODE. */
760 create_convert_operand_to (&ops[1], value, fieldmode, false);
761 if (maybe_expand_insn (icode, 2, ops))
762 return true;
763 }
44037a66
TG
764 }
765
766 /* Handle fields bigger than a word. */
767
768 if (bitsize > BITS_PER_WORD)
769 {
770 /* Here we transfer the words of the field
771 in the order least significant first.
772 This is because the most significant word is the one which may
ad83e87b
PB
773 be less than full.
774 However, only do that if the value is not BLKmode. */
775
770ae6cc
RK
776 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
777 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
778 unsigned int i;
f3f6fb16 779 rtx_insn *last;
44037a66
TG
780
781 /* This is the mode we must force value to, so that there will be enough
782 subwords to extract. Note that fieldmode will often (always?) be
783 VOIDmode, because that is what store_field uses to indicate that this
535a42b1
NS
784 is a bit field, but passing VOIDmode to operand_subword_force
785 is not allowed. */
9f5e2e11
RS
786 fieldmode = GET_MODE (value);
787 if (fieldmode == VOIDmode)
6f83092f 788 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
44037a66 789
6d7db3c5 790 last = get_last_insn ();
44037a66
TG
791 for (i = 0; i < nwords; i++)
792 {
ad83e87b
PB
793 /* If I is 0, use the low-order word in both field and target;
794 if I is 1, use the next to lowest word; and so on. */
00d6b19a
AB
795 unsigned int wordnum = (backwards
796 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
797 - i - 1
798 : i);
770ae6cc 799 unsigned int bit_offset = (backwards
04050c69
RK
800 ? MAX ((int) bitsize - ((int) i + 1)
801 * BITS_PER_WORD,
802 0)
803 : (int) i * BITS_PER_WORD);
6d7db3c5 804 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
3bdb97b8
AK
805 unsigned HOST_WIDE_INT new_bitsize =
806 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
807
808 /* If the remaining chunk doesn't have full wordsize we have
809 to make sure that for big endian machines the higher order
810 bits are used. */
811 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
812 value_word = simplify_expand_binop (word_mode, lshr_optab,
813 value_word,
814 GEN_INT (BITS_PER_WORD
815 - new_bitsize),
816 NULL_RTX, true,
817 OPTAB_LIB_WIDEN);
818
819 if (!store_bit_field_1 (op0, new_bitsize,
1169e45d
AH
820 bitnum + bit_offset,
821 bitregion_start, bitregion_end,
822 word_mode,
6d7db3c5
RS
823 value_word, fallback_p))
824 {
825 delete_insns_since (last);
826 return false;
827 }
44037a66 828 }
6d7db3c5 829 return true;
44037a66
TG
830 }
831
4f1da2e9
RS
832 /* If VALUE has a floating-point or complex mode, access it as an
833 integer of the corresponding size. This can occur on a machine
834 with 64 bit registers that uses SFmode for float. It can also
835 occur for unaligned float or complex fields. */
28526e20 836 orig_value = value;
4f1da2e9
RS
837 if (GET_MODE (value) != VOIDmode
838 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
32b069d3 839 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
4f1da2e9
RS
840 {
841 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
842 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
843 }
2305bcad 844
bebf0797
RS
845 /* If OP0 is a multi-word register, narrow it to the affected word.
846 If the region spans two words, defer to store_split_bit_field. */
847 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
848 {
849 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
850 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
851 gcc_assert (op0);
852 bitnum %= BITS_PER_WORD;
853 if (bitnum + bitsize > BITS_PER_WORD)
854 {
855 if (!fallback_p)
856 return false;
857
858 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
859 bitregion_end, value);
860 return true;
861 }
862 }
863
864 /* From here on we can assume that the field to be stored in fits
865 within a word. If the destination is a register, it too fits
866 in a word. */
44037a66 867
fcdd52b7
RS
868 extraction_insn insv;
869 if (!MEM_P (op0)
870 && get_best_reg_extraction_insn (&insv, EP_insv,
871 GET_MODE_BITSIZE (GET_MODE (op0)),
872 fieldmode)
873 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
a20556e4 874 return true;
6d7db3c5
RS
875
876 /* If OP0 is a memory, try copying it to a register and seeing if a
877 cheap register alternative is available. */
fcdd52b7 878 if (MEM_P (op0))
6d7db3c5 879 {
f5d4f18c
SL
880 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
881 fieldmode)
fcdd52b7 882 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
17a73ba0
RS
883 return true;
884
f3f6fb16 885 rtx_insn *last = get_last_insn ();
6d7db3c5 886
fcdd52b7
RS
887 /* Try loading part of OP0 into a register, inserting the bitfield
888 into that, and then copying the result back to OP0. */
889 unsigned HOST_WIDE_INT bitpos;
890 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
891 bitregion_start, bitregion_end,
892 fieldmode, &bitpos);
893 if (xop0)
0fb7aeda 894 {
fcdd52b7 895 rtx tempreg = copy_to_reg (xop0);
bebf0797 896 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1169e45d 897 bitregion_start, bitregion_end,
6d7db3c5
RS
898 fieldmode, orig_value, false))
899 {
900 emit_move_insn (xop0, tempreg);
901 return true;
902 }
44037a66 903 delete_insns_since (last);
44037a66
TG
904 }
905 }
6d7db3c5
RS
906
907 if (!fallback_p)
908 return false;
909
bebf0797
RS
910 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
911 bitregion_end, value);
6d7db3c5
RS
912 return true;
913}
914
915/* Generate code to store value from rtx VALUE
916 into a bit-field within structure STR_RTX
917 containing BITSIZE bits starting at bit BITNUM.
1169e45d
AH
918
919 BITREGION_START is bitpos of the first bitfield in this region.
920 BITREGION_END is the bitpos of the ending bitfield in this region.
921 These two fields are 0, if the C++ memory model does not apply,
922 or we are not interested in keeping track of bitfield regions.
923
6d7db3c5
RS
924 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
925
926void
927store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
928 unsigned HOST_WIDE_INT bitnum,
929 unsigned HOST_WIDE_INT bitregion_start,
930 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 931 machine_mode fieldmode,
6d7db3c5
RS
932 rtx value)
933{
f5d4f18c 934 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
6f4e9cf8
BE
935 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
936 bitregion_start, bitregion_end))
f5d4f18c 937 {
f5d4f18c
SL
938 /* Storing any naturally aligned field can be done with a simple
939 store. For targets that support fast unaligned memory, any
940 naturally sized, unit aligned field can be done directly. */
941 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
942 {
943 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
944 bitnum / BITS_PER_UNIT);
945 emit_move_insn (str_rtx, value);
946 }
947 else
ebb99f96
BE
948 {
949 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
950 &bitnum);
951 /* Explicitly override the C/C++ memory model; ignore the
952 bit range so that we can do the access in the mode mandated
953 by -fstrict-volatile-bitfields instead. */
548cfdc2 954 store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
ebb99f96
BE
955 }
956
f5d4f18c
SL
957 return;
958 }
959
1169e45d
AH
960 /* Under the C++0x memory model, we must not touch bits outside the
961 bit region. Adjust the address to start at the beginning of the
962 bit region. */
a59b038c 963 if (MEM_P (str_rtx) && bitregion_start > 0)
1169e45d 964 {
ef4bddc2 965 machine_mode bestmode;
ee88e690 966 HOST_WIDE_INT offset, size;
1169e45d 967
a59b038c
EB
968 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
969
1169e45d
AH
970 offset = bitregion_start / BITS_PER_UNIT;
971 bitnum -= bitregion_start;
ee88e690 972 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1169e45d
AH
973 bitregion_end -= bitregion_start;
974 bitregion_start = 0;
975 bestmode = get_best_mode (bitsize, bitnum,
976 bitregion_start, bitregion_end,
fcdd52b7 977 MEM_ALIGN (str_rtx), VOIDmode,
1169e45d 978 MEM_VOLATILE_P (str_rtx));
ee88e690 979 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1169e45d
AH
980 }
981
982 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
983 bitregion_start, bitregion_end,
984 fieldmode, value, true))
6d7db3c5 985 gcc_unreachable ();
44037a66
TG
986}
987\f
bebf0797
RS
988/* Use shifts and boolean operations to store VALUE into a bit field of
989 width BITSIZE in OP0, starting at bit BITNUM. */
44037a66
TG
990
991static void
bebf0797
RS
992store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
993 unsigned HOST_WIDE_INT bitnum,
1169e45d
AH
994 unsigned HOST_WIDE_INT bitregion_start,
995 unsigned HOST_WIDE_INT bitregion_end,
996 rtx value)
44037a66 997{
44037a66
TG
998 /* There is a case not handled here:
999 a structure with a known alignment of just a halfword
1000 and a field split across two aligned halfwords within the structure.
1001 Or likewise a structure with a known alignment of just a byte
1002 and a field split across two bytes.
1003 Such cases are not supposed to be able to occur. */
1004
bebf0797 1005 if (MEM_P (op0))
44037a66 1006 {
ef4bddc2 1007 machine_mode mode = GET_MODE (op0);
053a35af 1008 if (GET_MODE_BITSIZE (mode) == 0
0fb7aeda
KH
1009 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1010 mode = word_mode;
f5d4f18c
SL
1011 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1012 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
44037a66
TG
1013
1014 if (mode == VOIDmode)
1015 {
1016 /* The only way this should occur is if the field spans word
1017 boundaries. */
bebf0797
RS
1018 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1019 bitregion_end, value);
44037a66
TG
1020 return;
1021 }
1022
26f8b976 1023 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
44037a66
TG
1024 }
1025
ebb99f96 1026 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
ebb99f96
BE
1027}
1028
1029/* Helper function for store_fixed_bit_field, stores
1030 the bit field always using the MODE of OP0. */
1031
1032static void
1033store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
548cfdc2
EB
1034 unsigned HOST_WIDE_INT bitnum,
1035 rtx value)
ebb99f96 1036{
ef4bddc2 1037 machine_mode mode;
ebb99f96
BE
1038 rtx temp;
1039 int all_zero = 0;
1040 int all_one = 0;
1041
44037a66 1042 mode = GET_MODE (op0);
bebf0797 1043 gcc_assert (SCALAR_INT_MODE_P (mode));
44037a66 1044
bebf0797
RS
1045 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1046 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
44037a66 1047
f76b9db2 1048 if (BYTES_BIG_ENDIAN)
bebf0797
RS
1049 /* BITNUM is the distance between our msb
1050 and that of the containing datum.
1051 Convert it to the distance from the lsb. */
1052 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
44037a66 1053
bebf0797 1054 /* Now BITNUM is always the distance between our lsb
44037a66
TG
1055 and that of OP0. */
1056
bebf0797 1057 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
44037a66
TG
1058 we must first convert its mode to MODE. */
1059
481683e1 1060 if (CONST_INT_P (value))
44037a66 1061 {
e507a433 1062 unsigned HOST_WIDE_INT v = UINTVAL (value);
44037a66 1063
b1ec3c92 1064 if (bitsize < HOST_BITS_PER_WIDE_INT)
e507a433 1065 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
44037a66
TG
1066
1067 if (v == 0)
1068 all_zero = 1;
b1ec3c92 1069 else if ((bitsize < HOST_BITS_PER_WIDE_INT
e507a433
MP
1070 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1071 || (bitsize == HOST_BITS_PER_WIDE_INT
1072 && v == (unsigned HOST_WIDE_INT) -1))
44037a66
TG
1073 all_one = 1;
1074
088c5368 1075 value = lshift_value (mode, v, bitnum);
44037a66
TG
1076 }
1077 else
1078 {
1079 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
bebf0797 1080 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
44037a66
TG
1081
1082 if (GET_MODE (value) != mode)
86cfb27a 1083 value = convert_to_mode (mode, value, 1);
44037a66
TG
1084
1085 if (must_and)
1086 value = expand_binop (mode, and_optab, value,
1087 mask_rtx (mode, 0, bitsize, 0),
b1ec3c92 1088 NULL_RTX, 1, OPTAB_LIB_WIDEN);
bebf0797 1089 if (bitnum > 0)
44037a66 1090 value = expand_shift (LSHIFT_EXPR, mode, value,
bebf0797 1091 bitnum, NULL_RTX, 1);
44037a66
TG
1092 }
1093
1094 /* Now clear the chosen bits in OP0,
1095 except that if VALUE is -1 we need not bother. */
c505fc06
RS
1096 /* We keep the intermediates in registers to allow CSE to combine
1097 consecutive bitfield assignments. */
44037a66 1098
c505fc06 1099 temp = force_reg (mode, op0);
44037a66
TG
1100
1101 if (! all_one)
1102 {
c505fc06 1103 temp = expand_binop (mode, and_optab, temp,
bebf0797 1104 mask_rtx (mode, bitnum, bitsize, 1),
c505fc06
RS
1105 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1106 temp = force_reg (mode, temp);
44037a66 1107 }
44037a66
TG
1108
1109 /* Now logical-or VALUE into OP0, unless it is zero. */
1110
1111 if (! all_zero)
c505fc06
RS
1112 {
1113 temp = expand_binop (mode, ior_optab, temp, value,
1114 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1115 temp = force_reg (mode, temp);
1116 }
1117
44037a66 1118 if (op0 != temp)
4679504c
UB
1119 {
1120 op0 = copy_rtx (op0);
1121 emit_move_insn (op0, temp);
1122 }
44037a66
TG
1123}
1124\f
06c94bce 1125/* Store a bit field that is split across multiple accessible memory objects.
44037a66 1126
06c94bce 1127 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
44037a66
TG
1128 BITSIZE is the field width; BITPOS the position of its first bit
1129 (within the word).
06c94bce 1130 VALUE is the value to store.
06c94bce
RS
1131
1132 This does not yet handle fields wider than BITS_PER_WORD. */
44037a66
TG
1133
1134static void
502b8322 1135store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
1136 unsigned HOST_WIDE_INT bitpos,
1137 unsigned HOST_WIDE_INT bitregion_start,
1138 unsigned HOST_WIDE_INT bitregion_end,
1139 rtx value)
44037a66 1140{
770ae6cc
RK
1141 unsigned int unit;
1142 unsigned int bitsdone = 0;
4ee16841 1143
0eb61c19
DE
1144 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1145 much at a time. */
f8cfc6aa 1146 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
4ee16841
DE
1147 unit = BITS_PER_WORD;
1148 else
04050c69 1149 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
e54d80d0 1150
ebb99f96
BE
1151 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1152 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1153 again, and we will mutually recurse forever. */
1154 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1155 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1156
3d709ff0
RS
1157 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1158 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1159 that VALUE might be a floating-point constant. */
481683e1 1160 if (CONSTANT_P (value) && !CONST_INT_P (value))
3d709ff0
RS
1161 {
1162 rtx word = gen_lowpart_common (word_mode, value);
1163
bc8a0e39 1164 if (word && (value != word))
3d709ff0
RS
1165 value = word;
1166 else
1167 value = gen_lowpart_common (word_mode,
d01bc862
DE
1168 force_reg (GET_MODE (value) != VOIDmode
1169 ? GET_MODE (value)
1170 : word_mode, value));
3d709ff0 1171 }
44037a66 1172
06c94bce 1173 while (bitsdone < bitsize)
44037a66 1174 {
770ae6cc 1175 unsigned HOST_WIDE_INT thissize;
06c94bce 1176 rtx part, word;
770ae6cc
RK
1177 unsigned HOST_WIDE_INT thispos;
1178 unsigned HOST_WIDE_INT offset;
44037a66 1179
06c94bce
RS
1180 offset = (bitpos + bitsdone) / unit;
1181 thispos = (bitpos + bitsdone) % unit;
44037a66 1182
f1cc9589 1183 /* When region of bytes we can touch is restricted, decrease
bd3647bf
JJ
1184 UNIT close to the end of the region as needed. If op0 is a REG
1185 or SUBREG of REG, don't do this, as there can't be data races
1186 on a register and we can expand shorter code in some cases. */
f1cc9589
JJ
1187 if (bitregion_end
1188 && unit > BITS_PER_UNIT
bd3647bf
JJ
1189 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1190 && !REG_P (op0)
1191 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
f1cc9589
JJ
1192 {
1193 unit = unit / 2;
1194 continue;
1195 }
1196
0eb61c19
DE
1197 /* THISSIZE must not overrun a word boundary. Otherwise,
1198 store_fixed_bit_field will call us again, and we will mutually
1199 recurse forever. */
1200 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1201 thissize = MIN (thissize, unit - thispos);
44037a66 1202
f76b9db2
ILT
1203 if (BYTES_BIG_ENDIAN)
1204 {
1205 /* Fetch successively less significant portions. */
481683e1 1206 if (CONST_INT_P (value))
f76b9db2
ILT
1207 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1208 >> (bitsize - bitsdone - thissize))
1209 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1210 else
b8ab7fc8
RS
1211 {
1212 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1213 /* The args are chosen so that the last part includes the
1214 lsb. Give extract_bit_field the value it needs (with
1215 endianness compensation) to fetch the piece we want. */
1216 part = extract_fixed_bit_field (word_mode, value, thissize,
1217 total_bits - bitsize + bitsdone,
c6285bd7 1218 NULL_RTX, 1);
b8ab7fc8 1219 }
f76b9db2 1220 }
06c94bce 1221 else
f76b9db2
ILT
1222 {
1223 /* Fetch successively more significant portions. */
481683e1 1224 if (CONST_INT_P (value))
f76b9db2
ILT
1225 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1226 >> bitsdone)
1227 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1228 else
b8ab7fc8 1229 part = extract_fixed_bit_field (word_mode, value, thissize,
c6285bd7 1230 bitsdone, NULL_RTX, 1);
f76b9db2 1231 }
44037a66 1232
06c94bce 1233 /* If OP0 is a register, then handle OFFSET here.
5f57dff0
JW
1234
1235 When handling multiword bitfields, extract_bit_field may pass
1236 down a word_mode SUBREG of a larger REG for a bitfield that actually
1237 crosses a word boundary. Thus, for a SUBREG, we must find
1238 the current word starting from the base register. */
1239 if (GET_CODE (op0) == SUBREG)
1240 {
bd3647bf
JJ
1241 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1242 + (offset * unit / BITS_PER_WORD);
ef4bddc2 1243 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
19228b93
JJ
1244 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1245 word = word_offset ? const0_rtx : op0;
1246 else
1247 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1248 GET_MODE (SUBREG_REG (op0)));
bd3647bf 1249 offset &= BITS_PER_WORD / unit - 1;
5f57dff0 1250 }
f8cfc6aa 1251 else if (REG_P (op0))
06c94bce 1252 {
ef4bddc2 1253 machine_mode op0_mode = GET_MODE (op0);
19228b93
JJ
1254 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1255 word = offset ? const0_rtx : op0;
1256 else
bd3647bf
JJ
1257 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1258 GET_MODE (op0));
1259 offset &= BITS_PER_WORD / unit - 1;
06c94bce
RS
1260 }
1261 else
1262 word = op0;
44037a66 1263
bebf0797 1264 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
19228b93
JJ
1265 it is just an out-of-bounds access. Ignore it. */
1266 if (word != const0_rtx)
bebf0797
RS
1267 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1268 bitregion_start, bitregion_end, part);
06c94bce
RS
1269 bitsdone += thissize;
1270 }
44037a66
TG
1271}
1272\f
6d7db3c5
RS
1273/* A subroutine of extract_bit_field_1 that converts return value X
1274 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1275 to extract_bit_field. */
44037a66 1276
6d7db3c5 1277static rtx
ef4bddc2
RS
1278convert_extracted_bit_field (rtx x, machine_mode mode,
1279 machine_mode tmode, bool unsignedp)
6d7db3c5
RS
1280{
1281 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1282 return x;
44037a66 1283
6d7db3c5
RS
1284 /* If the x mode is not a scalar integral, first convert to the
1285 integer mode of that size and then access it as a floating-point
1286 value via a SUBREG. */
1287 if (!SCALAR_INT_MODE_P (tmode))
1288 {
ef4bddc2 1289 machine_mode smode;
44037a66 1290
6d7db3c5
RS
1291 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1292 x = convert_to_mode (smode, x, unsignedp);
1293 x = force_reg (smode, x);
1294 return gen_lowpart (tmode, x);
1295 }
44037a66 1296
6d7db3c5
RS
1297 return convert_to_mode (tmode, x, unsignedp);
1298}
1299
a20556e4
RS
1300/* Try to use an ext(z)v pattern to extract a field from OP0.
1301 Return the extracted value on success, otherwise return null.
1302 EXT_MODE is the mode of the extraction and the other arguments
1303 are as for extract_bit_field. */
1304
1305static rtx
fcdd52b7
RS
1306extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1307 unsigned HOST_WIDE_INT bitsize,
a20556e4
RS
1308 unsigned HOST_WIDE_INT bitnum,
1309 int unsignedp, rtx target,
ef4bddc2 1310 machine_mode mode, machine_mode tmode)
a20556e4
RS
1311{
1312 struct expand_operand ops[4];
1313 rtx spec_target = target;
1314 rtx spec_target_subreg = 0;
ef4bddc2 1315 machine_mode ext_mode = extv->field_mode;
a20556e4
RS
1316 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1317
1318 if (bitsize == 0 || unit < bitsize)
1319 return NULL_RTX;
1320
1321 if (MEM_P (op0))
26f8b976 1322 /* Get a reference to the first byte of the field. */
fcdd52b7
RS
1323 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1324 &bitnum);
a20556e4
RS
1325 else
1326 {
1327 /* Convert from counting within OP0 to counting in EXT_MODE. */
1328 if (BYTES_BIG_ENDIAN)
1329 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1330
1331 /* If op0 is a register, we need it in EXT_MODE to make it
1332 acceptable to the format of ext(z)v. */
1333 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1334 return NULL_RTX;
1335 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1336 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1337 }
1338
1339 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1340 "backwards" from the size of the unit we are extracting from.
1341 Otherwise, we count bits from the most significant on a
1342 BYTES/BITS_BIG_ENDIAN machine. */
1343
1344 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1345 bitnum = unit - bitsize - bitnum;
1346
1347 if (target == 0)
1348 target = spec_target = gen_reg_rtx (tmode);
1349
1350 if (GET_MODE (target) != ext_mode)
1351 {
1352 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1353 between the mode of the extraction (word_mode) and the target
1354 mode. Instead, create a temporary and use convert_move to set
1355 the target. */
1356 if (REG_P (target)
1357 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1358 {
1359 target = gen_lowpart (ext_mode, target);
1360 if (GET_MODE_PRECISION (ext_mode)
1361 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1362 spec_target_subreg = target;
1363 }
1364 else
1365 target = gen_reg_rtx (ext_mode);
1366 }
1367
1368 create_output_operand (&ops[0], target, ext_mode);
1369 create_fixed_operand (&ops[1], op0);
1370 create_integer_operand (&ops[2], bitsize);
1371 create_integer_operand (&ops[3], bitnum);
fcdd52b7 1372 if (maybe_expand_insn (extv->icode, 4, ops))
a20556e4
RS
1373 {
1374 target = ops[0].value;
1375 if (target == spec_target)
1376 return target;
1377 if (target == spec_target_subreg)
1378 return spec_target;
1379 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1380 }
1381 return NULL_RTX;
1382}
1383
6d7db3c5
RS
1384/* A subroutine of extract_bit_field, with the same arguments.
1385 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1386 if we can find no other means of implementing the operation.
1387 if FALLBACK_P is false, return NULL instead. */
1388
1389static rtx
1390extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
c6285bd7 1391 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
ef4bddc2 1392 machine_mode mode, machine_mode tmode,
6d7db3c5 1393 bool fallback_p)
44037a66 1394{
b3694847 1395 rtx op0 = str_rtx;
ef4bddc2
RS
1396 machine_mode int_mode;
1397 machine_mode mode1;
44037a66 1398
44037a66
TG
1399 if (tmode == VOIDmode)
1400 tmode = mode;
6ca6193b 1401
44037a66
TG
1402 while (GET_CODE (op0) == SUBREG)
1403 {
2c58f7dd 1404 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
44037a66
TG
1405 op0 = SUBREG_REG (op0);
1406 }
77295dec 1407
2c58f7dd 1408 /* If we have an out-of-bounds access to a register, just return an
647eea9d 1409 uninitialized register of the required mode. This can occur if the
2c58f7dd
RS
1410 source code contains an out-of-bounds access to a small array. */
1411 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1412 return gen_reg_rtx (tmode);
1413
f8cfc6aa 1414 if (REG_P (op0)
aac280fb
DD
1415 && mode == GET_MODE (op0)
1416 && bitnum == 0
0b69c29f 1417 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
aac280fb 1418 {
0b69c29f 1419 /* We're trying to extract a full register from itself. */
aac280fb
DD
1420 return op0;
1421 }
1422
0890b981
AP
1423 /* See if we can get a better vector mode before extracting. */
1424 if (VECTOR_MODE_P (GET_MODE (op0))
1425 && !MEM_P (op0)
1426 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1427 {
ef4bddc2 1428 machine_mode new_mode;
0890b981
AP
1429
1430 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1431 new_mode = MIN_MODE_VECTOR_FLOAT;
325217ed
CF
1432 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1433 new_mode = MIN_MODE_VECTOR_FRACT;
1434 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1435 new_mode = MIN_MODE_VECTOR_UFRACT;
1436 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1437 new_mode = MIN_MODE_VECTOR_ACCUM;
1438 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1439 new_mode = MIN_MODE_VECTOR_UACCUM;
0890b981
AP
1440 else
1441 new_mode = MIN_MODE_VECTOR_INT;
1442
1443 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
b147c5b9 1444 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
0890b981
AP
1445 && targetm.vector_mode_supported_p (new_mode))
1446 break;
1447 if (new_mode != VOIDmode)
1448 op0 = gen_lowpart (new_mode, op0);
1449 }
1450
997404de
JH
1451 /* Use vec_extract patterns for extracting parts of vectors whenever
1452 available. */
1453 if (VECTOR_MODE_P (GET_MODE (op0))
3c0cb5de 1454 && !MEM_P (op0)
947131ba 1455 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
b42271d6
JB
1456 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1457 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
997404de 1458 {
a5c7d693 1459 struct expand_operand ops[3];
ef4bddc2
RS
1460 machine_mode outermode = GET_MODE (op0);
1461 machine_mode innermode = GET_MODE_INNER (outermode);
a5c7d693 1462 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
b42271d6 1463 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
997404de 1464
a5c7d693
RS
1465 create_output_operand (&ops[0], target, innermode);
1466 create_input_operand (&ops[1], op0, outermode);
1467 create_integer_operand (&ops[2], pos);
1468 if (maybe_expand_insn (icode, 3, ops))
997404de 1469 {
a5c7d693
RS
1470 target = ops[0].value;
1471 if (GET_MODE (target) != mode)
1472 return gen_lowpart (tmode, target);
1473 return target;
997404de
JH
1474 }
1475 }
1476
d006aa54
RH
1477 /* Make sure we are playing with integral modes. Pun with subregs
1478 if we aren't. */
1479 {
ef4bddc2 1480 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
d006aa54
RH
1481 if (imode != GET_MODE (op0))
1482 {
a6d2976a 1483 if (MEM_P (op0))
e98fc6de 1484 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
7d293b58 1485 else if (imode != BLKmode)
a6d2976a 1486 {
a6d2976a 1487 op0 = gen_lowpart (imode, op0);
360e3535 1488
a6d2976a
JDA
1489 /* If we got a SUBREG, force it into a register since we
1490 aren't going to be able to do another SUBREG on it. */
1491 if (GET_CODE (op0) == SUBREG)
1492 op0 = force_reg (imode, op0);
1493 }
7d293b58
JJ
1494 else if (REG_P (op0))
1495 {
1496 rtx reg, subreg;
1497 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1498 MODE_INT);
1499 reg = gen_reg_rtx (imode);
1500 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1501 emit_move_insn (subreg, op0);
1502 op0 = reg;
1503 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1504 }
1505 else
1506 {
e98fc6de
RS
1507 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1508 rtx mem = assign_stack_temp (GET_MODE (op0), size);
7d293b58 1509 emit_move_insn (mem, op0);
e98fc6de 1510 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
7d293b58 1511 }
d006aa54
RH
1512 }
1513 }
1514
6ca6193b
JDA
1515 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1516 If that's wrong, the solution is to test for it and set TARGET to 0
1517 if needed. */
e98f90d3 1518
f5d4f18c
SL
1519 /* Get the mode of the field to use for atomic access or subreg
1520 conversion. */
b8ab7fc8
RS
1521 mode1 = mode;
1522 if (SCALAR_INT_MODE_P (tmode))
44037a66 1523 {
ef4bddc2 1524 machine_mode try_mode = mode_for_size (bitsize,
b8ab7fc8
RS
1525 GET_MODE_CLASS (tmode), 0);
1526 if (try_mode != BLKmode)
1527 mode1 = try_mode;
1528 }
1529 gcc_assert (mode1 != BLKmode);
1530
1531 /* Extraction of a full MODE1 value can be done with a subreg as long
1532 as the least significant bit of the value is the least significant
1533 bit of either OP0 or a word of OP0. */
1534 if (!MEM_P (op0)
1535 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1536 && bitsize == GET_MODE_BITSIZE (mode1)
1537 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1538 {
1539 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1540 bitnum / BITS_PER_UNIT);
1541 if (sub)
1542 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1543 }
1544
1545 /* Extraction of a full MODE1 value can be done with a load as long as
1546 the field is on a byte boundary and is sufficiently aligned. */
00efe3ea 1547 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
b8ab7fc8
RS
1548 {
1549 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1550 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
44037a66 1551 }
b8ab7fc8 1552
44037a66 1553 /* Handle fields bigger than a word. */
c410d49e 1554
44037a66
TG
1555 if (bitsize > BITS_PER_WORD)
1556 {
1557 /* Here we transfer the words of the field
1558 in the order least significant first.
1559 This is because the most significant word is the one which may
1560 be less than full. */
1561
0cd9e9ee 1562 unsigned int backwards = WORDS_BIG_ENDIAN;
770ae6cc
RK
1563 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1564 unsigned int i;
f3f6fb16 1565 rtx_insn *last;
44037a66 1566
02972eaf 1567 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
44037a66
TG
1568 target = gen_reg_rtx (mode);
1569
34ea783b 1570 /* Indicate for flow that the entire target reg is being set. */
c41c1387 1571 emit_clobber (target);
34ea783b 1572
5ef0b50d 1573 last = get_last_insn ();
44037a66
TG
1574 for (i = 0; i < nwords; i++)
1575 {
1576 /* If I is 0, use the low-order word in both field and target;
1577 if I is 1, use the next to lowest word; and so on. */
77295dec 1578 /* Word number in TARGET to use. */
770ae6cc 1579 unsigned int wordnum
0cd9e9ee 1580 = (backwards
770ae6cc
RK
1581 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1582 : i);
77295dec 1583 /* Offset from start of field in OP0. */
0cd9e9ee
EB
1584 unsigned int bit_offset = (backwards
1585 ? MAX ((int) bitsize - ((int) i + 1)
1586 * BITS_PER_WORD,
1587 0)
770ae6cc 1588 : (int) i * BITS_PER_WORD);
44037a66
TG
1589 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1590 rtx result_part
5ef0b50d
EB
1591 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1592 bitsize - i * BITS_PER_WORD),
c6285bd7 1593 bitnum + bit_offset, 1, target_part,
5ef0b50d 1594 mode, word_mode, fallback_p);
44037a66 1595
5b0264cb 1596 gcc_assert (target_part);
5ef0b50d
EB
1597 if (!result_part)
1598 {
1599 delete_insns_since (last);
1600 return NULL;
1601 }
44037a66
TG
1602
1603 if (result_part != target_part)
1604 emit_move_insn (target_part, result_part);
1605 }
1606
5f57dff0 1607 if (unsignedp)
77295dec
DE
1608 {
1609 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1610 need to be zero'd out. */
1611 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1612 {
770ae6cc 1613 unsigned int i, total_words;
77295dec
DE
1614
1615 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1616 for (i = nwords; i < total_words; i++)
04050c69
RK
1617 emit_move_insn
1618 (operand_subword (target,
0cd9e9ee 1619 backwards ? total_words - i - 1 : i,
04050c69
RK
1620 1, VOIDmode),
1621 const0_rtx);
77295dec
DE
1622 }
1623 return target;
1624 }
1625
5f57dff0
JW
1626 /* Signed bit field: sign-extend with two arithmetic shifts. */
1627 target = expand_shift (LSHIFT_EXPR, mode, target,
eb6c3df1 1628 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
5f57dff0 1629 return expand_shift (RSHIFT_EXPR, mode, target,
eb6c3df1 1630 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
44037a66 1631 }
c410d49e 1632
b8ab7fc8
RS
1633 /* If OP0 is a multi-word register, narrow it to the affected word.
1634 If the region spans two words, defer to extract_split_bit_field. */
1635 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
44037a66 1636 {
b8ab7fc8
RS
1637 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1638 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1639 bitnum %= BITS_PER_WORD;
1640 if (bitnum + bitsize > BITS_PER_WORD)
470032d7 1641 {
b8ab7fc8
RS
1642 if (!fallback_p)
1643 return NULL_RTX;
1644 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1645 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
470032d7 1646 }
44037a66 1647 }
44037a66 1648
b8ab7fc8
RS
1649 /* From here on we know the desired field is smaller than a word.
1650 If OP0 is a register, it too fits within a word. */
fcdd52b7
RS
1651 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1652 extraction_insn extv;
1653 if (!MEM_P (op0)
c0a8a3e6
RS
1654 /* ??? We could limit the structure size to the part of OP0 that
1655 contains the field, with appropriate checks for endianness
1656 and TRULY_NOOP_TRUNCATION. */
1657 && get_best_reg_extraction_insn (&extv, pattern,
1658 GET_MODE_BITSIZE (GET_MODE (op0)),
fcdd52b7 1659 tmode))
44037a66 1660 {
fcdd52b7 1661 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
a20556e4 1662 unsignedp, target, mode,
fcdd52b7 1663 tmode);
a20556e4
RS
1664 if (result)
1665 return result;
44037a66 1666 }
f76b9db2 1667
6d7db3c5
RS
1668 /* If OP0 is a memory, try copying it to a register and seeing if a
1669 cheap register alternative is available. */
fcdd52b7 1670 if (MEM_P (op0))
6d7db3c5 1671 {
f5d4f18c
SL
1672 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1673 tmode))
17a73ba0 1674 {
fcdd52b7
RS
1675 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1676 bitnum, unsignedp,
1677 target, mode,
1678 tmode);
17a73ba0
RS
1679 if (result)
1680 return result;
1681 }
1682
f3f6fb16 1683 rtx_insn *last = get_last_insn ();
f76b9db2 1684
fcdd52b7
RS
1685 /* Try loading part of OP0 into a register and extracting the
1686 bitfield from that. */
1687 unsigned HOST_WIDE_INT bitpos;
1688 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1689 0, 0, tmode, &bitpos);
1690 if (xop0)
6d7db3c5 1691 {
fcdd52b7
RS
1692 xop0 = copy_to_reg (xop0);
1693 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
c6285bd7 1694 unsignedp, target,
6d7db3c5 1695 mode, tmode, false);
fcdd52b7
RS
1696 if (result)
1697 return result;
1698 delete_insns_since (last);
c410d49e 1699 }
44037a66 1700 }
562fc702 1701
6d7db3c5
RS
1702 if (!fallback_p)
1703 return NULL;
1704
b8ab7fc8
RS
1705 /* Find a correspondingly-sized integer field, so we can apply
1706 shifts and masks to it. */
1707 int_mode = int_mode_for_mode (tmode);
1708 if (int_mode == BLKmode)
1709 int_mode = int_mode_for_mode (mode);
1710 /* Should probably push op0 out to memory and then do a load. */
1711 gcc_assert (int_mode != BLKmode);
1712
1713 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
c6285bd7 1714 target, unsignedp);
6d7db3c5
RS
1715 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1716}
1717
1718/* Generate code to extract a byte-field from STR_RTX
1719 containing BITSIZE bits, starting at BITNUM,
1720 and put it in TARGET if possible (if TARGET is nonzero).
1721 Regardless of TARGET, we return the rtx for where the value is placed.
1722
1723 STR_RTX is the structure containing the byte (a REG or MEM).
1724 UNSIGNEDP is nonzero if this is an unsigned bit field.
1725 MODE is the natural mode of the field value once extracted.
1726 TMODE is the mode the caller would like the value to have;
1727 but the value may be returned with type MODE instead.
1728
1729 If a TARGET is specified and we can store in it at no extra cost,
1730 we do so, and return TARGET.
1731 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1732 if they are equally easy. */
1733
1734rtx
1735extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
c6285bd7 1736 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
ef4bddc2 1737 machine_mode mode, machine_mode tmode)
6d7db3c5 1738{
ef4bddc2 1739 machine_mode mode1;
f5d4f18c
SL
1740
1741 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1742 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1743 mode1 = GET_MODE (str_rtx);
1744 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1745 mode1 = GET_MODE (target);
1746 else
1747 mode1 = tmode;
1748
6f4e9cf8 1749 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
f5d4f18c
SL
1750 {
1751 rtx result;
1752
1753 /* Extraction of a full MODE1 value can be done with a load as long as
1754 the field is on a byte boundary and is sufficiently aligned. */
1755 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1756 result = adjust_bitfield_address (str_rtx, mode1,
1757 bitnum / BITS_PER_UNIT);
1758 else
6f4e9cf8
BE
1759 {
1760 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1761 &bitnum);
1762 result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1763 target, unsignedp);
1764 }
1765
f5d4f18c
SL
1766 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1767 }
1768
c6285bd7 1769 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
6d7db3c5 1770 target, mode, tmode, true);
44037a66
TG
1771}
1772\f
b8ab7fc8
RS
1773/* Use shifts and boolean operations to extract a field of BITSIZE bits
1774 from bit BITNUM of OP0.
44037a66
TG
1775
1776 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1777 If TARGET is nonzero, attempts to store the value there
1778 and return TARGET, but this is not guaranteed.
04050c69 1779 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
44037a66
TG
1780
1781static rtx
ef4bddc2 1782extract_fixed_bit_field (machine_mode tmode, rtx op0,
502b8322 1783 unsigned HOST_WIDE_INT bitsize,
b8ab7fc8 1784 unsigned HOST_WIDE_INT bitnum, rtx target,
c6285bd7 1785 int unsignedp)
44037a66 1786{
b8ab7fc8 1787 if (MEM_P (op0))
44037a66 1788 {
ef4bddc2 1789 machine_mode mode
548cfdc2
EB
1790 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1791 MEM_VOLATILE_P (op0));
44037a66
TG
1792
1793 if (mode == VOIDmode)
1794 /* The only way this should occur is if the field spans word
1795 boundaries. */
b8ab7fc8 1796 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
44037a66 1797
f5d4f18c 1798 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
44037a66
TG
1799 }
1800
6f4e9cf8
BE
1801 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1802 target, unsignedp);
1803}
1804
1805/* Helper function for extract_fixed_bit_field, extracts
1806 the bit field always using the MODE of OP0. */
1807
1808static rtx
ef4bddc2 1809extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
6f4e9cf8
BE
1810 unsigned HOST_WIDE_INT bitsize,
1811 unsigned HOST_WIDE_INT bitnum, rtx target,
1812 int unsignedp)
1813{
ef4bddc2 1814 machine_mode mode = GET_MODE (op0);
b8ab7fc8
RS
1815 gcc_assert (SCALAR_INT_MODE_P (mode));
1816
1817 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1818 for invalid input, such as extract equivalent of f5 from
1819 gcc.dg/pr48335-2.c. */
37811a73 1820
f76b9db2 1821 if (BYTES_BIG_ENDIAN)
b8ab7fc8 1822 /* BITNUM is the distance between our msb and that of OP0.
04050c69 1823 Convert it to the distance from the lsb. */
b8ab7fc8 1824 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
44037a66 1825
b8ab7fc8 1826 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
44037a66
TG
1827 We have reduced the big-endian case to the little-endian case. */
1828
1829 if (unsignedp)
1830 {
b8ab7fc8 1831 if (bitnum)
44037a66
TG
1832 {
1833 /* If the field does not already start at the lsb,
1834 shift it so it does. */
44037a66 1835 /* Maybe propagate the target for the shift. */
f8cfc6aa 1836 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
6399c0ab
SB
1837 if (tmode != mode)
1838 subtarget = 0;
b8ab7fc8 1839 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
44037a66
TG
1840 }
1841 /* Convert the value to the desired mode. */
1842 if (mode != tmode)
1843 op0 = convert_to_mode (tmode, op0, 1);
1844
1845 /* Unless the msb of the field used to be the msb when we shifted,
1846 mask out the upper bits. */
1847
b8ab7fc8 1848 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
44037a66
TG
1849 return expand_binop (GET_MODE (op0), and_optab, op0,
1850 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1851 target, 1, OPTAB_LIB_WIDEN);
1852 return op0;
1853 }
1854
1855 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1856 then arithmetic-shift its lsb to the lsb of the word. */
1857 op0 = force_reg (mode, op0);
44037a66
TG
1858
1859 /* Find the narrowest integer mode that contains the field. */
1860
1861 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1862 mode = GET_MODE_WIDER_MODE (mode))
b8ab7fc8 1863 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
44037a66
TG
1864 {
1865 op0 = convert_to_mode (mode, op0, 0);
1866 break;
1867 }
1868
ccb1b17b
JJ
1869 if (mode != tmode)
1870 target = 0;
1871
b8ab7fc8 1872 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
44037a66 1873 {
b8ab7fc8 1874 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
44037a66 1875 /* Maybe propagate the target for the shift. */
f8cfc6aa 1876 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
44037a66
TG
1877 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1878 }
1879
1880 return expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 1881 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
44037a66 1882}
44037a66
TG
1883
1884/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
088c5368 1885 VALUE << BITPOS. */
44037a66
TG
1886
1887static rtx
ef4bddc2 1888lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
088c5368 1889 int bitpos)
44037a66 1890{
807e902e 1891 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
44037a66
TG
1892}
1893\f
1894/* Extract a bit field that is split across two words
1895 and return an RTX for the result.
1896
1897 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1898 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
04050c69 1899 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
44037a66
TG
1900
1901static rtx
502b8322
AJ
1902extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1903 unsigned HOST_WIDE_INT bitpos, int unsignedp)
44037a66 1904{
770ae6cc
RK
1905 unsigned int unit;
1906 unsigned int bitsdone = 0;
c16ddde3 1907 rtx result = NULL_RTX;
06c94bce 1908 int first = 1;
44037a66 1909
4ee16841
DE
1910 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1911 much at a time. */
f8cfc6aa 1912 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
4ee16841
DE
1913 unit = BITS_PER_WORD;
1914 else
609023ff 1915 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
4ee16841 1916
06c94bce
RS
1917 while (bitsdone < bitsize)
1918 {
770ae6cc 1919 unsigned HOST_WIDE_INT thissize;
06c94bce 1920 rtx part, word;
770ae6cc
RK
1921 unsigned HOST_WIDE_INT thispos;
1922 unsigned HOST_WIDE_INT offset;
06c94bce
RS
1923
1924 offset = (bitpos + bitsdone) / unit;
1925 thispos = (bitpos + bitsdone) % unit;
1926
0eb61c19
DE
1927 /* THISSIZE must not overrun a word boundary. Otherwise,
1928 extract_fixed_bit_field will call us again, and we will mutually
1929 recurse forever. */
1930 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1931 thissize = MIN (thissize, unit - thispos);
06c94bce
RS
1932
1933 /* If OP0 is a register, then handle OFFSET here.
5f57dff0
JW
1934
1935 When handling multiword bitfields, extract_bit_field may pass
1936 down a word_mode SUBREG of a larger REG for a bitfield that actually
1937 crosses a word boundary. Thus, for a SUBREG, we must find
1938 the current word starting from the base register. */
1939 if (GET_CODE (op0) == SUBREG)
1940 {
ddef6bc7
JJ
1941 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1942 word = operand_subword_force (SUBREG_REG (op0), word_offset,
5f57dff0
JW
1943 GET_MODE (SUBREG_REG (op0)));
1944 offset = 0;
1945 }
f8cfc6aa 1946 else if (REG_P (op0))
06c94bce
RS
1947 {
1948 word = operand_subword_force (op0, offset, GET_MODE (op0));
1949 offset = 0;
1950 }
1951 else
1952 word = op0;
1953
06c94bce 1954 /* Extract the parts in bit-counting order,
0eb61c19 1955 whose meaning is determined by BYTES_PER_UNIT.
b8ab7fc8
RS
1956 OFFSET is in UNITs, and UNIT is in bits. */
1957 part = extract_fixed_bit_field (word_mode, word, thissize,
c6285bd7 1958 offset * unit + thispos, 0, 1);
06c94bce 1959 bitsdone += thissize;
44037a66 1960
06c94bce 1961 /* Shift this part into place for the result. */
f76b9db2
ILT
1962 if (BYTES_BIG_ENDIAN)
1963 {
1964 if (bitsize != bitsdone)
1965 part = expand_shift (LSHIFT_EXPR, word_mode, part,
eb6c3df1 1966 bitsize - bitsdone, 0, 1);
f76b9db2
ILT
1967 }
1968 else
1969 {
1970 if (bitsdone != thissize)
1971 part = expand_shift (LSHIFT_EXPR, word_mode, part,
eb6c3df1 1972 bitsdone - thissize, 0, 1);
f76b9db2 1973 }
44037a66 1974
06c94bce
RS
1975 if (first)
1976 result = part;
1977 else
1978 /* Combine the parts with bitwise or. This works
1979 because we extracted each part as an unsigned bit field. */
1980 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1981 OPTAB_LIB_WIDEN);
1982
1983 first = 0;
1984 }
44037a66
TG
1985
1986 /* Unsigned bit field: we are done. */
1987 if (unsignedp)
1988 return result;
1989 /* Signed bit field: sign-extend with two arithmetic shifts. */
1990 result = expand_shift (LSHIFT_EXPR, word_mode, result,
eb6c3df1 1991 BITS_PER_WORD - bitsize, NULL_RTX, 0);
44037a66 1992 return expand_shift (RSHIFT_EXPR, word_mode, result,
eb6c3df1 1993 BITS_PER_WORD - bitsize, NULL_RTX, 0);
44037a66
TG
1994}
1995\f
18b526e8
RS
1996/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1997 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1998 MODE, fill the upper bits with zeros. Fail if the layout of either
1999 mode is unknown (as for CC modes) or if the extraction would involve
2000 unprofitable mode punning. Return the value on success, otherwise
2001 return null.
2002
2003 This is different from gen_lowpart* in these respects:
2004
2005 - the returned value must always be considered an rvalue
2006
2007 - when MODE is wider than SRC_MODE, the extraction involves
2008 a zero extension
2009
2010 - when MODE is smaller than SRC_MODE, the extraction involves
2011 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2012
2013 In other words, this routine performs a computation, whereas the
2014 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2015 operations. */
2016
2017rtx
ef4bddc2 2018extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
18b526e8 2019{
ef4bddc2 2020 machine_mode int_mode, src_int_mode;
18b526e8
RS
2021
2022 if (mode == src_mode)
2023 return src;
2024
2025 if (CONSTANT_P (src))
d898d29b
JJ
2026 {
2027 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2028 fails, it will happily create (subreg (symbol_ref)) or similar
2029 invalid SUBREGs. */
2030 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2031 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2032 if (ret)
2033 return ret;
2034
2035 if (GET_MODE (src) == VOIDmode
2036 || !validate_subreg (mode, src_mode, src, byte))
2037 return NULL_RTX;
2038
2039 src = force_reg (GET_MODE (src), src);
2040 return gen_rtx_SUBREG (mode, src, byte);
2041 }
18b526e8
RS
2042
2043 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2044 return NULL_RTX;
2045
2046 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2047 && MODES_TIEABLE_P (mode, src_mode))
2048 {
2049 rtx x = gen_lowpart_common (mode, src);
2050 if (x)
2051 return x;
2052 }
2053
2054 src_int_mode = int_mode_for_mode (src_mode);
2055 int_mode = int_mode_for_mode (mode);
2056 if (src_int_mode == BLKmode || int_mode == BLKmode)
2057 return NULL_RTX;
2058
2059 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2060 return NULL_RTX;
2061 if (!MODES_TIEABLE_P (int_mode, mode))
2062 return NULL_RTX;
2063
2064 src = gen_lowpart (src_int_mode, src);
2065 src = convert_modes (int_mode, src_int_mode, src, true);
2066 src = gen_lowpart (mode, src);
2067 return src;
2068}
2069\f
44037a66
TG
2070/* Add INC into TARGET. */
2071
2072void
502b8322 2073expand_inc (rtx target, rtx inc)
44037a66
TG
2074{
2075 rtx value = expand_binop (GET_MODE (target), add_optab,
2076 target, inc,
2077 target, 0, OPTAB_LIB_WIDEN);
2078 if (value != target)
2079 emit_move_insn (target, value);
2080}
2081
2082/* Subtract DEC from TARGET. */
2083
2084void
502b8322 2085expand_dec (rtx target, rtx dec)
44037a66
TG
2086{
2087 rtx value = expand_binop (GET_MODE (target), sub_optab,
2088 target, dec,
2089 target, 0, OPTAB_LIB_WIDEN);
2090 if (value != target)
2091 emit_move_insn (target, value);
2092}
2093\f
2094/* Output a shift instruction for expression code CODE,
2095 with SHIFTED being the rtx for the value to shift,
86529a49 2096 and AMOUNT the rtx for the amount to shift by.
44037a66
TG
2097 Store the result in the rtx TARGET, if that is convenient.
2098 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2099 Return the rtx for where the value is. */
2100
86529a49 2101static rtx
ef4bddc2 2102expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
86529a49 2103 rtx amount, rtx target, int unsignedp)
44037a66 2104{
b3694847
SS
2105 rtx op1, temp = 0;
2106 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2107 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
71d46ca5
MM
2108 optab lshift_optab = ashl_optab;
2109 optab rshift_arith_optab = ashr_optab;
2110 optab rshift_uns_optab = lshr_optab;
2111 optab lrotate_optab = rotl_optab;
2112 optab rrotate_optab = rotr_optab;
ef4bddc2
RS
2113 machine_mode op1_mode;
2114 machine_mode scalar_mode = mode;
d858f359 2115 int attempt;
f40751dd 2116 bool speed = optimize_insn_for_speed_p ();
44037a66 2117
a4ee446d
JJ
2118 if (VECTOR_MODE_P (mode))
2119 scalar_mode = GET_MODE_INNER (mode);
86529a49 2120 op1 = amount;
71d46ca5
MM
2121 op1_mode = GET_MODE (op1);
2122
2123 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2124 shift amount is a vector, use the vector/vector shift patterns. */
2125 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2126 {
2127 lshift_optab = vashl_optab;
2128 rshift_arith_optab = vashr_optab;
2129 rshift_uns_optab = vlshr_optab;
2130 lrotate_optab = vrotl_optab;
2131 rrotate_optab = vrotr_optab;
2132 }
2133
44037a66
TG
2134 /* Previously detected shift-counts computed by NEGATE_EXPR
2135 and shifted in the other direction; but that does not work
2136 on all machines. */
2137
166cdf4a
RH
2138 if (SHIFT_COUNT_TRUNCATED)
2139 {
481683e1 2140 if (CONST_INT_P (op1)
0fb7aeda 2141 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
a4ee446d 2142 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
0fb7aeda 2143 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
a4ee446d 2144 % GET_MODE_BITSIZE (scalar_mode));
166cdf4a 2145 else if (GET_CODE (op1) == SUBREG
c1cb09ad 2146 && subreg_lowpart_p (op1)
7afe2801
DM
2147 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2148 && SCALAR_INT_MODE_P (GET_MODE (op1)))
166cdf4a
RH
2149 op1 = SUBREG_REG (op1);
2150 }
2ab0a5c4 2151
75776c6d
JJ
2152 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2153 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2154 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2155 amount instead. */
2156 if (rotate
2157 && CONST_INT_P (op1)
a4ee446d
JJ
2158 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2159 GET_MODE_BITSIZE (scalar_mode) - 1))
75776c6d 2160 {
a4ee446d 2161 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
75776c6d
JJ
2162 left = !left;
2163 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2164 }
2165
44037a66
TG
2166 if (op1 == const0_rtx)
2167 return shifted;
2168
15bad393
RS
2169 /* Check whether its cheaper to implement a left shift by a constant
2170 bit count by a sequence of additions. */
2171 if (code == LSHIFT_EXPR
481683e1 2172 && CONST_INT_P (op1)
15bad393 2173 && INTVAL (op1) > 0
a4ee446d 2174 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
cb2eb96f 2175 && INTVAL (op1) < MAX_BITS_PER_WORD
5322d07e
NF
2176 && (shift_cost (speed, mode, INTVAL (op1))
2177 > INTVAL (op1) * add_cost (speed, mode))
2178 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
15bad393
RS
2179 {
2180 int i;
2181 for (i = 0; i < INTVAL (op1); i++)
2182 {
2183 temp = force_reg (mode, shifted);
2184 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2185 unsignedp, OPTAB_LIB_WIDEN);
2186 }
2187 return shifted;
2188 }
2189
d858f359 2190 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
44037a66
TG
2191 {
2192 enum optab_methods methods;
2193
d858f359 2194 if (attempt == 0)
44037a66 2195 methods = OPTAB_DIRECT;
d858f359 2196 else if (attempt == 1)
44037a66
TG
2197 methods = OPTAB_WIDEN;
2198 else
2199 methods = OPTAB_LIB_WIDEN;
2200
2201 if (rotate)
2202 {
2203 /* Widening does not work for rotation. */
2204 if (methods == OPTAB_WIDEN)
2205 continue;
2206 else if (methods == OPTAB_LIB_WIDEN)
cbec710e 2207 {
39e71615 2208 /* If we have been unable to open-code this by a rotation,
cbec710e 2209 do it as the IOR of two shifts. I.e., to rotate A
ae6fa899
JJ
2210 by N bits, compute
2211 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
cbec710e
RK
2212 where C is the bitsize of A.
2213
2214 It is theoretically possible that the target machine might
2215 not be able to perform either shift and hence we would
2216 be making two libcalls rather than just the one for the
2217 shift (similarly if IOR could not be done). We will allow
2218 this extremely unlikely lossage to avoid complicating the
2219 code below. */
2220
39e71615 2221 rtx subtarget = target == shifted ? 0 : target;
86529a49 2222 rtx new_amount, other_amount;
39e71615 2223 rtx temp1;
86529a49
RG
2224
2225 new_amount = op1;
ae6fa899
JJ
2226 if (op1 == const0_rtx)
2227 return shifted;
2228 else if (CONST_INT_P (op1))
a4ee446d 2229 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
5c049507
RG
2230 - INTVAL (op1));
2231 else
ae6fa899
JJ
2232 {
2233 other_amount
2234 = simplify_gen_unary (NEG, GET_MODE (op1),
2235 op1, GET_MODE (op1));
a4ee446d 2236 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
ae6fa899 2237 other_amount
69a59f0f
RS
2238 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2239 gen_int_mode (mask, GET_MODE (op1)));
ae6fa899 2240 }
39e71615
RK
2241
2242 shifted = force_reg (mode, shifted);
2243
86529a49
RG
2244 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2245 mode, shifted, new_amount, 0, 1);
2246 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2247 mode, shifted, other_amount,
2248 subtarget, 1);
39e71615
RK
2249 return expand_binop (mode, ior_optab, temp, temp1, target,
2250 unsignedp, methods);
cbec710e 2251 }
44037a66
TG
2252
2253 temp = expand_binop (mode,
71d46ca5 2254 left ? lrotate_optab : rrotate_optab,
44037a66
TG
2255 shifted, op1, target, unsignedp, methods);
2256 }
2257 else if (unsignedp)
a34958c9 2258 temp = expand_binop (mode,
71d46ca5 2259 left ? lshift_optab : rshift_uns_optab,
a34958c9 2260 shifted, op1, target, unsignedp, methods);
44037a66
TG
2261
2262 /* Do arithmetic shifts.
2263 Also, if we are going to widen the operand, we can just as well
2264 use an arithmetic right-shift instead of a logical one. */
2265 if (temp == 0 && ! rotate
2266 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2267 {
2268 enum optab_methods methods1 = methods;
2269
2270 /* If trying to widen a log shift to an arithmetic shift,
2271 don't accept an arithmetic shift of the same size. */
2272 if (unsignedp)
2273 methods1 = OPTAB_MUST_WIDEN;
2274
2275 /* Arithmetic shift */
2276
2277 temp = expand_binop (mode,
71d46ca5 2278 left ? lshift_optab : rshift_arith_optab,
44037a66
TG
2279 shifted, op1, target, unsignedp, methods1);
2280 }
2281
711a5e64 2282 /* We used to try extzv here for logical right shifts, but that was
c410d49e 2283 only useful for one machine, the VAX, and caused poor code
711a5e64
RK
2284 generation there for lshrdi3, so the code was deleted and a
2285 define_expand for lshrsi3 was added to vax.md. */
44037a66
TG
2286 }
2287
5b0264cb 2288 gcc_assert (temp);
44037a66
TG
2289 return temp;
2290}
eb6c3df1
RG
2291
2292/* Output a shift instruction for expression code CODE,
2293 with SHIFTED being the rtx for the value to shift,
2294 and AMOUNT the amount to shift by.
2295 Store the result in the rtx TARGET, if that is convenient.
2296 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2297 Return the rtx for where the value is. */
2298
2299rtx
ef4bddc2 2300expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
eb6c3df1
RG
2301 int amount, rtx target, int unsignedp)
2302{
86529a49
RG
2303 return expand_shift_1 (code, mode,
2304 shifted, GEN_INT (amount), target, unsignedp);
2305}
2306
2307/* Output a shift instruction for expression code CODE,
2308 with SHIFTED being the rtx for the value to shift,
2309 and AMOUNT the tree for the amount to shift by.
2310 Store the result in the rtx TARGET, if that is convenient.
2311 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2312 Return the rtx for where the value is. */
2313
2314rtx
ef4bddc2 2315expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
86529a49
RG
2316 tree amount, rtx target, int unsignedp)
2317{
2318 return expand_shift_1 (code, mode,
2319 shifted, expand_normal (amount), target, unsignedp);
eb6c3df1 2320}
86529a49 2321
44037a66 2322\f
8efc8980
RS
2323/* Indicates the type of fixup needed after a constant multiplication.
2324 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2325 the result should be negated, and ADD_VARIANT means that the
2326 multiplicand should be added to the result. */
2327enum mult_variant {basic_variant, negate_variant, add_variant};
2328
41c64ac0 2329static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
ef4bddc2
RS
2330 const struct mult_cost *, machine_mode mode);
2331static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
f258e38b 2332 struct algorithm *, enum mult_variant *, int);
ef4bddc2 2333static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
8efc8980 2334 const struct algorithm *, enum mult_variant);
502b8322 2335static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
ef4bddc2
RS
2336static rtx extract_high_half (machine_mode, rtx);
2337static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2338static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
8efc8980 2339 int, int);
44037a66 2340/* Compute and return the best algorithm for multiplying by T.
7963ac37
RK
2341 The algorithm must cost less than cost_limit
2342 If retval.cost >= COST_LIMIT, no algorithm was found and all
41c64ac0
RS
2343 other field of the returned struct are undefined.
2344 MODE is the machine mode of the multiplication. */
44037a66 2345
819126a6 2346static void
502b8322 2347synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
ef4bddc2 2348 const struct mult_cost *cost_limit, machine_mode mode)
44037a66 2349{
b2fb324c 2350 int m;
52786026 2351 struct algorithm *alg_in, *best_alg;
26276705
RS
2352 struct mult_cost best_cost;
2353 struct mult_cost new_limit;
2354 int op_cost, op_latency;
ef268d34 2355 unsigned HOST_WIDE_INT orig_t = t;
b2fb324c 2356 unsigned HOST_WIDE_INT q;
84ddb681 2357 int maxm, hash_index;
7b13ee6b
KH
2358 bool cache_hit = false;
2359 enum alg_code cache_alg = alg_zero;
f40751dd 2360 bool speed = optimize_insn_for_speed_p ();
ef4bddc2 2361 machine_mode imode;
5322d07e 2362 struct alg_hash_entry *entry_ptr;
44037a66 2363
7963ac37
RK
2364 /* Indicate that no algorithm is yet found. If no algorithm
2365 is found, this value will be returned and indicate failure. */
26276705 2366 alg_out->cost.cost = cost_limit->cost + 1;
3ab0f290 2367 alg_out->cost.latency = cost_limit->latency + 1;
44037a66 2368
26276705
RS
2369 if (cost_limit->cost < 0
2370 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
819126a6 2371 return;
44037a66 2372
84ddb681
RH
2373 /* Be prepared for vector modes. */
2374 imode = GET_MODE_INNER (mode);
2375 if (imode == VOIDmode)
2376 imode = mode;
2377
2378 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2379
0792ab19 2380 /* Restrict the bits of "t" to the multiplication's mode. */
84ddb681 2381 t &= GET_MODE_MASK (imode);
0792ab19 2382
b385aeda
RK
2383 /* t == 1 can be done in zero cost. */
2384 if (t == 1)
b2fb324c 2385 {
819126a6 2386 alg_out->ops = 1;
26276705
RS
2387 alg_out->cost.cost = 0;
2388 alg_out->cost.latency = 0;
819126a6
RK
2389 alg_out->op[0] = alg_m;
2390 return;
b2fb324c
RK
2391 }
2392
b385aeda
RK
2393 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2394 fail now. */
819126a6 2395 if (t == 0)
b385aeda 2396 {
5322d07e 2397 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
819126a6 2398 return;
b385aeda
RK
2399 else
2400 {
819126a6 2401 alg_out->ops = 1;
5322d07e
NF
2402 alg_out->cost.cost = zero_cost (speed);
2403 alg_out->cost.latency = zero_cost (speed);
819126a6
RK
2404 alg_out->op[0] = alg_zero;
2405 return;
b385aeda
RK
2406 }
2407 }
2408
52786026
RK
2409 /* We'll be needing a couple extra algorithm structures now. */
2410
1b4572a8
KG
2411 alg_in = XALLOCA (struct algorithm);
2412 best_alg = XALLOCA (struct algorithm);
26276705 2413 best_cost = *cost_limit;
52786026 2414
7b13ee6b 2415 /* Compute the hash index. */
f40751dd 2416 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
7b13ee6b
KH
2417
2418 /* See if we already know what to do for T. */
5322d07e
NF
2419 entry_ptr = alg_hash_entry_ptr (hash_index);
2420 if (entry_ptr->t == t
2421 && entry_ptr->mode == mode
2422 && entry_ptr->mode == mode
2423 && entry_ptr->speed == speed
2424 && entry_ptr->alg != alg_unknown)
7b13ee6b 2425 {
5322d07e 2426 cache_alg = entry_ptr->alg;
0178027c
KH
2427
2428 if (cache_alg == alg_impossible)
7b13ee6b 2429 {
0178027c 2430 /* The cache tells us that it's impossible to synthesize
5322d07e
NF
2431 multiplication by T within entry_ptr->cost. */
2432 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
0178027c
KH
2433 /* COST_LIMIT is at least as restrictive as the one
2434 recorded in the hash table, in which case we have no
2435 hope of synthesizing a multiplication. Just
2436 return. */
2437 return;
2438
2439 /* If we get here, COST_LIMIT is less restrictive than the
2440 one recorded in the hash table, so we may be able to
2441 synthesize a multiplication. Proceed as if we didn't
2442 have the cache entry. */
2443 }
2444 else
2445 {
5322d07e 2446 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
0178027c
KH
2447 /* The cached algorithm shows that this multiplication
2448 requires more cost than COST_LIMIT. Just return. This
2449 way, we don't clobber this cache entry with
2450 alg_impossible but retain useful information. */
2451 return;
7b13ee6b 2452
0178027c
KH
2453 cache_hit = true;
2454
2455 switch (cache_alg)
2456 {
2457 case alg_shift:
2458 goto do_alg_shift;
7b13ee6b 2459
0178027c
KH
2460 case alg_add_t_m2:
2461 case alg_sub_t_m2:
2462 goto do_alg_addsub_t_m2;
7b13ee6b 2463
0178027c
KH
2464 case alg_add_factor:
2465 case alg_sub_factor:
2466 goto do_alg_addsub_factor;
7b13ee6b 2467
0178027c
KH
2468 case alg_add_t2_m:
2469 goto do_alg_add_t2_m;
7b13ee6b 2470
0178027c
KH
2471 case alg_sub_t2_m:
2472 goto do_alg_sub_t2_m;
2473
2474 default:
2475 gcc_unreachable ();
2476 }
7b13ee6b
KH
2477 }
2478 }
2479
b385aeda
RK
2480 /* If we have a group of zero bits at the low-order part of T, try
2481 multiplying by the remaining bits and then doing a shift. */
2482
b2fb324c 2483 if ((t & 1) == 0)
44037a66 2484 {
7b13ee6b 2485 do_alg_shift:
b2fb324c 2486 m = floor_log2 (t & -t); /* m = number of low zero bits */
0792ab19 2487 if (m < maxm)
44037a66 2488 {
02a65aef 2489 q = t >> m;
15bad393
RS
2490 /* The function expand_shift will choose between a shift and
2491 a sequence of additions, so the observed cost is given as
5322d07e
NF
2492 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2493 op_cost = m * add_cost (speed, mode);
2494 if (shift_cost (speed, mode, m) < op_cost)
2495 op_cost = shift_cost (speed, mode, m);
26276705
RS
2496 new_limit.cost = best_cost.cost - op_cost;
2497 new_limit.latency = best_cost.latency - op_cost;
2498 synth_mult (alg_in, q, &new_limit, mode);
2499
2500 alg_in->cost.cost += op_cost;
2501 alg_in->cost.latency += op_cost;
2502 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
02a65aef
R
2503 {
2504 struct algorithm *x;
26276705 2505 best_cost = alg_in->cost;
02a65aef
R
2506 x = alg_in, alg_in = best_alg, best_alg = x;
2507 best_alg->log[best_alg->ops] = m;
2508 best_alg->op[best_alg->ops] = alg_shift;
02a65aef 2509 }
ddc2690a
KH
2510
2511 /* See if treating ORIG_T as a signed number yields a better
2512 sequence. Try this sequence only for a negative ORIG_T
2513 as it would be useless for a non-negative ORIG_T. */
2514 if ((HOST_WIDE_INT) orig_t < 0)
2515 {
2516 /* Shift ORIG_T as follows because a right shift of a
2517 negative-valued signed type is implementation
2518 defined. */
2519 q = ~(~orig_t >> m);
2520 /* The function expand_shift will choose between a shift
2521 and a sequence of additions, so the observed cost is
5322d07e
NF
2522 given as MIN (m * add_cost(speed, mode),
2523 shift_cost(speed, mode, m)). */
2524 op_cost = m * add_cost (speed, mode);
2525 if (shift_cost (speed, mode, m) < op_cost)
2526 op_cost = shift_cost (speed, mode, m);
ddc2690a
KH
2527 new_limit.cost = best_cost.cost - op_cost;
2528 new_limit.latency = best_cost.latency - op_cost;
2529 synth_mult (alg_in, q, &new_limit, mode);
2530
2531 alg_in->cost.cost += op_cost;
2532 alg_in->cost.latency += op_cost;
2533 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2534 {
2535 struct algorithm *x;
2536 best_cost = alg_in->cost;
2537 x = alg_in, alg_in = best_alg, best_alg = x;
2538 best_alg->log[best_alg->ops] = m;
2539 best_alg->op[best_alg->ops] = alg_shift;
2540 }
2541 }
819126a6 2542 }
7b13ee6b
KH
2543 if (cache_hit)
2544 goto done;
819126a6
RK
2545 }
2546
2547 /* If we have an odd number, add or subtract one. */
2548 if ((t & 1) != 0)
2549 {
2550 unsigned HOST_WIDE_INT w;
2551
7b13ee6b 2552 do_alg_addsub_t_m2:
819126a6
RK
2553 for (w = 1; (w & t) != 0; w <<= 1)
2554 ;
31031edd 2555 /* If T was -1, then W will be zero after the loop. This is another
c410d49e 2556 case where T ends with ...111. Handling this with (T + 1) and
31031edd
JL
2557 subtract 1 produces slightly better code and results in algorithm
2558 selection much faster than treating it like the ...0111 case
2559 below. */
2560 if (w == 0
2561 || (w > 2
2562 /* Reject the case where t is 3.
2563 Thus we prefer addition in that case. */
2564 && t != 3))
819126a6
RK
2565 {
2566 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2567
5322d07e 2568 op_cost = add_cost (speed, mode);
26276705
RS
2569 new_limit.cost = best_cost.cost - op_cost;
2570 new_limit.latency = best_cost.latency - op_cost;
2571 synth_mult (alg_in, t + 1, &new_limit, mode);
b2fb324c 2572
26276705
RS
2573 alg_in->cost.cost += op_cost;
2574 alg_in->cost.latency += op_cost;
2575 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2576 {
b2fb324c 2577 struct algorithm *x;
26276705 2578 best_cost = alg_in->cost;
b2fb324c 2579 x = alg_in, alg_in = best_alg, best_alg = x;
819126a6
RK
2580 best_alg->log[best_alg->ops] = 0;
2581 best_alg->op[best_alg->ops] = alg_sub_t_m2;
44037a66 2582 }
44037a66 2583 }
819126a6
RK
2584 else
2585 {
2586 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
44037a66 2587
5322d07e 2588 op_cost = add_cost (speed, mode);
26276705
RS
2589 new_limit.cost = best_cost.cost - op_cost;
2590 new_limit.latency = best_cost.latency - op_cost;
2591 synth_mult (alg_in, t - 1, &new_limit, mode);
819126a6 2592
26276705
RS
2593 alg_in->cost.cost += op_cost;
2594 alg_in->cost.latency += op_cost;
2595 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
819126a6
RK
2596 {
2597 struct algorithm *x;
26276705 2598 best_cost = alg_in->cost;
819126a6
RK
2599 x = alg_in, alg_in = best_alg, best_alg = x;
2600 best_alg->log[best_alg->ops] = 0;
2601 best_alg->op[best_alg->ops] = alg_add_t_m2;
819126a6
RK
2602 }
2603 }
ef268d34
KH
2604
2605 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2606 quickly with a - a * n for some appropriate constant n. */
2607 m = exact_log2 (-orig_t + 1);
2608 if (m >= 0 && m < maxm)
2609 {
5322d07e 2610 op_cost = shiftsub1_cost (speed, mode, m);
ef268d34
KH
2611 new_limit.cost = best_cost.cost - op_cost;
2612 new_limit.latency = best_cost.latency - op_cost;
84ddb681
RH
2613 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2614 &new_limit, mode);
ef268d34
KH
2615
2616 alg_in->cost.cost += op_cost;
2617 alg_in->cost.latency += op_cost;
2618 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619 {
2620 struct algorithm *x;
2621 best_cost = alg_in->cost;
2622 x = alg_in, alg_in = best_alg, best_alg = x;
2623 best_alg->log[best_alg->ops] = m;
2624 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2625 }
2626 }
2627
7b13ee6b
KH
2628 if (cache_hit)
2629 goto done;
819126a6 2630 }
63610db9 2631
44037a66 2632 /* Look for factors of t of the form
7963ac37 2633 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
44037a66 2634 If we find such a factor, we can multiply by t using an algorithm that
7963ac37 2635 multiplies by q, shift the result by m and add/subtract it to itself.
44037a66 2636
7963ac37
RK
2637 We search for large factors first and loop down, even if large factors
2638 are less probable than small; if we find a large factor we will find a
2639 good sequence quickly, and therefore be able to prune (by decreasing
2640 COST_LIMIT) the search. */
2641
7b13ee6b 2642 do_alg_addsub_factor:
7963ac37 2643 for (m = floor_log2 (t - 1); m >= 2; m--)
44037a66 2644 {
7963ac37 2645 unsigned HOST_WIDE_INT d;
44037a66 2646
7963ac37 2647 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
7b13ee6b
KH
2648 if (t % d == 0 && t > d && m < maxm
2649 && (!cache_hit || cache_alg == alg_add_factor))
44037a66 2650 {
26276705
RS
2651 /* If the target has a cheap shift-and-add instruction use
2652 that in preference to a shift insn followed by an add insn.
2653 Assume that the shift-and-add is "atomic" with a latency
a37739c1 2654 equal to its cost, otherwise assume that on superscalar
26276705
RS
2655 hardware the shift may be executed concurrently with the
2656 earlier steps in the algorithm. */
5322d07e
NF
2657 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2658 if (shiftadd_cost (speed, mode, m) < op_cost)
26276705 2659 {
5322d07e 2660 op_cost = shiftadd_cost (speed, mode, m);
26276705
RS
2661 op_latency = op_cost;
2662 }
2663 else
5322d07e 2664 op_latency = add_cost (speed, mode);
26276705
RS
2665
2666 new_limit.cost = best_cost.cost - op_cost;
2667 new_limit.latency = best_cost.latency - op_latency;
2668 synth_mult (alg_in, t / d, &new_limit, mode);
44037a66 2669
26276705
RS
2670 alg_in->cost.cost += op_cost;
2671 alg_in->cost.latency += op_latency;
2672 if (alg_in->cost.latency < op_cost)
2673 alg_in->cost.latency = op_cost;
2674 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2675 {
7963ac37 2676 struct algorithm *x;
26276705 2677 best_cost = alg_in->cost;
7963ac37 2678 x = alg_in, alg_in = best_alg, best_alg = x;
b385aeda 2679 best_alg->log[best_alg->ops] = m;
819126a6 2680 best_alg->op[best_alg->ops] = alg_add_factor;
44037a66 2681 }
c0b262c1
TG
2682 /* Other factors will have been taken care of in the recursion. */
2683 break;
44037a66
TG
2684 }
2685
7963ac37 2686 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
7b13ee6b
KH
2687 if (t % d == 0 && t > d && m < maxm
2688 && (!cache_hit || cache_alg == alg_sub_factor))
44037a66 2689 {
26276705
RS
2690 /* If the target has a cheap shift-and-subtract insn use
2691 that in preference to a shift insn followed by a sub insn.
2692 Assume that the shift-and-sub is "atomic" with a latency
2693 equal to it's cost, otherwise assume that on superscalar
2694 hardware the shift may be executed concurrently with the
2695 earlier steps in the algorithm. */
5322d07e
NF
2696 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2697 if (shiftsub0_cost (speed, mode, m) < op_cost)
26276705 2698 {
5322d07e 2699 op_cost = shiftsub0_cost (speed, mode, m);
26276705
RS
2700 op_latency = op_cost;
2701 }
2702 else
5322d07e 2703 op_latency = add_cost (speed, mode);
26276705
RS
2704
2705 new_limit.cost = best_cost.cost - op_cost;
417c735c 2706 new_limit.latency = best_cost.latency - op_latency;
26276705 2707 synth_mult (alg_in, t / d, &new_limit, mode);
44037a66 2708
26276705
RS
2709 alg_in->cost.cost += op_cost;
2710 alg_in->cost.latency += op_latency;
2711 if (alg_in->cost.latency < op_cost)
2712 alg_in->cost.latency = op_cost;
2713 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2714 {
7963ac37 2715 struct algorithm *x;
26276705 2716 best_cost = alg_in->cost;
7963ac37 2717 x = alg_in, alg_in = best_alg, best_alg = x;
b385aeda 2718 best_alg->log[best_alg->ops] = m;
819126a6 2719 best_alg->op[best_alg->ops] = alg_sub_factor;
44037a66 2720 }
c0b262c1 2721 break;
44037a66
TG
2722 }
2723 }
7b13ee6b
KH
2724 if (cache_hit)
2725 goto done;
44037a66 2726
7963ac37
RK
2727 /* Try shift-and-add (load effective address) instructions,
2728 i.e. do a*3, a*5, a*9. */
2729 if ((t & 1) != 0)
2730 {
7b13ee6b 2731 do_alg_add_t2_m:
7963ac37
RK
2732 q = t - 1;
2733 q = q & -q;
2734 m = exact_log2 (q);
0792ab19 2735 if (m >= 0 && m < maxm)
b385aeda 2736 {
5322d07e 2737 op_cost = shiftadd_cost (speed, mode, m);
26276705
RS
2738 new_limit.cost = best_cost.cost - op_cost;
2739 new_limit.latency = best_cost.latency - op_cost;
2740 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2741
2742 alg_in->cost.cost += op_cost;
2743 alg_in->cost.latency += op_cost;
2744 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
5eebe2eb
RK
2745 {
2746 struct algorithm *x;
26276705 2747 best_cost = alg_in->cost;
5eebe2eb
RK
2748 x = alg_in, alg_in = best_alg, best_alg = x;
2749 best_alg->log[best_alg->ops] = m;
819126a6 2750 best_alg->op[best_alg->ops] = alg_add_t2_m;
5eebe2eb 2751 }
7963ac37 2752 }
7b13ee6b
KH
2753 if (cache_hit)
2754 goto done;
44037a66 2755
7b13ee6b 2756 do_alg_sub_t2_m:
7963ac37
RK
2757 q = t + 1;
2758 q = q & -q;
2759 m = exact_log2 (q);
0792ab19 2760 if (m >= 0 && m < maxm)
b385aeda 2761 {
5322d07e 2762 op_cost = shiftsub0_cost (speed, mode, m);
26276705
RS
2763 new_limit.cost = best_cost.cost - op_cost;
2764 new_limit.latency = best_cost.latency - op_cost;
2765 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2766
2767 alg_in->cost.cost += op_cost;
2768 alg_in->cost.latency += op_cost;
2769 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
5eebe2eb
RK
2770 {
2771 struct algorithm *x;
26276705 2772 best_cost = alg_in->cost;
5eebe2eb
RK
2773 x = alg_in, alg_in = best_alg, best_alg = x;
2774 best_alg->log[best_alg->ops] = m;
819126a6 2775 best_alg->op[best_alg->ops] = alg_sub_t2_m;
5eebe2eb 2776 }
7963ac37 2777 }
7b13ee6b
KH
2778 if (cache_hit)
2779 goto done;
7963ac37 2780 }
44037a66 2781
7b13ee6b 2782 done:
3ab0f290
DJ
2783 /* If best_cost has not decreased, we have not found any algorithm. */
2784 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
0178027c
KH
2785 {
2786 /* We failed to find an algorithm. Record alg_impossible for
2787 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2788 we are asked to find an algorithm for T within the same or
2789 lower COST_LIMIT, we can immediately return to the
2790 caller. */
5322d07e
NF
2791 entry_ptr->t = t;
2792 entry_ptr->mode = mode;
2793 entry_ptr->speed = speed;
2794 entry_ptr->alg = alg_impossible;
2795 entry_ptr->cost = *cost_limit;
0178027c
KH
2796 return;
2797 }
3ab0f290 2798
7b13ee6b
KH
2799 /* Cache the result. */
2800 if (!cache_hit)
2801 {
5322d07e
NF
2802 entry_ptr->t = t;
2803 entry_ptr->mode = mode;
2804 entry_ptr->speed = speed;
2805 entry_ptr->alg = best_alg->op[best_alg->ops];
2806 entry_ptr->cost.cost = best_cost.cost;
2807 entry_ptr->cost.latency = best_cost.latency;
7b13ee6b
KH
2808 }
2809
52786026
RK
2810 /* If we are getting a too long sequence for `struct algorithm'
2811 to record, make this search fail. */
2812 if (best_alg->ops == MAX_BITS_PER_WORD)
2813 return;
2814
819126a6
RK
2815 /* Copy the algorithm from temporary space to the space at alg_out.
2816 We avoid using structure assignment because the majority of
2817 best_alg is normally undefined, and this is a critical function. */
2818 alg_out->ops = best_alg->ops + 1;
26276705 2819 alg_out->cost = best_cost;
4e135bdd
KG
2820 memcpy (alg_out->op, best_alg->op,
2821 alg_out->ops * sizeof *alg_out->op);
2822 memcpy (alg_out->log, best_alg->log,
2823 alg_out->ops * sizeof *alg_out->log);
44037a66
TG
2824}
2825\f
d1a6adeb 2826/* Find the cheapest way of multiplying a value of mode MODE by VAL.
8efc8980
RS
2827 Try three variations:
2828
2829 - a shift/add sequence based on VAL itself
2830 - a shift/add sequence based on -VAL, followed by a negation
2831 - a shift/add sequence based on VAL - 1, followed by an addition.
2832
f258e38b
UW
2833 Return true if the cheapest of these cost less than MULT_COST,
2834 describing the algorithm in *ALG and final fixup in *VARIANT. */
8efc8980
RS
2835
2836static bool
ef4bddc2 2837choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
f258e38b
UW
2838 struct algorithm *alg, enum mult_variant *variant,
2839 int mult_cost)
8efc8980 2840{
8efc8980 2841 struct algorithm alg2;
26276705
RS
2842 struct mult_cost limit;
2843 int op_cost;
f40751dd 2844 bool speed = optimize_insn_for_speed_p ();
8efc8980 2845
18eaea7f
RS
2846 /* Fail quickly for impossible bounds. */
2847 if (mult_cost < 0)
2848 return false;
2849
2850 /* Ensure that mult_cost provides a reasonable upper bound.
2851 Any constant multiplication can be performed with less
2852 than 2 * bits additions. */
5322d07e 2853 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
18eaea7f
RS
2854 if (mult_cost > op_cost)
2855 mult_cost = op_cost;
2856
8efc8980 2857 *variant = basic_variant;
26276705
RS
2858 limit.cost = mult_cost;
2859 limit.latency = mult_cost;
2860 synth_mult (alg, val, &limit, mode);
8efc8980
RS
2861
2862 /* This works only if the inverted value actually fits in an
2863 `unsigned int' */
84ddb681 2864 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
8efc8980 2865 {
c3284718 2866 op_cost = neg_cost (speed, mode);
26276705
RS
2867 if (MULT_COST_LESS (&alg->cost, mult_cost))
2868 {
2869 limit.cost = alg->cost.cost - op_cost;
2870 limit.latency = alg->cost.latency - op_cost;
2871 }
2872 else
2873 {
2874 limit.cost = mult_cost - op_cost;
2875 limit.latency = mult_cost - op_cost;
2876 }
2877
2878 synth_mult (&alg2, -val, &limit, mode);
2879 alg2.cost.cost += op_cost;
2880 alg2.cost.latency += op_cost;
2881 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
8efc8980
RS
2882 *alg = alg2, *variant = negate_variant;
2883 }
2884
2885 /* This proves very useful for division-by-constant. */
5322d07e 2886 op_cost = add_cost (speed, mode);
26276705
RS
2887 if (MULT_COST_LESS (&alg->cost, mult_cost))
2888 {
2889 limit.cost = alg->cost.cost - op_cost;
2890 limit.latency = alg->cost.latency - op_cost;
2891 }
2892 else
2893 {
2894 limit.cost = mult_cost - op_cost;
2895 limit.latency = mult_cost - op_cost;
2896 }
2897
2898 synth_mult (&alg2, val - 1, &limit, mode);
2899 alg2.cost.cost += op_cost;
2900 alg2.cost.latency += op_cost;
2901 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
8efc8980
RS
2902 *alg = alg2, *variant = add_variant;
2903
26276705 2904 return MULT_COST_LESS (&alg->cost, mult_cost);
8efc8980
RS
2905}
2906
2907/* A subroutine of expand_mult, used for constant multiplications.
2908 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2909 convenient. Use the shift/add sequence described by ALG and apply
2910 the final fixup specified by VARIANT. */
2911
2912static rtx
ef4bddc2 2913expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
8efc8980
RS
2914 rtx target, const struct algorithm *alg,
2915 enum mult_variant variant)
2916{
2917 HOST_WIDE_INT val_so_far;
f3f6fb16
DM
2918 rtx_insn *insn;
2919 rtx accum, tem;
8efc8980 2920 int opno;
ef4bddc2 2921 machine_mode nmode;
8efc8980 2922
d448860e
JH
2923 /* Avoid referencing memory over and over and invalid sharing
2924 on SUBREGs. */
2925 op0 = force_reg (mode, op0);
8efc8980
RS
2926
2927 /* ACCUM starts out either as OP0 or as a zero, depending on
2928 the first operation. */
2929
2930 if (alg->op[0] == alg_zero)
2931 {
84ddb681 2932 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
8efc8980
RS
2933 val_so_far = 0;
2934 }
2935 else if (alg->op[0] == alg_m)
2936 {
2937 accum = copy_to_mode_reg (mode, op0);
2938 val_so_far = 1;
2939 }
2940 else
5b0264cb 2941 gcc_unreachable ();
8efc8980
RS
2942
2943 for (opno = 1; opno < alg->ops; opno++)
2944 {
2945 int log = alg->log[opno];
7c27e184 2946 rtx shift_subtarget = optimize ? 0 : accum;
8efc8980
RS
2947 rtx add_target
2948 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
7c27e184 2949 && !optimize)
8efc8980 2950 ? target : 0;
7c27e184 2951 rtx accum_target = optimize ? 0 : accum;
7543f918 2952 rtx accum_inner;
8efc8980
RS
2953
2954 switch (alg->op[opno])
2955 {
2956 case alg_shift:
eb6c3df1 2957 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
4caa21a1
UB
2958 /* REG_EQUAL note will be attached to the following insn. */
2959 emit_move_insn (accum, tem);
8efc8980
RS
2960 val_so_far <<= log;
2961 break;
2962
2963 case alg_add_t_m2:
eb6c3df1 2964 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
8efc8980
RS
2965 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2966 add_target ? add_target : accum_target);
2967 val_so_far += (HOST_WIDE_INT) 1 << log;
2968 break;
2969
2970 case alg_sub_t_m2:
eb6c3df1 2971 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
8efc8980
RS
2972 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2973 add_target ? add_target : accum_target);
2974 val_so_far -= (HOST_WIDE_INT) 1 << log;
2975 break;
2976
2977 case alg_add_t2_m:
2978 accum = expand_shift (LSHIFT_EXPR, mode, accum,
eb6c3df1 2979 log, shift_subtarget, 0);
8efc8980
RS
2980 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2981 add_target ? add_target : accum_target);
2982 val_so_far = (val_so_far << log) + 1;
2983 break;
2984
2985 case alg_sub_t2_m:
2986 accum = expand_shift (LSHIFT_EXPR, mode, accum,
eb6c3df1 2987 log, shift_subtarget, 0);
8efc8980
RS
2988 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2989 add_target ? add_target : accum_target);
2990 val_so_far = (val_so_far << log) - 1;
2991 break;
2992
2993 case alg_add_factor:
eb6c3df1 2994 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
8efc8980
RS
2995 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2996 add_target ? add_target : accum_target);
2997 val_so_far += val_so_far << log;
2998 break;
2999
3000 case alg_sub_factor:
eb6c3df1 3001 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
8efc8980 3002 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
7c27e184
PB
3003 (add_target
3004 ? add_target : (optimize ? 0 : tem)));
8efc8980
RS
3005 val_so_far = (val_so_far << log) - val_so_far;
3006 break;
3007
3008 default:
5b0264cb 3009 gcc_unreachable ();
8efc8980
RS
3010 }
3011
84ddb681 3012 if (SCALAR_INT_MODE_P (mode))
8efc8980 3013 {
84ddb681
RH
3014 /* Write a REG_EQUAL note on the last insn so that we can cse
3015 multiplication sequences. Note that if ACCUM is a SUBREG,
3016 we've set the inner register and must properly indicate that. */
3017 tem = op0, nmode = mode;
3018 accum_inner = accum;
3019 if (GET_CODE (accum) == SUBREG)
3020 {
3021 accum_inner = SUBREG_REG (accum);
3022 nmode = GET_MODE (accum_inner);
3023 tem = gen_lowpart (nmode, op0);
3024 }
8efc8980 3025
84ddb681
RH
3026 insn = get_last_insn ();
3027 set_dst_reg_note (insn, REG_EQUAL,
4789c0ce
RS
3028 gen_rtx_MULT (nmode, tem,
3029 gen_int_mode (val_so_far, nmode)),
84ddb681
RH
3030 accum_inner);
3031 }
8efc8980
RS
3032 }
3033
3034 if (variant == negate_variant)
3035 {
3036 val_so_far = -val_so_far;
3037 accum = expand_unop (mode, neg_optab, accum, target, 0);
3038 }
3039 else if (variant == add_variant)
3040 {
3041 val_so_far = val_so_far + 1;
3042 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3043 }
3044
42eb30b5
ZW
3045 /* Compare only the bits of val and val_so_far that are significant
3046 in the result mode, to avoid sign-/zero-extension confusion. */
84ddb681
RH
3047 nmode = GET_MODE_INNER (mode);
3048 if (nmode == VOIDmode)
3049 nmode = mode;
3050 val &= GET_MODE_MASK (nmode);
3051 val_so_far &= GET_MODE_MASK (nmode);
5b0264cb 3052 gcc_assert (val == val_so_far);
8efc8980
RS
3053
3054 return accum;
3055}
3056
44037a66
TG
3057/* Perform a multiplication and return an rtx for the result.
3058 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3059 TARGET is a suggestion for where to store the result (an rtx).
3060
3061 We check specially for a constant integer as OP1.
3062 If you want this check for OP0 as well, then before calling
3063 you should swap the two operands if OP0 would be constant. */
3064
3065rtx
ef4bddc2 3066expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
f2593a66 3067 int unsignedp)
44037a66 3068{
8efc8980
RS
3069 enum mult_variant variant;
3070 struct algorithm algorithm;
84ddb681 3071 rtx scalar_op1;
65dc9350 3072 int max_cost;
f40751dd 3073 bool speed = optimize_insn_for_speed_p ();
84ddb681 3074 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
44037a66 3075
84ddb681
RH
3076 if (CONSTANT_P (op0))
3077 {
3078 rtx temp = op0;
3079 op0 = op1;
3080 op1 = temp;
3081 }
3082
3083 /* For vectors, there are several simplifications that can be made if
3084 all elements of the vector constant are identical. */
3085 scalar_op1 = op1;
3086 if (GET_CODE (op1) == CONST_VECTOR)
3087 {
3088 int i, n = CONST_VECTOR_NUNITS (op1);
3089 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3090 for (i = 1; i < n; ++i)
3091 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3092 goto skip_scalar;
3093 }
3094
3095 if (INTEGRAL_MODE_P (mode))
3096 {
3097 rtx fake_reg;
caf62455
JDA
3098 HOST_WIDE_INT coeff;
3099 bool is_neg;
84ddb681
RH
3100 int mode_bitsize;
3101
3102 if (op1 == CONST0_RTX (mode))
3103 return op1;
3104 if (op1 == CONST1_RTX (mode))
3105 return op0;
3106 if (op1 == CONSTM1_RTX (mode))
3107 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3108 op0, target, 0);
3109
3110 if (do_trapv)
3111 goto skip_synth;
3112
66b3ed5f
JJ
3113 /* If mode is integer vector mode, check if the backend supports
3114 vector lshift (by scalar or vector) at all. If not, we can't use
3115 synthetized multiply. */
3116 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3117 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3118 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3119 goto skip_synth;
3120
84ddb681
RH
3121 /* These are the operations that are potentially turned into
3122 a sequence of shifts and additions. */
3123 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
65dc9350
RS
3124
3125 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3126 less than or equal in size to `unsigned int' this doesn't matter.
3127 If the mode is larger than `unsigned int', then synth_mult works
3128 only if the constant value exactly fits in an `unsigned int' without
3129 any truncation. This means that multiplying by negative values does
3130 not work; results are off by 2^32 on a 32 bit machine. */
84ddb681 3131 if (CONST_INT_P (scalar_op1))
58b42e19 3132 {
84ddb681
RH
3133 coeff = INTVAL (scalar_op1);
3134 is_neg = coeff < 0;
65dc9350 3135 }
807e902e
KZ
3136#if TARGET_SUPPORTS_WIDE_INT
3137 else if (CONST_WIDE_INT_P (scalar_op1))
3138#else
48175537 3139 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
807e902e 3140#endif
65dc9350 3141 {
807e902e
KZ
3142 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3143 /* Perfect power of 2 (other than 1, which is handled above). */
3144 if (shift > 0)
3145 return expand_shift (LSHIFT_EXPR, mode, op0,
3146 shift, target, unsignedp);
caf62455
JDA
3147 else
3148 goto skip_synth;
65dc9350 3149 }
84ddb681
RH
3150 else
3151 goto skip_synth;
b8698a0f 3152
65dc9350
RS
3153 /* We used to test optimize here, on the grounds that it's better to
3154 produce a smaller program when -O is not used. But this causes
3155 such a terrible slowdown sometimes that it seems better to always
3156 use synth_mult. */
65dc9350 3157
84ddb681 3158 /* Special case powers of two. */
be63b77d
JJ
3159 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3160 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
84ddb681
RH
3161 return expand_shift (LSHIFT_EXPR, mode, op0,
3162 floor_log2 (coeff), target, unsignedp);
3163
3164 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3165
3166 /* Attempt to handle multiplication of DImode values by negative
3167 coefficients, by performing the multiplication by a positive
3168 multiplier and then inverting the result. */
84ddb681
RH
3169 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3170 {
3171 /* Its safe to use -coeff even for INT_MIN, as the
3172 result is interpreted as an unsigned coefficient.
3173 Exclude cost of op0 from max_cost to match the cost
5e839bc8 3174 calculation of the synth_mult. */
be63b77d 3175 coeff = -(unsigned HOST_WIDE_INT) coeff;
84ddb681 3176 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
c3284718 3177 - neg_cost (speed, mode));
be63b77d
JJ
3178 if (max_cost <= 0)
3179 goto skip_synth;
3180
3181 /* Special case powers of two. */
3182 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3183 {
3184 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3185 floor_log2 (coeff), target, unsignedp);
3186 return expand_unop (mode, neg_optab, temp, target, 0);
3187 }
3188
3189 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3190 max_cost))
84ddb681 3191 {
be63b77d 3192 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
84ddb681
RH
3193 &algorithm, variant);
3194 return expand_unop (mode, neg_optab, temp, target, 0);
3195 }
b216b86b 3196 goto skip_synth;
58b42e19 3197 }
44037a66 3198
84ddb681
RH
3199 /* Exclude cost of op0 from max_cost to match the cost
3200 calculation of the synth_mult. */
3201 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3202 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3203 return expand_mult_const (mode, op0, coeff, target,
3204 &algorithm, variant);
f2593a66 3205 }
84ddb681 3206 skip_synth:
f2593a66
RS
3207
3208 /* Expand x*2.0 as x+x. */
48175537 3209 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
f2593a66
RS
3210 {
3211 REAL_VALUE_TYPE d;
84ddb681 3212 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
f2593a66
RS
3213
3214 if (REAL_VALUES_EQUAL (d, dconst2))
3215 {
3216 op0 = force_reg (GET_MODE (op0), op0);
3217 return expand_binop (mode, add_optab, op0, op0,
3218 target, unsignedp, OPTAB_LIB_WIDEN);
3219 }
3220 }
84ddb681 3221 skip_scalar:
f2593a66 3222
819126a6
RK
3223 /* This used to use umul_optab if unsigned, but for non-widening multiply
3224 there is no difference between signed and unsigned. */
84ddb681 3225 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
44037a66 3226 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
5b0264cb 3227 gcc_assert (op0);
44037a66
TG
3228 return op0;
3229}
5b58b39b 3230
6dd8f4bb
BS
3231/* Return a cost estimate for multiplying a register by the given
3232 COEFFicient in the given MODE and SPEED. */
3233
3234int
ef4bddc2 3235mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
6dd8f4bb
BS
3236{
3237 int max_cost;
3238 struct algorithm algorithm;
3239 enum mult_variant variant;
3240
3241 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3242 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3243 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3244 return algorithm.cost.cost;
3245 else
3246 return max_cost;
3247}
3248
5b58b39b
BS
3249/* Perform a widening multiplication and return an rtx for the result.
3250 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3251 TARGET is a suggestion for where to store the result (an rtx).
3252 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3253 or smul_widen_optab.
3254
3255 We check specially for a constant integer as OP1, comparing the
3256 cost of a widening multiply against the cost of a sequence of shifts
3257 and adds. */
3258
3259rtx
ef4bddc2 3260expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
5b58b39b
BS
3261 int unsignedp, optab this_optab)
3262{
3263 bool speed = optimize_insn_for_speed_p ();
e7ef91dc 3264 rtx cop1;
5b58b39b
BS
3265
3266 if (CONST_INT_P (op1)
e7ef91dc
JJ
3267 && GET_MODE (op0) != VOIDmode
3268 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3269 this_optab == umul_widen_optab))
3270 && CONST_INT_P (cop1)
3271 && (INTVAL (cop1) >= 0
46c9550f 3272 || HWI_COMPUTABLE_MODE_P (mode)))
5b58b39b 3273 {
e7ef91dc 3274 HOST_WIDE_INT coeff = INTVAL (cop1);
5b58b39b
BS
3275 int max_cost;
3276 enum mult_variant variant;
3277 struct algorithm algorithm;
3278
3279 /* Special case powers of two. */
3280 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3281 {
3282 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3283 return expand_shift (LSHIFT_EXPR, mode, op0,
eb6c3df1 3284 floor_log2 (coeff), target, unsignedp);
5b58b39b
BS
3285 }
3286
3287 /* Exclude cost of op0 from max_cost to match the cost
3288 calculation of the synth_mult. */
5322d07e 3289 max_cost = mul_widen_cost (speed, mode);
5b58b39b
BS
3290 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3291 max_cost))
3292 {
3293 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3294 return expand_mult_const (mode, op0, coeff, target,
3295 &algorithm, variant);
3296 }
3297 }
3298 return expand_binop (mode, this_optab, op0, op1, target,
3299 unsignedp, OPTAB_LIB_WIDEN);
3300}
44037a66 3301\f
55c2d311
TG
3302/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3303 replace division by D, and put the least significant N bits of the result
3304 in *MULTIPLIER_PTR and return the most significant bit.
3305
3306 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3307 needed precision is in PRECISION (should be <= N).
3308
3309 PRECISION should be as small as possible so this function can choose
3310 multiplier more freely.
3311
3312 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3313 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3314
3315 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3316 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3317
55c2d311 3318unsigned HOST_WIDE_INT
502b8322 3319choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
079c527f
JJ
3320 unsigned HOST_WIDE_INT *multiplier_ptr,
3321 int *post_shift_ptr, int *lgup_ptr)
55c2d311 3322{
55c2d311
TG
3323 int lgup, post_shift;
3324 int pow, pow2;
55c2d311
TG
3325
3326 /* lgup = ceil(log2(divisor)); */
3327 lgup = ceil_log2 (d);
3328
5b0264cb 3329 gcc_assert (lgup <= n);
55c2d311
TG
3330
3331 pow = n + lgup;
3332 pow2 = n + lgup - precision;
3333
55c2d311 3334 /* mlow = 2^(N + lgup)/d */
807e902e
KZ
3335 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3336 wide_int mlow = wi::udiv_trunc (val, d);
55c2d311 3337
9be0ac8c 3338 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
807e902e
KZ
3339 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3340 wide_int mhigh = wi::udiv_trunc (val, d);
55c2d311
TG
3341
3342 /* If precision == N, then mlow, mhigh exceed 2^N
3343 (but they do not exceed 2^(N+1)). */
3344
f9da5064 3345 /* Reduce to lowest terms. */
55c2d311
TG
3346 for (post_shift = lgup; post_shift > 0; post_shift--)
3347 {
807e902e
KZ
3348 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3349 HOST_BITS_PER_WIDE_INT);
3350 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3351 HOST_BITS_PER_WIDE_INT);
55c2d311
TG
3352 if (ml_lo >= mh_lo)
3353 break;
3354
807e902e
KZ
3355 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3356 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
55c2d311
TG
3357 }
3358
3359 *post_shift_ptr = post_shift;
3360 *lgup_ptr = lgup;
3361 if (n < HOST_BITS_PER_WIDE_INT)
3362 {
3363 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
807e902e
KZ
3364 *multiplier_ptr = mhigh.to_uhwi () & mask;
3365 return mhigh.to_uhwi () >= mask;
55c2d311
TG
3366 }
3367 else
3368 {
807e902e
KZ
3369 *multiplier_ptr = mhigh.to_uhwi ();
3370 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
55c2d311
TG
3371 }
3372}
3373
3374/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3375 congruent to 1 (mod 2**N). */
3376
3377static unsigned HOST_WIDE_INT
502b8322 3378invert_mod2n (unsigned HOST_WIDE_INT x, int n)
55c2d311 3379{
0f41302f 3380 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
55c2d311
TG
3381
3382 /* The algorithm notes that the choice y = x satisfies
3383 x*y == 1 mod 2^3, since x is assumed odd.
3384 Each iteration doubles the number of bits of significance in y. */
3385
3386 unsigned HOST_WIDE_INT mask;
3387 unsigned HOST_WIDE_INT y = x;
3388 int nbit = 3;
3389
3390 mask = (n == HOST_BITS_PER_WIDE_INT
3391 ? ~(unsigned HOST_WIDE_INT) 0
3392 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3393
3394 while (nbit < n)
3395 {
3396 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3397 nbit *= 2;
3398 }
3399 return y;
3400}
3401
3402/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3403 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3404 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3405 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3406 become signed.
3407
3408 The result is put in TARGET if that is convenient.
3409
3410 MODE is the mode of operation. */
3411
3412rtx
ef4bddc2 3413expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
502b8322 3414 rtx op1, rtx target, int unsignedp)
55c2d311
TG
3415{
3416 rtx tem;
3417 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3418
3419 tem = expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 3420 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
22273300 3421 tem = expand_and (mode, tem, op1, NULL_RTX);
38a448ca
RH
3422 adj_operand
3423 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3424 adj_operand);
55c2d311
TG
3425
3426 tem = expand_shift (RSHIFT_EXPR, mode, op1,
eb6c3df1 3427 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
22273300 3428 tem = expand_and (mode, tem, op0, NULL_RTX);
38a448ca
RH
3429 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3430 target);
55c2d311
TG
3431
3432 return target;
3433}
3434
00f07b86 3435/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
55c2d311 3436
8efc8980 3437static rtx
ef4bddc2 3438extract_high_half (machine_mode mode, rtx op)
8efc8980 3439{
ef4bddc2 3440 machine_mode wider_mode;
55c2d311 3441
8efc8980
RS
3442 if (mode == word_mode)
3443 return gen_highpart (mode, op);
71af73bb 3444
15ed7b52
JG
3445 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3446
8efc8980
RS
3447 wider_mode = GET_MODE_WIDER_MODE (mode);
3448 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
eb6c3df1 3449 GET_MODE_BITSIZE (mode), 0, 1);
8efc8980
RS
3450 return convert_modes (mode, wider_mode, op, 0);
3451}
55c2d311 3452
00f07b86 3453/* Like expmed_mult_highpart, but only consider using a multiplication
8efc8980
RS
3454 optab. OP1 is an rtx for the constant operand. */
3455
3456static rtx
ef4bddc2 3457expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
8efc8980 3458 rtx target, int unsignedp, int max_cost)
55c2d311 3459{
665acd1e 3460 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
ef4bddc2 3461 machine_mode wider_mode;
55c2d311
TG
3462 optab moptab;
3463 rtx tem;
8efc8980 3464 int size;
f40751dd 3465 bool speed = optimize_insn_for_speed_p ();
55c2d311 3466
15ed7b52
JG
3467 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3468
8efc8980
RS
3469 wider_mode = GET_MODE_WIDER_MODE (mode);
3470 size = GET_MODE_BITSIZE (mode);
55c2d311
TG
3471
3472 /* Firstly, try using a multiplication insn that only generates the needed
3473 high part of the product, and in the sign flavor of unsignedp. */
5322d07e 3474 if (mul_highpart_cost (speed, mode) < max_cost)
71af73bb 3475 {
8efc8980 3476 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
665acd1e 3477 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
8efc8980
RS
3478 unsignedp, OPTAB_DIRECT);
3479 if (tem)
3480 return tem;
71af73bb 3481 }
55c2d311
TG
3482
3483 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3484 Need to adjust the result after the multiplication. */
02a65aef 3485 if (size - 1 < BITS_PER_WORD
5322d07e
NF
3486 && (mul_highpart_cost (speed, mode)
3487 + 2 * shift_cost (speed, mode, size-1)
3488 + 4 * add_cost (speed, mode) < max_cost))
71af73bb 3489 {
8efc8980 3490 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
665acd1e 3491 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
8efc8980
RS
3492 unsignedp, OPTAB_DIRECT);
3493 if (tem)
71af73bb 3494 /* We used the wrong signedness. Adjust the result. */
77278891 3495 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
8efc8980 3496 tem, unsignedp);
71af73bb 3497 }
55c2d311 3498
71af73bb 3499 /* Try widening multiplication. */
55c2d311 3500 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
4d8752f0 3501 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
5322d07e 3502 && mul_widen_cost (speed, wider_mode) < max_cost)
a295d331 3503 {
665acd1e 3504 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
8efc8980
RS
3505 unsignedp, OPTAB_WIDEN);
3506 if (tem)
3507 return extract_high_half (mode, tem);
c410d49e 3508 }
71af73bb
TG
3509
3510 /* Try widening the mode and perform a non-widening multiplication. */
947131ba 3511 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
02a65aef 3512 && size - 1 < BITS_PER_WORD
5322d07e
NF
3513 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3514 < max_cost))
a295d331 3515 {
f3f6fb16
DM
3516 rtx_insn *insns;
3517 rtx wop0, wop1;
82dfb9a5
RS
3518
3519 /* We need to widen the operands, for example to ensure the
3520 constant multiplier is correctly sign or zero extended.
3521 Use a sequence to clean-up any instructions emitted by
3522 the conversions if things don't work out. */
3523 start_sequence ();
3524 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3525 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3526 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
8efc8980 3527 unsignedp, OPTAB_WIDEN);
82dfb9a5
RS
3528 insns = get_insns ();
3529 end_sequence ();
3530
8efc8980 3531 if (tem)
82dfb9a5
RS
3532 {
3533 emit_insn (insns);
3534 return extract_high_half (mode, tem);
3535 }
a295d331 3536 }
71af73bb
TG
3537
3538 /* Try widening multiplication of opposite signedness, and adjust. */
3539 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
4d8752f0 3540 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
02a65aef 3541 && size - 1 < BITS_PER_WORD
5322d07e
NF
3542 && (mul_widen_cost (speed, wider_mode)
3543 + 2 * shift_cost (speed, mode, size-1)
3544 + 4 * add_cost (speed, mode) < max_cost))
55c2d311 3545 {
665acd1e 3546 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
71af73bb
TG
3547 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3548 if (tem != 0)
55c2d311 3549 {
8efc8980 3550 tem = extract_high_half (mode, tem);
71af73bb 3551 /* We used the wrong signedness. Adjust the result. */
77278891 3552 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
71af73bb 3553 target, unsignedp);
55c2d311 3554 }
55c2d311
TG
3555 }
3556
71af73bb 3557 return 0;
8efc8980 3558}
71af73bb 3559
0d282692
RS
3560/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3561 putting the high half of the result in TARGET if that is convenient,
3562 and return where the result is. If the operation can not be performed,
3563 0 is returned.
55c2d311 3564
8efc8980
RS
3565 MODE is the mode of operation and result.
3566
3567 UNSIGNEDP nonzero means unsigned multiply.
3568
3569 MAX_COST is the total allowed cost for the expanded RTL. */
3570
0d282692 3571static rtx
ef4bddc2 3572expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
0d282692 3573 rtx target, int unsignedp, int max_cost)
8efc8980 3574{
ef4bddc2 3575 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
0d282692 3576 unsigned HOST_WIDE_INT cnst1;
f258e38b
UW
3577 int extra_cost;
3578 bool sign_adjust = false;
8efc8980
RS
3579 enum mult_variant variant;
3580 struct algorithm alg;
0d282692 3581 rtx tem;
f40751dd 3582 bool speed = optimize_insn_for_speed_p ();
8efc8980 3583
15ed7b52 3584 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
8efc8980 3585 /* We can't support modes wider than HOST_BITS_PER_INT. */
46c9550f 3586 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
8efc8980 3587
0d282692 3588 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
f258e38b 3589
b8698a0f
L
3590 /* We can't optimize modes wider than BITS_PER_WORD.
3591 ??? We might be able to perform double-word arithmetic if
f258e38b
UW
3592 mode == word_mode, however all the cost calculations in
3593 synth_mult etc. assume single-word operations. */
3594 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
00f07b86 3595 return expmed_mult_highpart_optab (mode, op0, op1, target,
f258e38b
UW
3596 unsignedp, max_cost);
3597
5322d07e 3598 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
f258e38b
UW
3599
3600 /* Check whether we try to multiply by a negative constant. */
3601 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3602 {
3603 sign_adjust = true;
5322d07e 3604 extra_cost += add_cost (speed, mode);
f258e38b 3605 }
8efc8980
RS
3606
3607 /* See whether shift/add multiplication is cheap enough. */
f258e38b
UW
3608 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3609 max_cost - extra_cost))
a295d331 3610 {
8efc8980
RS
3611 /* See whether the specialized multiplication optabs are
3612 cheaper than the shift/add version. */
00f07b86 3613 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
26276705 3614 alg.cost.cost + extra_cost);
8efc8980
RS
3615 if (tem)
3616 return tem;
3617
f258e38b
UW
3618 tem = convert_to_mode (wider_mode, op0, unsignedp);
3619 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3620 tem = extract_high_half (mode, tem);
3621
9cf737f8 3622 /* Adjust result for signedness. */
f258e38b
UW
3623 if (sign_adjust)
3624 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3625
3626 return tem;
a295d331 3627 }
00f07b86 3628 return expmed_mult_highpart_optab (mode, op0, op1, target,
8efc8980 3629 unsignedp, max_cost);
55c2d311 3630}
0b55e932
RS
3631
3632
3633/* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3634
3635static rtx
ef4bddc2 3636expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
0b55e932 3637{
f3f6fb16
DM
3638 rtx result, temp, shift;
3639 rtx_code_label *label;
0b55e932 3640 int logd;
807e902e 3641 int prec = GET_MODE_PRECISION (mode);
0b55e932
RS
3642
3643 logd = floor_log2 (d);
3644 result = gen_reg_rtx (mode);
3645
3646 /* Avoid conditional branches when they're expensive. */
3a4fd356 3647 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
22660666 3648 && optimize_insn_for_speed_p ())
0b55e932
RS
3649 {
3650 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3651 mode, 0, -1);
3652 if (signmask)
3653 {
807e902e 3654 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
0b55e932 3655 signmask = force_reg (mode, signmask);
1c234fcb
RS
3656 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3657
3658 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3659 which instruction sequence to use. If logical right shifts
3660 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3661 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
6e7a355c 3662
1c234fcb 3663 temp = gen_rtx_LSHIFTRT (mode, result, shift);
947131ba 3664 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
5e8f01f4
RS
3665 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3666 > COSTS_N_INSNS (2)))
1c234fcb
RS
3667 {
3668 temp = expand_binop (mode, xor_optab, op0, signmask,
3669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670 temp = expand_binop (mode, sub_optab, temp, signmask,
3671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
2f1cd2eb
RS
3672 temp = expand_binop (mode, and_optab, temp,
3673 gen_int_mode (masklow, mode),
1c234fcb
RS
3674 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3675 temp = expand_binop (mode, xor_optab, temp, signmask,
3676 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677 temp = expand_binop (mode, sub_optab, temp, signmask,
3678 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3679 }
3680 else
3681 {
3682 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3683 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 signmask = force_reg (mode, signmask);
3685
3686 temp = expand_binop (mode, add_optab, op0, signmask,
3687 NULL_RTX, 1, OPTAB_LIB_WIDEN);
2f1cd2eb
RS
3688 temp = expand_binop (mode, and_optab, temp,
3689 gen_int_mode (masklow, mode),
1c234fcb
RS
3690 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691 temp = expand_binop (mode, sub_optab, temp, signmask,
3692 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3693 }
0b55e932
RS
3694 return temp;
3695 }
3696 }
3697
3698 /* Mask contains the mode's signbit and the significant bits of the
3699 modulus. By including the signbit in the operation, many targets
3700 can avoid an explicit compare operation in the following comparison
3701 against zero. */
807e902e
KZ
3702 wide_int mask = wi::mask (logd, false, prec);
3703 mask = wi::set_bit (mask, prec - 1);
0b55e932 3704
6e7a355c 3705 temp = expand_binop (mode, and_optab, op0,
807e902e 3706 immed_wide_int_const (mask, mode),
6e7a355c 3707 result, 1, OPTAB_LIB_WIDEN);
0b55e932
RS
3708 if (temp != result)
3709 emit_move_insn (result, temp);
3710
3711 label = gen_label_rtx ();
3712 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3713
3714 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3715 0, OPTAB_LIB_WIDEN);
807e902e
KZ
3716
3717 mask = wi::mask (logd, true, prec);
6e7a355c 3718 temp = expand_binop (mode, ior_optab, temp,
807e902e 3719 immed_wide_int_const (mask, mode),
6e7a355c 3720 result, 1, OPTAB_LIB_WIDEN);
0b55e932
RS
3721 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3722 0, OPTAB_LIB_WIDEN);
3723 if (temp != result)
3724 emit_move_insn (result, temp);
3725 emit_label (label);
3726 return result;
3727}
39cab019
RS
3728
3729/* Expand signed division of OP0 by a power of two D in mode MODE.
3730 This routine is only called for positive values of D. */
3731
3732static rtx
ef4bddc2 3733expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
39cab019 3734{
f3f6fb16
DM
3735 rtx temp;
3736 rtx_code_label *label;
39cab019
RS
3737 int logd;
3738
3739 logd = floor_log2 (d);
39cab019 3740
3a4fd356
JH
3741 if (d == 2
3742 && BRANCH_COST (optimize_insn_for_speed_p (),
3743 false) >= 1)
39cab019
RS
3744 {
3745 temp = gen_reg_rtx (mode);
3746 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3747 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3748 0, OPTAB_LIB_WIDEN);
eb6c3df1 3749 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019
RS
3750 }
3751
fdded401 3752#ifdef HAVE_conditional_move
3a4fd356
JH
3753 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3754 >= 2)
fdded401
RS
3755 {
3756 rtx temp2;
3757
3758 start_sequence ();
3759 temp2 = copy_to_mode_reg (mode, op0);
2f1cd2eb 3760 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
fdded401
RS
3761 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3762 temp = force_reg (mode, temp);
3763
3764 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3765 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3766 mode, temp, temp2, mode, 0);
3767 if (temp2)
3768 {
f3f6fb16 3769 rtx_insn *seq = get_insns ();
fdded401
RS
3770 end_sequence ();
3771 emit_insn (seq);
eb6c3df1 3772 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
fdded401
RS
3773 }
3774 end_sequence ();
3775 }
3776#endif
3777
3a4fd356
JH
3778 if (BRANCH_COST (optimize_insn_for_speed_p (),
3779 false) >= 2)
39cab019
RS
3780 {
3781 int ushift = GET_MODE_BITSIZE (mode) - logd;
3782
3783 temp = gen_reg_rtx (mode);
3784 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
00a7ba58
JJ
3785 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3786 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3787 > COSTS_N_INSNS (1))
2f1cd2eb 3788 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
39cab019
RS
3789 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3790 else
3791 temp = expand_shift (RSHIFT_EXPR, mode, temp,
eb6c3df1 3792 ushift, NULL_RTX, 1);
39cab019
RS
3793 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3794 0, OPTAB_LIB_WIDEN);
eb6c3df1 3795 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019
RS
3796 }
3797
3798 label = gen_label_rtx ();
3799 temp = copy_to_mode_reg (mode, op0);
3800 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
2f1cd2eb 3801 expand_inc (temp, gen_int_mode (d - 1, mode));
39cab019 3802 emit_label (label);
eb6c3df1 3803 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019 3804}
55c2d311 3805\f
44037a66
TG
3806/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3807 if that is convenient, and returning where the result is.
3808 You may request either the quotient or the remainder as the result;
3809 specify REM_FLAG nonzero to get the remainder.
3810
3811 CODE is the expression code for which kind of division this is;
3812 it controls how rounding is done. MODE is the machine mode to use.
3813 UNSIGNEDP nonzero means do unsigned division. */
3814
3815/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3816 and then correct it by or'ing in missing high bits
3817 if result of ANDI is nonzero.
3818 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3819 This could optimize to a bfexts instruction.
3820 But C doesn't use these operations, so their optimizations are
3821 left for later. */
5353610b
R
3822/* ??? For modulo, we don't actually need the highpart of the first product,
3823 the low part will do nicely. And for small divisors, the second multiply
3824 can also be a low-part only multiply or even be completely left out.
3825 E.g. to calculate the remainder of a division by 3 with a 32 bit
3826 multiply, multiply with 0x55555556 and extract the upper two bits;
3827 the result is exact for inputs up to 0x1fffffff.
3828 The input range can be reduced by using cross-sum rules.
3829 For odd divisors >= 3, the following table gives right shift counts
09da1532 3830 so that if a number is shifted by an integer multiple of the given
5353610b
R
3831 amount, the remainder stays the same:
3832 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3833 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3834 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3835 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3836 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3837
3838 Cross-sum rules for even numbers can be derived by leaving as many bits
3839 to the right alone as the divisor has zeros to the right.
3840 E.g. if x is an unsigned 32 bit number:
3841 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3842 */
44037a66
TG
3843
3844rtx
ef4bddc2 3845expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
502b8322 3846 rtx op0, rtx op1, rtx target, int unsignedp)
44037a66 3847{
ef4bddc2 3848 machine_mode compute_mode;
b3694847 3849 rtx tquotient;
55c2d311 3850 rtx quotient = 0, remainder = 0;
f3f6fb16 3851 rtx_insn *last;
2c414fba 3852 int size;
f3f6fb16 3853 rtx_insn *insn;
44037a66 3854 optab optab1, optab2;
1c4a429a 3855 int op1_is_constant, op1_is_pow2 = 0;
71af73bb 3856 int max_cost, extra_cost;
9ec36da5 3857 static HOST_WIDE_INT last_div_const = 0;
f40751dd 3858 bool speed = optimize_insn_for_speed_p ();
55c2d311 3859
481683e1 3860 op1_is_constant = CONST_INT_P (op1);
1c4a429a
JH
3861 if (op1_is_constant)
3862 {
be63b77d 3863 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
1c4a429a
JH
3864 if (unsignedp)
3865 ext_op1 &= GET_MODE_MASK (mode);
3866 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3867 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3868 }
55c2d311
TG
3869
3870 /*
3871 This is the structure of expand_divmod:
3872
3873 First comes code to fix up the operands so we can perform the operations
3874 correctly and efficiently.
3875
3876 Second comes a switch statement with code specific for each rounding mode.
3877 For some special operands this code emits all RTL for the desired
69f61901 3878 operation, for other cases, it generates only a quotient and stores it in
55c2d311
TG
3879 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3880 to indicate that it has not done anything.
3881
69f61901
RK
3882 Last comes code that finishes the operation. If QUOTIENT is set and
3883 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3884 QUOTIENT is not set, it is computed using trunc rounding.
44037a66 3885
55c2d311
TG
3886 We try to generate special code for division and remainder when OP1 is a
3887 constant. If |OP1| = 2**n we can use shifts and some other fast
3888 operations. For other values of OP1, we compute a carefully selected
3889 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3890 by m.
3891
3892 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3893 half of the product. Different strategies for generating the product are
00f07b86 3894 implemented in expmed_mult_highpart.
55c2d311
TG
3895
3896 If what we actually want is the remainder, we generate that by another
3897 by-constant multiplication and a subtraction. */
3898
3899 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3d32ffd1
TW
3900 code below will malfunction if we are, so check here and handle
3901 the special case if so. */
3902 if (op1 == const1_rtx)
3903 return rem_flag ? const0_rtx : op0;
3904
91ce572a
CC
3905 /* When dividing by -1, we could get an overflow.
3906 negv_optab can handle overflows. */
3907 if (! unsignedp && op1 == constm1_rtx)
3908 {
3909 if (rem_flag)
0fb7aeda 3910 return const0_rtx;
c3284718 3911 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
0fb7aeda 3912 ? negv_optab : neg_optab, op0, target, 0);
91ce572a
CC
3913 }
3914
bc1c7e93
RK
3915 if (target
3916 /* Don't use the function value register as a target
3917 since we have to read it as well as write it,
3918 and function-inlining gets confused by this. */
3919 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3920 /* Don't clobber an operand while doing a multi-step calculation. */
515dfc7a 3921 || ((rem_flag || op1_is_constant)
bc1c7e93 3922 && (reg_mentioned_p (target, op0)
3c0cb5de 3923 || (MEM_P (op0) && MEM_P (target))))
bc1c7e93 3924 || reg_mentioned_p (target, op1)
3c0cb5de 3925 || (MEM_P (op1) && MEM_P (target))))
44037a66
TG
3926 target = 0;
3927
44037a66
TG
3928 /* Get the mode in which to perform this computation. Normally it will
3929 be MODE, but sometimes we can't do the desired operation in MODE.
3930 If so, pick a wider mode in which we can do the operation. Convert
3931 to that mode at the start to avoid repeated conversions.
3932
3933 First see what operations we need. These depend on the expression
3934 we are evaluating. (We assume that divxx3 insns exist under the
3935 same conditions that modxx3 insns and that these insns don't normally
3936 fail. If these assumptions are not correct, we may generate less
3937 efficient code in some cases.)
3938
3939 Then see if we find a mode in which we can open-code that operation
3940 (either a division, modulus, or shift). Finally, check for the smallest
3941 mode for which we can do the operation with a library call. */
3942
55c2d311 3943 /* We might want to refine this now that we have division-by-constant
00f07b86 3944 optimization. Since expmed_mult_highpart tries so many variants, it is
55c2d311
TG
3945 not straightforward to generalize this. Maybe we should make an array
3946 of possible modes in init_expmed? Save this for GCC 2.7. */
3947
556a56ac
DM
3948 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3949 ? (unsignedp ? lshr_optab : ashr_optab)
44037a66 3950 : (unsignedp ? udiv_optab : sdiv_optab));
556a56ac
DM
3951 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3952 ? optab1
3953 : (unsignedp ? udivmod_optab : sdivmod_optab));
44037a66
TG
3954
3955 for (compute_mode = mode; compute_mode != VOIDmode;
3956 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
947131ba
RS
3957 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3958 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
44037a66
TG
3959 break;
3960
3961 if (compute_mode == VOIDmode)
3962 for (compute_mode = mode; compute_mode != VOIDmode;
3963 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
8a33f100
JH
3964 if (optab_libfunc (optab1, compute_mode)
3965 || optab_libfunc (optab2, compute_mode))
44037a66
TG
3966 break;
3967
535a42b1
NS
3968 /* If we still couldn't find a mode, use MODE, but expand_binop will
3969 probably die. */
44037a66
TG
3970 if (compute_mode == VOIDmode)
3971 compute_mode = mode;
3972
55c2d311
TG
3973 if (target && GET_MODE (target) == compute_mode)
3974 tquotient = target;
3975 else
3976 tquotient = gen_reg_rtx (compute_mode);
2c414fba 3977
55c2d311
TG
3978 size = GET_MODE_BITSIZE (compute_mode);
3979#if 0
3980 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
71af73bb
TG
3981 (mode), and thereby get better code when OP1 is a constant. Do that
3982 later. It will require going over all usages of SIZE below. */
55c2d311
TG
3983 size = GET_MODE_BITSIZE (mode);
3984#endif
bc1c7e93 3985
9ec36da5
JL
3986 /* Only deduct something for a REM if the last divide done was
3987 for a different constant. Then set the constant of the last
3988 divide. */
5322d07e
NF
3989 max_cost = (unsignedp
3990 ? udiv_cost (speed, compute_mode)
3991 : sdiv_cost (speed, compute_mode));
a28b2ac6
RS
3992 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3993 && INTVAL (op1) == last_div_const))
5322d07e
NF
3994 max_cost -= (mul_cost (speed, compute_mode)
3995 + add_cost (speed, compute_mode));
9ec36da5
JL
3996
3997 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
71af73bb 3998
55c2d311 3999 /* Now convert to the best mode to use. */
44037a66
TG
4000 if (compute_mode != mode)
4001 {
55c2d311 4002 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
81722fa9 4003 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
e13a25d5 4004
e9a25f70
JL
4005 /* convert_modes may have placed op1 into a register, so we
4006 must recompute the following. */
481683e1 4007 op1_is_constant = CONST_INT_P (op1);
e13a25d5
DM
4008 op1_is_pow2 = (op1_is_constant
4009 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4010 || (! unsignedp
be63b77d 4011 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
44037a66
TG
4012 }
4013
55c2d311 4014 /* If one of the operands is a volatile MEM, copy it into a register. */
c2a47e48 4015
3c0cb5de 4016 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
55c2d311 4017 op0 = force_reg (compute_mode, op0);
3c0cb5de 4018 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
c2a47e48
RK
4019 op1 = force_reg (compute_mode, op1);
4020
ab0b6581
TG
4021 /* If we need the remainder or if OP1 is constant, we need to
4022 put OP0 in a register in case it has any queued subexpressions. */
4023 if (rem_flag || op1_is_constant)
4024 op0 = force_reg (compute_mode, op0);
bc1c7e93 4025
55c2d311 4026 last = get_last_insn ();
44037a66 4027
9faa82d8 4028 /* Promote floor rounding to trunc rounding for unsigned operations. */
55c2d311 4029 if (unsignedp)
44037a66 4030 {
55c2d311
TG
4031 if (code == FLOOR_DIV_EXPR)
4032 code = TRUNC_DIV_EXPR;
4033 if (code == FLOOR_MOD_EXPR)
4034 code = TRUNC_MOD_EXPR;
db7cafb0
JL
4035 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4036 code = TRUNC_DIV_EXPR;
55c2d311 4037 }
bc1c7e93 4038
55c2d311
TG
4039 if (op1 != const0_rtx)
4040 switch (code)
4041 {
4042 case TRUNC_MOD_EXPR:
4043 case TRUNC_DIV_EXPR:
34f016ed 4044 if (op1_is_constant)
55c2d311 4045 {
d8f1376c 4046 if (unsignedp)
55c2d311 4047 {
079c527f 4048 unsigned HOST_WIDE_INT mh, ml;
55c2d311
TG
4049 int pre_shift, post_shift;
4050 int dummy;
1c4a429a
JH
4051 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4052 & GET_MODE_MASK (compute_mode));
55c2d311
TG
4053
4054 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4055 {
4056 pre_shift = floor_log2 (d);
4057 if (rem_flag)
4058 {
2f1cd2eb
RS
4059 unsigned HOST_WIDE_INT mask
4060 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
db3cf6fb
MS
4061 remainder
4062 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4063 gen_int_mode (mask, compute_mode),
db3cf6fb
MS
4064 remainder, 1,
4065 OPTAB_LIB_WIDEN);
55c2d311 4066 if (remainder)
c8dbc8ca 4067 return gen_lowpart (mode, remainder);
55c2d311
TG
4068 }
4069 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4070 pre_shift, tquotient, 1);
55c2d311 4071 }
34f016ed 4072 else if (size <= HOST_BITS_PER_WIDE_INT)
55c2d311 4073 {
dc1d6150 4074 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
55c2d311 4075 {
dc1d6150
TG
4076 /* Most significant bit of divisor is set; emit an scc
4077 insn. */
b45f0e58
PB
4078 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4079 compute_mode, 1, 1);
55c2d311
TG
4080 }
4081 else
4082 {
dc1d6150
TG
4083 /* Find a suitable multiplier and right shift count
4084 instead of multiplying with D. */
4085
4086 mh = choose_multiplier (d, size, size,
4087 &ml, &post_shift, &dummy);
4088
4089 /* If the suggested multiplier is more than SIZE bits,
4090 we can do better for even divisors, using an
4091 initial right shift. */
4092 if (mh != 0 && (d & 1) == 0)
4093 {
4094 pre_shift = floor_log2 (d & -d);
4095 mh = choose_multiplier (d >> pre_shift, size,
4096 size - pre_shift,
4097 &ml, &post_shift, &dummy);
5b0264cb 4098 gcc_assert (!mh);
dc1d6150
TG
4099 }
4100 else
4101 pre_shift = 0;
4102
4103 if (mh != 0)
4104 {
4105 rtx t1, t2, t3, t4;
4106
02a65aef
R
4107 if (post_shift - 1 >= BITS_PER_WORD)
4108 goto fail1;
4109
965703ed 4110 extra_cost
5322d07e
NF
4111 = (shift_cost (speed, compute_mode, post_shift - 1)
4112 + shift_cost (speed, compute_mode, 1)
4113 + 2 * add_cost (speed, compute_mode));
2f1cd2eb
RS
4114 t1 = expmed_mult_highpart
4115 (compute_mode, op0,
4116 gen_int_mode (ml, compute_mode),
4117 NULL_RTX, 1, max_cost - extra_cost);
dc1d6150
TG
4118 if (t1 == 0)
4119 goto fail1;
38a448ca
RH
4120 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4121 op0, t1),
dc1d6150 4122 NULL_RTX);
eb6c3df1
RG
4123 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4124 t2, 1, NULL_RTX, 1);
38a448ca
RH
4125 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4126 t1, t3),
dc1d6150 4127 NULL_RTX);
4a90aeeb
NS
4128 quotient = expand_shift
4129 (RSHIFT_EXPR, compute_mode, t4,
eb6c3df1 4130 post_shift - 1, tquotient, 1);
dc1d6150
TG
4131 }
4132 else
4133 {
4134 rtx t1, t2;
4135
02a65aef
R
4136 if (pre_shift >= BITS_PER_WORD
4137 || post_shift >= BITS_PER_WORD)
4138 goto fail1;
4139
4a90aeeb
NS
4140 t1 = expand_shift
4141 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4142 pre_shift, NULL_RTX, 1);
965703ed 4143 extra_cost
5322d07e
NF
4144 = (shift_cost (speed, compute_mode, pre_shift)
4145 + shift_cost (speed, compute_mode, post_shift));
2f1cd2eb
RS
4146 t2 = expmed_mult_highpart
4147 (compute_mode, t1,
4148 gen_int_mode (ml, compute_mode),
4149 NULL_RTX, 1, max_cost - extra_cost);
dc1d6150
TG
4150 if (t2 == 0)
4151 goto fail1;
4a90aeeb
NS
4152 quotient = expand_shift
4153 (RSHIFT_EXPR, compute_mode, t2,
eb6c3df1 4154 post_shift, tquotient, 1);
dc1d6150 4155 }
55c2d311
TG
4156 }
4157 }
34f016ed
TG
4158 else /* Too wide mode to use tricky code */
4159 break;
55c2d311
TG
4160
4161 insn = get_last_insn ();
7543f918
JR
4162 if (insn != last)
4163 set_dst_reg_note (insn, REG_EQUAL,
4164 gen_rtx_UDIV (compute_mode, op0, op1),
4165 quotient);
55c2d311
TG
4166 }
4167 else /* TRUNC_DIV, signed */
4168 {
4169 unsigned HOST_WIDE_INT ml;
4170 int lgup, post_shift;
e71c0aa7 4171 rtx mlr;
55c2d311 4172 HOST_WIDE_INT d = INTVAL (op1);
e4c9f3c2
ILT
4173 unsigned HOST_WIDE_INT abs_d;
4174
093253be
ILT
4175 /* Since d might be INT_MIN, we have to cast to
4176 unsigned HOST_WIDE_INT before negating to avoid
4177 undefined signed overflow. */
6d9c91e9
ILT
4178 abs_d = (d >= 0
4179 ? (unsigned HOST_WIDE_INT) d
4180 : - (unsigned HOST_WIDE_INT) d);
55c2d311
TG
4181
4182 /* n rem d = n rem -d */
4183 if (rem_flag && d < 0)
4184 {
4185 d = abs_d;
2496c7bd 4186 op1 = gen_int_mode (abs_d, compute_mode);
55c2d311
TG
4187 }
4188
4189 if (d == 1)
4190 quotient = op0;
4191 else if (d == -1)
4192 quotient = expand_unop (compute_mode, neg_optab, op0,
4193 tquotient, 0);
f6c1336c
ILT
4194 else if (HOST_BITS_PER_WIDE_INT >= size
4195 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
f737b132
RK
4196 {
4197 /* This case is not handled correctly below. */
4198 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4199 compute_mode, 1, 1);
4200 if (quotient == 0)
4201 goto fail1;
4202 }
55c2d311 4203 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
5322d07e
NF
4204 && (rem_flag
4205 ? smod_pow2_cheap (speed, compute_mode)
4206 : sdiv_pow2_cheap (speed, compute_mode))
0b55e932
RS
4207 /* We assume that cheap metric is true if the
4208 optab has an expander for this mode. */
166cdb08
JH
4209 && ((optab_handler ((rem_flag ? smod_optab
4210 : sdiv_optab),
947131ba 4211 compute_mode)
a8c7e72d 4212 != CODE_FOR_nothing)
947131ba
RS
4213 || (optab_handler (sdivmod_optab,
4214 compute_mode)
4215 != CODE_FOR_nothing)))
55c2d311
TG
4216 ;
4217 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4218 {
0b55e932
RS
4219 if (rem_flag)
4220 {
4221 remainder = expand_smod_pow2 (compute_mode, op0, d);
4222 if (remainder)
4223 return gen_lowpart (mode, remainder);
4224 }
3d520aaf 4225
5322d07e 4226 if (sdiv_pow2_cheap (speed, compute_mode)
947131ba 4227 && ((optab_handler (sdiv_optab, compute_mode)
3d520aaf 4228 != CODE_FOR_nothing)
947131ba 4229 || (optab_handler (sdivmod_optab, compute_mode)
3d520aaf
DE
4230 != CODE_FOR_nothing)))
4231 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4232 compute_mode, op0,
4233 gen_int_mode (abs_d,
4234 compute_mode),
4235 NULL_RTX, 0);
4236 else
4237 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
55c2d311 4238
0b55e932
RS
4239 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4240 negate the quotient. */
55c2d311
TG
4241 if (d < 0)
4242 {
4243 insn = get_last_insn ();
4e430df8 4244 if (insn != last
c8e7fe58
DE
4245 && abs_d < ((unsigned HOST_WIDE_INT) 1
4246 << (HOST_BITS_PER_WIDE_INT - 1)))
7543f918
JR
4247 set_dst_reg_note (insn, REG_EQUAL,
4248 gen_rtx_DIV (compute_mode, op0,
6d26322f
JR
4249 gen_int_mode
4250 (abs_d,
4251 compute_mode)),
7543f918 4252 quotient);
55c2d311
TG
4253
4254 quotient = expand_unop (compute_mode, neg_optab,
4255 quotient, quotient, 0);
4256 }
4257 }
34f016ed 4258 else if (size <= HOST_BITS_PER_WIDE_INT)
55c2d311
TG
4259 {
4260 choose_multiplier (abs_d, size, size - 1,
079c527f 4261 &ml, &post_shift, &lgup);
55c2d311
TG
4262 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4263 {
4264 rtx t1, t2, t3;
4265
02a65aef
R
4266 if (post_shift >= BITS_PER_WORD
4267 || size - 1 >= BITS_PER_WORD)
4268 goto fail1;
4269
5322d07e
NF
4270 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4271 + shift_cost (speed, compute_mode, size - 1)
4272 + add_cost (speed, compute_mode));
2f1cd2eb
RS
4273 t1 = expmed_mult_highpart
4274 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4275 NULL_RTX, 0, max_cost - extra_cost);
55c2d311
TG
4276 if (t1 == 0)
4277 goto fail1;
4a90aeeb
NS
4278 t2 = expand_shift
4279 (RSHIFT_EXPR, compute_mode, t1,
eb6c3df1 4280 post_shift, NULL_RTX, 0);
4a90aeeb
NS
4281 t3 = expand_shift
4282 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4283 size - 1, NULL_RTX, 0);
55c2d311 4284 if (d < 0)
c5c76735
JL
4285 quotient
4286 = force_operand (gen_rtx_MINUS (compute_mode,
4287 t3, t2),
4288 tquotient);
55c2d311 4289 else
c5c76735
JL
4290 quotient
4291 = force_operand (gen_rtx_MINUS (compute_mode,
4292 t2, t3),
4293 tquotient);
55c2d311
TG
4294 }
4295 else
4296 {
4297 rtx t1, t2, t3, t4;
4298
02a65aef
R
4299 if (post_shift >= BITS_PER_WORD
4300 || size - 1 >= BITS_PER_WORD)
4301 goto fail1;
4302
55c2d311 4303 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
e71c0aa7 4304 mlr = gen_int_mode (ml, compute_mode);
5322d07e
NF
4305 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4306 + shift_cost (speed, compute_mode, size - 1)
4307 + 2 * add_cost (speed, compute_mode));
00f07b86 4308 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
71af73bb
TG
4309 NULL_RTX, 0,
4310 max_cost - extra_cost);
55c2d311
TG
4311 if (t1 == 0)
4312 goto fail1;
c5c76735
JL
4313 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4314 t1, op0),
55c2d311 4315 NULL_RTX);
4a90aeeb
NS
4316 t3 = expand_shift
4317 (RSHIFT_EXPR, compute_mode, t2,
eb6c3df1 4318 post_shift, NULL_RTX, 0);
4a90aeeb
NS
4319 t4 = expand_shift
4320 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4321 size - 1, NULL_RTX, 0);
55c2d311 4322 if (d < 0)
c5c76735
JL
4323 quotient
4324 = force_operand (gen_rtx_MINUS (compute_mode,
4325 t4, t3),
4326 tquotient);
55c2d311 4327 else
c5c76735
JL
4328 quotient
4329 = force_operand (gen_rtx_MINUS (compute_mode,
4330 t3, t4),
4331 tquotient);
55c2d311
TG
4332 }
4333 }
34f016ed
TG
4334 else /* Too wide mode to use tricky code */
4335 break;
55c2d311 4336
4e430df8 4337 insn = get_last_insn ();
7543f918
JR
4338 if (insn != last)
4339 set_dst_reg_note (insn, REG_EQUAL,
4340 gen_rtx_DIV (compute_mode, op0, op1),
4341 quotient);
55c2d311
TG
4342 }
4343 break;
4344 }
4345 fail1:
4346 delete_insns_since (last);
4347 break;
44037a66 4348
55c2d311
TG
4349 case FLOOR_DIV_EXPR:
4350 case FLOOR_MOD_EXPR:
4351 /* We will come here only for signed operations. */
4352 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4353 {
079c527f 4354 unsigned HOST_WIDE_INT mh, ml;
55c2d311
TG
4355 int pre_shift, lgup, post_shift;
4356 HOST_WIDE_INT d = INTVAL (op1);
4357
4358 if (d > 0)
4359 {
4360 /* We could just as easily deal with negative constants here,
4361 but it does not seem worth the trouble for GCC 2.6. */
4362 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4363 {
4364 pre_shift = floor_log2 (d);
4365 if (rem_flag)
4366 {
2f1cd2eb
RS
4367 unsigned HOST_WIDE_INT mask
4368 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4369 remainder = expand_binop
4370 (compute_mode, and_optab, op0,
4371 gen_int_mode (mask, compute_mode),
4372 remainder, 0, OPTAB_LIB_WIDEN);
55c2d311 4373 if (remainder)
c8dbc8ca 4374 return gen_lowpart (mode, remainder);
55c2d311 4375 }
4a90aeeb
NS
4376 quotient = expand_shift
4377 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4378 pre_shift, tquotient, 0);
55c2d311
TG
4379 }
4380 else
4381 {
4382 rtx t1, t2, t3, t4;
4383
4384 mh = choose_multiplier (d, size, size - 1,
4385 &ml, &post_shift, &lgup);
5b0264cb 4386 gcc_assert (!mh);
55c2d311 4387
02a65aef
R
4388 if (post_shift < BITS_PER_WORD
4389 && size - 1 < BITS_PER_WORD)
55c2d311 4390 {
4a90aeeb
NS
4391 t1 = expand_shift
4392 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4393 size - 1, NULL_RTX, 0);
02a65aef
R
4394 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4395 NULL_RTX, 0, OPTAB_WIDEN);
5322d07e
NF
4396 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4397 + shift_cost (speed, compute_mode, size - 1)
4398 + 2 * add_cost (speed, compute_mode));
2f1cd2eb
RS
4399 t3 = expmed_mult_highpart
4400 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4401 NULL_RTX, 1, max_cost - extra_cost);
02a65aef
R
4402 if (t3 != 0)
4403 {
4a90aeeb
NS
4404 t4 = expand_shift
4405 (RSHIFT_EXPR, compute_mode, t3,
eb6c3df1 4406 post_shift, NULL_RTX, 1);
02a65aef
R
4407 quotient = expand_binop (compute_mode, xor_optab,
4408 t4, t1, tquotient, 0,
4409 OPTAB_WIDEN);
4410 }
55c2d311
TG
4411 }
4412 }
4413 }
4414 else
4415 {
4416 rtx nsign, t1, t2, t3, t4;
38a448ca
RH
4417 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4418 op0, constm1_rtx), NULL_RTX);
55c2d311
TG
4419 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4420 0, OPTAB_WIDEN);
4a90aeeb
NS
4421 nsign = expand_shift
4422 (RSHIFT_EXPR, compute_mode, t2,
eb6c3df1 4423 size - 1, NULL_RTX, 0);
38a448ca 4424 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
55c2d311
TG
4425 NULL_RTX);
4426 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4427 NULL_RTX, 0);
4428 if (t4)
4429 {
4430 rtx t5;
4431 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4432 NULL_RTX, 0);
38a448ca
RH
4433 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4434 t4, t5),
55c2d311
TG
4435 tquotient);
4436 }
4437 }
4438 }
4439
4440 if (quotient != 0)
4441 break;
4442 delete_insns_since (last);
4443
4444 /* Try using an instruction that produces both the quotient and
4445 remainder, using truncation. We can easily compensate the quotient
4446 or remainder to get floor rounding, once we have the remainder.
4447 Notice that we compute also the final remainder value here,
4448 and return the result right away. */
a45cf58c 4449 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311 4450 target = gen_reg_rtx (compute_mode);
668443c9 4451
55c2d311
TG
4452 if (rem_flag)
4453 {
668443c9 4454 remainder
f8cfc6aa 4455 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
55c2d311
TG
4456 quotient = gen_reg_rtx (compute_mode);
4457 }
4458 else
4459 {
668443c9 4460 quotient
f8cfc6aa 4461 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
55c2d311
TG
4462 remainder = gen_reg_rtx (compute_mode);
4463 }
4464
4465 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4466 quotient, remainder, 0))
4467 {
4468 /* This could be computed with a branch-less sequence.
4469 Save that for later. */
4470 rtx tem;
f3f6fb16 4471 rtx_code_label *label = gen_label_rtx ();
f5963e61 4472 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
55c2d311
TG
4473 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4474 NULL_RTX, 0, OPTAB_WIDEN);
f5963e61 4475 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
55c2d311
TG
4476 expand_dec (quotient, const1_rtx);
4477 expand_inc (remainder, op1);
4478 emit_label (label);
c8dbc8ca 4479 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4480 }
4481
4482 /* No luck with division elimination or divmod. Have to do it
4483 by conditionally adjusting op0 *and* the result. */
44037a66 4484 {
f3f6fb16 4485 rtx_code_label *label1, *label2, *label3, *label4, *label5;
55c2d311
TG
4486 rtx adjusted_op0;
4487 rtx tem;
4488
4489 quotient = gen_reg_rtx (compute_mode);
4490 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4491 label1 = gen_label_rtx ();
4492 label2 = gen_label_rtx ();
4493 label3 = gen_label_rtx ();
4494 label4 = gen_label_rtx ();
4495 label5 = gen_label_rtx ();
f5963e61
JL
4496 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4497 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
55c2d311
TG
4498 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4499 quotient, 0, OPTAB_LIB_WIDEN);
4500 if (tem != quotient)
4501 emit_move_insn (quotient, tem);
4502 emit_jump_insn (gen_jump (label5));
4503 emit_barrier ();
4504 emit_label (label1);
44037a66 4505 expand_inc (adjusted_op0, const1_rtx);
55c2d311
TG
4506 emit_jump_insn (gen_jump (label4));
4507 emit_barrier ();
4508 emit_label (label2);
f5963e61 4509 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
55c2d311
TG
4510 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4511 quotient, 0, OPTAB_LIB_WIDEN);
4512 if (tem != quotient)
4513 emit_move_insn (quotient, tem);
4514 emit_jump_insn (gen_jump (label5));
4515 emit_barrier ();
4516 emit_label (label3);
4517 expand_dec (adjusted_op0, const1_rtx);
4518 emit_label (label4);
4519 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4520 quotient, 0, OPTAB_LIB_WIDEN);
4521 if (tem != quotient)
4522 emit_move_insn (quotient, tem);
4523 expand_dec (quotient, const1_rtx);
4524 emit_label (label5);
44037a66 4525 }
55c2d311 4526 break;
44037a66 4527
55c2d311
TG
4528 case CEIL_DIV_EXPR:
4529 case CEIL_MOD_EXPR:
4530 if (unsignedp)
4531 {
9176af2f
TG
4532 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4533 {
4534 rtx t1, t2, t3;
4535 unsigned HOST_WIDE_INT d = INTVAL (op1);
4536 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4537 floor_log2 (d), tquotient, 1);
9176af2f 4538 t2 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4539 gen_int_mode (d - 1, compute_mode),
9176af2f
TG
4540 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4541 t3 = gen_reg_rtx (compute_mode);
4542 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4543 compute_mode, 1, 1);
412381d9
TG
4544 if (t3 == 0)
4545 {
f3f6fb16 4546 rtx_code_label *lab;
412381d9 4547 lab = gen_label_rtx ();
f5963e61 4548 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
412381d9
TG
4549 expand_inc (t1, const1_rtx);
4550 emit_label (lab);
4551 quotient = t1;
4552 }
4553 else
38a448ca
RH
4554 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4555 t1, t3),
412381d9 4556 tquotient);
9176af2f
TG
4557 break;
4558 }
55c2d311
TG
4559
4560 /* Try using an instruction that produces both the quotient and
4561 remainder, using truncation. We can easily compensate the
4562 quotient or remainder to get ceiling rounding, once we have the
4563 remainder. Notice that we compute also the final remainder
4564 value here, and return the result right away. */
a45cf58c 4565 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311 4566 target = gen_reg_rtx (compute_mode);
668443c9 4567
55c2d311
TG
4568 if (rem_flag)
4569 {
f8cfc6aa 4570 remainder = (REG_P (target)
668443c9 4571 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4572 quotient = gen_reg_rtx (compute_mode);
4573 }
4574 else
4575 {
f8cfc6aa 4576 quotient = (REG_P (target)
668443c9 4577 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4578 remainder = gen_reg_rtx (compute_mode);
4579 }
4580
4581 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4582 remainder, 1))
4583 {
4584 /* This could be computed with a branch-less sequence.
4585 Save that for later. */
f3f6fb16 4586 rtx_code_label *label = gen_label_rtx ();
f5963e61
JL
4587 do_cmp_and_jump (remainder, const0_rtx, EQ,
4588 compute_mode, label);
55c2d311
TG
4589 expand_inc (quotient, const1_rtx);
4590 expand_dec (remainder, op1);
4591 emit_label (label);
c8dbc8ca 4592 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4593 }
4594
4595 /* No luck with division elimination or divmod. Have to do it
4596 by conditionally adjusting op0 *and* the result. */
44037a66 4597 {
f3f6fb16 4598 rtx_code_label *label1, *label2;
55c2d311
TG
4599 rtx adjusted_op0, tem;
4600
4601 quotient = gen_reg_rtx (compute_mode);
4602 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4603 label1 = gen_label_rtx ();
4604 label2 = gen_label_rtx ();
f5963e61
JL
4605 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4606 compute_mode, label1);
55c2d311
TG
4607 emit_move_insn (quotient, const0_rtx);
4608 emit_jump_insn (gen_jump (label2));
4609 emit_barrier ();
4610 emit_label (label1);
4611 expand_dec (adjusted_op0, const1_rtx);
4612 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4613 quotient, 1, OPTAB_LIB_WIDEN);
4614 if (tem != quotient)
4615 emit_move_insn (quotient, tem);
4616 expand_inc (quotient, const1_rtx);
4617 emit_label (label2);
44037a66 4618 }
55c2d311
TG
4619 }
4620 else /* signed */
4621 {
73f27728
RK
4622 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4623 && INTVAL (op1) >= 0)
4624 {
4625 /* This is extremely similar to the code for the unsigned case
4626 above. For 2.7 we should merge these variants, but for
4627 2.6.1 I don't want to touch the code for unsigned since that
4628 get used in C. The signed case will only be used by other
4629 languages (Ada). */
4630
4631 rtx t1, t2, t3;
4632 unsigned HOST_WIDE_INT d = INTVAL (op1);
4633 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4634 floor_log2 (d), tquotient, 0);
73f27728 4635 t2 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4636 gen_int_mode (d - 1, compute_mode),
73f27728
RK
4637 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4638 t3 = gen_reg_rtx (compute_mode);
4639 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4640 compute_mode, 1, 1);
4641 if (t3 == 0)
4642 {
f3f6fb16 4643 rtx_code_label *lab;
73f27728 4644 lab = gen_label_rtx ();
f5963e61 4645 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
73f27728
RK
4646 expand_inc (t1, const1_rtx);
4647 emit_label (lab);
4648 quotient = t1;
4649 }
4650 else
38a448ca
RH
4651 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4652 t1, t3),
73f27728
RK
4653 tquotient);
4654 break;
4655 }
4656
55c2d311
TG
4657 /* Try using an instruction that produces both the quotient and
4658 remainder, using truncation. We can easily compensate the
4659 quotient or remainder to get ceiling rounding, once we have the
4660 remainder. Notice that we compute also the final remainder
4661 value here, and return the result right away. */
a45cf58c 4662 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311
TG
4663 target = gen_reg_rtx (compute_mode);
4664 if (rem_flag)
4665 {
f8cfc6aa 4666 remainder= (REG_P (target)
668443c9 4667 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4668 quotient = gen_reg_rtx (compute_mode);
4669 }
4670 else
4671 {
f8cfc6aa 4672 quotient = (REG_P (target)
668443c9 4673 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4674 remainder = gen_reg_rtx (compute_mode);
4675 }
4676
4677 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4678 remainder, 0))
4679 {
4680 /* This could be computed with a branch-less sequence.
4681 Save that for later. */
4682 rtx tem;
f3f6fb16 4683 rtx_code_label *label = gen_label_rtx ();
f5963e61
JL
4684 do_cmp_and_jump (remainder, const0_rtx, EQ,
4685 compute_mode, label);
55c2d311
TG
4686 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4687 NULL_RTX, 0, OPTAB_WIDEN);
f5963e61 4688 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
55c2d311
TG
4689 expand_inc (quotient, const1_rtx);
4690 expand_dec (remainder, op1);
4691 emit_label (label);
c8dbc8ca 4692 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4693 }
4694
4695 /* No luck with division elimination or divmod. Have to do it
4696 by conditionally adjusting op0 *and* the result. */
44037a66 4697 {
f3f6fb16 4698 rtx_code_label *label1, *label2, *label3, *label4, *label5;
55c2d311
TG
4699 rtx adjusted_op0;
4700 rtx tem;
4701
4702 quotient = gen_reg_rtx (compute_mode);
4703 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4704 label1 = gen_label_rtx ();
4705 label2 = gen_label_rtx ();
4706 label3 = gen_label_rtx ();
4707 label4 = gen_label_rtx ();
4708 label5 = gen_label_rtx ();
f5963e61
JL
4709 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4710 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4711 compute_mode, label1);
55c2d311
TG
4712 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4713 quotient, 0, OPTAB_LIB_WIDEN);
4714 if (tem != quotient)
4715 emit_move_insn (quotient, tem);
4716 emit_jump_insn (gen_jump (label5));
4717 emit_barrier ();
4718 emit_label (label1);
4719 expand_dec (adjusted_op0, const1_rtx);
4720 emit_jump_insn (gen_jump (label4));
4721 emit_barrier ();
4722 emit_label (label2);
f5963e61
JL
4723 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4724 compute_mode, label3);
55c2d311
TG
4725 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4726 quotient, 0, OPTAB_LIB_WIDEN);
4727 if (tem != quotient)
4728 emit_move_insn (quotient, tem);
4729 emit_jump_insn (gen_jump (label5));
4730 emit_barrier ();
4731 emit_label (label3);
4732 expand_inc (adjusted_op0, const1_rtx);
4733 emit_label (label4);
4734 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735 quotient, 0, OPTAB_LIB_WIDEN);
4736 if (tem != quotient)
4737 emit_move_insn (quotient, tem);
4738 expand_inc (quotient, const1_rtx);
4739 emit_label (label5);
44037a66 4740 }
55c2d311
TG
4741 }
4742 break;
bc1c7e93 4743
55c2d311
TG
4744 case EXACT_DIV_EXPR:
4745 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4746 {
4747 HOST_WIDE_INT d = INTVAL (op1);
4748 unsigned HOST_WIDE_INT ml;
91ce572a 4749 int pre_shift;
55c2d311
TG
4750 rtx t1;
4751
91ce572a
CC
4752 pre_shift = floor_log2 (d & -d);
4753 ml = invert_mod2n (d >> pre_shift, size);
4754 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4755 pre_shift, NULL_RTX, unsignedp);
69107307 4756 quotient = expand_mult (compute_mode, t1,
2496c7bd 4757 gen_int_mode (ml, compute_mode),
31ff3e0b 4758 NULL_RTX, 1);
55c2d311
TG
4759
4760 insn = get_last_insn ();
7543f918
JR
4761 set_dst_reg_note (insn, REG_EQUAL,
4762 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4763 compute_mode, op0, op1),
4764 quotient);
55c2d311
TG
4765 }
4766 break;
4767
4768 case ROUND_DIV_EXPR:
4769 case ROUND_MOD_EXPR:
69f61901
RK
4770 if (unsignedp)
4771 {
4772 rtx tem;
f3f6fb16 4773 rtx_code_label *label;
69f61901
RK
4774 label = gen_label_rtx ();
4775 quotient = gen_reg_rtx (compute_mode);
4776 remainder = gen_reg_rtx (compute_mode);
4777 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4778 {
4779 rtx tem;
4780 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4781 quotient, 1, OPTAB_LIB_WIDEN);
4782 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4783 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4784 remainder, 1, OPTAB_LIB_WIDEN);
4785 }
0a81f074 4786 tem = plus_constant (compute_mode, op1, -1);
eb6c3df1 4787 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
f5963e61 4788 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
69f61901
RK
4789 expand_inc (quotient, const1_rtx);
4790 expand_dec (remainder, op1);
4791 emit_label (label);
4792 }
4793 else
4794 {
4795 rtx abs_rem, abs_op1, tem, mask;
f3f6fb16 4796 rtx_code_label *label;
69f61901
RK
4797 label = gen_label_rtx ();
4798 quotient = gen_reg_rtx (compute_mode);
4799 remainder = gen_reg_rtx (compute_mode);
4800 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4801 {
4802 rtx tem;
4803 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4804 quotient, 0, OPTAB_LIB_WIDEN);
4805 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4806 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4807 remainder, 0, OPTAB_LIB_WIDEN);
4808 }
91ce572a
CC
4809 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4810 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
69f61901 4811 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
eb6c3df1 4812 1, NULL_RTX, 1);
f5963e61 4813 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
69f61901
RK
4814 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4815 NULL_RTX, 0, OPTAB_WIDEN);
4816 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
eb6c3df1 4817 size - 1, NULL_RTX, 0);
69f61901
RK
4818 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4819 NULL_RTX, 0, OPTAB_WIDEN);
4820 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4821 NULL_RTX, 0, OPTAB_WIDEN);
4822 expand_inc (quotient, tem);
4823 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4824 NULL_RTX, 0, OPTAB_WIDEN);
4825 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4826 NULL_RTX, 0, OPTAB_WIDEN);
4827 expand_dec (remainder, tem);
4828 emit_label (label);
4829 }
4830 return gen_lowpart (mode, rem_flag ? remainder : quotient);
c410d49e 4831
e9a25f70 4832 default:
5b0264cb 4833 gcc_unreachable ();
55c2d311 4834 }
44037a66 4835
55c2d311 4836 if (quotient == 0)
44037a66 4837 {
a45cf58c
RK
4838 if (target && GET_MODE (target) != compute_mode)
4839 target = 0;
4840
55c2d311 4841 if (rem_flag)
44037a66 4842 {
32fdf36b 4843 /* Try to produce the remainder without producing the quotient.
d6a7951f 4844 If we seem to have a divmod pattern that does not require widening,
b20b352b 4845 don't try widening here. We should really have a WIDEN argument
32fdf36b
TG
4846 to expand_twoval_binop, since what we'd really like to do here is
4847 1) try a mod insn in compute_mode
4848 2) try a divmod insn in compute_mode
4849 3) try a div insn in compute_mode and multiply-subtract to get
4850 remainder
4851 4) try the same things with widening allowed. */
4852 remainder
4853 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4854 op0, op1, target,
4855 unsignedp,
947131ba 4856 ((optab_handler (optab2, compute_mode)
32fdf36b
TG
4857 != CODE_FOR_nothing)
4858 ? OPTAB_DIRECT : OPTAB_WIDEN));
55c2d311 4859 if (remainder == 0)
44037a66
TG
4860 {
4861 /* No luck there. Can we do remainder and divide at once
4862 without a library call? */
55c2d311
TG
4863 remainder = gen_reg_rtx (compute_mode);
4864 if (! expand_twoval_binop ((unsignedp
4865 ? udivmod_optab
4866 : sdivmod_optab),
4867 op0, op1,
4868 NULL_RTX, remainder, unsignedp))
4869 remainder = 0;
44037a66 4870 }
55c2d311
TG
4871
4872 if (remainder)
4873 return gen_lowpart (mode, remainder);
44037a66 4874 }
44037a66 4875
dc38b292
RK
4876 /* Produce the quotient. Try a quotient insn, but not a library call.
4877 If we have a divmod in this mode, use it in preference to widening
4878 the div (for this test we assume it will not fail). Note that optab2
4879 is set to the one of the two optabs that the call below will use. */
4880 quotient
4881 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4882 op0, op1, rem_flag ? NULL_RTX : target,
4883 unsignedp,
947131ba 4884 ((optab_handler (optab2, compute_mode)
dc38b292
RK
4885 != CODE_FOR_nothing)
4886 ? OPTAB_DIRECT : OPTAB_WIDEN));
4887
55c2d311 4888 if (quotient == 0)
44037a66
TG
4889 {
4890 /* No luck there. Try a quotient-and-remainder insn,
4891 keeping the quotient alone. */
55c2d311 4892 quotient = gen_reg_rtx (compute_mode);
44037a66 4893 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
55c2d311
TG
4894 op0, op1,
4895 quotient, NULL_RTX, unsignedp))
4896 {
4897 quotient = 0;
4898 if (! rem_flag)
4899 /* Still no luck. If we are not computing the remainder,
4900 use a library call for the quotient. */
4901 quotient = sign_expand_binop (compute_mode,
4902 udiv_optab, sdiv_optab,
4903 op0, op1, target,
4904 unsignedp, OPTAB_LIB_WIDEN);
4905 }
44037a66 4906 }
44037a66
TG
4907 }
4908
44037a66
TG
4909 if (rem_flag)
4910 {
a45cf58c
RK
4911 if (target && GET_MODE (target) != compute_mode)
4912 target = 0;
4913
55c2d311 4914 if (quotient == 0)
b3f8d95d
MM
4915 {
4916 /* No divide instruction either. Use library for remainder. */
4917 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4918 op0, op1, target,
4919 unsignedp, OPTAB_LIB_WIDEN);
4920 /* No remainder function. Try a quotient-and-remainder
4921 function, keeping the remainder. */
4922 if (!remainder)
4923 {
4924 remainder = gen_reg_rtx (compute_mode);
b8698a0f 4925 if (!expand_twoval_binop_libfunc
b3f8d95d
MM
4926 (unsignedp ? udivmod_optab : sdivmod_optab,
4927 op0, op1,
4928 NULL_RTX, remainder,
4929 unsignedp ? UMOD : MOD))
4930 remainder = NULL_RTX;
4931 }
4932 }
44037a66
TG
4933 else
4934 {
4935 /* We divided. Now finish doing X - Y * (X / Y). */
55c2d311
TG
4936 remainder = expand_mult (compute_mode, quotient, op1,
4937 NULL_RTX, unsignedp);
4938 remainder = expand_binop (compute_mode, sub_optab, op0,
4939 remainder, target, unsignedp,
4940 OPTAB_LIB_WIDEN);
44037a66
TG
4941 }
4942 }
4943
55c2d311 4944 return gen_lowpart (mode, rem_flag ? remainder : quotient);
44037a66
TG
4945}
4946\f
4947/* Return a tree node with data type TYPE, describing the value of X.
4dfa0342 4948 Usually this is an VAR_DECL, if there is no obvious better choice.
44037a66 4949 X may be an expression, however we only support those expressions
6d2f8887 4950 generated by loop.c. */
44037a66
TG
4951
4952tree
502b8322 4953make_tree (tree type, rtx x)
44037a66
TG
4954{
4955 tree t;
4956
4957 switch (GET_CODE (x))
4958 {
4959 case CONST_INT:
807e902e
KZ
4960 case CONST_WIDE_INT:
4961 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4962 return t;
b8698a0f 4963
44037a66 4964 case CONST_DOUBLE:
807e902e
KZ
4965 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4966 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4967 t = wide_int_to_tree (type,
4968 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4969 HOST_BITS_PER_WIDE_INT * 2));
44037a66
TG
4970 else
4971 {
4972 REAL_VALUE_TYPE d;
4973
4974 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4975 t = build_real (type, d);
4976 }
4977
4978 return t;
69ef87e2
AH
4979
4980 case CONST_VECTOR:
4981 {
b8b7f162
RS
4982 int units = CONST_VECTOR_NUNITS (x);
4983 tree itype = TREE_TYPE (type);
d2a12ae7 4984 tree *elts;
b8b7f162 4985 int i;
69ef87e2 4986
69ef87e2 4987 /* Build a tree with vector elements. */
d2a12ae7 4988 elts = XALLOCAVEC (tree, units);
69ef87e2
AH
4989 for (i = units - 1; i >= 0; --i)
4990 {
b8b7f162 4991 rtx elt = CONST_VECTOR_ELT (x, i);
d2a12ae7 4992 elts[i] = make_tree (itype, elt);
69ef87e2 4993 }
c410d49e 4994
d2a12ae7 4995 return build_vector (type, elts);
69ef87e2
AH
4996 }
4997
44037a66 4998 case PLUS:
4845b383
KH
4999 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5000 make_tree (type, XEXP (x, 1)));
c410d49e 5001
44037a66 5002 case MINUS:
4845b383
KH
5003 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5004 make_tree (type, XEXP (x, 1)));
c410d49e 5005
44037a66 5006 case NEG:
4845b383 5007 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
44037a66
TG
5008
5009 case MULT:
4845b383
KH
5010 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5011 make_tree (type, XEXP (x, 1)));
c410d49e 5012
44037a66 5013 case ASHIFT:
4845b383
KH
5014 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5015 make_tree (type, XEXP (x, 1)));
c410d49e 5016
44037a66 5017 case LSHIFTRT:
ca5ba2a3 5018 t = unsigned_type_for (type);
aeba6c28
JM
5019 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5020 make_tree (t, XEXP (x, 0)),
5021 make_tree (type, XEXP (x, 1))));
c410d49e 5022
44037a66 5023 case ASHIFTRT:
12753674 5024 t = signed_type_for (type);
aeba6c28
JM
5025 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5026 make_tree (t, XEXP (x, 0)),
5027 make_tree (type, XEXP (x, 1))));
c410d49e 5028
44037a66
TG
5029 case DIV:
5030 if (TREE_CODE (type) != REAL_TYPE)
12753674 5031 t = signed_type_for (type);
44037a66
TG
5032 else
5033 t = type;
5034
aeba6c28
JM
5035 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5036 make_tree (t, XEXP (x, 0)),
5037 make_tree (t, XEXP (x, 1))));
44037a66 5038 case UDIV:
ca5ba2a3 5039 t = unsigned_type_for (type);
aeba6c28
JM
5040 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5041 make_tree (t, XEXP (x, 0)),
5042 make_tree (t, XEXP (x, 1))));
5c45425b
RH
5043
5044 case SIGN_EXTEND:
5045 case ZERO_EXTEND:
ae2bcd98
RS
5046 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5047 GET_CODE (x) == ZERO_EXTEND);
aeba6c28 5048 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5c45425b 5049
84816907
JM
5050 case CONST:
5051 return make_tree (type, XEXP (x, 0));
5052
5053 case SYMBOL_REF:
5054 t = SYMBOL_REF_DECL (x);
5055 if (t)
5056 return fold_convert (type, build_fold_addr_expr (t));
5057 /* else fall through. */
5058
4dfa0342 5059 default:
c2255bc4 5060 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
d1608933 5061
d4ebfa65
BE
5062 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5063 address mode to pointer mode. */
5ae6cd0d 5064 if (POINTER_TYPE_P (type))
d4ebfa65
BE
5065 x = convert_memory_address_addr_space
5066 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
d1608933 5067
8a0aa06e
RH
5068 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5069 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
820cc88f 5070 t->decl_with_rtl.rtl = x;
4dfa0342 5071
44037a66
TG
5072 return t;
5073 }
5074}
44037a66
TG
5075\f
5076/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5077 and returning TARGET.
5078
5079 If TARGET is 0, a pseudo-register or constant is returned. */
5080
5081rtx
ef4bddc2 5082expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
44037a66 5083{
22273300 5084 rtx tem = 0;
44037a66 5085
22273300
JJ
5086 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5087 tem = simplify_binary_operation (AND, mode, op0, op1);
5088 if (tem == 0)
44037a66 5089 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
44037a66
TG
5090
5091 if (target == 0)
5092 target = tem;
5093 else if (tem != target)
5094 emit_move_insn (target, tem);
5095 return target;
5096}
495499da 5097
a41a56b6
RE
5098/* Helper function for emit_store_flag. */
5099static rtx
ef12ae45 5100emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
ef4bddc2 5101 machine_mode mode, machine_mode compare_mode,
92355a9c 5102 int unsignedp, rtx x, rtx y, int normalizep,
ef4bddc2 5103 machine_mode target_mode)
a41a56b6 5104{
a5c7d693 5105 struct expand_operand ops[4];
f3f6fb16
DM
5106 rtx op0, comparison, subtarget;
5107 rtx_insn *last;
ef4bddc2 5108 machine_mode result_mode = targetm.cstore_mode (icode);
45475a3f
PB
5109
5110 last = get_last_insn ();
5111 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5112 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
a5c7d693 5113 if (!x || !y)
45475a3f
PB
5114 {
5115 delete_insns_since (last);
5116 return NULL_RTX;
5117 }
5118
92355a9c
PB
5119 if (target_mode == VOIDmode)
5120 target_mode = result_mode;
5121 if (!target)
5122 target = gen_reg_rtx (target_mode);
b8698a0f 5123
a5c7d693 5124 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
45475a3f 5125
a5c7d693
RS
5126 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5127 create_fixed_operand (&ops[1], comparison);
5128 create_fixed_operand (&ops[2], x);
5129 create_fixed_operand (&ops[3], y);
5130 if (!maybe_expand_insn (icode, 4, ops))
5131 {
5132 delete_insns_since (last);
5133 return NULL_RTX;
5134 }
5135 subtarget = ops[0].value;
495499da 5136
a41a56b6
RE
5137 /* If we are converting to a wider mode, first convert to
5138 TARGET_MODE, then normalize. This produces better combining
5139 opportunities on machines that have a SIGN_EXTRACT when we are
5140 testing a single bit. This mostly benefits the 68k.
5141
5142 If STORE_FLAG_VALUE does not have the sign bit set when
5143 interpreted in MODE, we can do this conversion as unsigned, which
5144 is usually more efficient. */
45475a3f 5145 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
a41a56b6
RE
5146 {
5147 convert_move (target, subtarget,
2d0c270f
BS
5148 val_signbit_known_clear_p (result_mode,
5149 STORE_FLAG_VALUE));
a41a56b6 5150 op0 = target;
45475a3f 5151 result_mode = target_mode;
a41a56b6
RE
5152 }
5153 else
5154 op0 = subtarget;
5155
5156 /* If we want to keep subexpressions around, don't reuse our last
5157 target. */
5158 if (optimize)
5159 subtarget = 0;
5160
5161 /* Now normalize to the proper value in MODE. Sometimes we don't
5162 have to do anything. */
5163 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5164 ;
5165 /* STORE_FLAG_VALUE might be the most negative number, so write
5166 the comparison this way to avoid a compiler-time warning. */
5167 else if (- normalizep == STORE_FLAG_VALUE)
45475a3f 5168 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
a41a56b6
RE
5169
5170 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5171 it hard to use a value of just the sign bit due to ANSI integer
5172 constant typing rules. */
2d0c270f 5173 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
45475a3f 5174 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
eb6c3df1 5175 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
a41a56b6
RE
5176 normalizep == 1);
5177 else
5178 {
5179 gcc_assert (STORE_FLAG_VALUE & 1);
5180
45475a3f 5181 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
a41a56b6 5182 if (normalizep == -1)
45475a3f 5183 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
a41a56b6
RE
5184 }
5185
5186 /* If we were converting to a smaller mode, do the conversion now. */
45475a3f 5187 if (target_mode != result_mode)
a41a56b6
RE
5188 {
5189 convert_move (target, op0, 0);
5190 return target;
5191 }
5192 else
5193 return op0;
5194}
5195
44037a66 5196
ef12ae45
PB
5197/* A subroutine of emit_store_flag only including "tricks" that do not
5198 need a recursive call. These are kept separate to avoid infinite
5199 loops. */
44037a66 5200
ef12ae45
PB
5201static rtx
5202emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
ef4bddc2
RS
5203 machine_mode mode, int unsignedp, int normalizep,
5204 machine_mode target_mode)
44037a66
TG
5205{
5206 rtx subtarget;
5207 enum insn_code icode;
ef4bddc2 5208 machine_mode compare_mode;
f90b7a5a 5209 enum mode_class mclass;
45475a3f 5210 enum rtx_code scode;
ef12ae45 5211 rtx tem;
44037a66 5212
b30f05db
BS
5213 if (unsignedp)
5214 code = unsigned_condition (code);
45475a3f 5215 scode = swap_condition (code);
b30f05db 5216
c2615a67
RK
5217 /* If one operand is constant, make it the second one. Only do this
5218 if the other operand is not constant as well. */
5219
8c9864f3 5220 if (swap_commutative_operands_p (op0, op1))
c2615a67
RK
5221 {
5222 tem = op0;
5223 op0 = op1;
5224 op1 = tem;
5225 code = swap_condition (code);
5226 }
5227
6405e07b
DE
5228 if (mode == VOIDmode)
5229 mode = GET_MODE (op0);
5230
c410d49e 5231 /* For some comparisons with 1 and -1, we can convert this to
44037a66 5232 comparisons with zero. This will often produce more opportunities for
0f41302f 5233 store-flag insns. */
44037a66
TG
5234
5235 switch (code)
5236 {
5237 case LT:
5238 if (op1 == const1_rtx)
5239 op1 = const0_rtx, code = LE;
5240 break;
5241 case LE:
5242 if (op1 == constm1_rtx)
5243 op1 = const0_rtx, code = LT;
5244 break;
5245 case GE:
5246 if (op1 == const1_rtx)
5247 op1 = const0_rtx, code = GT;
5248 break;
5249 case GT:
5250 if (op1 == constm1_rtx)
5251 op1 = const0_rtx, code = GE;
5252 break;
5253 case GEU:
5254 if (op1 == const1_rtx)
5255 op1 = const0_rtx, code = NE;
5256 break;
5257 case LTU:
5258 if (op1 == const1_rtx)
5259 op1 = const0_rtx, code = EQ;
5260 break;
e9a25f70
JL
5261 default:
5262 break;
44037a66
TG
5263 }
5264
884815aa
JB
5265 /* If we are comparing a double-word integer with zero or -1, we can
5266 convert the comparison into one involving a single word. */
6912b84b
RK
5267 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5268 && GET_MODE_CLASS (mode) == MODE_INT
3c0cb5de 5269 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
6912b84b 5270 {
884815aa
JB
5271 if ((code == EQ || code == NE)
5272 && (op1 == const0_rtx || op1 == constm1_rtx))
6912b84b 5273 {
1ed20a40 5274 rtx op00, op01;
8433f113 5275
a41a56b6
RE
5276 /* Do a logical OR or AND of the two words and compare the
5277 result. */
8433f113
RH
5278 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5279 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
1ed20a40
PB
5280 tem = expand_binop (word_mode,
5281 op1 == const0_rtx ? ior_optab : and_optab,
5282 op00, op01, NULL_RTX, unsignedp,
5283 OPTAB_DIRECT);
884815aa 5284
1ed20a40
PB
5285 if (tem != 0)
5286 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
92355a9c 5287 unsignedp, normalizep);
6912b84b 5288 }
884815aa 5289 else if ((code == LT || code == GE) && op1 == const0_rtx)
8433f113
RH
5290 {
5291 rtx op0h;
5292
5293 /* If testing the sign bit, can just test on high word. */
5294 op0h = simplify_gen_subreg (word_mode, op0, mode,
a41a56b6
RE
5295 subreg_highpart_offset (word_mode,
5296 mode));
1ed20a40
PB
5297 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5298 unsignedp, normalizep);
5299 }
5300 else
5301 tem = NULL_RTX;
5302
5303 if (tem)
5304 {
92355a9c 5305 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
1ed20a40 5306 return tem;
92355a9c
PB
5307 if (!target)
5308 target = gen_reg_rtx (target_mode);
1ed20a40
PB
5309
5310 convert_move (target, tem,
2d0c270f
BS
5311 !val_signbit_known_set_p (word_mode,
5312 (normalizep ? normalizep
5313 : STORE_FLAG_VALUE)));
1ed20a40 5314 return target;
8433f113 5315 }
6912b84b
RK
5316 }
5317
44037a66
TG
5318 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5319 complement of A (for GE) and shifting the sign bit to the low bit. */
5320 if (op1 == const0_rtx && (code == LT || code == GE)
5321 && GET_MODE_CLASS (mode) == MODE_INT
5322 && (normalizep || STORE_FLAG_VALUE == 1
2d0c270f 5323 || val_signbit_p (mode, STORE_FLAG_VALUE)))
44037a66 5324 {
8deb7047 5325 subtarget = target;
44037a66 5326
495499da
PB
5327 if (!target)
5328 target_mode = mode;
5329
44037a66
TG
5330 /* If the result is to be wider than OP0, it is best to convert it
5331 first. If it is to be narrower, it is *incorrect* to convert it
5332 first. */
495499da 5333 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
44037a66 5334 {
81722fa9 5335 op0 = convert_modes (target_mode, mode, op0, 0);
44037a66
TG
5336 mode = target_mode;
5337 }
5338
5339 if (target_mode != mode)
5340 subtarget = 0;
5341
5342 if (code == GE)
1d6eaf3d
RK
5343 op0 = expand_unop (mode, one_cmpl_optab, op0,
5344 ((STORE_FLAG_VALUE == 1 || normalizep)
5345 ? 0 : subtarget), 0);
44037a66 5346
1d6eaf3d 5347 if (STORE_FLAG_VALUE == 1 || normalizep)
44037a66
TG
5348 /* If we are supposed to produce a 0/1 value, we want to do
5349 a logical shift from the sign bit to the low-order bit; for
5350 a -1/0 value, we do an arithmetic shift. */
5351 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 5352 GET_MODE_BITSIZE (mode) - 1,
44037a66
TG
5353 subtarget, normalizep != -1);
5354
5355 if (mode != target_mode)
c2ec26b8 5356 op0 = convert_modes (target_mode, mode, op0, 0);
44037a66
TG
5357
5358 return op0;
5359 }
5360
f90b7a5a
PB
5361 mclass = GET_MODE_CLASS (mode);
5362 for (compare_mode = mode; compare_mode != VOIDmode;
5363 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
a41a56b6 5364 {
ef4bddc2 5365 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
947131ba 5366 icode = optab_handler (cstore_optab, optab_mode);
f90b7a5a 5367 if (icode != CODE_FOR_nothing)
a41a56b6 5368 {
a41a56b6 5369 do_pending_stack_adjust ();
ef12ae45 5370 tem = emit_cstore (target, icode, code, mode, compare_mode,
92355a9c 5371 unsignedp, op0, op1, normalizep, target_mode);
45475a3f
PB
5372 if (tem)
5373 return tem;
44037a66 5374
45475a3f 5375 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
44037a66 5376 {
ef12ae45 5377 tem = emit_cstore (target, icode, scode, mode, compare_mode,
92355a9c 5378 unsignedp, op1, op0, normalizep, target_mode);
45475a3f
PB
5379 if (tem)
5380 return tem;
44037a66 5381 }
f90b7a5a 5382 break;
44037a66
TG
5383 }
5384 }
5385
ef12ae45
PB
5386 return 0;
5387}
5388
5389/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5390 and storing in TARGET. Normally return TARGET.
5391 Return 0 if that cannot be done.
5392
5393 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5394 it is VOIDmode, they cannot both be CONST_INT.
5395
5396 UNSIGNEDP is for the case where we have to widen the operands
5397 to perform the operation. It says to use zero-extension.
5398
5399 NORMALIZEP is 1 if we should convert the result to be either zero
5400 or one. Normalize is -1 if we should convert the result to be
5401 either zero or -1. If NORMALIZEP is zero, the result will be left
5402 "raw" out of the scc insn. */
5403
5404rtx
5405emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
ef4bddc2 5406 machine_mode mode, int unsignedp, int normalizep)
ef12ae45 5407{
ef4bddc2 5408 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
ef12ae45
PB
5409 enum rtx_code rcode;
5410 rtx subtarget;
f3f6fb16
DM
5411 rtx tem, trueval;
5412 rtx_insn *last;
ef12ae45 5413
b2b262e3
JR
5414 /* If we compare constants, we shouldn't use a store-flag operation,
5415 but a constant load. We can get there via the vanilla route that
5416 usually generates a compare-branch sequence, but will in this case
5417 fold the comparison to a constant, and thus elide the branch. */
5418 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5419 return NULL_RTX;
5420
92355a9c
PB
5421 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5422 target_mode);
ef12ae45
PB
5423 if (tem)
5424 return tem;
44037a66 5425
495499da
PB
5426 /* If we reached here, we can't do this with a scc insn, however there
5427 are some comparisons that can be done in other ways. Don't do any
5428 of these cases if branches are very cheap. */
5429 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5430 return 0;
5431
5432 /* See what we need to return. We can only return a 1, -1, or the
5433 sign bit. */
5434
5435 if (normalizep == 0)
5436 {
5437 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5438 normalizep = STORE_FLAG_VALUE;
5439
2d0c270f 5440 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
495499da
PB
5441 ;
5442 else
5443 return 0;
5444 }
5445
ef12ae45
PB
5446 last = get_last_insn ();
5447
7c27e184
PB
5448 /* If optimizing, use different pseudo registers for each insn, instead
5449 of reusing the same pseudo. This leads to better CSE, but slows
5450 down the compiler, since there are more pseudos */
5451 subtarget = (!optimize
91e66235 5452 && (target_mode == mode)) ? target : NULL_RTX;
495499da
PB
5453 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5454
5455 /* For floating-point comparisons, try the reverse comparison or try
5456 changing the "orderedness" of the comparison. */
5457 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5458 {
5459 enum rtx_code first_code;
5460 bool and_them;
5461
5462 rcode = reverse_condition_maybe_unordered (code);
5463 if (can_compare_p (rcode, mode, ccp_store_flag)
5464 && (code == ORDERED || code == UNORDERED
5465 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5466 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5467 {
533d4b99
PB
5468 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5469 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5470
495499da 5471 /* For the reverse comparison, use either an addition or a XOR. */
533d4b99 5472 if (want_add
68f932c4 5473 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
533d4b99 5474 optimize_insn_for_speed_p ()) == 0)
495499da 5475 {
ef12ae45 5476 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5477 STORE_FLAG_VALUE, target_mode);
495499da
PB
5478 if (tem)
5479 return expand_binop (target_mode, add_optab, tem,
2f1cd2eb 5480 gen_int_mode (normalizep, target_mode),
495499da
PB
5481 target, 0, OPTAB_WIDEN);
5482 }
533d4b99 5483 else if (!want_add
68f932c4 5484 && rtx_cost (trueval, XOR, 1,
533d4b99 5485 optimize_insn_for_speed_p ()) == 0)
495499da 5486 {
ef12ae45 5487 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5488 normalizep, target_mode);
495499da
PB
5489 if (tem)
5490 return expand_binop (target_mode, xor_optab, tem, trueval,
5491 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5492 }
5493 }
5494
5495 delete_insns_since (last);
5496
5497 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5498 if (code == ORDERED || code == UNORDERED)
5499 return 0;
b8698a0f 5500
495499da
PB
5501 and_them = split_comparison (code, mode, &first_code, &code);
5502
5503 /* If there are no NaNs, the first comparison should always fall through.
5504 Effectively change the comparison to the other one. */
5505 if (!HONOR_NANS (mode))
5506 {
5507 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
92355a9c
PB
5508 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5509 target_mode);
495499da
PB
5510 }
5511
5512#ifdef HAVE_conditional_move
5513 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5514 conditional move. */
92355a9c
PB
5515 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5516 normalizep, target_mode);
495499da
PB
5517 if (tem == 0)
5518 return 0;
5519
5520 if (and_them)
5521 tem = emit_conditional_move (target, code, op0, op1, mode,
5522 tem, const0_rtx, GET_MODE (tem), 0);
5523 else
5524 tem = emit_conditional_move (target, code, op0, op1, mode,
5525 trueval, tem, GET_MODE (tem), 0);
5526
5527 if (tem == 0)
5528 delete_insns_since (last);
5529 return tem;
5530#else
5531 return 0;
5532#endif
5533 }
44037a66 5534
495499da
PB
5535 /* The remaining tricks only apply to integer comparisons. */
5536
5537 if (GET_MODE_CLASS (mode) != MODE_INT)
5538 return 0;
5539
5540 /* If this is an equality comparison of integers, we can try to exclusive-or
44037a66
TG
5541 (or subtract) the two operands and use a recursive call to try the
5542 comparison with zero. Don't do any of these cases if branches are
5543 very cheap. */
5544
495499da 5545 if ((code == EQ || code == NE) && op1 != const0_rtx)
44037a66
TG
5546 {
5547 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5548 OPTAB_WIDEN);
5549
5550 if (tem == 0)
5551 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5552 OPTAB_WIDEN);
5553 if (tem != 0)
a22fb74c
AK
5554 tem = emit_store_flag (target, code, tem, const0_rtx,
5555 mode, unsignedp, normalizep);
495499da
PB
5556 if (tem != 0)
5557 return tem;
5558
5559 delete_insns_since (last);
5560 }
5561
5562 /* For integer comparisons, try the reverse comparison. However, for
5563 small X and if we'd have anyway to extend, implementing "X != 0"
5564 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5565 rcode = reverse_condition (code);
5566 if (can_compare_p (rcode, mode, ccp_store_flag)
947131ba 5567 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
495499da
PB
5568 && code == NE
5569 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5570 && op1 == const0_rtx))
5571 {
533d4b99
PB
5572 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5573 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5574
495499da 5575 /* Again, for the reverse comparison, use either an addition or a XOR. */
533d4b99 5576 if (want_add
68f932c4 5577 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
533d4b99 5578 optimize_insn_for_speed_p ()) == 0)
495499da 5579 {
ef12ae45 5580 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5581 STORE_FLAG_VALUE, target_mode);
495499da
PB
5582 if (tem != 0)
5583 tem = expand_binop (target_mode, add_optab, tem,
2f1cd2eb
RS
5584 gen_int_mode (normalizep, target_mode),
5585 target, 0, OPTAB_WIDEN);
495499da 5586 }
533d4b99 5587 else if (!want_add
68f932c4 5588 && rtx_cost (trueval, XOR, 1,
533d4b99 5589 optimize_insn_for_speed_p ()) == 0)
495499da 5590 {
ef12ae45 5591 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5592 normalizep, target_mode);
495499da
PB
5593 if (tem != 0)
5594 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5595 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5596 }
5597
5598 if (tem != 0)
5599 return tem;
5600 delete_insns_since (last);
44037a66
TG
5601 }
5602
c410d49e 5603 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
44037a66
TG
5604 the constant zero. Reject all other comparisons at this point. Only
5605 do LE and GT if branches are expensive since they are expensive on
5606 2-operand machines. */
5607
495499da 5608 if (op1 != const0_rtx
44037a66 5609 || (code != EQ && code != NE
3a4fd356
JH
5610 && (BRANCH_COST (optimize_insn_for_speed_p (),
5611 false) <= 1 || (code != LE && code != GT))))
44037a66
TG
5612 return 0;
5613
44037a66
TG
5614 /* Try to put the result of the comparison in the sign bit. Assume we can't
5615 do the necessary operation below. */
5616
5617 tem = 0;
5618
5619 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5620 the sign bit set. */
5621
5622 if (code == LE)
5623 {
5624 /* This is destructive, so SUBTARGET can't be OP0. */
5625 if (rtx_equal_p (subtarget, op0))
5626 subtarget = 0;
5627
5628 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5629 OPTAB_WIDEN);
5630 if (tem)
5631 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5632 OPTAB_WIDEN);
5633 }
5634
5635 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5636 number of bits in the mode of OP0, minus one. */
5637
5638 if (code == GT)
5639 {
5640 if (rtx_equal_p (subtarget, op0))
5641 subtarget = 0;
5642
5643 tem = expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 5644 GET_MODE_BITSIZE (mode) - 1,
44037a66
TG
5645 subtarget, 0);
5646 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5647 OPTAB_WIDEN);
5648 }
c410d49e 5649
44037a66
TG
5650 if (code == EQ || code == NE)
5651 {
5652 /* For EQ or NE, one way to do the comparison is to apply an operation
cc2902df 5653 that converts the operand into a positive number if it is nonzero
44037a66
TG
5654 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5655 for NE we negate. This puts the result in the sign bit. Then we
c410d49e 5656 normalize with a shift, if needed.
44037a66
TG
5657
5658 Two operations that can do the above actions are ABS and FFS, so try
5659 them. If that doesn't work, and MODE is smaller than a full word,
36d747f6 5660 we can use zero-extension to the wider mode (an unsigned conversion)
44037a66
TG
5661 as the operation. */
5662
c410d49e
EC
5663 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5664 that is compensated by the subsequent overflow when subtracting
30f7a378 5665 one / negating. */
91ce572a 5666
947131ba 5667 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
44037a66 5668 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
947131ba 5669 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
44037a66
TG
5670 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5671 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5672 {
c2ec26b8 5673 tem = convert_modes (word_mode, mode, op0, 1);
81722fa9 5674 mode = word_mode;
44037a66
TG
5675 }
5676
5677 if (tem != 0)
5678 {
5679 if (code == EQ)
5680 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5681 0, OPTAB_WIDEN);
5682 else
5683 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5684 }
5685
5686 /* If we couldn't do it that way, for NE we can "or" the two's complement
5687 of the value with itself. For EQ, we take the one's complement of
5688 that "or", which is an extra insn, so we only handle EQ if branches
5689 are expensive. */
5690
3a4fd356
JH
5691 if (tem == 0
5692 && (code == NE
5693 || BRANCH_COST (optimize_insn_for_speed_p (),
5694 false) > 1))
44037a66 5695 {
36d747f6
RS
5696 if (rtx_equal_p (subtarget, op0))
5697 subtarget = 0;
5698
44037a66
TG
5699 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5700 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5701 OPTAB_WIDEN);
5702
5703 if (tem && code == EQ)
5704 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5705 }
5706 }
5707
5708 if (tem && normalizep)
5709 tem = expand_shift (RSHIFT_EXPR, mode, tem,
eb6c3df1 5710 GET_MODE_BITSIZE (mode) - 1,
91e66235 5711 subtarget, normalizep == 1);
44037a66 5712
91e66235 5713 if (tem)
44037a66 5714 {
495499da
PB
5715 if (!target)
5716 ;
5717 else if (GET_MODE (tem) != target_mode)
91e66235
MM
5718 {
5719 convert_move (target, tem, 0);
5720 tem = target;
5721 }
5722 else if (!subtarget)
5723 {
5724 emit_move_insn (target, tem);
5725 tem = target;
5726 }
44037a66 5727 }
91e66235 5728 else
44037a66
TG
5729 delete_insns_since (last);
5730
5731 return tem;
5732}
04a8ee2f
TG
5733
5734/* Like emit_store_flag, but always succeeds. */
5735
5736rtx
502b8322 5737emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
ef4bddc2 5738 machine_mode mode, int unsignedp, int normalizep)
04a8ee2f 5739{
f3f6fb16
DM
5740 rtx tem;
5741 rtx_code_label *label;
495499da 5742 rtx trueval, falseval;
04a8ee2f
TG
5743
5744 /* First see if emit_store_flag can do the job. */
5745 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5746 if (tem != 0)
5747 return tem;
5748
495499da
PB
5749 if (!target)
5750 target = gen_reg_rtx (word_mode);
04a8ee2f 5751
495499da
PB
5752 /* If this failed, we have to do this with set/compare/jump/set code.
5753 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5754 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
b8698a0f 5755 if (code == NE
495499da
PB
5756 && GET_MODE_CLASS (mode) == MODE_INT
5757 && REG_P (target)
5758 && op0 == target
5759 && op1 == const0_rtx)
5760 {
5761 label = gen_label_rtx ();
5762 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
40e90eac 5763 mode, NULL_RTX, NULL_RTX, label, -1);
495499da
PB
5764 emit_move_insn (target, trueval);
5765 emit_label (label);
5766 return target;
5767 }
04a8ee2f 5768
f8cfc6aa 5769 if (!REG_P (target)
04a8ee2f
TG
5770 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5771 target = gen_reg_rtx (GET_MODE (target));
5772
495499da
PB
5773 /* Jump in the right direction if the target cannot implement CODE
5774 but can jump on its reverse condition. */
5775 falseval = const0_rtx;
5776 if (! can_compare_p (code, mode, ccp_jump)
5777 && (! FLOAT_MODE_P (mode)
5778 || code == ORDERED || code == UNORDERED
5779 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5780 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5781 {
5782 enum rtx_code rcode;
5783 if (FLOAT_MODE_P (mode))
5784 rcode = reverse_condition_maybe_unordered (code);
5785 else
5786 rcode = reverse_condition (code);
5787
5788 /* Canonicalize to UNORDERED for the libcall. */
5789 if (can_compare_p (rcode, mode, ccp_jump)
5790 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5791 {
5792 falseval = trueval;
5793 trueval = const0_rtx;
5794 code = rcode;
5795 }
5796 }
5797
5798 emit_move_insn (target, trueval);
04a8ee2f 5799 label = gen_label_rtx ();
d43e0b7d 5800 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
40e90eac 5801 NULL_RTX, label, -1);
04a8ee2f 5802
495499da 5803 emit_move_insn (target, falseval);
44037a66
TG
5804 emit_label (label);
5805
5806 return target;
5807}
f5963e61
JL
5808\f
5809/* Perform possibly multi-word comparison and conditional jump to LABEL
feb04780
RS
5810 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5811 now a thin wrapper around do_compare_rtx_and_jump. */
f5963e61
JL
5812
5813static void
ef4bddc2 5814do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
f3f6fb16 5815 rtx_code_label *label)
f5963e61 5816{
feb04780
RS
5817 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5818 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
40e90eac 5819 NULL_RTX, NULL_RTX, label, -1);
f5963e61 5820}