]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/expmed.c
Use tree_vector_builder instead of build_vector
[thirdparty/gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "dojump.h"
40 #include "explow.h"
41 #include "expr.h"
42 #include "langhooks.h"
43 #include "tree-vector-builder.h"
44
45 struct target_expmed default_target_expmed;
46 #if SWITCHABLE_TARGET
47 struct target_expmed *this_target_expmed = &default_target_expmed;
48 #endif
49
50 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx, scalar_int_mode, bool);
56 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 rtx, scalar_int_mode, bool);
60 static void store_split_bit_field (rtx, opt_scalar_int_mode,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT,
65 rtx, scalar_int_mode, bool);
66 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
67 unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT, rtx, int, bool);
69 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
70 unsigned HOST_WIDE_INT,
71 unsigned HOST_WIDE_INT, rtx, int, bool);
72 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
73 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
74 unsigned HOST_WIDE_INT,
75 unsigned HOST_WIDE_INT, int, bool);
76 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
77 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
78 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
79
80 /* Return a constant integer mask value of mode MODE with BITSIZE ones
81 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
82 The mask is truncated if necessary to the width of mode MODE. The
83 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
84
85 static inline rtx
86 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
87 {
88 return immed_wide_int_const
89 (wi::shifted_mask (bitpos, bitsize, complement,
90 GET_MODE_PRECISION (mode)), mode);
91 }
92
93 /* Test whether a value is zero of a power of two. */
94 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
95 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
96
97 struct init_expmed_rtl
98 {
99 rtx reg;
100 rtx plus;
101 rtx neg;
102 rtx mult;
103 rtx sdiv;
104 rtx udiv;
105 rtx sdiv_32;
106 rtx smod_32;
107 rtx wide_mult;
108 rtx wide_lshr;
109 rtx wide_trunc;
110 rtx shift;
111 rtx shift_mult;
112 rtx shift_add;
113 rtx shift_sub0;
114 rtx shift_sub1;
115 rtx zext;
116 rtx trunc;
117
118 rtx pow2[MAX_BITS_PER_WORD];
119 rtx cint[MAX_BITS_PER_WORD];
120 };
121
122 static void
123 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
124 scalar_int_mode from_mode, bool speed)
125 {
126 int to_size, from_size;
127 rtx which;
128
129 to_size = GET_MODE_PRECISION (to_mode);
130 from_size = GET_MODE_PRECISION (from_mode);
131
132 /* Most partial integers have a precision less than the "full"
133 integer it requires for storage. In case one doesn't, for
134 comparison purposes here, reduce the bit size by one in that
135 case. */
136 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
137 && pow2p_hwi (to_size))
138 to_size --;
139 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
140 && pow2p_hwi (from_size))
141 from_size --;
142
143 /* Assume cost of zero-extend and sign-extend is the same. */
144 which = (to_size < from_size ? all->trunc : all->zext);
145
146 PUT_MODE (all->reg, from_mode);
147 set_convert_cost (to_mode, from_mode, speed,
148 set_src_cost (which, to_mode, speed));
149 }
150
151 static void
152 init_expmed_one_mode (struct init_expmed_rtl *all,
153 machine_mode mode, int speed)
154 {
155 int m, n, mode_bitsize;
156 machine_mode mode_from;
157
158 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
159
160 PUT_MODE (all->reg, mode);
161 PUT_MODE (all->plus, mode);
162 PUT_MODE (all->neg, mode);
163 PUT_MODE (all->mult, mode);
164 PUT_MODE (all->sdiv, mode);
165 PUT_MODE (all->udiv, mode);
166 PUT_MODE (all->sdiv_32, mode);
167 PUT_MODE (all->smod_32, mode);
168 PUT_MODE (all->wide_trunc, mode);
169 PUT_MODE (all->shift, mode);
170 PUT_MODE (all->shift_mult, mode);
171 PUT_MODE (all->shift_add, mode);
172 PUT_MODE (all->shift_sub0, mode);
173 PUT_MODE (all->shift_sub1, mode);
174 PUT_MODE (all->zext, mode);
175 PUT_MODE (all->trunc, mode);
176
177 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
178 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
179 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
180 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
181 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
182
183 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
184 <= 2 * add_cost (speed, mode)));
185 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
186 <= 4 * add_cost (speed, mode)));
187
188 set_shift_cost (speed, mode, 0, 0);
189 {
190 int cost = add_cost (speed, mode);
191 set_shiftadd_cost (speed, mode, 0, cost);
192 set_shiftsub0_cost (speed, mode, 0, cost);
193 set_shiftsub1_cost (speed, mode, 0, cost);
194 }
195
196 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
197 for (m = 1; m < n; m++)
198 {
199 XEXP (all->shift, 1) = all->cint[m];
200 XEXP (all->shift_mult, 1) = all->pow2[m];
201
202 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
203 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
204 speed));
205 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
206 speed));
207 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
208 speed));
209 }
210
211 scalar_int_mode int_mode_to;
212 if (is_a <scalar_int_mode> (mode, &int_mode_to))
213 {
214 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
215 mode_from = (machine_mode)(mode_from + 1))
216 init_expmed_one_conv (all, int_mode_to,
217 as_a <scalar_int_mode> (mode_from), speed);
218
219 scalar_int_mode wider_mode;
220 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
221 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
222 {
223 PUT_MODE (all->zext, wider_mode);
224 PUT_MODE (all->wide_mult, wider_mode);
225 PUT_MODE (all->wide_lshr, wider_mode);
226 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
227
228 set_mul_widen_cost (speed, wider_mode,
229 set_src_cost (all->wide_mult, wider_mode, speed));
230 set_mul_highpart_cost (speed, int_mode_to,
231 set_src_cost (all->wide_trunc,
232 int_mode_to, speed));
233 }
234 }
235 }
236
237 void
238 init_expmed (void)
239 {
240 struct init_expmed_rtl all;
241 machine_mode mode = QImode;
242 int m, speed;
243
244 memset (&all, 0, sizeof all);
245 for (m = 1; m < MAX_BITS_PER_WORD; m++)
246 {
247 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
248 all.cint[m] = GEN_INT (m);
249 }
250
251 /* Avoid using hard regs in ways which may be unsupported. */
252 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
253 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
254 all.neg = gen_rtx_NEG (mode, all.reg);
255 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
256 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
257 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
258 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
259 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
260 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
261 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
262 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
263 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
264 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
265 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
266 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
267 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
268 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
269 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
270
271 for (speed = 0; speed < 2; speed++)
272 {
273 crtl->maybe_hot_insn_p = speed;
274 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
275
276 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
277 mode = (machine_mode)(mode + 1))
278 init_expmed_one_mode (&all, mode, speed);
279
280 if (MIN_MODE_PARTIAL_INT != VOIDmode)
281 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
282 mode = (machine_mode)(mode + 1))
283 init_expmed_one_mode (&all, mode, speed);
284
285 if (MIN_MODE_VECTOR_INT != VOIDmode)
286 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
287 mode = (machine_mode)(mode + 1))
288 init_expmed_one_mode (&all, mode, speed);
289 }
290
291 if (alg_hash_used_p ())
292 {
293 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
294 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
295 }
296 else
297 set_alg_hash_used_p (true);
298 default_rtl_profile ();
299
300 ggc_free (all.trunc);
301 ggc_free (all.shift_sub1);
302 ggc_free (all.shift_sub0);
303 ggc_free (all.shift_add);
304 ggc_free (all.shift_mult);
305 ggc_free (all.shift);
306 ggc_free (all.wide_trunc);
307 ggc_free (all.wide_lshr);
308 ggc_free (all.wide_mult);
309 ggc_free (all.zext);
310 ggc_free (all.smod_32);
311 ggc_free (all.sdiv_32);
312 ggc_free (all.udiv);
313 ggc_free (all.sdiv);
314 ggc_free (all.mult);
315 ggc_free (all.neg);
316 ggc_free (all.plus);
317 ggc_free (all.reg);
318 }
319
320 /* Return an rtx representing minus the value of X.
321 MODE is the intended mode of the result,
322 useful if X is a CONST_INT. */
323
324 rtx
325 negate_rtx (machine_mode mode, rtx x)
326 {
327 rtx result = simplify_unary_operation (NEG, mode, x, mode);
328
329 if (result == 0)
330 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
331
332 return result;
333 }
334
335 /* Whether reverse storage order is supported on the target. */
336 static int reverse_storage_order_supported = -1;
337
338 /* Check whether reverse storage order is supported on the target. */
339
340 static void
341 check_reverse_storage_order_support (void)
342 {
343 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
344 {
345 reverse_storage_order_supported = 0;
346 sorry ("reverse scalar storage order");
347 }
348 else
349 reverse_storage_order_supported = 1;
350 }
351
352 /* Whether reverse FP storage order is supported on the target. */
353 static int reverse_float_storage_order_supported = -1;
354
355 /* Check whether reverse FP storage order is supported on the target. */
356
357 static void
358 check_reverse_float_storage_order_support (void)
359 {
360 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
361 {
362 reverse_float_storage_order_supported = 0;
363 sorry ("reverse floating-point scalar storage order");
364 }
365 else
366 reverse_float_storage_order_supported = 1;
367 }
368
369 /* Return an rtx representing value of X with reverse storage order.
370 MODE is the intended mode of the result,
371 useful if X is a CONST_INT. */
372
373 rtx
374 flip_storage_order (machine_mode mode, rtx x)
375 {
376 scalar_int_mode int_mode;
377 rtx result;
378
379 if (mode == QImode)
380 return x;
381
382 if (COMPLEX_MODE_P (mode))
383 {
384 rtx real = read_complex_part (x, false);
385 rtx imag = read_complex_part (x, true);
386
387 real = flip_storage_order (GET_MODE_INNER (mode), real);
388 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
389
390 return gen_rtx_CONCAT (mode, real, imag);
391 }
392
393 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
394 check_reverse_storage_order_support ();
395
396 if (!is_a <scalar_int_mode> (mode, &int_mode))
397 {
398 if (FLOAT_MODE_P (mode)
399 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
400 check_reverse_float_storage_order_support ();
401
402 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
403 {
404 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
405 return x;
406 }
407 x = gen_lowpart (int_mode, x);
408 }
409
410 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
411 if (result == 0)
412 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
413
414 if (int_mode != mode)
415 result = gen_lowpart (mode, result);
416
417 return result;
418 }
419
420 /* If MODE is set, adjust bitfield memory MEM so that it points to the
421 first unit of mode MODE that contains a bitfield of size BITSIZE at
422 bit position BITNUM. If MODE is not set, return a BLKmode reference
423 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
424 of the field within the new memory. */
425
426 static rtx
427 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
428 unsigned HOST_WIDE_INT bitsize,
429 unsigned HOST_WIDE_INT bitnum,
430 unsigned HOST_WIDE_INT *new_bitnum)
431 {
432 scalar_int_mode imode;
433 if (mode.exists (&imode))
434 {
435 unsigned int unit = GET_MODE_BITSIZE (imode);
436 *new_bitnum = bitnum % unit;
437 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
438 return adjust_bitfield_address (mem, imode, offset);
439 }
440 else
441 {
442 *new_bitnum = bitnum % BITS_PER_UNIT;
443 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
444 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
445 / BITS_PER_UNIT);
446 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
447 }
448 }
449
450 /* The caller wants to perform insertion or extraction PATTERN on a
451 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
452 BITREGION_START and BITREGION_END are as for store_bit_field
453 and FIELDMODE is the natural mode of the field.
454
455 Search for a mode that is compatible with the memory access
456 restrictions and (where applicable) with a register insertion or
457 extraction. Return the new memory on success, storing the adjusted
458 bit position in *NEW_BITNUM. Return null otherwise. */
459
460 static rtx
461 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
462 rtx op0, HOST_WIDE_INT bitsize,
463 HOST_WIDE_INT bitnum,
464 unsigned HOST_WIDE_INT bitregion_start,
465 unsigned HOST_WIDE_INT bitregion_end,
466 machine_mode fieldmode,
467 unsigned HOST_WIDE_INT *new_bitnum)
468 {
469 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
470 bitregion_end, MEM_ALIGN (op0),
471 MEM_VOLATILE_P (op0));
472 scalar_int_mode best_mode;
473 if (iter.next_mode (&best_mode))
474 {
475 /* We can use a memory in BEST_MODE. See whether this is true for
476 any wider modes. All other things being equal, we prefer to
477 use the widest mode possible because it tends to expose more
478 CSE opportunities. */
479 if (!iter.prefer_smaller_modes ())
480 {
481 /* Limit the search to the mode required by the corresponding
482 register insertion or extraction instruction, if any. */
483 scalar_int_mode limit_mode = word_mode;
484 extraction_insn insn;
485 if (get_best_reg_extraction_insn (&insn, pattern,
486 GET_MODE_BITSIZE (best_mode),
487 fieldmode))
488 limit_mode = insn.field_mode;
489
490 scalar_int_mode wider_mode;
491 while (iter.next_mode (&wider_mode)
492 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
493 best_mode = wider_mode;
494 }
495 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
496 new_bitnum);
497 }
498 return NULL_RTX;
499 }
500
501 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
502 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
503 offset is then BITNUM / BITS_PER_UNIT. */
504
505 static bool
506 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
507 unsigned HOST_WIDE_INT bitsize,
508 machine_mode struct_mode)
509 {
510 unsigned HOST_WIDE_INT regsize = REGMODE_NATURAL_SIZE (struct_mode);
511 if (BYTES_BIG_ENDIAN)
512 return (bitnum % BITS_PER_UNIT == 0
513 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
514 || (bitnum + bitsize) % (regsize * BITS_PER_UNIT) == 0));
515 else
516 return bitnum % (regsize * BITS_PER_UNIT) == 0;
517 }
518
519 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
520 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
521 Return false if the access would touch memory outside the range
522 BITREGION_START to BITREGION_END for conformance to the C++ memory
523 model. */
524
525 static bool
526 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
527 unsigned HOST_WIDE_INT bitnum,
528 scalar_int_mode fieldmode,
529 unsigned HOST_WIDE_INT bitregion_start,
530 unsigned HOST_WIDE_INT bitregion_end)
531 {
532 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
533
534 /* -fstrict-volatile-bitfields must be enabled and we must have a
535 volatile MEM. */
536 if (!MEM_P (op0)
537 || !MEM_VOLATILE_P (op0)
538 || flag_strict_volatile_bitfields <= 0)
539 return false;
540
541 /* The bit size must not be larger than the field mode, and
542 the field mode must not be larger than a word. */
543 if (bitsize > modesize || modesize > BITS_PER_WORD)
544 return false;
545
546 /* Check for cases of unaligned fields that must be split. */
547 if (bitnum % modesize + bitsize > modesize)
548 return false;
549
550 /* The memory must be sufficiently aligned for a MODESIZE access.
551 This condition guarantees, that the memory access will not
552 touch anything after the end of the structure. */
553 if (MEM_ALIGN (op0) < modesize)
554 return false;
555
556 /* Check for cases where the C++ memory model applies. */
557 if (bitregion_end != 0
558 && (bitnum - bitnum % modesize < bitregion_start
559 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
560 return false;
561
562 return true;
563 }
564
565 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
566 bit number BITNUM can be treated as a simple value of mode MODE. */
567
568 static bool
569 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
570 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
571 {
572 return (MEM_P (op0)
573 && bitnum % BITS_PER_UNIT == 0
574 && bitsize == GET_MODE_BITSIZE (mode)
575 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
576 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
577 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
578 }
579 \f
580 /* Try to use instruction INSV to store VALUE into a field of OP0.
581 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
582 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
583 are as for store_bit_field. */
584
585 static bool
586 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
587 opt_scalar_int_mode op0_mode,
588 unsigned HOST_WIDE_INT bitsize,
589 unsigned HOST_WIDE_INT bitnum,
590 rtx value, scalar_int_mode value_mode)
591 {
592 struct expand_operand ops[4];
593 rtx value1;
594 rtx xop0 = op0;
595 rtx_insn *last = get_last_insn ();
596 bool copy_back = false;
597
598 scalar_int_mode op_mode = insv->field_mode;
599 unsigned int unit = GET_MODE_BITSIZE (op_mode);
600 if (bitsize == 0 || bitsize > unit)
601 return false;
602
603 if (MEM_P (xop0))
604 /* Get a reference to the first byte of the field. */
605 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
606 &bitnum);
607 else
608 {
609 /* Convert from counting within OP0 to counting in OP_MODE. */
610 if (BYTES_BIG_ENDIAN)
611 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
612
613 /* If xop0 is a register, we need it in OP_MODE
614 to make it acceptable to the format of insv. */
615 if (GET_CODE (xop0) == SUBREG)
616 /* We can't just change the mode, because this might clobber op0,
617 and we will need the original value of op0 if insv fails. */
618 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
619 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
620 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
621 }
622
623 /* If the destination is a paradoxical subreg such that we need a
624 truncate to the inner mode, perform the insertion on a temporary and
625 truncate the result to the original destination. Note that we can't
626 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
627 X) 0)) is (reg:N X). */
628 if (GET_CODE (xop0) == SUBREG
629 && REG_P (SUBREG_REG (xop0))
630 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
631 op_mode))
632 {
633 rtx tem = gen_reg_rtx (op_mode);
634 emit_move_insn (tem, xop0);
635 xop0 = tem;
636 copy_back = true;
637 }
638
639 /* There are similar overflow check at the start of store_bit_field_1,
640 but that only check the situation where the field lies completely
641 outside the register, while there do have situation where the field
642 lies partialy in the register, we need to adjust bitsize for this
643 partial overflow situation. Without this fix, pr48335-2.c on big-endian
644 will broken on those arch support bit insert instruction, like arm, aarch64
645 etc. */
646 if (bitsize + bitnum > unit && bitnum < unit)
647 {
648 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
649 "destination object, data truncated into %wu-bit",
650 bitsize, unit - bitnum);
651 bitsize = unit - bitnum;
652 }
653
654 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
655 "backwards" from the size of the unit we are inserting into.
656 Otherwise, we count bits from the most significant on a
657 BYTES/BITS_BIG_ENDIAN machine. */
658
659 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
660 bitnum = unit - bitsize - bitnum;
661
662 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
663 value1 = value;
664 if (value_mode != op_mode)
665 {
666 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
667 {
668 rtx tmp;
669 /* Optimization: Don't bother really extending VALUE
670 if it has all the bits we will actually use. However,
671 if we must narrow it, be sure we do it correctly. */
672
673 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
674 {
675 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
676 if (! tmp)
677 tmp = simplify_gen_subreg (op_mode,
678 force_reg (value_mode, value1),
679 value_mode, 0);
680 }
681 else
682 {
683 tmp = gen_lowpart_if_possible (op_mode, value1);
684 if (! tmp)
685 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
686 }
687 value1 = tmp;
688 }
689 else if (CONST_INT_P (value))
690 value1 = gen_int_mode (INTVAL (value), op_mode);
691 else
692 /* Parse phase is supposed to make VALUE's data type
693 match that of the component reference, which is a type
694 at least as wide as the field; so VALUE should have
695 a mode that corresponds to that type. */
696 gcc_assert (CONSTANT_P (value));
697 }
698
699 create_fixed_operand (&ops[0], xop0);
700 create_integer_operand (&ops[1], bitsize);
701 create_integer_operand (&ops[2], bitnum);
702 create_input_operand (&ops[3], value1, op_mode);
703 if (maybe_expand_insn (insv->icode, 4, ops))
704 {
705 if (copy_back)
706 convert_move (op0, xop0, true);
707 return true;
708 }
709 delete_insns_since (last);
710 return false;
711 }
712
713 /* A subroutine of store_bit_field, with the same arguments. Return true
714 if the operation could be implemented.
715
716 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
717 no other way of implementing the operation. If FALLBACK_P is false,
718 return false instead. */
719
720 static bool
721 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
722 unsigned HOST_WIDE_INT bitnum,
723 unsigned HOST_WIDE_INT bitregion_start,
724 unsigned HOST_WIDE_INT bitregion_end,
725 machine_mode fieldmode,
726 rtx value, bool reverse, bool fallback_p)
727 {
728 rtx op0 = str_rtx;
729 rtx orig_value;
730
731 while (GET_CODE (op0) == SUBREG)
732 {
733 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
734 op0 = SUBREG_REG (op0);
735 }
736
737 /* No action is needed if the target is a register and if the field
738 lies completely outside that register. This can occur if the source
739 code contains an out-of-bounds access to a small array. */
740 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
741 return true;
742
743 /* Use vec_set patterns for inserting parts of vectors whenever
744 available. */
745 machine_mode outermode = GET_MODE (op0);
746 scalar_mode innermode = GET_MODE_INNER (outermode);
747 if (VECTOR_MODE_P (outermode)
748 && !MEM_P (op0)
749 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
750 && fieldmode == innermode
751 && bitsize == GET_MODE_BITSIZE (innermode)
752 && !(bitnum % GET_MODE_BITSIZE (innermode)))
753 {
754 struct expand_operand ops[3];
755 enum insn_code icode = optab_handler (vec_set_optab, outermode);
756 int pos = bitnum / GET_MODE_BITSIZE (innermode);
757
758 create_fixed_operand (&ops[0], op0);
759 create_input_operand (&ops[1], value, innermode);
760 create_integer_operand (&ops[2], pos);
761 if (maybe_expand_insn (icode, 3, ops))
762 return true;
763 }
764
765 /* If the target is a register, overwriting the entire object, or storing
766 a full-word or multi-word field can be done with just a SUBREG. */
767 if (!MEM_P (op0)
768 && bitsize == GET_MODE_BITSIZE (fieldmode)
769 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
770 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
771 {
772 /* Use the subreg machinery either to narrow OP0 to the required
773 words or to cope with mode punning between equal-sized modes.
774 In the latter case, use subreg on the rhs side, not lhs. */
775 rtx sub;
776
777 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
778 {
779 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
780 if (sub)
781 {
782 if (reverse)
783 sub = flip_storage_order (GET_MODE (op0), sub);
784 emit_move_insn (op0, sub);
785 return true;
786 }
787 }
788 else
789 {
790 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
791 bitnum / BITS_PER_UNIT);
792 if (sub)
793 {
794 if (reverse)
795 value = flip_storage_order (fieldmode, value);
796 emit_move_insn (sub, value);
797 return true;
798 }
799 }
800 }
801
802 /* If the target is memory, storing any naturally aligned field can be
803 done with a simple store. For targets that support fast unaligned
804 memory, any naturally sized, unit aligned field can be done directly. */
805 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
806 {
807 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
808 if (reverse)
809 value = flip_storage_order (fieldmode, value);
810 emit_move_insn (op0, value);
811 return true;
812 }
813
814 /* Make sure we are playing with integral modes. Pun with subregs
815 if we aren't. This must come after the entire register case above,
816 since that case is valid for any mode. The following cases are only
817 valid for integral modes. */
818 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
819 scalar_int_mode imode;
820 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
821 {
822 if (MEM_P (op0))
823 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
824 0, MEM_SIZE (op0));
825 else
826 op0 = gen_lowpart (op0_mode.require (), op0);
827 }
828
829 /* Storing an lsb-aligned field in a register
830 can be done with a movstrict instruction. */
831
832 if (!MEM_P (op0)
833 && !reverse
834 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
835 && bitsize == GET_MODE_BITSIZE (fieldmode)
836 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
837 {
838 struct expand_operand ops[2];
839 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
840 rtx arg0 = op0;
841 unsigned HOST_WIDE_INT subreg_off;
842
843 if (GET_CODE (arg0) == SUBREG)
844 {
845 /* Else we've got some float mode source being extracted into
846 a different float mode destination -- this combination of
847 subregs results in Severe Tire Damage. */
848 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
849 || GET_MODE_CLASS (fieldmode) == MODE_INT
850 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
851 arg0 = SUBREG_REG (arg0);
852 }
853
854 subreg_off = bitnum / BITS_PER_UNIT;
855 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
856 {
857 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
858
859 create_fixed_operand (&ops[0], arg0);
860 /* Shrink the source operand to FIELDMODE. */
861 create_convert_operand_to (&ops[1], value, fieldmode, false);
862 if (maybe_expand_insn (icode, 2, ops))
863 return true;
864 }
865 }
866
867 /* Handle fields bigger than a word. */
868
869 if (bitsize > BITS_PER_WORD)
870 {
871 /* Here we transfer the words of the field
872 in the order least significant first.
873 This is because the most significant word is the one which may
874 be less than full.
875 However, only do that if the value is not BLKmode. */
876
877 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
878 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
879 unsigned int i;
880 rtx_insn *last;
881
882 /* This is the mode we must force value to, so that there will be enough
883 subwords to extract. Note that fieldmode will often (always?) be
884 VOIDmode, because that is what store_field uses to indicate that this
885 is a bit field, but passing VOIDmode to operand_subword_force
886 is not allowed. */
887 fieldmode = GET_MODE (value);
888 if (fieldmode == VOIDmode)
889 fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
890
891 last = get_last_insn ();
892 for (i = 0; i < nwords; i++)
893 {
894 /* If I is 0, use the low-order word in both field and target;
895 if I is 1, use the next to lowest word; and so on. */
896 unsigned int wordnum = (backwards
897 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
898 - i - 1
899 : i);
900 unsigned int bit_offset = (backwards ^ reverse
901 ? MAX ((int) bitsize - ((int) i + 1)
902 * BITS_PER_WORD,
903 0)
904 : (int) i * BITS_PER_WORD);
905 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
906 unsigned HOST_WIDE_INT new_bitsize =
907 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
908
909 /* If the remaining chunk doesn't have full wordsize we have
910 to make sure that for big-endian machines the higher order
911 bits are used. */
912 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
913 value_word = simplify_expand_binop (word_mode, lshr_optab,
914 value_word,
915 GEN_INT (BITS_PER_WORD
916 - new_bitsize),
917 NULL_RTX, true,
918 OPTAB_LIB_WIDEN);
919
920 if (!store_bit_field_1 (op0, new_bitsize,
921 bitnum + bit_offset,
922 bitregion_start, bitregion_end,
923 word_mode,
924 value_word, reverse, fallback_p))
925 {
926 delete_insns_since (last);
927 return false;
928 }
929 }
930 return true;
931 }
932
933 /* If VALUE has a floating-point or complex mode, access it as an
934 integer of the corresponding size. This can occur on a machine
935 with 64 bit registers that uses SFmode for float. It can also
936 occur for unaligned float or complex fields. */
937 orig_value = value;
938 scalar_int_mode value_mode;
939 if (GET_MODE (value) == VOIDmode)
940 /* By this point we've dealt with values that are bigger than a word,
941 so word_mode is a conservatively correct choice. */
942 value_mode = word_mode;
943 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
944 {
945 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
946 value = gen_reg_rtx (value_mode);
947 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
948 }
949
950 /* If OP0 is a multi-word register, narrow it to the affected word.
951 If the region spans two words, defer to store_split_bit_field.
952 Don't do this if op0 is a single hard register wider than word
953 such as a float or vector register. */
954 if (!MEM_P (op0)
955 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
956 && (!REG_P (op0)
957 || !HARD_REGISTER_P (op0)
958 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
959 {
960 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
961 {
962 if (!fallback_p)
963 return false;
964
965 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
966 bitregion_start, bitregion_end,
967 value, value_mode, reverse);
968 return true;
969 }
970 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
971 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
972 gcc_assert (op0);
973 op0_mode = word_mode;
974 bitnum %= BITS_PER_WORD;
975 }
976
977 /* From here on we can assume that the field to be stored in fits
978 within a word. If the destination is a register, it too fits
979 in a word. */
980
981 extraction_insn insv;
982 if (!MEM_P (op0)
983 && !reverse
984 && get_best_reg_extraction_insn (&insv, EP_insv,
985 GET_MODE_BITSIZE (op0_mode.require ()),
986 fieldmode)
987 && store_bit_field_using_insv (&insv, op0, op0_mode,
988 bitsize, bitnum, value, value_mode))
989 return true;
990
991 /* If OP0 is a memory, try copying it to a register and seeing if a
992 cheap register alternative is available. */
993 if (MEM_P (op0) && !reverse)
994 {
995 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
996 fieldmode)
997 && store_bit_field_using_insv (&insv, op0, op0_mode,
998 bitsize, bitnum, value, value_mode))
999 return true;
1000
1001 rtx_insn *last = get_last_insn ();
1002
1003 /* Try loading part of OP0 into a register, inserting the bitfield
1004 into that, and then copying the result back to OP0. */
1005 unsigned HOST_WIDE_INT bitpos;
1006 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1007 bitregion_start, bitregion_end,
1008 fieldmode, &bitpos);
1009 if (xop0)
1010 {
1011 rtx tempreg = copy_to_reg (xop0);
1012 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1013 bitregion_start, bitregion_end,
1014 fieldmode, orig_value, reverse, false))
1015 {
1016 emit_move_insn (xop0, tempreg);
1017 return true;
1018 }
1019 delete_insns_since (last);
1020 }
1021 }
1022
1023 if (!fallback_p)
1024 return false;
1025
1026 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1027 bitregion_end, value, value_mode, reverse);
1028 return true;
1029 }
1030
1031 /* Generate code to store value from rtx VALUE
1032 into a bit-field within structure STR_RTX
1033 containing BITSIZE bits starting at bit BITNUM.
1034
1035 BITREGION_START is bitpos of the first bitfield in this region.
1036 BITREGION_END is the bitpos of the ending bitfield in this region.
1037 These two fields are 0, if the C++ memory model does not apply,
1038 or we are not interested in keeping track of bitfield regions.
1039
1040 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1041
1042 If REVERSE is true, the store is to be done in reverse order. */
1043
1044 void
1045 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1046 unsigned HOST_WIDE_INT bitnum,
1047 unsigned HOST_WIDE_INT bitregion_start,
1048 unsigned HOST_WIDE_INT bitregion_end,
1049 machine_mode fieldmode,
1050 rtx value, bool reverse)
1051 {
1052 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1053 scalar_int_mode int_mode;
1054 if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1055 && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1056 bitregion_start, bitregion_end))
1057 {
1058 /* Storing of a full word can be done with a simple store.
1059 We know here that the field can be accessed with one single
1060 instruction. For targets that support unaligned memory,
1061 an unaligned access may be necessary. */
1062 if (bitsize == GET_MODE_BITSIZE (int_mode))
1063 {
1064 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1065 bitnum / BITS_PER_UNIT);
1066 if (reverse)
1067 value = flip_storage_order (int_mode, value);
1068 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1069 emit_move_insn (str_rtx, value);
1070 }
1071 else
1072 {
1073 rtx temp;
1074
1075 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1076 &bitnum);
1077 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1078 temp = copy_to_reg (str_rtx);
1079 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1080 int_mode, value, reverse, true))
1081 gcc_unreachable ();
1082
1083 emit_move_insn (str_rtx, temp);
1084 }
1085
1086 return;
1087 }
1088
1089 /* Under the C++0x memory model, we must not touch bits outside the
1090 bit region. Adjust the address to start at the beginning of the
1091 bit region. */
1092 if (MEM_P (str_rtx) && bitregion_start > 0)
1093 {
1094 scalar_int_mode best_mode;
1095 machine_mode addr_mode = VOIDmode;
1096 HOST_WIDE_INT offset, size;
1097
1098 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1099
1100 offset = bitregion_start / BITS_PER_UNIT;
1101 bitnum -= bitregion_start;
1102 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1103 bitregion_end -= bitregion_start;
1104 bitregion_start = 0;
1105 if (get_best_mode (bitsize, bitnum,
1106 bitregion_start, bitregion_end,
1107 MEM_ALIGN (str_rtx), INT_MAX,
1108 MEM_VOLATILE_P (str_rtx), &best_mode))
1109 addr_mode = best_mode;
1110 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1111 offset, size);
1112 }
1113
1114 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1115 bitregion_start, bitregion_end,
1116 fieldmode, value, reverse, true))
1117 gcc_unreachable ();
1118 }
1119 \f
1120 /* Use shifts and boolean operations to store VALUE into a bit field of
1121 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1122 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1123 the mode of VALUE.
1124
1125 If REVERSE is true, the store is to be done in reverse order. */
1126
1127 static void
1128 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1129 unsigned HOST_WIDE_INT bitsize,
1130 unsigned HOST_WIDE_INT bitnum,
1131 unsigned HOST_WIDE_INT bitregion_start,
1132 unsigned HOST_WIDE_INT bitregion_end,
1133 rtx value, scalar_int_mode value_mode, bool reverse)
1134 {
1135 /* There is a case not handled here:
1136 a structure with a known alignment of just a halfword
1137 and a field split across two aligned halfwords within the structure.
1138 Or likewise a structure with a known alignment of just a byte
1139 and a field split across two bytes.
1140 Such cases are not supposed to be able to occur. */
1141
1142 scalar_int_mode best_mode;
1143 if (MEM_P (op0))
1144 {
1145 unsigned int max_bitsize = BITS_PER_WORD;
1146 scalar_int_mode imode;
1147 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1148 max_bitsize = GET_MODE_BITSIZE (imode);
1149
1150 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1151 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1152 &best_mode))
1153 {
1154 /* The only way this should occur is if the field spans word
1155 boundaries. */
1156 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1157 bitregion_start, bitregion_end,
1158 value, value_mode, reverse);
1159 return;
1160 }
1161
1162 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1163 }
1164 else
1165 best_mode = op0_mode.require ();
1166
1167 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1168 value, value_mode, reverse);
1169 }
1170
1171 /* Helper function for store_fixed_bit_field, stores
1172 the bit field always using MODE, which is the mode of OP0. The other
1173 arguments are as for store_fixed_bit_field. */
1174
1175 static void
1176 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1177 unsigned HOST_WIDE_INT bitsize,
1178 unsigned HOST_WIDE_INT bitnum,
1179 rtx value, scalar_int_mode value_mode, bool reverse)
1180 {
1181 rtx temp;
1182 int all_zero = 0;
1183 int all_one = 0;
1184
1185 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1186 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1187
1188 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1189 /* BITNUM is the distance between our msb
1190 and that of the containing datum.
1191 Convert it to the distance from the lsb. */
1192 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1193
1194 /* Now BITNUM is always the distance between our lsb
1195 and that of OP0. */
1196
1197 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1198 we must first convert its mode to MODE. */
1199
1200 if (CONST_INT_P (value))
1201 {
1202 unsigned HOST_WIDE_INT v = UINTVAL (value);
1203
1204 if (bitsize < HOST_BITS_PER_WIDE_INT)
1205 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1206
1207 if (v == 0)
1208 all_zero = 1;
1209 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1210 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1211 || (bitsize == HOST_BITS_PER_WIDE_INT
1212 && v == HOST_WIDE_INT_M1U))
1213 all_one = 1;
1214
1215 value = lshift_value (mode, v, bitnum);
1216 }
1217 else
1218 {
1219 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1220 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1221
1222 if (value_mode != mode)
1223 value = convert_to_mode (mode, value, 1);
1224
1225 if (must_and)
1226 value = expand_binop (mode, and_optab, value,
1227 mask_rtx (mode, 0, bitsize, 0),
1228 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1229 if (bitnum > 0)
1230 value = expand_shift (LSHIFT_EXPR, mode, value,
1231 bitnum, NULL_RTX, 1);
1232 }
1233
1234 if (reverse)
1235 value = flip_storage_order (mode, value);
1236
1237 /* Now clear the chosen bits in OP0,
1238 except that if VALUE is -1 we need not bother. */
1239 /* We keep the intermediates in registers to allow CSE to combine
1240 consecutive bitfield assignments. */
1241
1242 temp = force_reg (mode, op0);
1243
1244 if (! all_one)
1245 {
1246 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1247 if (reverse)
1248 mask = flip_storage_order (mode, mask);
1249 temp = expand_binop (mode, and_optab, temp, mask,
1250 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1251 temp = force_reg (mode, temp);
1252 }
1253
1254 /* Now logical-or VALUE into OP0, unless it is zero. */
1255
1256 if (! all_zero)
1257 {
1258 temp = expand_binop (mode, ior_optab, temp, value,
1259 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1260 temp = force_reg (mode, temp);
1261 }
1262
1263 if (op0 != temp)
1264 {
1265 op0 = copy_rtx (op0);
1266 emit_move_insn (op0, temp);
1267 }
1268 }
1269 \f
1270 /* Store a bit field that is split across multiple accessible memory objects.
1271
1272 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1273 BITSIZE is the field width; BITPOS the position of its first bit
1274 (within the word).
1275 VALUE is the value to store, which has mode VALUE_MODE.
1276 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1277 a BLKmode MEM.
1278
1279 If REVERSE is true, the store is to be done in reverse order.
1280
1281 This does not yet handle fields wider than BITS_PER_WORD. */
1282
1283 static void
1284 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1285 unsigned HOST_WIDE_INT bitsize,
1286 unsigned HOST_WIDE_INT bitpos,
1287 unsigned HOST_WIDE_INT bitregion_start,
1288 unsigned HOST_WIDE_INT bitregion_end,
1289 rtx value, scalar_int_mode value_mode, bool reverse)
1290 {
1291 unsigned int unit, total_bits, bitsdone = 0;
1292
1293 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1294 much at a time. */
1295 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1296 unit = BITS_PER_WORD;
1297 else
1298 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1299
1300 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1301 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1302 again, and we will mutually recurse forever. */
1303 if (MEM_P (op0) && op0_mode.exists ())
1304 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1305
1306 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1307 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1308 that VALUE might be a floating-point constant. */
1309 if (CONSTANT_P (value) && !CONST_INT_P (value))
1310 {
1311 rtx word = gen_lowpart_common (word_mode, value);
1312
1313 if (word && (value != word))
1314 value = word;
1315 else
1316 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1317 value_mode = word_mode;
1318 }
1319
1320 total_bits = GET_MODE_BITSIZE (value_mode);
1321
1322 while (bitsdone < bitsize)
1323 {
1324 unsigned HOST_WIDE_INT thissize;
1325 unsigned HOST_WIDE_INT thispos;
1326 unsigned HOST_WIDE_INT offset;
1327 rtx part;
1328
1329 offset = (bitpos + bitsdone) / unit;
1330 thispos = (bitpos + bitsdone) % unit;
1331
1332 /* When region of bytes we can touch is restricted, decrease
1333 UNIT close to the end of the region as needed. If op0 is a REG
1334 or SUBREG of REG, don't do this, as there can't be data races
1335 on a register and we can expand shorter code in some cases. */
1336 if (bitregion_end
1337 && unit > BITS_PER_UNIT
1338 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1339 && !REG_P (op0)
1340 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1341 {
1342 unit = unit / 2;
1343 continue;
1344 }
1345
1346 /* THISSIZE must not overrun a word boundary. Otherwise,
1347 store_fixed_bit_field will call us again, and we will mutually
1348 recurse forever. */
1349 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1350 thissize = MIN (thissize, unit - thispos);
1351
1352 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1353 {
1354 /* Fetch successively less significant portions. */
1355 if (CONST_INT_P (value))
1356 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1357 >> (bitsize - bitsdone - thissize))
1358 & ((HOST_WIDE_INT_1 << thissize) - 1));
1359 /* Likewise, but the source is little-endian. */
1360 else if (reverse)
1361 part = extract_fixed_bit_field (word_mode, value, value_mode,
1362 thissize,
1363 bitsize - bitsdone - thissize,
1364 NULL_RTX, 1, false);
1365 else
1366 /* The args are chosen so that the last part includes the
1367 lsb. Give extract_bit_field the value it needs (with
1368 endianness compensation) to fetch the piece we want. */
1369 part = extract_fixed_bit_field (word_mode, value, value_mode,
1370 thissize,
1371 total_bits - bitsize + bitsdone,
1372 NULL_RTX, 1, false);
1373 }
1374 else
1375 {
1376 /* Fetch successively more significant portions. */
1377 if (CONST_INT_P (value))
1378 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1379 >> bitsdone)
1380 & ((HOST_WIDE_INT_1 << thissize) - 1));
1381 /* Likewise, but the source is big-endian. */
1382 else if (reverse)
1383 part = extract_fixed_bit_field (word_mode, value, value_mode,
1384 thissize,
1385 total_bits - bitsdone - thissize,
1386 NULL_RTX, 1, false);
1387 else
1388 part = extract_fixed_bit_field (word_mode, value, value_mode,
1389 thissize, bitsdone, NULL_RTX,
1390 1, false);
1391 }
1392
1393 /* If OP0 is a register, then handle OFFSET here. */
1394 rtx op0_piece = op0;
1395 opt_scalar_int_mode op0_piece_mode = op0_mode;
1396 if (SUBREG_P (op0) || REG_P (op0))
1397 {
1398 scalar_int_mode imode;
1399 if (op0_mode.exists (&imode)
1400 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1401 {
1402 if (offset)
1403 op0_piece = const0_rtx;
1404 }
1405 else
1406 {
1407 op0_piece = operand_subword_force (op0,
1408 offset * unit / BITS_PER_WORD,
1409 GET_MODE (op0));
1410 op0_piece_mode = word_mode;
1411 }
1412 offset &= BITS_PER_WORD / unit - 1;
1413 }
1414
1415 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1416 it is just an out-of-bounds access. Ignore it. */
1417 if (op0_piece != const0_rtx)
1418 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1419 offset * unit + thispos, bitregion_start,
1420 bitregion_end, part, word_mode, reverse);
1421 bitsdone += thissize;
1422 }
1423 }
1424 \f
1425 /* A subroutine of extract_bit_field_1 that converts return value X
1426 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1427 to extract_bit_field. */
1428
1429 static rtx
1430 convert_extracted_bit_field (rtx x, machine_mode mode,
1431 machine_mode tmode, bool unsignedp)
1432 {
1433 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1434 return x;
1435
1436 /* If the x mode is not a scalar integral, first convert to the
1437 integer mode of that size and then access it as a floating-point
1438 value via a SUBREG. */
1439 if (!SCALAR_INT_MODE_P (tmode))
1440 {
1441 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1442 x = convert_to_mode (int_mode, x, unsignedp);
1443 x = force_reg (int_mode, x);
1444 return gen_lowpart (tmode, x);
1445 }
1446
1447 return convert_to_mode (tmode, x, unsignedp);
1448 }
1449
1450 /* Try to use an ext(z)v pattern to extract a field from OP0.
1451 Return the extracted value on success, otherwise return null.
1452 EXTV describes the extraction instruction to use. If OP0_MODE
1453 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1454 The other arguments are as for extract_bit_field. */
1455
1456 static rtx
1457 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1458 opt_scalar_int_mode op0_mode,
1459 unsigned HOST_WIDE_INT bitsize,
1460 unsigned HOST_WIDE_INT bitnum,
1461 int unsignedp, rtx target,
1462 machine_mode mode, machine_mode tmode)
1463 {
1464 struct expand_operand ops[4];
1465 rtx spec_target = target;
1466 rtx spec_target_subreg = 0;
1467 scalar_int_mode ext_mode = extv->field_mode;
1468 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1469
1470 if (bitsize == 0 || unit < bitsize)
1471 return NULL_RTX;
1472
1473 if (MEM_P (op0))
1474 /* Get a reference to the first byte of the field. */
1475 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1476 &bitnum);
1477 else
1478 {
1479 /* Convert from counting within OP0 to counting in EXT_MODE. */
1480 if (BYTES_BIG_ENDIAN)
1481 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1482
1483 /* If op0 is a register, we need it in EXT_MODE to make it
1484 acceptable to the format of ext(z)v. */
1485 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1486 return NULL_RTX;
1487 if (REG_P (op0) && op0_mode.require () != ext_mode)
1488 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1489 }
1490
1491 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1492 "backwards" from the size of the unit we are extracting from.
1493 Otherwise, we count bits from the most significant on a
1494 BYTES/BITS_BIG_ENDIAN machine. */
1495
1496 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1497 bitnum = unit - bitsize - bitnum;
1498
1499 if (target == 0)
1500 target = spec_target = gen_reg_rtx (tmode);
1501
1502 if (GET_MODE (target) != ext_mode)
1503 {
1504 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1505 between the mode of the extraction (word_mode) and the target
1506 mode. Instead, create a temporary and use convert_move to set
1507 the target. */
1508 if (REG_P (target)
1509 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1510 {
1511 target = gen_lowpart (ext_mode, target);
1512 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1513 spec_target_subreg = target;
1514 }
1515 else
1516 target = gen_reg_rtx (ext_mode);
1517 }
1518
1519 create_output_operand (&ops[0], target, ext_mode);
1520 create_fixed_operand (&ops[1], op0);
1521 create_integer_operand (&ops[2], bitsize);
1522 create_integer_operand (&ops[3], bitnum);
1523 if (maybe_expand_insn (extv->icode, 4, ops))
1524 {
1525 target = ops[0].value;
1526 if (target == spec_target)
1527 return target;
1528 if (target == spec_target_subreg)
1529 return spec_target;
1530 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1531 }
1532 return NULL_RTX;
1533 }
1534
1535 /* A subroutine of extract_bit_field, with the same arguments.
1536 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1537 if we can find no other means of implementing the operation.
1538 if FALLBACK_P is false, return NULL instead. */
1539
1540 static rtx
1541 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1542 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1543 machine_mode mode, machine_mode tmode,
1544 bool reverse, bool fallback_p, rtx *alt_rtl)
1545 {
1546 rtx op0 = str_rtx;
1547 machine_mode mode1;
1548
1549 if (tmode == VOIDmode)
1550 tmode = mode;
1551
1552 while (GET_CODE (op0) == SUBREG)
1553 {
1554 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1555 op0 = SUBREG_REG (op0);
1556 }
1557
1558 /* If we have an out-of-bounds access to a register, just return an
1559 uninitialized register of the required mode. This can occur if the
1560 source code contains an out-of-bounds access to a small array. */
1561 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1562 return gen_reg_rtx (tmode);
1563
1564 if (REG_P (op0)
1565 && mode == GET_MODE (op0)
1566 && bitnum == 0
1567 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1568 {
1569 if (reverse)
1570 op0 = flip_storage_order (mode, op0);
1571 /* We're trying to extract a full register from itself. */
1572 return op0;
1573 }
1574
1575 /* First try to check for vector from vector extractions. */
1576 if (VECTOR_MODE_P (GET_MODE (op0))
1577 && !MEM_P (op0)
1578 && VECTOR_MODE_P (tmode)
1579 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1580 {
1581 machine_mode new_mode = GET_MODE (op0);
1582 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1583 {
1584 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1585 unsigned int nunits = (GET_MODE_BITSIZE (GET_MODE (op0))
1586 / GET_MODE_UNIT_BITSIZE (tmode));
1587 if (!mode_for_vector (inner_mode, nunits).exists (&new_mode)
1588 || !VECTOR_MODE_P (new_mode)
1589 || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1590 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1591 || !targetm.vector_mode_supported_p (new_mode))
1592 new_mode = VOIDmode;
1593 }
1594 if (new_mode != VOIDmode
1595 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1596 != CODE_FOR_nothing)
1597 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1598 == bitnum / GET_MODE_BITSIZE (tmode)))
1599 {
1600 struct expand_operand ops[3];
1601 machine_mode outermode = new_mode;
1602 machine_mode innermode = tmode;
1603 enum insn_code icode
1604 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1605 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1606
1607 if (new_mode != GET_MODE (op0))
1608 op0 = gen_lowpart (new_mode, op0);
1609 create_output_operand (&ops[0], target, innermode);
1610 ops[0].target = 1;
1611 create_input_operand (&ops[1], op0, outermode);
1612 create_integer_operand (&ops[2], pos);
1613 if (maybe_expand_insn (icode, 3, ops))
1614 {
1615 if (alt_rtl && ops[0].target)
1616 *alt_rtl = target;
1617 target = ops[0].value;
1618 if (GET_MODE (target) != mode)
1619 return gen_lowpart (tmode, target);
1620 return target;
1621 }
1622 }
1623 }
1624
1625 /* See if we can get a better vector mode before extracting. */
1626 if (VECTOR_MODE_P (GET_MODE (op0))
1627 && !MEM_P (op0)
1628 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1629 {
1630 machine_mode new_mode;
1631
1632 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1633 new_mode = MIN_MODE_VECTOR_FLOAT;
1634 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1635 new_mode = MIN_MODE_VECTOR_FRACT;
1636 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1637 new_mode = MIN_MODE_VECTOR_UFRACT;
1638 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1639 new_mode = MIN_MODE_VECTOR_ACCUM;
1640 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1641 new_mode = MIN_MODE_VECTOR_UACCUM;
1642 else
1643 new_mode = MIN_MODE_VECTOR_INT;
1644
1645 FOR_EACH_MODE_FROM (new_mode, new_mode)
1646 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1647 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1648 && targetm.vector_mode_supported_p (new_mode))
1649 break;
1650 if (new_mode != VOIDmode)
1651 op0 = gen_lowpart (new_mode, op0);
1652 }
1653
1654 /* Use vec_extract patterns for extracting parts of vectors whenever
1655 available. */
1656 machine_mode outermode = GET_MODE (op0);
1657 scalar_mode innermode = GET_MODE_INNER (outermode);
1658 if (VECTOR_MODE_P (outermode)
1659 && !MEM_P (op0)
1660 && (convert_optab_handler (vec_extract_optab, outermode, innermode)
1661 != CODE_FOR_nothing)
1662 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
1663 == bitnum / GET_MODE_BITSIZE (innermode)))
1664 {
1665 struct expand_operand ops[3];
1666 enum insn_code icode
1667 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1668 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1669
1670 create_output_operand (&ops[0], target, innermode);
1671 ops[0].target = 1;
1672 create_input_operand (&ops[1], op0, outermode);
1673 create_integer_operand (&ops[2], pos);
1674 if (maybe_expand_insn (icode, 3, ops))
1675 {
1676 if (alt_rtl && ops[0].target)
1677 *alt_rtl = target;
1678 target = ops[0].value;
1679 if (GET_MODE (target) != mode)
1680 return gen_lowpart (tmode, target);
1681 return target;
1682 }
1683 }
1684
1685 /* Make sure we are playing with integral modes. Pun with subregs
1686 if we aren't. */
1687 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1688 scalar_int_mode imode;
1689 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1690 {
1691 if (MEM_P (op0))
1692 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1693 0, MEM_SIZE (op0));
1694 else if (op0_mode.exists (&imode))
1695 {
1696 op0 = gen_lowpart (imode, op0);
1697
1698 /* If we got a SUBREG, force it into a register since we
1699 aren't going to be able to do another SUBREG on it. */
1700 if (GET_CODE (op0) == SUBREG)
1701 op0 = force_reg (imode, op0);
1702 }
1703 else
1704 {
1705 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1706 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1707 emit_move_insn (mem, op0);
1708 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1709 }
1710 }
1711
1712 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1713 If that's wrong, the solution is to test for it and set TARGET to 0
1714 if needed. */
1715
1716 /* Get the mode of the field to use for atomic access or subreg
1717 conversion. */
1718 if (!SCALAR_INT_MODE_P (tmode)
1719 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1720 mode1 = mode;
1721 gcc_assert (mode1 != BLKmode);
1722
1723 /* Extraction of a full MODE1 value can be done with a subreg as long
1724 as the least significant bit of the value is the least significant
1725 bit of either OP0 or a word of OP0. */
1726 if (!MEM_P (op0)
1727 && !reverse
1728 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1729 && bitsize == GET_MODE_BITSIZE (mode1)
1730 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1731 {
1732 rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1733 bitnum / BITS_PER_UNIT);
1734 if (sub)
1735 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1736 }
1737
1738 /* Extraction of a full MODE1 value can be done with a load as long as
1739 the field is on a byte boundary and is sufficiently aligned. */
1740 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1741 {
1742 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1743 if (reverse)
1744 op0 = flip_storage_order (mode1, op0);
1745 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1746 }
1747
1748 /* Handle fields bigger than a word. */
1749
1750 if (bitsize > BITS_PER_WORD)
1751 {
1752 /* Here we transfer the words of the field
1753 in the order least significant first.
1754 This is because the most significant word is the one which may
1755 be less than full. */
1756
1757 const bool backwards = WORDS_BIG_ENDIAN;
1758 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1759 unsigned int i;
1760 rtx_insn *last;
1761
1762 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1763 target = gen_reg_rtx (mode);
1764
1765 /* In case we're about to clobber a base register or something
1766 (see gcc.c-torture/execute/20040625-1.c). */
1767 if (reg_mentioned_p (target, str_rtx))
1768 target = gen_reg_rtx (mode);
1769
1770 /* Indicate for flow that the entire target reg is being set. */
1771 emit_clobber (target);
1772
1773 last = get_last_insn ();
1774 for (i = 0; i < nwords; i++)
1775 {
1776 /* If I is 0, use the low-order word in both field and target;
1777 if I is 1, use the next to lowest word; and so on. */
1778 /* Word number in TARGET to use. */
1779 unsigned int wordnum
1780 = (backwards
1781 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1782 : i);
1783 /* Offset from start of field in OP0. */
1784 unsigned int bit_offset = (backwards ^ reverse
1785 ? MAX ((int) bitsize - ((int) i + 1)
1786 * BITS_PER_WORD,
1787 0)
1788 : (int) i * BITS_PER_WORD);
1789 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1790 rtx result_part
1791 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1792 bitsize - i * BITS_PER_WORD),
1793 bitnum + bit_offset, 1, target_part,
1794 mode, word_mode, reverse, fallback_p, NULL);
1795
1796 gcc_assert (target_part);
1797 if (!result_part)
1798 {
1799 delete_insns_since (last);
1800 return NULL;
1801 }
1802
1803 if (result_part != target_part)
1804 emit_move_insn (target_part, result_part);
1805 }
1806
1807 if (unsignedp)
1808 {
1809 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1810 need to be zero'd out. */
1811 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1812 {
1813 unsigned int i, total_words;
1814
1815 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1816 for (i = nwords; i < total_words; i++)
1817 emit_move_insn
1818 (operand_subword (target,
1819 backwards ? total_words - i - 1 : i,
1820 1, VOIDmode),
1821 const0_rtx);
1822 }
1823 return target;
1824 }
1825
1826 /* Signed bit field: sign-extend with two arithmetic shifts. */
1827 target = expand_shift (LSHIFT_EXPR, mode, target,
1828 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1829 return expand_shift (RSHIFT_EXPR, mode, target,
1830 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1831 }
1832
1833 /* If OP0 is a multi-word register, narrow it to the affected word.
1834 If the region spans two words, defer to extract_split_bit_field. */
1835 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1836 {
1837 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1838 {
1839 if (!fallback_p)
1840 return NULL_RTX;
1841 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1842 unsignedp, reverse);
1843 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1844 }
1845 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1846 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1847 op0_mode = word_mode;
1848 bitnum %= BITS_PER_WORD;
1849 }
1850
1851 /* From here on we know the desired field is smaller than a word.
1852 If OP0 is a register, it too fits within a word. */
1853 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1854 extraction_insn extv;
1855 if (!MEM_P (op0)
1856 && !reverse
1857 /* ??? We could limit the structure size to the part of OP0 that
1858 contains the field, with appropriate checks for endianness
1859 and TARGET_TRULY_NOOP_TRUNCATION. */
1860 && get_best_reg_extraction_insn (&extv, pattern,
1861 GET_MODE_BITSIZE (op0_mode.require ()),
1862 tmode))
1863 {
1864 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1865 bitsize, bitnum,
1866 unsignedp, target, mode,
1867 tmode);
1868 if (result)
1869 return result;
1870 }
1871
1872 /* If OP0 is a memory, try copying it to a register and seeing if a
1873 cheap register alternative is available. */
1874 if (MEM_P (op0) & !reverse)
1875 {
1876 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1877 tmode))
1878 {
1879 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1880 bitsize, bitnum,
1881 unsignedp, target, mode,
1882 tmode);
1883 if (result)
1884 return result;
1885 }
1886
1887 rtx_insn *last = get_last_insn ();
1888
1889 /* Try loading part of OP0 into a register and extracting the
1890 bitfield from that. */
1891 unsigned HOST_WIDE_INT bitpos;
1892 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1893 0, 0, tmode, &bitpos);
1894 if (xop0)
1895 {
1896 xop0 = copy_to_reg (xop0);
1897 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1898 unsignedp, target,
1899 mode, tmode, reverse, false, NULL);
1900 if (result)
1901 return result;
1902 delete_insns_since (last);
1903 }
1904 }
1905
1906 if (!fallback_p)
1907 return NULL;
1908
1909 /* Find a correspondingly-sized integer field, so we can apply
1910 shifts and masks to it. */
1911 scalar_int_mode int_mode;
1912 if (!int_mode_for_mode (tmode).exists (&int_mode))
1913 /* If this fails, we should probably push op0 out to memory and then
1914 do a load. */
1915 int_mode = int_mode_for_mode (mode).require ();
1916
1917 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1918 bitnum, target, unsignedp, reverse);
1919
1920 /* Complex values must be reversed piecewise, so we need to undo the global
1921 reversal, convert to the complex mode and reverse again. */
1922 if (reverse && COMPLEX_MODE_P (tmode))
1923 {
1924 target = flip_storage_order (int_mode, target);
1925 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1926 target = flip_storage_order (tmode, target);
1927 }
1928 else
1929 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1930
1931 return target;
1932 }
1933
1934 /* Generate code to extract a byte-field from STR_RTX
1935 containing BITSIZE bits, starting at BITNUM,
1936 and put it in TARGET if possible (if TARGET is nonzero).
1937 Regardless of TARGET, we return the rtx for where the value is placed.
1938
1939 STR_RTX is the structure containing the byte (a REG or MEM).
1940 UNSIGNEDP is nonzero if this is an unsigned bit field.
1941 MODE is the natural mode of the field value once extracted.
1942 TMODE is the mode the caller would like the value to have;
1943 but the value may be returned with type MODE instead.
1944
1945 If REVERSE is true, the extraction is to be done in reverse order.
1946
1947 If a TARGET is specified and we can store in it at no extra cost,
1948 we do so, and return TARGET.
1949 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1950 if they are equally easy. */
1951
1952 rtx
1953 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1954 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1955 machine_mode mode, machine_mode tmode, bool reverse,
1956 rtx *alt_rtl)
1957 {
1958 machine_mode mode1;
1959
1960 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1961 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1962 mode1 = GET_MODE (str_rtx);
1963 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1964 mode1 = GET_MODE (target);
1965 else
1966 mode1 = tmode;
1967
1968 scalar_int_mode int_mode;
1969 if (is_a <scalar_int_mode> (mode1, &int_mode)
1970 && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1971 {
1972 /* Extraction of a full INT_MODE value can be done with a simple load.
1973 We know here that the field can be accessed with one single
1974 instruction. For targets that support unaligned memory,
1975 an unaligned access may be necessary. */
1976 if (bitsize == GET_MODE_BITSIZE (int_mode))
1977 {
1978 rtx result = adjust_bitfield_address (str_rtx, int_mode,
1979 bitnum / BITS_PER_UNIT);
1980 if (reverse)
1981 result = flip_storage_order (int_mode, result);
1982 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1983 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1984 }
1985
1986 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1987 &bitnum);
1988 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1989 str_rtx = copy_to_reg (str_rtx);
1990 }
1991
1992 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1993 target, mode, tmode, reverse, true, alt_rtl);
1994 }
1995 \f
1996 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1997 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
1998 otherwise OP0 is a BLKmode MEM.
1999
2000 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2001 If REVERSE is true, the extraction is to be done in reverse order.
2002
2003 If TARGET is nonzero, attempts to store the value there
2004 and return TARGET, but this is not guaranteed.
2005 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2006
2007 static rtx
2008 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2009 opt_scalar_int_mode op0_mode,
2010 unsigned HOST_WIDE_INT bitsize,
2011 unsigned HOST_WIDE_INT bitnum, rtx target,
2012 int unsignedp, bool reverse)
2013 {
2014 scalar_int_mode mode;
2015 if (MEM_P (op0))
2016 {
2017 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2018 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2019 /* The only way this should occur is if the field spans word
2020 boundaries. */
2021 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2022 unsignedp, reverse);
2023
2024 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2025 }
2026 else
2027 mode = op0_mode.require ();
2028
2029 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2030 target, unsignedp, reverse);
2031 }
2032
2033 /* Helper function for extract_fixed_bit_field, extracts
2034 the bit field always using MODE, which is the mode of OP0.
2035 The other arguments are as for extract_fixed_bit_field. */
2036
2037 static rtx
2038 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2039 unsigned HOST_WIDE_INT bitsize,
2040 unsigned HOST_WIDE_INT bitnum, rtx target,
2041 int unsignedp, bool reverse)
2042 {
2043 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2044 for invalid input, such as extract equivalent of f5 from
2045 gcc.dg/pr48335-2.c. */
2046
2047 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2048 /* BITNUM is the distance between our msb and that of OP0.
2049 Convert it to the distance from the lsb. */
2050 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2051
2052 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2053 We have reduced the big-endian case to the little-endian case. */
2054 if (reverse)
2055 op0 = flip_storage_order (mode, op0);
2056
2057 if (unsignedp)
2058 {
2059 if (bitnum)
2060 {
2061 /* If the field does not already start at the lsb,
2062 shift it so it does. */
2063 /* Maybe propagate the target for the shift. */
2064 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2065 if (tmode != mode)
2066 subtarget = 0;
2067 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2068 }
2069 /* Convert the value to the desired mode. TMODE must also be a
2070 scalar integer for this conversion to make sense, since we
2071 shouldn't reinterpret the bits. */
2072 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2073 if (mode != new_mode)
2074 op0 = convert_to_mode (new_mode, op0, 1);
2075
2076 /* Unless the msb of the field used to be the msb when we shifted,
2077 mask out the upper bits. */
2078
2079 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2080 return expand_binop (new_mode, and_optab, op0,
2081 mask_rtx (new_mode, 0, bitsize, 0),
2082 target, 1, OPTAB_LIB_WIDEN);
2083 return op0;
2084 }
2085
2086 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2087 then arithmetic-shift its lsb to the lsb of the word. */
2088 op0 = force_reg (mode, op0);
2089
2090 /* Find the narrowest integer mode that contains the field. */
2091
2092 opt_scalar_int_mode mode_iter;
2093 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2094 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2095 break;
2096
2097 mode = mode_iter.require ();
2098 op0 = convert_to_mode (mode, op0, 0);
2099
2100 if (mode != tmode)
2101 target = 0;
2102
2103 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2104 {
2105 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2106 /* Maybe propagate the target for the shift. */
2107 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2108 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2109 }
2110
2111 return expand_shift (RSHIFT_EXPR, mode, op0,
2112 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2113 }
2114
2115 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2116 VALUE << BITPOS. */
2117
2118 static rtx
2119 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2120 int bitpos)
2121 {
2122 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2123 }
2124 \f
2125 /* Extract a bit field that is split across two words
2126 and return an RTX for the result.
2127
2128 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2129 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2130 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2131 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2132 a BLKmode MEM.
2133
2134 If REVERSE is true, the extraction is to be done in reverse order. */
2135
2136 static rtx
2137 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2138 unsigned HOST_WIDE_INT bitsize,
2139 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2140 bool reverse)
2141 {
2142 unsigned int unit;
2143 unsigned int bitsdone = 0;
2144 rtx result = NULL_RTX;
2145 int first = 1;
2146
2147 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2148 much at a time. */
2149 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2150 unit = BITS_PER_WORD;
2151 else
2152 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2153
2154 while (bitsdone < bitsize)
2155 {
2156 unsigned HOST_WIDE_INT thissize;
2157 rtx part;
2158 unsigned HOST_WIDE_INT thispos;
2159 unsigned HOST_WIDE_INT offset;
2160
2161 offset = (bitpos + bitsdone) / unit;
2162 thispos = (bitpos + bitsdone) % unit;
2163
2164 /* THISSIZE must not overrun a word boundary. Otherwise,
2165 extract_fixed_bit_field will call us again, and we will mutually
2166 recurse forever. */
2167 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2168 thissize = MIN (thissize, unit - thispos);
2169
2170 /* If OP0 is a register, then handle OFFSET here. */
2171 rtx op0_piece = op0;
2172 opt_scalar_int_mode op0_piece_mode = op0_mode;
2173 if (SUBREG_P (op0) || REG_P (op0))
2174 {
2175 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2176 op0_piece_mode = word_mode;
2177 offset = 0;
2178 }
2179
2180 /* Extract the parts in bit-counting order,
2181 whose meaning is determined by BYTES_PER_UNIT.
2182 OFFSET is in UNITs, and UNIT is in bits. */
2183 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2184 thissize, offset * unit + thispos,
2185 0, 1, reverse);
2186 bitsdone += thissize;
2187
2188 /* Shift this part into place for the result. */
2189 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2190 {
2191 if (bitsize != bitsdone)
2192 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2193 bitsize - bitsdone, 0, 1);
2194 }
2195 else
2196 {
2197 if (bitsdone != thissize)
2198 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2199 bitsdone - thissize, 0, 1);
2200 }
2201
2202 if (first)
2203 result = part;
2204 else
2205 /* Combine the parts with bitwise or. This works
2206 because we extracted each part as an unsigned bit field. */
2207 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2208 OPTAB_LIB_WIDEN);
2209
2210 first = 0;
2211 }
2212
2213 /* Unsigned bit field: we are done. */
2214 if (unsignedp)
2215 return result;
2216 /* Signed bit field: sign-extend with two arithmetic shifts. */
2217 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2218 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2219 return expand_shift (RSHIFT_EXPR, word_mode, result,
2220 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2221 }
2222 \f
2223 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2224 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2225 MODE, fill the upper bits with zeros. Fail if the layout of either
2226 mode is unknown (as for CC modes) or if the extraction would involve
2227 unprofitable mode punning. Return the value on success, otherwise
2228 return null.
2229
2230 This is different from gen_lowpart* in these respects:
2231
2232 - the returned value must always be considered an rvalue
2233
2234 - when MODE is wider than SRC_MODE, the extraction involves
2235 a zero extension
2236
2237 - when MODE is smaller than SRC_MODE, the extraction involves
2238 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2239
2240 In other words, this routine performs a computation, whereas the
2241 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2242 operations. */
2243
2244 rtx
2245 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2246 {
2247 scalar_int_mode int_mode, src_int_mode;
2248
2249 if (mode == src_mode)
2250 return src;
2251
2252 if (CONSTANT_P (src))
2253 {
2254 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2255 fails, it will happily create (subreg (symbol_ref)) or similar
2256 invalid SUBREGs. */
2257 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2258 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2259 if (ret)
2260 return ret;
2261
2262 if (GET_MODE (src) == VOIDmode
2263 || !validate_subreg (mode, src_mode, src, byte))
2264 return NULL_RTX;
2265
2266 src = force_reg (GET_MODE (src), src);
2267 return gen_rtx_SUBREG (mode, src, byte);
2268 }
2269
2270 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2271 return NULL_RTX;
2272
2273 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2274 && targetm.modes_tieable_p (mode, src_mode))
2275 {
2276 rtx x = gen_lowpart_common (mode, src);
2277 if (x)
2278 return x;
2279 }
2280
2281 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2282 || !int_mode_for_mode (mode).exists (&int_mode))
2283 return NULL_RTX;
2284
2285 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2286 return NULL_RTX;
2287 if (!targetm.modes_tieable_p (int_mode, mode))
2288 return NULL_RTX;
2289
2290 src = gen_lowpart (src_int_mode, src);
2291 src = convert_modes (int_mode, src_int_mode, src, true);
2292 src = gen_lowpart (mode, src);
2293 return src;
2294 }
2295 \f
2296 /* Add INC into TARGET. */
2297
2298 void
2299 expand_inc (rtx target, rtx inc)
2300 {
2301 rtx value = expand_binop (GET_MODE (target), add_optab,
2302 target, inc,
2303 target, 0, OPTAB_LIB_WIDEN);
2304 if (value != target)
2305 emit_move_insn (target, value);
2306 }
2307
2308 /* Subtract DEC from TARGET. */
2309
2310 void
2311 expand_dec (rtx target, rtx dec)
2312 {
2313 rtx value = expand_binop (GET_MODE (target), sub_optab,
2314 target, dec,
2315 target, 0, OPTAB_LIB_WIDEN);
2316 if (value != target)
2317 emit_move_insn (target, value);
2318 }
2319 \f
2320 /* Output a shift instruction for expression code CODE,
2321 with SHIFTED being the rtx for the value to shift,
2322 and AMOUNT the rtx for the amount to shift by.
2323 Store the result in the rtx TARGET, if that is convenient.
2324 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2325 Return the rtx for where the value is.
2326 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2327 in which case 0 is returned. */
2328
2329 static rtx
2330 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2331 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2332 {
2333 rtx op1, temp = 0;
2334 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2335 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2336 optab lshift_optab = ashl_optab;
2337 optab rshift_arith_optab = ashr_optab;
2338 optab rshift_uns_optab = lshr_optab;
2339 optab lrotate_optab = rotl_optab;
2340 optab rrotate_optab = rotr_optab;
2341 machine_mode op1_mode;
2342 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2343 int attempt;
2344 bool speed = optimize_insn_for_speed_p ();
2345
2346 op1 = amount;
2347 op1_mode = GET_MODE (op1);
2348
2349 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2350 shift amount is a vector, use the vector/vector shift patterns. */
2351 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2352 {
2353 lshift_optab = vashl_optab;
2354 rshift_arith_optab = vashr_optab;
2355 rshift_uns_optab = vlshr_optab;
2356 lrotate_optab = vrotl_optab;
2357 rrotate_optab = vrotr_optab;
2358 }
2359
2360 /* Previously detected shift-counts computed by NEGATE_EXPR
2361 and shifted in the other direction; but that does not work
2362 on all machines. */
2363
2364 if (SHIFT_COUNT_TRUNCATED)
2365 {
2366 if (CONST_INT_P (op1)
2367 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2368 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2369 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2370 % GET_MODE_BITSIZE (scalar_mode));
2371 else if (GET_CODE (op1) == SUBREG
2372 && subreg_lowpart_p (op1)
2373 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2374 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2375 op1 = SUBREG_REG (op1);
2376 }
2377
2378 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2379 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2380 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2381 amount instead. */
2382 if (rotate
2383 && CONST_INT_P (op1)
2384 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2385 GET_MODE_BITSIZE (scalar_mode) - 1))
2386 {
2387 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2388 left = !left;
2389 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2390 }
2391
2392 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2393 Note that this is not the case for bigger values. For instance a rotation
2394 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2395 0x04030201 (bswapsi). */
2396 if (rotate
2397 && CONST_INT_P (op1)
2398 && INTVAL (op1) == BITS_PER_UNIT
2399 && GET_MODE_SIZE (scalar_mode) == 2
2400 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2401 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2402 unsignedp);
2403
2404 if (op1 == const0_rtx)
2405 return shifted;
2406
2407 /* Check whether its cheaper to implement a left shift by a constant
2408 bit count by a sequence of additions. */
2409 if (code == LSHIFT_EXPR
2410 && CONST_INT_P (op1)
2411 && INTVAL (op1) > 0
2412 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2413 && INTVAL (op1) < MAX_BITS_PER_WORD
2414 && (shift_cost (speed, mode, INTVAL (op1))
2415 > INTVAL (op1) * add_cost (speed, mode))
2416 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2417 {
2418 int i;
2419 for (i = 0; i < INTVAL (op1); i++)
2420 {
2421 temp = force_reg (mode, shifted);
2422 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2423 unsignedp, OPTAB_LIB_WIDEN);
2424 }
2425 return shifted;
2426 }
2427
2428 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2429 {
2430 enum optab_methods methods;
2431
2432 if (attempt == 0)
2433 methods = OPTAB_DIRECT;
2434 else if (attempt == 1)
2435 methods = OPTAB_WIDEN;
2436 else
2437 methods = OPTAB_LIB_WIDEN;
2438
2439 if (rotate)
2440 {
2441 /* Widening does not work for rotation. */
2442 if (methods == OPTAB_WIDEN)
2443 continue;
2444 else if (methods == OPTAB_LIB_WIDEN)
2445 {
2446 /* If we have been unable to open-code this by a rotation,
2447 do it as the IOR of two shifts. I.e., to rotate A
2448 by N bits, compute
2449 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2450 where C is the bitsize of A.
2451
2452 It is theoretically possible that the target machine might
2453 not be able to perform either shift and hence we would
2454 be making two libcalls rather than just the one for the
2455 shift (similarly if IOR could not be done). We will allow
2456 this extremely unlikely lossage to avoid complicating the
2457 code below. */
2458
2459 rtx subtarget = target == shifted ? 0 : target;
2460 rtx new_amount, other_amount;
2461 rtx temp1;
2462
2463 new_amount = op1;
2464 if (op1 == const0_rtx)
2465 return shifted;
2466 else if (CONST_INT_P (op1))
2467 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2468 - INTVAL (op1));
2469 else
2470 {
2471 other_amount
2472 = simplify_gen_unary (NEG, GET_MODE (op1),
2473 op1, GET_MODE (op1));
2474 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2475 other_amount
2476 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2477 gen_int_mode (mask, GET_MODE (op1)));
2478 }
2479
2480 shifted = force_reg (mode, shifted);
2481
2482 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2483 mode, shifted, new_amount, 0, 1);
2484 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2485 mode, shifted, other_amount,
2486 subtarget, 1);
2487 return expand_binop (mode, ior_optab, temp, temp1, target,
2488 unsignedp, methods);
2489 }
2490
2491 temp = expand_binop (mode,
2492 left ? lrotate_optab : rrotate_optab,
2493 shifted, op1, target, unsignedp, methods);
2494 }
2495 else if (unsignedp)
2496 temp = expand_binop (mode,
2497 left ? lshift_optab : rshift_uns_optab,
2498 shifted, op1, target, unsignedp, methods);
2499
2500 /* Do arithmetic shifts.
2501 Also, if we are going to widen the operand, we can just as well
2502 use an arithmetic right-shift instead of a logical one. */
2503 if (temp == 0 && ! rotate
2504 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2505 {
2506 enum optab_methods methods1 = methods;
2507
2508 /* If trying to widen a log shift to an arithmetic shift,
2509 don't accept an arithmetic shift of the same size. */
2510 if (unsignedp)
2511 methods1 = OPTAB_MUST_WIDEN;
2512
2513 /* Arithmetic shift */
2514
2515 temp = expand_binop (mode,
2516 left ? lshift_optab : rshift_arith_optab,
2517 shifted, op1, target, unsignedp, methods1);
2518 }
2519
2520 /* We used to try extzv here for logical right shifts, but that was
2521 only useful for one machine, the VAX, and caused poor code
2522 generation there for lshrdi3, so the code was deleted and a
2523 define_expand for lshrsi3 was added to vax.md. */
2524 }
2525
2526 gcc_assert (temp != NULL_RTX || may_fail);
2527 return temp;
2528 }
2529
2530 /* Output a shift instruction for expression code CODE,
2531 with SHIFTED being the rtx for the value to shift,
2532 and AMOUNT the amount to shift by.
2533 Store the result in the rtx TARGET, if that is convenient.
2534 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2535 Return the rtx for where the value is. */
2536
2537 rtx
2538 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2539 int amount, rtx target, int unsignedp)
2540 {
2541 return expand_shift_1 (code, mode,
2542 shifted, GEN_INT (amount), target, unsignedp);
2543 }
2544
2545 /* Likewise, but return 0 if that cannot be done. */
2546
2547 static rtx
2548 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2549 int amount, rtx target, int unsignedp)
2550 {
2551 return expand_shift_1 (code, mode,
2552 shifted, GEN_INT (amount), target, unsignedp, true);
2553 }
2554
2555 /* Output a shift instruction for expression code CODE,
2556 with SHIFTED being the rtx for the value to shift,
2557 and AMOUNT the tree for the amount to shift by.
2558 Store the result in the rtx TARGET, if that is convenient.
2559 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2560 Return the rtx for where the value is. */
2561
2562 rtx
2563 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2564 tree amount, rtx target, int unsignedp)
2565 {
2566 return expand_shift_1 (code, mode,
2567 shifted, expand_normal (amount), target, unsignedp);
2568 }
2569
2570 \f
2571 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2572 const struct mult_cost *, machine_mode mode);
2573 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2574 const struct algorithm *, enum mult_variant);
2575 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2576 static rtx extract_high_half (scalar_int_mode, rtx);
2577 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2578 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2579 int, int);
2580 /* Compute and return the best algorithm for multiplying by T.
2581 The algorithm must cost less than cost_limit
2582 If retval.cost >= COST_LIMIT, no algorithm was found and all
2583 other field of the returned struct are undefined.
2584 MODE is the machine mode of the multiplication. */
2585
2586 static void
2587 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2588 const struct mult_cost *cost_limit, machine_mode mode)
2589 {
2590 int m;
2591 struct algorithm *alg_in, *best_alg;
2592 struct mult_cost best_cost;
2593 struct mult_cost new_limit;
2594 int op_cost, op_latency;
2595 unsigned HOST_WIDE_INT orig_t = t;
2596 unsigned HOST_WIDE_INT q;
2597 int maxm, hash_index;
2598 bool cache_hit = false;
2599 enum alg_code cache_alg = alg_zero;
2600 bool speed = optimize_insn_for_speed_p ();
2601 scalar_int_mode imode;
2602 struct alg_hash_entry *entry_ptr;
2603
2604 /* Indicate that no algorithm is yet found. If no algorithm
2605 is found, this value will be returned and indicate failure. */
2606 alg_out->cost.cost = cost_limit->cost + 1;
2607 alg_out->cost.latency = cost_limit->latency + 1;
2608
2609 if (cost_limit->cost < 0
2610 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2611 return;
2612
2613 /* Be prepared for vector modes. */
2614 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2615
2616 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2617
2618 /* Restrict the bits of "t" to the multiplication's mode. */
2619 t &= GET_MODE_MASK (imode);
2620
2621 /* t == 1 can be done in zero cost. */
2622 if (t == 1)
2623 {
2624 alg_out->ops = 1;
2625 alg_out->cost.cost = 0;
2626 alg_out->cost.latency = 0;
2627 alg_out->op[0] = alg_m;
2628 return;
2629 }
2630
2631 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2632 fail now. */
2633 if (t == 0)
2634 {
2635 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2636 return;
2637 else
2638 {
2639 alg_out->ops = 1;
2640 alg_out->cost.cost = zero_cost (speed);
2641 alg_out->cost.latency = zero_cost (speed);
2642 alg_out->op[0] = alg_zero;
2643 return;
2644 }
2645 }
2646
2647 /* We'll be needing a couple extra algorithm structures now. */
2648
2649 alg_in = XALLOCA (struct algorithm);
2650 best_alg = XALLOCA (struct algorithm);
2651 best_cost = *cost_limit;
2652
2653 /* Compute the hash index. */
2654 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2655
2656 /* See if we already know what to do for T. */
2657 entry_ptr = alg_hash_entry_ptr (hash_index);
2658 if (entry_ptr->t == t
2659 && entry_ptr->mode == mode
2660 && entry_ptr->speed == speed
2661 && entry_ptr->alg != alg_unknown)
2662 {
2663 cache_alg = entry_ptr->alg;
2664
2665 if (cache_alg == alg_impossible)
2666 {
2667 /* The cache tells us that it's impossible to synthesize
2668 multiplication by T within entry_ptr->cost. */
2669 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2670 /* COST_LIMIT is at least as restrictive as the one
2671 recorded in the hash table, in which case we have no
2672 hope of synthesizing a multiplication. Just
2673 return. */
2674 return;
2675
2676 /* If we get here, COST_LIMIT is less restrictive than the
2677 one recorded in the hash table, so we may be able to
2678 synthesize a multiplication. Proceed as if we didn't
2679 have the cache entry. */
2680 }
2681 else
2682 {
2683 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2684 /* The cached algorithm shows that this multiplication
2685 requires more cost than COST_LIMIT. Just return. This
2686 way, we don't clobber this cache entry with
2687 alg_impossible but retain useful information. */
2688 return;
2689
2690 cache_hit = true;
2691
2692 switch (cache_alg)
2693 {
2694 case alg_shift:
2695 goto do_alg_shift;
2696
2697 case alg_add_t_m2:
2698 case alg_sub_t_m2:
2699 goto do_alg_addsub_t_m2;
2700
2701 case alg_add_factor:
2702 case alg_sub_factor:
2703 goto do_alg_addsub_factor;
2704
2705 case alg_add_t2_m:
2706 goto do_alg_add_t2_m;
2707
2708 case alg_sub_t2_m:
2709 goto do_alg_sub_t2_m;
2710
2711 default:
2712 gcc_unreachable ();
2713 }
2714 }
2715 }
2716
2717 /* If we have a group of zero bits at the low-order part of T, try
2718 multiplying by the remaining bits and then doing a shift. */
2719
2720 if ((t & 1) == 0)
2721 {
2722 do_alg_shift:
2723 m = ctz_or_zero (t); /* m = number of low zero bits */
2724 if (m < maxm)
2725 {
2726 q = t >> m;
2727 /* The function expand_shift will choose between a shift and
2728 a sequence of additions, so the observed cost is given as
2729 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2730 op_cost = m * add_cost (speed, mode);
2731 if (shift_cost (speed, mode, m) < op_cost)
2732 op_cost = shift_cost (speed, mode, m);
2733 new_limit.cost = best_cost.cost - op_cost;
2734 new_limit.latency = best_cost.latency - op_cost;
2735 synth_mult (alg_in, q, &new_limit, mode);
2736
2737 alg_in->cost.cost += op_cost;
2738 alg_in->cost.latency += op_cost;
2739 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740 {
2741 best_cost = alg_in->cost;
2742 std::swap (alg_in, best_alg);
2743 best_alg->log[best_alg->ops] = m;
2744 best_alg->op[best_alg->ops] = alg_shift;
2745 }
2746
2747 /* See if treating ORIG_T as a signed number yields a better
2748 sequence. Try this sequence only for a negative ORIG_T
2749 as it would be useless for a non-negative ORIG_T. */
2750 if ((HOST_WIDE_INT) orig_t < 0)
2751 {
2752 /* Shift ORIG_T as follows because a right shift of a
2753 negative-valued signed type is implementation
2754 defined. */
2755 q = ~(~orig_t >> m);
2756 /* The function expand_shift will choose between a shift
2757 and a sequence of additions, so the observed cost is
2758 given as MIN (m * add_cost(speed, mode),
2759 shift_cost(speed, mode, m)). */
2760 op_cost = m * add_cost (speed, mode);
2761 if (shift_cost (speed, mode, m) < op_cost)
2762 op_cost = shift_cost (speed, mode, m);
2763 new_limit.cost = best_cost.cost - op_cost;
2764 new_limit.latency = best_cost.latency - op_cost;
2765 synth_mult (alg_in, q, &new_limit, mode);
2766
2767 alg_in->cost.cost += op_cost;
2768 alg_in->cost.latency += op_cost;
2769 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2770 {
2771 best_cost = alg_in->cost;
2772 std::swap (alg_in, best_alg);
2773 best_alg->log[best_alg->ops] = m;
2774 best_alg->op[best_alg->ops] = alg_shift;
2775 }
2776 }
2777 }
2778 if (cache_hit)
2779 goto done;
2780 }
2781
2782 /* If we have an odd number, add or subtract one. */
2783 if ((t & 1) != 0)
2784 {
2785 unsigned HOST_WIDE_INT w;
2786
2787 do_alg_addsub_t_m2:
2788 for (w = 1; (w & t) != 0; w <<= 1)
2789 ;
2790 /* If T was -1, then W will be zero after the loop. This is another
2791 case where T ends with ...111. Handling this with (T + 1) and
2792 subtract 1 produces slightly better code and results in algorithm
2793 selection much faster than treating it like the ...0111 case
2794 below. */
2795 if (w == 0
2796 || (w > 2
2797 /* Reject the case where t is 3.
2798 Thus we prefer addition in that case. */
2799 && t != 3))
2800 {
2801 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2802
2803 op_cost = add_cost (speed, mode);
2804 new_limit.cost = best_cost.cost - op_cost;
2805 new_limit.latency = best_cost.latency - op_cost;
2806 synth_mult (alg_in, t + 1, &new_limit, mode);
2807
2808 alg_in->cost.cost += op_cost;
2809 alg_in->cost.latency += op_cost;
2810 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2811 {
2812 best_cost = alg_in->cost;
2813 std::swap (alg_in, best_alg);
2814 best_alg->log[best_alg->ops] = 0;
2815 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2816 }
2817 }
2818 else
2819 {
2820 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2821
2822 op_cost = add_cost (speed, mode);
2823 new_limit.cost = best_cost.cost - op_cost;
2824 new_limit.latency = best_cost.latency - op_cost;
2825 synth_mult (alg_in, t - 1, &new_limit, mode);
2826
2827 alg_in->cost.cost += op_cost;
2828 alg_in->cost.latency += op_cost;
2829 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2830 {
2831 best_cost = alg_in->cost;
2832 std::swap (alg_in, best_alg);
2833 best_alg->log[best_alg->ops] = 0;
2834 best_alg->op[best_alg->ops] = alg_add_t_m2;
2835 }
2836 }
2837
2838 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2839 quickly with a - a * n for some appropriate constant n. */
2840 m = exact_log2 (-orig_t + 1);
2841 if (m >= 0 && m < maxm)
2842 {
2843 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2844 /* If the target has a cheap shift-and-subtract insn use
2845 that in preference to a shift insn followed by a sub insn.
2846 Assume that the shift-and-sub is "atomic" with a latency
2847 equal to it's cost, otherwise assume that on superscalar
2848 hardware the shift may be executed concurrently with the
2849 earlier steps in the algorithm. */
2850 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2851 {
2852 op_cost = shiftsub1_cost (speed, mode, m);
2853 op_latency = op_cost;
2854 }
2855 else
2856 op_latency = add_cost (speed, mode);
2857
2858 new_limit.cost = best_cost.cost - op_cost;
2859 new_limit.latency = best_cost.latency - op_latency;
2860 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2861 &new_limit, mode);
2862
2863 alg_in->cost.cost += op_cost;
2864 alg_in->cost.latency += op_latency;
2865 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2866 {
2867 best_cost = alg_in->cost;
2868 std::swap (alg_in, best_alg);
2869 best_alg->log[best_alg->ops] = m;
2870 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2871 }
2872 }
2873
2874 if (cache_hit)
2875 goto done;
2876 }
2877
2878 /* Look for factors of t of the form
2879 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2880 If we find such a factor, we can multiply by t using an algorithm that
2881 multiplies by q, shift the result by m and add/subtract it to itself.
2882
2883 We search for large factors first and loop down, even if large factors
2884 are less probable than small; if we find a large factor we will find a
2885 good sequence quickly, and therefore be able to prune (by decreasing
2886 COST_LIMIT) the search. */
2887
2888 do_alg_addsub_factor:
2889 for (m = floor_log2 (t - 1); m >= 2; m--)
2890 {
2891 unsigned HOST_WIDE_INT d;
2892
2893 d = (HOST_WIDE_INT_1U << m) + 1;
2894 if (t % d == 0 && t > d && m < maxm
2895 && (!cache_hit || cache_alg == alg_add_factor))
2896 {
2897 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2898 if (shiftadd_cost (speed, mode, m) <= op_cost)
2899 op_cost = shiftadd_cost (speed, mode, m);
2900
2901 op_latency = op_cost;
2902
2903
2904 new_limit.cost = best_cost.cost - op_cost;
2905 new_limit.latency = best_cost.latency - op_latency;
2906 synth_mult (alg_in, t / d, &new_limit, mode);
2907
2908 alg_in->cost.cost += op_cost;
2909 alg_in->cost.latency += op_latency;
2910 if (alg_in->cost.latency < op_cost)
2911 alg_in->cost.latency = op_cost;
2912 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2913 {
2914 best_cost = alg_in->cost;
2915 std::swap (alg_in, best_alg);
2916 best_alg->log[best_alg->ops] = m;
2917 best_alg->op[best_alg->ops] = alg_add_factor;
2918 }
2919 /* Other factors will have been taken care of in the recursion. */
2920 break;
2921 }
2922
2923 d = (HOST_WIDE_INT_1U << m) - 1;
2924 if (t % d == 0 && t > d && m < maxm
2925 && (!cache_hit || cache_alg == alg_sub_factor))
2926 {
2927 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2928 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2929 op_cost = shiftsub0_cost (speed, mode, m);
2930
2931 op_latency = op_cost;
2932
2933 new_limit.cost = best_cost.cost - op_cost;
2934 new_limit.latency = best_cost.latency - op_latency;
2935 synth_mult (alg_in, t / d, &new_limit, mode);
2936
2937 alg_in->cost.cost += op_cost;
2938 alg_in->cost.latency += op_latency;
2939 if (alg_in->cost.latency < op_cost)
2940 alg_in->cost.latency = op_cost;
2941 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2942 {
2943 best_cost = alg_in->cost;
2944 std::swap (alg_in, best_alg);
2945 best_alg->log[best_alg->ops] = m;
2946 best_alg->op[best_alg->ops] = alg_sub_factor;
2947 }
2948 break;
2949 }
2950 }
2951 if (cache_hit)
2952 goto done;
2953
2954 /* Try shift-and-add (load effective address) instructions,
2955 i.e. do a*3, a*5, a*9. */
2956 if ((t & 1) != 0)
2957 {
2958 do_alg_add_t2_m:
2959 q = t - 1;
2960 m = ctz_hwi (q);
2961 if (q && m < maxm)
2962 {
2963 op_cost = shiftadd_cost (speed, mode, m);
2964 new_limit.cost = best_cost.cost - op_cost;
2965 new_limit.latency = best_cost.latency - op_cost;
2966 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2967
2968 alg_in->cost.cost += op_cost;
2969 alg_in->cost.latency += op_cost;
2970 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2971 {
2972 best_cost = alg_in->cost;
2973 std::swap (alg_in, best_alg);
2974 best_alg->log[best_alg->ops] = m;
2975 best_alg->op[best_alg->ops] = alg_add_t2_m;
2976 }
2977 }
2978 if (cache_hit)
2979 goto done;
2980
2981 do_alg_sub_t2_m:
2982 q = t + 1;
2983 m = ctz_hwi (q);
2984 if (q && m < maxm)
2985 {
2986 op_cost = shiftsub0_cost (speed, mode, m);
2987 new_limit.cost = best_cost.cost - op_cost;
2988 new_limit.latency = best_cost.latency - op_cost;
2989 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2990
2991 alg_in->cost.cost += op_cost;
2992 alg_in->cost.latency += op_cost;
2993 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2994 {
2995 best_cost = alg_in->cost;
2996 std::swap (alg_in, best_alg);
2997 best_alg->log[best_alg->ops] = m;
2998 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2999 }
3000 }
3001 if (cache_hit)
3002 goto done;
3003 }
3004
3005 done:
3006 /* If best_cost has not decreased, we have not found any algorithm. */
3007 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3008 {
3009 /* We failed to find an algorithm. Record alg_impossible for
3010 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3011 we are asked to find an algorithm for T within the same or
3012 lower COST_LIMIT, we can immediately return to the
3013 caller. */
3014 entry_ptr->t = t;
3015 entry_ptr->mode = mode;
3016 entry_ptr->speed = speed;
3017 entry_ptr->alg = alg_impossible;
3018 entry_ptr->cost = *cost_limit;
3019 return;
3020 }
3021
3022 /* Cache the result. */
3023 if (!cache_hit)
3024 {
3025 entry_ptr->t = t;
3026 entry_ptr->mode = mode;
3027 entry_ptr->speed = speed;
3028 entry_ptr->alg = best_alg->op[best_alg->ops];
3029 entry_ptr->cost.cost = best_cost.cost;
3030 entry_ptr->cost.latency = best_cost.latency;
3031 }
3032
3033 /* If we are getting a too long sequence for `struct algorithm'
3034 to record, make this search fail. */
3035 if (best_alg->ops == MAX_BITS_PER_WORD)
3036 return;
3037
3038 /* Copy the algorithm from temporary space to the space at alg_out.
3039 We avoid using structure assignment because the majority of
3040 best_alg is normally undefined, and this is a critical function. */
3041 alg_out->ops = best_alg->ops + 1;
3042 alg_out->cost = best_cost;
3043 memcpy (alg_out->op, best_alg->op,
3044 alg_out->ops * sizeof *alg_out->op);
3045 memcpy (alg_out->log, best_alg->log,
3046 alg_out->ops * sizeof *alg_out->log);
3047 }
3048 \f
3049 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3050 Try three variations:
3051
3052 - a shift/add sequence based on VAL itself
3053 - a shift/add sequence based on -VAL, followed by a negation
3054 - a shift/add sequence based on VAL - 1, followed by an addition.
3055
3056 Return true if the cheapest of these cost less than MULT_COST,
3057 describing the algorithm in *ALG and final fixup in *VARIANT. */
3058
3059 bool
3060 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3061 struct algorithm *alg, enum mult_variant *variant,
3062 int mult_cost)
3063 {
3064 struct algorithm alg2;
3065 struct mult_cost limit;
3066 int op_cost;
3067 bool speed = optimize_insn_for_speed_p ();
3068
3069 /* Fail quickly for impossible bounds. */
3070 if (mult_cost < 0)
3071 return false;
3072
3073 /* Ensure that mult_cost provides a reasonable upper bound.
3074 Any constant multiplication can be performed with less
3075 than 2 * bits additions. */
3076 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3077 if (mult_cost > op_cost)
3078 mult_cost = op_cost;
3079
3080 *variant = basic_variant;
3081 limit.cost = mult_cost;
3082 limit.latency = mult_cost;
3083 synth_mult (alg, val, &limit, mode);
3084
3085 /* This works only if the inverted value actually fits in an
3086 `unsigned int' */
3087 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3088 {
3089 op_cost = neg_cost (speed, mode);
3090 if (MULT_COST_LESS (&alg->cost, mult_cost))
3091 {
3092 limit.cost = alg->cost.cost - op_cost;
3093 limit.latency = alg->cost.latency - op_cost;
3094 }
3095 else
3096 {
3097 limit.cost = mult_cost - op_cost;
3098 limit.latency = mult_cost - op_cost;
3099 }
3100
3101 synth_mult (&alg2, -val, &limit, mode);
3102 alg2.cost.cost += op_cost;
3103 alg2.cost.latency += op_cost;
3104 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3105 *alg = alg2, *variant = negate_variant;
3106 }
3107
3108 /* This proves very useful for division-by-constant. */
3109 op_cost = add_cost (speed, mode);
3110 if (MULT_COST_LESS (&alg->cost, mult_cost))
3111 {
3112 limit.cost = alg->cost.cost - op_cost;
3113 limit.latency = alg->cost.latency - op_cost;
3114 }
3115 else
3116 {
3117 limit.cost = mult_cost - op_cost;
3118 limit.latency = mult_cost - op_cost;
3119 }
3120
3121 synth_mult (&alg2, val - 1, &limit, mode);
3122 alg2.cost.cost += op_cost;
3123 alg2.cost.latency += op_cost;
3124 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3125 *alg = alg2, *variant = add_variant;
3126
3127 return MULT_COST_LESS (&alg->cost, mult_cost);
3128 }
3129
3130 /* A subroutine of expand_mult, used for constant multiplications.
3131 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3132 convenient. Use the shift/add sequence described by ALG and apply
3133 the final fixup specified by VARIANT. */
3134
3135 static rtx
3136 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3137 rtx target, const struct algorithm *alg,
3138 enum mult_variant variant)
3139 {
3140 unsigned HOST_WIDE_INT val_so_far;
3141 rtx_insn *insn;
3142 rtx accum, tem;
3143 int opno;
3144 machine_mode nmode;
3145
3146 /* Avoid referencing memory over and over and invalid sharing
3147 on SUBREGs. */
3148 op0 = force_reg (mode, op0);
3149
3150 /* ACCUM starts out either as OP0 or as a zero, depending on
3151 the first operation. */
3152
3153 if (alg->op[0] == alg_zero)
3154 {
3155 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3156 val_so_far = 0;
3157 }
3158 else if (alg->op[0] == alg_m)
3159 {
3160 accum = copy_to_mode_reg (mode, op0);
3161 val_so_far = 1;
3162 }
3163 else
3164 gcc_unreachable ();
3165
3166 for (opno = 1; opno < alg->ops; opno++)
3167 {
3168 int log = alg->log[opno];
3169 rtx shift_subtarget = optimize ? 0 : accum;
3170 rtx add_target
3171 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3172 && !optimize)
3173 ? target : 0;
3174 rtx accum_target = optimize ? 0 : accum;
3175 rtx accum_inner;
3176
3177 switch (alg->op[opno])
3178 {
3179 case alg_shift:
3180 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3181 /* REG_EQUAL note will be attached to the following insn. */
3182 emit_move_insn (accum, tem);
3183 val_so_far <<= log;
3184 break;
3185
3186 case alg_add_t_m2:
3187 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3188 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3189 add_target ? add_target : accum_target);
3190 val_so_far += HOST_WIDE_INT_1U << log;
3191 break;
3192
3193 case alg_sub_t_m2:
3194 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3195 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3196 add_target ? add_target : accum_target);
3197 val_so_far -= HOST_WIDE_INT_1U << log;
3198 break;
3199
3200 case alg_add_t2_m:
3201 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3202 log, shift_subtarget, 0);
3203 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3204 add_target ? add_target : accum_target);
3205 val_so_far = (val_so_far << log) + 1;
3206 break;
3207
3208 case alg_sub_t2_m:
3209 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3210 log, shift_subtarget, 0);
3211 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3212 add_target ? add_target : accum_target);
3213 val_so_far = (val_so_far << log) - 1;
3214 break;
3215
3216 case alg_add_factor:
3217 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3218 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3219 add_target ? add_target : accum_target);
3220 val_so_far += val_so_far << log;
3221 break;
3222
3223 case alg_sub_factor:
3224 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3225 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3226 (add_target
3227 ? add_target : (optimize ? 0 : tem)));
3228 val_so_far = (val_so_far << log) - val_so_far;
3229 break;
3230
3231 default:
3232 gcc_unreachable ();
3233 }
3234
3235 if (SCALAR_INT_MODE_P (mode))
3236 {
3237 /* Write a REG_EQUAL note on the last insn so that we can cse
3238 multiplication sequences. Note that if ACCUM is a SUBREG,
3239 we've set the inner register and must properly indicate that. */
3240 tem = op0, nmode = mode;
3241 accum_inner = accum;
3242 if (GET_CODE (accum) == SUBREG)
3243 {
3244 accum_inner = SUBREG_REG (accum);
3245 nmode = GET_MODE (accum_inner);
3246 tem = gen_lowpart (nmode, op0);
3247 }
3248
3249 insn = get_last_insn ();
3250 set_dst_reg_note (insn, REG_EQUAL,
3251 gen_rtx_MULT (nmode, tem,
3252 gen_int_mode (val_so_far, nmode)),
3253 accum_inner);
3254 }
3255 }
3256
3257 if (variant == negate_variant)
3258 {
3259 val_so_far = -val_so_far;
3260 accum = expand_unop (mode, neg_optab, accum, target, 0);
3261 }
3262 else if (variant == add_variant)
3263 {
3264 val_so_far = val_so_far + 1;
3265 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3266 }
3267
3268 /* Compare only the bits of val and val_so_far that are significant
3269 in the result mode, to avoid sign-/zero-extension confusion. */
3270 nmode = GET_MODE_INNER (mode);
3271 val &= GET_MODE_MASK (nmode);
3272 val_so_far &= GET_MODE_MASK (nmode);
3273 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3274
3275 return accum;
3276 }
3277
3278 /* Perform a multiplication and return an rtx for the result.
3279 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3280 TARGET is a suggestion for where to store the result (an rtx).
3281
3282 We check specially for a constant integer as OP1.
3283 If you want this check for OP0 as well, then before calling
3284 you should swap the two operands if OP0 would be constant. */
3285
3286 rtx
3287 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3288 int unsignedp, bool no_libcall)
3289 {
3290 enum mult_variant variant;
3291 struct algorithm algorithm;
3292 rtx scalar_op1;
3293 int max_cost;
3294 bool speed = optimize_insn_for_speed_p ();
3295 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3296
3297 if (CONSTANT_P (op0))
3298 std::swap (op0, op1);
3299
3300 /* For vectors, there are several simplifications that can be made if
3301 all elements of the vector constant are identical. */
3302 scalar_op1 = unwrap_const_vec_duplicate (op1);
3303
3304 if (INTEGRAL_MODE_P (mode))
3305 {
3306 rtx fake_reg;
3307 HOST_WIDE_INT coeff;
3308 bool is_neg;
3309 int mode_bitsize;
3310
3311 if (op1 == CONST0_RTX (mode))
3312 return op1;
3313 if (op1 == CONST1_RTX (mode))
3314 return op0;
3315 if (op1 == CONSTM1_RTX (mode))
3316 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3317 op0, target, 0);
3318
3319 if (do_trapv)
3320 goto skip_synth;
3321
3322 /* If mode is integer vector mode, check if the backend supports
3323 vector lshift (by scalar or vector) at all. If not, we can't use
3324 synthetized multiply. */
3325 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3326 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3327 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3328 goto skip_synth;
3329
3330 /* These are the operations that are potentially turned into
3331 a sequence of shifts and additions. */
3332 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3333
3334 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3335 less than or equal in size to `unsigned int' this doesn't matter.
3336 If the mode is larger than `unsigned int', then synth_mult works
3337 only if the constant value exactly fits in an `unsigned int' without
3338 any truncation. This means that multiplying by negative values does
3339 not work; results are off by 2^32 on a 32 bit machine. */
3340 if (CONST_INT_P (scalar_op1))
3341 {
3342 coeff = INTVAL (scalar_op1);
3343 is_neg = coeff < 0;
3344 }
3345 #if TARGET_SUPPORTS_WIDE_INT
3346 else if (CONST_WIDE_INT_P (scalar_op1))
3347 #else
3348 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3349 #endif
3350 {
3351 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3352 /* Perfect power of 2 (other than 1, which is handled above). */
3353 if (shift > 0)
3354 return expand_shift (LSHIFT_EXPR, mode, op0,
3355 shift, target, unsignedp);
3356 else
3357 goto skip_synth;
3358 }
3359 else
3360 goto skip_synth;
3361
3362 /* We used to test optimize here, on the grounds that it's better to
3363 produce a smaller program when -O is not used. But this causes
3364 such a terrible slowdown sometimes that it seems better to always
3365 use synth_mult. */
3366
3367 /* Special case powers of two. */
3368 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3369 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3370 return expand_shift (LSHIFT_EXPR, mode, op0,
3371 floor_log2 (coeff), target, unsignedp);
3372
3373 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3374
3375 /* Attempt to handle multiplication of DImode values by negative
3376 coefficients, by performing the multiplication by a positive
3377 multiplier and then inverting the result. */
3378 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3379 {
3380 /* Its safe to use -coeff even for INT_MIN, as the
3381 result is interpreted as an unsigned coefficient.
3382 Exclude cost of op0 from max_cost to match the cost
3383 calculation of the synth_mult. */
3384 coeff = -(unsigned HOST_WIDE_INT) coeff;
3385 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3386 mode, speed)
3387 - neg_cost (speed, mode));
3388 if (max_cost <= 0)
3389 goto skip_synth;
3390
3391 /* Special case powers of two. */
3392 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3393 {
3394 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3395 floor_log2 (coeff), target, unsignedp);
3396 return expand_unop (mode, neg_optab, temp, target, 0);
3397 }
3398
3399 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3400 max_cost))
3401 {
3402 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3403 &algorithm, variant);
3404 return expand_unop (mode, neg_optab, temp, target, 0);
3405 }
3406 goto skip_synth;
3407 }
3408
3409 /* Exclude cost of op0 from max_cost to match the cost
3410 calculation of the synth_mult. */
3411 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3412 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3413 return expand_mult_const (mode, op0, coeff, target,
3414 &algorithm, variant);
3415 }
3416 skip_synth:
3417
3418 /* Expand x*2.0 as x+x. */
3419 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3420 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3421 {
3422 op0 = force_reg (GET_MODE (op0), op0);
3423 return expand_binop (mode, add_optab, op0, op0,
3424 target, unsignedp,
3425 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3426 }
3427
3428 /* This used to use umul_optab if unsigned, but for non-widening multiply
3429 there is no difference between signed and unsigned. */
3430 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3431 op0, op1, target, unsignedp,
3432 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3433 gcc_assert (op0 || no_libcall);
3434 return op0;
3435 }
3436
3437 /* Return a cost estimate for multiplying a register by the given
3438 COEFFicient in the given MODE and SPEED. */
3439
3440 int
3441 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3442 {
3443 int max_cost;
3444 struct algorithm algorithm;
3445 enum mult_variant variant;
3446
3447 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3448 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3449 mode, speed);
3450 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3451 return algorithm.cost.cost;
3452 else
3453 return max_cost;
3454 }
3455
3456 /* Perform a widening multiplication and return an rtx for the result.
3457 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3458 TARGET is a suggestion for where to store the result (an rtx).
3459 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3460 or smul_widen_optab.
3461
3462 We check specially for a constant integer as OP1, comparing the
3463 cost of a widening multiply against the cost of a sequence of shifts
3464 and adds. */
3465
3466 rtx
3467 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3468 int unsignedp, optab this_optab)
3469 {
3470 bool speed = optimize_insn_for_speed_p ();
3471 rtx cop1;
3472
3473 if (CONST_INT_P (op1)
3474 && GET_MODE (op0) != VOIDmode
3475 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3476 this_optab == umul_widen_optab))
3477 && CONST_INT_P (cop1)
3478 && (INTVAL (cop1) >= 0
3479 || HWI_COMPUTABLE_MODE_P (mode)))
3480 {
3481 HOST_WIDE_INT coeff = INTVAL (cop1);
3482 int max_cost;
3483 enum mult_variant variant;
3484 struct algorithm algorithm;
3485
3486 if (coeff == 0)
3487 return CONST0_RTX (mode);
3488
3489 /* Special case powers of two. */
3490 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3491 {
3492 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3493 return expand_shift (LSHIFT_EXPR, mode, op0,
3494 floor_log2 (coeff), target, unsignedp);
3495 }
3496
3497 /* Exclude cost of op0 from max_cost to match the cost
3498 calculation of the synth_mult. */
3499 max_cost = mul_widen_cost (speed, mode);
3500 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3501 max_cost))
3502 {
3503 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3504 return expand_mult_const (mode, op0, coeff, target,
3505 &algorithm, variant);
3506 }
3507 }
3508 return expand_binop (mode, this_optab, op0, op1, target,
3509 unsignedp, OPTAB_LIB_WIDEN);
3510 }
3511 \f
3512 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3513 replace division by D, and put the least significant N bits of the result
3514 in *MULTIPLIER_PTR and return the most significant bit.
3515
3516 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3517 needed precision is in PRECISION (should be <= N).
3518
3519 PRECISION should be as small as possible so this function can choose
3520 multiplier more freely.
3521
3522 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3523 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3524
3525 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3526 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3527
3528 unsigned HOST_WIDE_INT
3529 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3530 unsigned HOST_WIDE_INT *multiplier_ptr,
3531 int *post_shift_ptr, int *lgup_ptr)
3532 {
3533 int lgup, post_shift;
3534 int pow, pow2;
3535
3536 /* lgup = ceil(log2(divisor)); */
3537 lgup = ceil_log2 (d);
3538
3539 gcc_assert (lgup <= n);
3540
3541 pow = n + lgup;
3542 pow2 = n + lgup - precision;
3543
3544 /* mlow = 2^(N + lgup)/d */
3545 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3546 wide_int mlow = wi::udiv_trunc (val, d);
3547
3548 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3549 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3550 wide_int mhigh = wi::udiv_trunc (val, d);
3551
3552 /* If precision == N, then mlow, mhigh exceed 2^N
3553 (but they do not exceed 2^(N+1)). */
3554
3555 /* Reduce to lowest terms. */
3556 for (post_shift = lgup; post_shift > 0; post_shift--)
3557 {
3558 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3559 HOST_BITS_PER_WIDE_INT);
3560 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3561 HOST_BITS_PER_WIDE_INT);
3562 if (ml_lo >= mh_lo)
3563 break;
3564
3565 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3566 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3567 }
3568
3569 *post_shift_ptr = post_shift;
3570 *lgup_ptr = lgup;
3571 if (n < HOST_BITS_PER_WIDE_INT)
3572 {
3573 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3574 *multiplier_ptr = mhigh.to_uhwi () & mask;
3575 return mhigh.to_uhwi () >= mask;
3576 }
3577 else
3578 {
3579 *multiplier_ptr = mhigh.to_uhwi ();
3580 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3581 }
3582 }
3583
3584 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3585 congruent to 1 (mod 2**N). */
3586
3587 static unsigned HOST_WIDE_INT
3588 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3589 {
3590 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3591
3592 /* The algorithm notes that the choice y = x satisfies
3593 x*y == 1 mod 2^3, since x is assumed odd.
3594 Each iteration doubles the number of bits of significance in y. */
3595
3596 unsigned HOST_WIDE_INT mask;
3597 unsigned HOST_WIDE_INT y = x;
3598 int nbit = 3;
3599
3600 mask = (n == HOST_BITS_PER_WIDE_INT
3601 ? HOST_WIDE_INT_M1U
3602 : (HOST_WIDE_INT_1U << n) - 1);
3603
3604 while (nbit < n)
3605 {
3606 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3607 nbit *= 2;
3608 }
3609 return y;
3610 }
3611
3612 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3613 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3614 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3615 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3616 become signed.
3617
3618 The result is put in TARGET if that is convenient.
3619
3620 MODE is the mode of operation. */
3621
3622 rtx
3623 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3624 rtx op1, rtx target, int unsignedp)
3625 {
3626 rtx tem;
3627 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3628
3629 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3630 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3631 tem = expand_and (mode, tem, op1, NULL_RTX);
3632 adj_operand
3633 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3634 adj_operand);
3635
3636 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3637 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3638 tem = expand_and (mode, tem, op0, NULL_RTX);
3639 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3640 target);
3641
3642 return target;
3643 }
3644
3645 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3646
3647 static rtx
3648 extract_high_half (scalar_int_mode mode, rtx op)
3649 {
3650 if (mode == word_mode)
3651 return gen_highpart (mode, op);
3652
3653 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3654
3655 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3656 GET_MODE_BITSIZE (mode), 0, 1);
3657 return convert_modes (mode, wider_mode, op, 0);
3658 }
3659
3660 /* Like expmed_mult_highpart, but only consider using a multiplication
3661 optab. OP1 is an rtx for the constant operand. */
3662
3663 static rtx
3664 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3665 rtx target, int unsignedp, int max_cost)
3666 {
3667 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3668 optab moptab;
3669 rtx tem;
3670 int size;
3671 bool speed = optimize_insn_for_speed_p ();
3672
3673 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3674
3675 size = GET_MODE_BITSIZE (mode);
3676
3677 /* Firstly, try using a multiplication insn that only generates the needed
3678 high part of the product, and in the sign flavor of unsignedp. */
3679 if (mul_highpart_cost (speed, mode) < max_cost)
3680 {
3681 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3682 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3683 unsignedp, OPTAB_DIRECT);
3684 if (tem)
3685 return tem;
3686 }
3687
3688 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3689 Need to adjust the result after the multiplication. */
3690 if (size - 1 < BITS_PER_WORD
3691 && (mul_highpart_cost (speed, mode)
3692 + 2 * shift_cost (speed, mode, size-1)
3693 + 4 * add_cost (speed, mode) < max_cost))
3694 {
3695 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3696 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3697 unsignedp, OPTAB_DIRECT);
3698 if (tem)
3699 /* We used the wrong signedness. Adjust the result. */
3700 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3701 tem, unsignedp);
3702 }
3703
3704 /* Try widening multiplication. */
3705 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3706 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3707 && mul_widen_cost (speed, wider_mode) < max_cost)
3708 {
3709 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3710 unsignedp, OPTAB_WIDEN);
3711 if (tem)
3712 return extract_high_half (mode, tem);
3713 }
3714
3715 /* Try widening the mode and perform a non-widening multiplication. */
3716 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3717 && size - 1 < BITS_PER_WORD
3718 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3719 < max_cost))
3720 {
3721 rtx_insn *insns;
3722 rtx wop0, wop1;
3723
3724 /* We need to widen the operands, for example to ensure the
3725 constant multiplier is correctly sign or zero extended.
3726 Use a sequence to clean-up any instructions emitted by
3727 the conversions if things don't work out. */
3728 start_sequence ();
3729 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3730 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3731 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3732 unsignedp, OPTAB_WIDEN);
3733 insns = get_insns ();
3734 end_sequence ();
3735
3736 if (tem)
3737 {
3738 emit_insn (insns);
3739 return extract_high_half (mode, tem);
3740 }
3741 }
3742
3743 /* Try widening multiplication of opposite signedness, and adjust. */
3744 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3745 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3746 && size - 1 < BITS_PER_WORD
3747 && (mul_widen_cost (speed, wider_mode)
3748 + 2 * shift_cost (speed, mode, size-1)
3749 + 4 * add_cost (speed, mode) < max_cost))
3750 {
3751 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3752 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3753 if (tem != 0)
3754 {
3755 tem = extract_high_half (mode, tem);
3756 /* We used the wrong signedness. Adjust the result. */
3757 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3758 target, unsignedp);
3759 }
3760 }
3761
3762 return 0;
3763 }
3764
3765 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3766 putting the high half of the result in TARGET if that is convenient,
3767 and return where the result is. If the operation can not be performed,
3768 0 is returned.
3769
3770 MODE is the mode of operation and result.
3771
3772 UNSIGNEDP nonzero means unsigned multiply.
3773
3774 MAX_COST is the total allowed cost for the expanded RTL. */
3775
3776 static rtx
3777 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3778 rtx target, int unsignedp, int max_cost)
3779 {
3780 unsigned HOST_WIDE_INT cnst1;
3781 int extra_cost;
3782 bool sign_adjust = false;
3783 enum mult_variant variant;
3784 struct algorithm alg;
3785 rtx tem;
3786 bool speed = optimize_insn_for_speed_p ();
3787
3788 /* We can't support modes wider than HOST_BITS_PER_INT. */
3789 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3790
3791 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3792
3793 /* We can't optimize modes wider than BITS_PER_WORD.
3794 ??? We might be able to perform double-word arithmetic if
3795 mode == word_mode, however all the cost calculations in
3796 synth_mult etc. assume single-word operations. */
3797 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3798 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3799 return expmed_mult_highpart_optab (mode, op0, op1, target,
3800 unsignedp, max_cost);
3801
3802 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3803
3804 /* Check whether we try to multiply by a negative constant. */
3805 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3806 {
3807 sign_adjust = true;
3808 extra_cost += add_cost (speed, mode);
3809 }
3810
3811 /* See whether shift/add multiplication is cheap enough. */
3812 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3813 max_cost - extra_cost))
3814 {
3815 /* See whether the specialized multiplication optabs are
3816 cheaper than the shift/add version. */
3817 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3818 alg.cost.cost + extra_cost);
3819 if (tem)
3820 return tem;
3821
3822 tem = convert_to_mode (wider_mode, op0, unsignedp);
3823 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3824 tem = extract_high_half (mode, tem);
3825
3826 /* Adjust result for signedness. */
3827 if (sign_adjust)
3828 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3829
3830 return tem;
3831 }
3832 return expmed_mult_highpart_optab (mode, op0, op1, target,
3833 unsignedp, max_cost);
3834 }
3835
3836
3837 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3838
3839 static rtx
3840 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3841 {
3842 rtx result, temp, shift;
3843 rtx_code_label *label;
3844 int logd;
3845 int prec = GET_MODE_PRECISION (mode);
3846
3847 logd = floor_log2 (d);
3848 result = gen_reg_rtx (mode);
3849
3850 /* Avoid conditional branches when they're expensive. */
3851 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3852 && optimize_insn_for_speed_p ())
3853 {
3854 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3855 mode, 0, -1);
3856 if (signmask)
3857 {
3858 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3859 signmask = force_reg (mode, signmask);
3860 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3861
3862 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3863 which instruction sequence to use. If logical right shifts
3864 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3865 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3866
3867 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3868 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3869 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3870 > COSTS_N_INSNS (2)))
3871 {
3872 temp = expand_binop (mode, xor_optab, op0, signmask,
3873 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3874 temp = expand_binop (mode, sub_optab, temp, signmask,
3875 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3876 temp = expand_binop (mode, and_optab, temp,
3877 gen_int_mode (masklow, mode),
3878 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3879 temp = expand_binop (mode, xor_optab, temp, signmask,
3880 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3881 temp = expand_binop (mode, sub_optab, temp, signmask,
3882 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3883 }
3884 else
3885 {
3886 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3887 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3888 signmask = force_reg (mode, signmask);
3889
3890 temp = expand_binop (mode, add_optab, op0, signmask,
3891 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3892 temp = expand_binop (mode, and_optab, temp,
3893 gen_int_mode (masklow, mode),
3894 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3895 temp = expand_binop (mode, sub_optab, temp, signmask,
3896 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3897 }
3898 return temp;
3899 }
3900 }
3901
3902 /* Mask contains the mode's signbit and the significant bits of the
3903 modulus. By including the signbit in the operation, many targets
3904 can avoid an explicit compare operation in the following comparison
3905 against zero. */
3906 wide_int mask = wi::mask (logd, false, prec);
3907 mask = wi::set_bit (mask, prec - 1);
3908
3909 temp = expand_binop (mode, and_optab, op0,
3910 immed_wide_int_const (mask, mode),
3911 result, 1, OPTAB_LIB_WIDEN);
3912 if (temp != result)
3913 emit_move_insn (result, temp);
3914
3915 label = gen_label_rtx ();
3916 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3917
3918 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3919 0, OPTAB_LIB_WIDEN);
3920
3921 mask = wi::mask (logd, true, prec);
3922 temp = expand_binop (mode, ior_optab, temp,
3923 immed_wide_int_const (mask, mode),
3924 result, 1, OPTAB_LIB_WIDEN);
3925 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3926 0, OPTAB_LIB_WIDEN);
3927 if (temp != result)
3928 emit_move_insn (result, temp);
3929 emit_label (label);
3930 return result;
3931 }
3932
3933 /* Expand signed division of OP0 by a power of two D in mode MODE.
3934 This routine is only called for positive values of D. */
3935
3936 static rtx
3937 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3938 {
3939 rtx temp;
3940 rtx_code_label *label;
3941 int logd;
3942
3943 logd = floor_log2 (d);
3944
3945 if (d == 2
3946 && BRANCH_COST (optimize_insn_for_speed_p (),
3947 false) >= 1)
3948 {
3949 temp = gen_reg_rtx (mode);
3950 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3951 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3952 0, OPTAB_LIB_WIDEN);
3953 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3954 }
3955
3956 if (HAVE_conditional_move
3957 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3958 {
3959 rtx temp2;
3960
3961 start_sequence ();
3962 temp2 = copy_to_mode_reg (mode, op0);
3963 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3964 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3965 temp = force_reg (mode, temp);
3966
3967 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3968 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3969 mode, temp, temp2, mode, 0);
3970 if (temp2)
3971 {
3972 rtx_insn *seq = get_insns ();
3973 end_sequence ();
3974 emit_insn (seq);
3975 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3976 }
3977 end_sequence ();
3978 }
3979
3980 if (BRANCH_COST (optimize_insn_for_speed_p (),
3981 false) >= 2)
3982 {
3983 int ushift = GET_MODE_BITSIZE (mode) - logd;
3984
3985 temp = gen_reg_rtx (mode);
3986 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3987 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3988 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3989 > COSTS_N_INSNS (1))
3990 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3991 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3992 else
3993 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3994 ushift, NULL_RTX, 1);
3995 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3996 0, OPTAB_LIB_WIDEN);
3997 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3998 }
3999
4000 label = gen_label_rtx ();
4001 temp = copy_to_mode_reg (mode, op0);
4002 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4003 expand_inc (temp, gen_int_mode (d - 1, mode));
4004 emit_label (label);
4005 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4006 }
4007 \f
4008 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4009 if that is convenient, and returning where the result is.
4010 You may request either the quotient or the remainder as the result;
4011 specify REM_FLAG nonzero to get the remainder.
4012
4013 CODE is the expression code for which kind of division this is;
4014 it controls how rounding is done. MODE is the machine mode to use.
4015 UNSIGNEDP nonzero means do unsigned division. */
4016
4017 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4018 and then correct it by or'ing in missing high bits
4019 if result of ANDI is nonzero.
4020 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4021 This could optimize to a bfexts instruction.
4022 But C doesn't use these operations, so their optimizations are
4023 left for later. */
4024 /* ??? For modulo, we don't actually need the highpart of the first product,
4025 the low part will do nicely. And for small divisors, the second multiply
4026 can also be a low-part only multiply or even be completely left out.
4027 E.g. to calculate the remainder of a division by 3 with a 32 bit
4028 multiply, multiply with 0x55555556 and extract the upper two bits;
4029 the result is exact for inputs up to 0x1fffffff.
4030 The input range can be reduced by using cross-sum rules.
4031 For odd divisors >= 3, the following table gives right shift counts
4032 so that if a number is shifted by an integer multiple of the given
4033 amount, the remainder stays the same:
4034 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4035 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4036 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4037 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4038 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4039
4040 Cross-sum rules for even numbers can be derived by leaving as many bits
4041 to the right alone as the divisor has zeros to the right.
4042 E.g. if x is an unsigned 32 bit number:
4043 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4044 */
4045
4046 rtx
4047 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4048 rtx op0, rtx op1, rtx target, int unsignedp)
4049 {
4050 machine_mode compute_mode;
4051 rtx tquotient;
4052 rtx quotient = 0, remainder = 0;
4053 rtx_insn *last;
4054 rtx_insn *insn;
4055 optab optab1, optab2;
4056 int op1_is_constant, op1_is_pow2 = 0;
4057 int max_cost, extra_cost;
4058 static HOST_WIDE_INT last_div_const = 0;
4059 bool speed = optimize_insn_for_speed_p ();
4060
4061 op1_is_constant = CONST_INT_P (op1);
4062 if (op1_is_constant)
4063 {
4064 wide_int ext_op1 = rtx_mode_t (op1, mode);
4065 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4066 || (! unsignedp
4067 && wi::popcount (wi::neg (ext_op1)) == 1));
4068 }
4069
4070 /*
4071 This is the structure of expand_divmod:
4072
4073 First comes code to fix up the operands so we can perform the operations
4074 correctly and efficiently.
4075
4076 Second comes a switch statement with code specific for each rounding mode.
4077 For some special operands this code emits all RTL for the desired
4078 operation, for other cases, it generates only a quotient and stores it in
4079 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4080 to indicate that it has not done anything.
4081
4082 Last comes code that finishes the operation. If QUOTIENT is set and
4083 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4084 QUOTIENT is not set, it is computed using trunc rounding.
4085
4086 We try to generate special code for division and remainder when OP1 is a
4087 constant. If |OP1| = 2**n we can use shifts and some other fast
4088 operations. For other values of OP1, we compute a carefully selected
4089 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4090 by m.
4091
4092 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4093 half of the product. Different strategies for generating the product are
4094 implemented in expmed_mult_highpart.
4095
4096 If what we actually want is the remainder, we generate that by another
4097 by-constant multiplication and a subtraction. */
4098
4099 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4100 code below will malfunction if we are, so check here and handle
4101 the special case if so. */
4102 if (op1 == const1_rtx)
4103 return rem_flag ? const0_rtx : op0;
4104
4105 /* When dividing by -1, we could get an overflow.
4106 negv_optab can handle overflows. */
4107 if (! unsignedp && op1 == constm1_rtx)
4108 {
4109 if (rem_flag)
4110 return const0_rtx;
4111 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4112 ? negv_optab : neg_optab, op0, target, 0);
4113 }
4114
4115 if (target
4116 /* Don't use the function value register as a target
4117 since we have to read it as well as write it,
4118 and function-inlining gets confused by this. */
4119 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4120 /* Don't clobber an operand while doing a multi-step calculation. */
4121 || ((rem_flag || op1_is_constant)
4122 && (reg_mentioned_p (target, op0)
4123 || (MEM_P (op0) && MEM_P (target))))
4124 || reg_mentioned_p (target, op1)
4125 || (MEM_P (op1) && MEM_P (target))))
4126 target = 0;
4127
4128 /* Get the mode in which to perform this computation. Normally it will
4129 be MODE, but sometimes we can't do the desired operation in MODE.
4130 If so, pick a wider mode in which we can do the operation. Convert
4131 to that mode at the start to avoid repeated conversions.
4132
4133 First see what operations we need. These depend on the expression
4134 we are evaluating. (We assume that divxx3 insns exist under the
4135 same conditions that modxx3 insns and that these insns don't normally
4136 fail. If these assumptions are not correct, we may generate less
4137 efficient code in some cases.)
4138
4139 Then see if we find a mode in which we can open-code that operation
4140 (either a division, modulus, or shift). Finally, check for the smallest
4141 mode for which we can do the operation with a library call. */
4142
4143 /* We might want to refine this now that we have division-by-constant
4144 optimization. Since expmed_mult_highpart tries so many variants, it is
4145 not straightforward to generalize this. Maybe we should make an array
4146 of possible modes in init_expmed? Save this for GCC 2.7. */
4147
4148 optab1 = (op1_is_pow2
4149 ? (unsignedp ? lshr_optab : ashr_optab)
4150 : (unsignedp ? udiv_optab : sdiv_optab));
4151 optab2 = (op1_is_pow2 ? optab1
4152 : (unsignedp ? udivmod_optab : sdivmod_optab));
4153
4154 FOR_EACH_MODE_FROM (compute_mode, mode)
4155 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4156 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4157 break;
4158
4159 if (compute_mode == VOIDmode)
4160 FOR_EACH_MODE_FROM (compute_mode, mode)
4161 if (optab_libfunc (optab1, compute_mode)
4162 || optab_libfunc (optab2, compute_mode))
4163 break;
4164
4165 /* If we still couldn't find a mode, use MODE, but expand_binop will
4166 probably die. */
4167 if (compute_mode == VOIDmode)
4168 compute_mode = mode;
4169
4170 if (target && GET_MODE (target) == compute_mode)
4171 tquotient = target;
4172 else
4173 tquotient = gen_reg_rtx (compute_mode);
4174
4175 #if 0
4176 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4177 (mode), and thereby get better code when OP1 is a constant. Do that
4178 later. It will require going over all usages of SIZE below. */
4179 size = GET_MODE_BITSIZE (mode);
4180 #endif
4181
4182 /* Only deduct something for a REM if the last divide done was
4183 for a different constant. Then set the constant of the last
4184 divide. */
4185 max_cost = (unsignedp
4186 ? udiv_cost (speed, compute_mode)
4187 : sdiv_cost (speed, compute_mode));
4188 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4189 && INTVAL (op1) == last_div_const))
4190 max_cost -= (mul_cost (speed, compute_mode)
4191 + add_cost (speed, compute_mode));
4192
4193 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4194
4195 /* Now convert to the best mode to use. */
4196 if (compute_mode != mode)
4197 {
4198 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4199 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4200
4201 /* convert_modes may have placed op1 into a register, so we
4202 must recompute the following. */
4203 op1_is_constant = CONST_INT_P (op1);
4204 if (op1_is_constant)
4205 {
4206 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4207 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4208 || (! unsignedp
4209 && wi::popcount (wi::neg (ext_op1)) == 1));
4210 }
4211 else
4212 op1_is_pow2 = 0;
4213 }
4214
4215 /* If one of the operands is a volatile MEM, copy it into a register. */
4216
4217 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4218 op0 = force_reg (compute_mode, op0);
4219 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4220 op1 = force_reg (compute_mode, op1);
4221
4222 /* If we need the remainder or if OP1 is constant, we need to
4223 put OP0 in a register in case it has any queued subexpressions. */
4224 if (rem_flag || op1_is_constant)
4225 op0 = force_reg (compute_mode, op0);
4226
4227 last = get_last_insn ();
4228
4229 /* Promote floor rounding to trunc rounding for unsigned operations. */
4230 if (unsignedp)
4231 {
4232 if (code == FLOOR_DIV_EXPR)
4233 code = TRUNC_DIV_EXPR;
4234 if (code == FLOOR_MOD_EXPR)
4235 code = TRUNC_MOD_EXPR;
4236 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4237 code = TRUNC_DIV_EXPR;
4238 }
4239
4240 if (op1 != const0_rtx)
4241 switch (code)
4242 {
4243 case TRUNC_MOD_EXPR:
4244 case TRUNC_DIV_EXPR:
4245 if (op1_is_constant)
4246 {
4247 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4248 int size = GET_MODE_BITSIZE (int_mode);
4249 if (unsignedp)
4250 {
4251 unsigned HOST_WIDE_INT mh, ml;
4252 int pre_shift, post_shift;
4253 int dummy;
4254 wide_int wd = rtx_mode_t (op1, int_mode);
4255 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4256
4257 if (wi::popcount (wd) == 1)
4258 {
4259 pre_shift = floor_log2 (d);
4260 if (rem_flag)
4261 {
4262 unsigned HOST_WIDE_INT mask
4263 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4264 remainder
4265 = expand_binop (int_mode, and_optab, op0,
4266 gen_int_mode (mask, int_mode),
4267 remainder, 1,
4268 OPTAB_LIB_WIDEN);
4269 if (remainder)
4270 return gen_lowpart (mode, remainder);
4271 }
4272 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4273 pre_shift, tquotient, 1);
4274 }
4275 else if (size <= HOST_BITS_PER_WIDE_INT)
4276 {
4277 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4278 {
4279 /* Most significant bit of divisor is set; emit an scc
4280 insn. */
4281 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4282 int_mode, 1, 1);
4283 }
4284 else
4285 {
4286 /* Find a suitable multiplier and right shift count
4287 instead of multiplying with D. */
4288
4289 mh = choose_multiplier (d, size, size,
4290 &ml, &post_shift, &dummy);
4291
4292 /* If the suggested multiplier is more than SIZE bits,
4293 we can do better for even divisors, using an
4294 initial right shift. */
4295 if (mh != 0 && (d & 1) == 0)
4296 {
4297 pre_shift = ctz_or_zero (d);
4298 mh = choose_multiplier (d >> pre_shift, size,
4299 size - pre_shift,
4300 &ml, &post_shift, &dummy);
4301 gcc_assert (!mh);
4302 }
4303 else
4304 pre_shift = 0;
4305
4306 if (mh != 0)
4307 {
4308 rtx t1, t2, t3, t4;
4309
4310 if (post_shift - 1 >= BITS_PER_WORD)
4311 goto fail1;
4312
4313 extra_cost
4314 = (shift_cost (speed, int_mode, post_shift - 1)
4315 + shift_cost (speed, int_mode, 1)
4316 + 2 * add_cost (speed, int_mode));
4317 t1 = expmed_mult_highpart
4318 (int_mode, op0, gen_int_mode (ml, int_mode),
4319 NULL_RTX, 1, max_cost - extra_cost);
4320 if (t1 == 0)
4321 goto fail1;
4322 t2 = force_operand (gen_rtx_MINUS (int_mode,
4323 op0, t1),
4324 NULL_RTX);
4325 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4326 t2, 1, NULL_RTX, 1);
4327 t4 = force_operand (gen_rtx_PLUS (int_mode,
4328 t1, t3),
4329 NULL_RTX);
4330 quotient = expand_shift
4331 (RSHIFT_EXPR, int_mode, t4,
4332 post_shift - 1, tquotient, 1);
4333 }
4334 else
4335 {
4336 rtx t1, t2;
4337
4338 if (pre_shift >= BITS_PER_WORD
4339 || post_shift >= BITS_PER_WORD)
4340 goto fail1;
4341
4342 t1 = expand_shift
4343 (RSHIFT_EXPR, int_mode, op0,
4344 pre_shift, NULL_RTX, 1);
4345 extra_cost
4346 = (shift_cost (speed, int_mode, pre_shift)
4347 + shift_cost (speed, int_mode, post_shift));
4348 t2 = expmed_mult_highpart
4349 (int_mode, t1,
4350 gen_int_mode (ml, int_mode),
4351 NULL_RTX, 1, max_cost - extra_cost);
4352 if (t2 == 0)
4353 goto fail1;
4354 quotient = expand_shift
4355 (RSHIFT_EXPR, int_mode, t2,
4356 post_shift, tquotient, 1);
4357 }
4358 }
4359 }
4360 else /* Too wide mode to use tricky code */
4361 break;
4362
4363 insn = get_last_insn ();
4364 if (insn != last)
4365 set_dst_reg_note (insn, REG_EQUAL,
4366 gen_rtx_UDIV (int_mode, op0, op1),
4367 quotient);
4368 }
4369 else /* TRUNC_DIV, signed */
4370 {
4371 unsigned HOST_WIDE_INT ml;
4372 int lgup, post_shift;
4373 rtx mlr;
4374 HOST_WIDE_INT d = INTVAL (op1);
4375 unsigned HOST_WIDE_INT abs_d;
4376
4377 /* Since d might be INT_MIN, we have to cast to
4378 unsigned HOST_WIDE_INT before negating to avoid
4379 undefined signed overflow. */
4380 abs_d = (d >= 0
4381 ? (unsigned HOST_WIDE_INT) d
4382 : - (unsigned HOST_WIDE_INT) d);
4383
4384 /* n rem d = n rem -d */
4385 if (rem_flag && d < 0)
4386 {
4387 d = abs_d;
4388 op1 = gen_int_mode (abs_d, int_mode);
4389 }
4390
4391 if (d == 1)
4392 quotient = op0;
4393 else if (d == -1)
4394 quotient = expand_unop (int_mode, neg_optab, op0,
4395 tquotient, 0);
4396 else if (size <= HOST_BITS_PER_WIDE_INT
4397 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4398 {
4399 /* This case is not handled correctly below. */
4400 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4401 int_mode, 1, 1);
4402 if (quotient == 0)
4403 goto fail1;
4404 }
4405 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4406 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4407 && (rem_flag
4408 ? smod_pow2_cheap (speed, int_mode)
4409 : sdiv_pow2_cheap (speed, int_mode))
4410 /* We assume that cheap metric is true if the
4411 optab has an expander for this mode. */
4412 && ((optab_handler ((rem_flag ? smod_optab
4413 : sdiv_optab),
4414 int_mode)
4415 != CODE_FOR_nothing)
4416 || (optab_handler (sdivmod_optab, int_mode)
4417 != CODE_FOR_nothing)))
4418 ;
4419 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4420 && (size <= HOST_BITS_PER_WIDE_INT
4421 || abs_d != (unsigned HOST_WIDE_INT) d))
4422 {
4423 if (rem_flag)
4424 {
4425 remainder = expand_smod_pow2 (int_mode, op0, d);
4426 if (remainder)
4427 return gen_lowpart (mode, remainder);
4428 }
4429
4430 if (sdiv_pow2_cheap (speed, int_mode)
4431 && ((optab_handler (sdiv_optab, int_mode)
4432 != CODE_FOR_nothing)
4433 || (optab_handler (sdivmod_optab, int_mode)
4434 != CODE_FOR_nothing)))
4435 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4436 int_mode, op0,
4437 gen_int_mode (abs_d,
4438 int_mode),
4439 NULL_RTX, 0);
4440 else
4441 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4442
4443 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4444 negate the quotient. */
4445 if (d < 0)
4446 {
4447 insn = get_last_insn ();
4448 if (insn != last
4449 && abs_d < (HOST_WIDE_INT_1U
4450 << (HOST_BITS_PER_WIDE_INT - 1)))
4451 set_dst_reg_note (insn, REG_EQUAL,
4452 gen_rtx_DIV (int_mode, op0,
4453 gen_int_mode
4454 (abs_d,
4455 int_mode)),
4456 quotient);
4457
4458 quotient = expand_unop (int_mode, neg_optab,
4459 quotient, quotient, 0);
4460 }
4461 }
4462 else if (size <= HOST_BITS_PER_WIDE_INT)
4463 {
4464 choose_multiplier (abs_d, size, size - 1,
4465 &ml, &post_shift, &lgup);
4466 if (ml < HOST_WIDE_INT_1U << (size - 1))
4467 {
4468 rtx t1, t2, t3;
4469
4470 if (post_shift >= BITS_PER_WORD
4471 || size - 1 >= BITS_PER_WORD)
4472 goto fail1;
4473
4474 extra_cost = (shift_cost (speed, int_mode, post_shift)
4475 + shift_cost (speed, int_mode, size - 1)
4476 + add_cost (speed, int_mode));
4477 t1 = expmed_mult_highpart
4478 (int_mode, op0, gen_int_mode (ml, int_mode),
4479 NULL_RTX, 0, max_cost - extra_cost);
4480 if (t1 == 0)
4481 goto fail1;
4482 t2 = expand_shift
4483 (RSHIFT_EXPR, int_mode, t1,
4484 post_shift, NULL_RTX, 0);
4485 t3 = expand_shift
4486 (RSHIFT_EXPR, int_mode, op0,
4487 size - 1, NULL_RTX, 0);
4488 if (d < 0)
4489 quotient
4490 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4491 tquotient);
4492 else
4493 quotient
4494 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4495 tquotient);
4496 }
4497 else
4498 {
4499 rtx t1, t2, t3, t4;
4500
4501 if (post_shift >= BITS_PER_WORD
4502 || size - 1 >= BITS_PER_WORD)
4503 goto fail1;
4504
4505 ml |= HOST_WIDE_INT_M1U << (size - 1);
4506 mlr = gen_int_mode (ml, int_mode);
4507 extra_cost = (shift_cost (speed, int_mode, post_shift)
4508 + shift_cost (speed, int_mode, size - 1)
4509 + 2 * add_cost (speed, int_mode));
4510 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4511 NULL_RTX, 0,
4512 max_cost - extra_cost);
4513 if (t1 == 0)
4514 goto fail1;
4515 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4516 NULL_RTX);
4517 t3 = expand_shift
4518 (RSHIFT_EXPR, int_mode, t2,
4519 post_shift, NULL_RTX, 0);
4520 t4 = expand_shift
4521 (RSHIFT_EXPR, int_mode, op0,
4522 size - 1, NULL_RTX, 0);
4523 if (d < 0)
4524 quotient
4525 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4526 tquotient);
4527 else
4528 quotient
4529 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4530 tquotient);
4531 }
4532 }
4533 else /* Too wide mode to use tricky code */
4534 break;
4535
4536 insn = get_last_insn ();
4537 if (insn != last)
4538 set_dst_reg_note (insn, REG_EQUAL,
4539 gen_rtx_DIV (int_mode, op0, op1),
4540 quotient);
4541 }
4542 break;
4543 }
4544 fail1:
4545 delete_insns_since (last);
4546 break;
4547
4548 case FLOOR_DIV_EXPR:
4549 case FLOOR_MOD_EXPR:
4550 /* We will come here only for signed operations. */
4551 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4552 {
4553 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4554 int size = GET_MODE_BITSIZE (int_mode);
4555 unsigned HOST_WIDE_INT mh, ml;
4556 int pre_shift, lgup, post_shift;
4557 HOST_WIDE_INT d = INTVAL (op1);
4558
4559 if (d > 0)
4560 {
4561 /* We could just as easily deal with negative constants here,
4562 but it does not seem worth the trouble for GCC 2.6. */
4563 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4564 {
4565 pre_shift = floor_log2 (d);
4566 if (rem_flag)
4567 {
4568 unsigned HOST_WIDE_INT mask
4569 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4570 remainder = expand_binop
4571 (int_mode, and_optab, op0,
4572 gen_int_mode (mask, int_mode),
4573 remainder, 0, OPTAB_LIB_WIDEN);
4574 if (remainder)
4575 return gen_lowpart (mode, remainder);
4576 }
4577 quotient = expand_shift
4578 (RSHIFT_EXPR, int_mode, op0,
4579 pre_shift, tquotient, 0);
4580 }
4581 else
4582 {
4583 rtx t1, t2, t3, t4;
4584
4585 mh = choose_multiplier (d, size, size - 1,
4586 &ml, &post_shift, &lgup);
4587 gcc_assert (!mh);
4588
4589 if (post_shift < BITS_PER_WORD
4590 && size - 1 < BITS_PER_WORD)
4591 {
4592 t1 = expand_shift
4593 (RSHIFT_EXPR, int_mode, op0,
4594 size - 1, NULL_RTX, 0);
4595 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4596 NULL_RTX, 0, OPTAB_WIDEN);
4597 extra_cost = (shift_cost (speed, int_mode, post_shift)
4598 + shift_cost (speed, int_mode, size - 1)
4599 + 2 * add_cost (speed, int_mode));
4600 t3 = expmed_mult_highpart
4601 (int_mode, t2, gen_int_mode (ml, int_mode),
4602 NULL_RTX, 1, max_cost - extra_cost);
4603 if (t3 != 0)
4604 {
4605 t4 = expand_shift
4606 (RSHIFT_EXPR, int_mode, t3,
4607 post_shift, NULL_RTX, 1);
4608 quotient = expand_binop (int_mode, xor_optab,
4609 t4, t1, tquotient, 0,
4610 OPTAB_WIDEN);
4611 }
4612 }
4613 }
4614 }
4615 else
4616 {
4617 rtx nsign, t1, t2, t3, t4;
4618 t1 = force_operand (gen_rtx_PLUS (int_mode,
4619 op0, constm1_rtx), NULL_RTX);
4620 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4621 0, OPTAB_WIDEN);
4622 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4623 size - 1, NULL_RTX, 0);
4624 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4625 NULL_RTX);
4626 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4627 NULL_RTX, 0);
4628 if (t4)
4629 {
4630 rtx t5;
4631 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4632 NULL_RTX, 0);
4633 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4634 tquotient);
4635 }
4636 }
4637 }
4638
4639 if (quotient != 0)
4640 break;
4641 delete_insns_since (last);
4642
4643 /* Try using an instruction that produces both the quotient and
4644 remainder, using truncation. We can easily compensate the quotient
4645 or remainder to get floor rounding, once we have the remainder.
4646 Notice that we compute also the final remainder value here,
4647 and return the result right away. */
4648 if (target == 0 || GET_MODE (target) != compute_mode)
4649 target = gen_reg_rtx (compute_mode);
4650
4651 if (rem_flag)
4652 {
4653 remainder
4654 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4655 quotient = gen_reg_rtx (compute_mode);
4656 }
4657 else
4658 {
4659 quotient
4660 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4661 remainder = gen_reg_rtx (compute_mode);
4662 }
4663
4664 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4665 quotient, remainder, 0))
4666 {
4667 /* This could be computed with a branch-less sequence.
4668 Save that for later. */
4669 rtx tem;
4670 rtx_code_label *label = gen_label_rtx ();
4671 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4672 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4673 NULL_RTX, 0, OPTAB_WIDEN);
4674 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4675 expand_dec (quotient, const1_rtx);
4676 expand_inc (remainder, op1);
4677 emit_label (label);
4678 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4679 }
4680
4681 /* No luck with division elimination or divmod. Have to do it
4682 by conditionally adjusting op0 *and* the result. */
4683 {
4684 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4685 rtx adjusted_op0;
4686 rtx tem;
4687
4688 quotient = gen_reg_rtx (compute_mode);
4689 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4690 label1 = gen_label_rtx ();
4691 label2 = gen_label_rtx ();
4692 label3 = gen_label_rtx ();
4693 label4 = gen_label_rtx ();
4694 label5 = gen_label_rtx ();
4695 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4696 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4697 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4698 quotient, 0, OPTAB_LIB_WIDEN);
4699 if (tem != quotient)
4700 emit_move_insn (quotient, tem);
4701 emit_jump_insn (targetm.gen_jump (label5));
4702 emit_barrier ();
4703 emit_label (label1);
4704 expand_inc (adjusted_op0, const1_rtx);
4705 emit_jump_insn (targetm.gen_jump (label4));
4706 emit_barrier ();
4707 emit_label (label2);
4708 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4709 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4710 quotient, 0, OPTAB_LIB_WIDEN);
4711 if (tem != quotient)
4712 emit_move_insn (quotient, tem);
4713 emit_jump_insn (targetm.gen_jump (label5));
4714 emit_barrier ();
4715 emit_label (label3);
4716 expand_dec (adjusted_op0, const1_rtx);
4717 emit_label (label4);
4718 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4719 quotient, 0, OPTAB_LIB_WIDEN);
4720 if (tem != quotient)
4721 emit_move_insn (quotient, tem);
4722 expand_dec (quotient, const1_rtx);
4723 emit_label (label5);
4724 }
4725 break;
4726
4727 case CEIL_DIV_EXPR:
4728 case CEIL_MOD_EXPR:
4729 if (unsignedp)
4730 {
4731 if (op1_is_constant
4732 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4733 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4734 || INTVAL (op1) >= 0))
4735 {
4736 scalar_int_mode int_mode
4737 = as_a <scalar_int_mode> (compute_mode);
4738 rtx t1, t2, t3;
4739 unsigned HOST_WIDE_INT d = INTVAL (op1);
4740 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4741 floor_log2 (d), tquotient, 1);
4742 t2 = expand_binop (int_mode, and_optab, op0,
4743 gen_int_mode (d - 1, int_mode),
4744 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4745 t3 = gen_reg_rtx (int_mode);
4746 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4747 if (t3 == 0)
4748 {
4749 rtx_code_label *lab;
4750 lab = gen_label_rtx ();
4751 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4752 expand_inc (t1, const1_rtx);
4753 emit_label (lab);
4754 quotient = t1;
4755 }
4756 else
4757 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4758 tquotient);
4759 break;
4760 }
4761
4762 /* Try using an instruction that produces both the quotient and
4763 remainder, using truncation. We can easily compensate the
4764 quotient or remainder to get ceiling rounding, once we have the
4765 remainder. Notice that we compute also the final remainder
4766 value here, and return the result right away. */
4767 if (target == 0 || GET_MODE (target) != compute_mode)
4768 target = gen_reg_rtx (compute_mode);
4769
4770 if (rem_flag)
4771 {
4772 remainder = (REG_P (target)
4773 ? target : gen_reg_rtx (compute_mode));
4774 quotient = gen_reg_rtx (compute_mode);
4775 }
4776 else
4777 {
4778 quotient = (REG_P (target)
4779 ? target : gen_reg_rtx (compute_mode));
4780 remainder = gen_reg_rtx (compute_mode);
4781 }
4782
4783 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4784 remainder, 1))
4785 {
4786 /* This could be computed with a branch-less sequence.
4787 Save that for later. */
4788 rtx_code_label *label = gen_label_rtx ();
4789 do_cmp_and_jump (remainder, const0_rtx, EQ,
4790 compute_mode, label);
4791 expand_inc (quotient, const1_rtx);
4792 expand_dec (remainder, op1);
4793 emit_label (label);
4794 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4795 }
4796
4797 /* No luck with division elimination or divmod. Have to do it
4798 by conditionally adjusting op0 *and* the result. */
4799 {
4800 rtx_code_label *label1, *label2;
4801 rtx adjusted_op0, tem;
4802
4803 quotient = gen_reg_rtx (compute_mode);
4804 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4805 label1 = gen_label_rtx ();
4806 label2 = gen_label_rtx ();
4807 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4808 compute_mode, label1);
4809 emit_move_insn (quotient, const0_rtx);
4810 emit_jump_insn (targetm.gen_jump (label2));
4811 emit_barrier ();
4812 emit_label (label1);
4813 expand_dec (adjusted_op0, const1_rtx);
4814 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4815 quotient, 1, OPTAB_LIB_WIDEN);
4816 if (tem != quotient)
4817 emit_move_insn (quotient, tem);
4818 expand_inc (quotient, const1_rtx);
4819 emit_label (label2);
4820 }
4821 }
4822 else /* signed */
4823 {
4824 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4825 && INTVAL (op1) >= 0)
4826 {
4827 /* This is extremely similar to the code for the unsigned case
4828 above. For 2.7 we should merge these variants, but for
4829 2.6.1 I don't want to touch the code for unsigned since that
4830 get used in C. The signed case will only be used by other
4831 languages (Ada). */
4832
4833 rtx t1, t2, t3;
4834 unsigned HOST_WIDE_INT d = INTVAL (op1);
4835 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4836 floor_log2 (d), tquotient, 0);
4837 t2 = expand_binop (compute_mode, and_optab, op0,
4838 gen_int_mode (d - 1, compute_mode),
4839 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4840 t3 = gen_reg_rtx (compute_mode);
4841 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4842 compute_mode, 1, 1);
4843 if (t3 == 0)
4844 {
4845 rtx_code_label *lab;
4846 lab = gen_label_rtx ();
4847 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4848 expand_inc (t1, const1_rtx);
4849 emit_label (lab);
4850 quotient = t1;
4851 }
4852 else
4853 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4854 t1, t3),
4855 tquotient);
4856 break;
4857 }
4858
4859 /* Try using an instruction that produces both the quotient and
4860 remainder, using truncation. We can easily compensate the
4861 quotient or remainder to get ceiling rounding, once we have the
4862 remainder. Notice that we compute also the final remainder
4863 value here, and return the result right away. */
4864 if (target == 0 || GET_MODE (target) != compute_mode)
4865 target = gen_reg_rtx (compute_mode);
4866 if (rem_flag)
4867 {
4868 remainder= (REG_P (target)
4869 ? target : gen_reg_rtx (compute_mode));
4870 quotient = gen_reg_rtx (compute_mode);
4871 }
4872 else
4873 {
4874 quotient = (REG_P (target)
4875 ? target : gen_reg_rtx (compute_mode));
4876 remainder = gen_reg_rtx (compute_mode);
4877 }
4878
4879 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4880 remainder, 0))
4881 {
4882 /* This could be computed with a branch-less sequence.
4883 Save that for later. */
4884 rtx tem;
4885 rtx_code_label *label = gen_label_rtx ();
4886 do_cmp_and_jump (remainder, const0_rtx, EQ,
4887 compute_mode, label);
4888 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4889 NULL_RTX, 0, OPTAB_WIDEN);
4890 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4891 expand_inc (quotient, const1_rtx);
4892 expand_dec (remainder, op1);
4893 emit_label (label);
4894 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4895 }
4896
4897 /* No luck with division elimination or divmod. Have to do it
4898 by conditionally adjusting op0 *and* the result. */
4899 {
4900 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4901 rtx adjusted_op0;
4902 rtx tem;
4903
4904 quotient = gen_reg_rtx (compute_mode);
4905 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4906 label1 = gen_label_rtx ();
4907 label2 = gen_label_rtx ();
4908 label3 = gen_label_rtx ();
4909 label4 = gen_label_rtx ();
4910 label5 = gen_label_rtx ();
4911 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4912 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4913 compute_mode, label1);
4914 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4915 quotient, 0, OPTAB_LIB_WIDEN);
4916 if (tem != quotient)
4917 emit_move_insn (quotient, tem);
4918 emit_jump_insn (targetm.gen_jump (label5));
4919 emit_barrier ();
4920 emit_label (label1);
4921 expand_dec (adjusted_op0, const1_rtx);
4922 emit_jump_insn (targetm.gen_jump (label4));
4923 emit_barrier ();
4924 emit_label (label2);
4925 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4926 compute_mode, label3);
4927 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4928 quotient, 0, OPTAB_LIB_WIDEN);
4929 if (tem != quotient)
4930 emit_move_insn (quotient, tem);
4931 emit_jump_insn (targetm.gen_jump (label5));
4932 emit_barrier ();
4933 emit_label (label3);
4934 expand_inc (adjusted_op0, const1_rtx);
4935 emit_label (label4);
4936 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4937 quotient, 0, OPTAB_LIB_WIDEN);
4938 if (tem != quotient)
4939 emit_move_insn (quotient, tem);
4940 expand_inc (quotient, const1_rtx);
4941 emit_label (label5);
4942 }
4943 }
4944 break;
4945
4946 case EXACT_DIV_EXPR:
4947 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4948 {
4949 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4950 int size = GET_MODE_BITSIZE (int_mode);
4951 HOST_WIDE_INT d = INTVAL (op1);
4952 unsigned HOST_WIDE_INT ml;
4953 int pre_shift;
4954 rtx t1;
4955
4956 pre_shift = ctz_or_zero (d);
4957 ml = invert_mod2n (d >> pre_shift, size);
4958 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4959 pre_shift, NULL_RTX, unsignedp);
4960 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
4961 NULL_RTX, 1);
4962
4963 insn = get_last_insn ();
4964 set_dst_reg_note (insn, REG_EQUAL,
4965 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4966 int_mode, op0, op1),
4967 quotient);
4968 }
4969 break;
4970
4971 case ROUND_DIV_EXPR:
4972 case ROUND_MOD_EXPR:
4973 if (unsignedp)
4974 {
4975 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4976 rtx tem;
4977 rtx_code_label *label;
4978 label = gen_label_rtx ();
4979 quotient = gen_reg_rtx (int_mode);
4980 remainder = gen_reg_rtx (int_mode);
4981 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4982 {
4983 rtx tem;
4984 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
4985 quotient, 1, OPTAB_LIB_WIDEN);
4986 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
4987 remainder = expand_binop (int_mode, sub_optab, op0, tem,
4988 remainder, 1, OPTAB_LIB_WIDEN);
4989 }
4990 tem = plus_constant (int_mode, op1, -1);
4991 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
4992 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
4993 expand_inc (quotient, const1_rtx);
4994 expand_dec (remainder, op1);
4995 emit_label (label);
4996 }
4997 else
4998 {
4999 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5000 int size = GET_MODE_BITSIZE (int_mode);
5001 rtx abs_rem, abs_op1, tem, mask;
5002 rtx_code_label *label;
5003 label = gen_label_rtx ();
5004 quotient = gen_reg_rtx (int_mode);
5005 remainder = gen_reg_rtx (int_mode);
5006 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5007 {
5008 rtx tem;
5009 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5010 quotient, 0, OPTAB_LIB_WIDEN);
5011 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5012 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5013 remainder, 0, OPTAB_LIB_WIDEN);
5014 }
5015 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5016 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5017 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5018 1, NULL_RTX, 1);
5019 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5020 tem = expand_binop (int_mode, xor_optab, op0, op1,
5021 NULL_RTX, 0, OPTAB_WIDEN);
5022 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5023 size - 1, NULL_RTX, 0);
5024 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5025 NULL_RTX, 0, OPTAB_WIDEN);
5026 tem = expand_binop (int_mode, sub_optab, tem, mask,
5027 NULL_RTX, 0, OPTAB_WIDEN);
5028 expand_inc (quotient, tem);
5029 tem = expand_binop (int_mode, xor_optab, mask, op1,
5030 NULL_RTX, 0, OPTAB_WIDEN);
5031 tem = expand_binop (int_mode, sub_optab, tem, mask,
5032 NULL_RTX, 0, OPTAB_WIDEN);
5033 expand_dec (remainder, tem);
5034 emit_label (label);
5035 }
5036 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5037
5038 default:
5039 gcc_unreachable ();
5040 }
5041
5042 if (quotient == 0)
5043 {
5044 if (target && GET_MODE (target) != compute_mode)
5045 target = 0;
5046
5047 if (rem_flag)
5048 {
5049 /* Try to produce the remainder without producing the quotient.
5050 If we seem to have a divmod pattern that does not require widening,
5051 don't try widening here. We should really have a WIDEN argument
5052 to expand_twoval_binop, since what we'd really like to do here is
5053 1) try a mod insn in compute_mode
5054 2) try a divmod insn in compute_mode
5055 3) try a div insn in compute_mode and multiply-subtract to get
5056 remainder
5057 4) try the same things with widening allowed. */
5058 remainder
5059 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5060 op0, op1, target,
5061 unsignedp,
5062 ((optab_handler (optab2, compute_mode)
5063 != CODE_FOR_nothing)
5064 ? OPTAB_DIRECT : OPTAB_WIDEN));
5065 if (remainder == 0)
5066 {
5067 /* No luck there. Can we do remainder and divide at once
5068 without a library call? */
5069 remainder = gen_reg_rtx (compute_mode);
5070 if (! expand_twoval_binop ((unsignedp
5071 ? udivmod_optab
5072 : sdivmod_optab),
5073 op0, op1,
5074 NULL_RTX, remainder, unsignedp))
5075 remainder = 0;
5076 }
5077
5078 if (remainder)
5079 return gen_lowpart (mode, remainder);
5080 }
5081
5082 /* Produce the quotient. Try a quotient insn, but not a library call.
5083 If we have a divmod in this mode, use it in preference to widening
5084 the div (for this test we assume it will not fail). Note that optab2
5085 is set to the one of the two optabs that the call below will use. */
5086 quotient
5087 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5088 op0, op1, rem_flag ? NULL_RTX : target,
5089 unsignedp,
5090 ((optab_handler (optab2, compute_mode)
5091 != CODE_FOR_nothing)
5092 ? OPTAB_DIRECT : OPTAB_WIDEN));
5093
5094 if (quotient == 0)
5095 {
5096 /* No luck there. Try a quotient-and-remainder insn,
5097 keeping the quotient alone. */
5098 quotient = gen_reg_rtx (compute_mode);
5099 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5100 op0, op1,
5101 quotient, NULL_RTX, unsignedp))
5102 {
5103 quotient = 0;
5104 if (! rem_flag)
5105 /* Still no luck. If we are not computing the remainder,
5106 use a library call for the quotient. */
5107 quotient = sign_expand_binop (compute_mode,
5108 udiv_optab, sdiv_optab,
5109 op0, op1, target,
5110 unsignedp, OPTAB_LIB_WIDEN);
5111 }
5112 }
5113 }
5114
5115 if (rem_flag)
5116 {
5117 if (target && GET_MODE (target) != compute_mode)
5118 target = 0;
5119
5120 if (quotient == 0)
5121 {
5122 /* No divide instruction either. Use library for remainder. */
5123 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5124 op0, op1, target,
5125 unsignedp, OPTAB_LIB_WIDEN);
5126 /* No remainder function. Try a quotient-and-remainder
5127 function, keeping the remainder. */
5128 if (!remainder)
5129 {
5130 remainder = gen_reg_rtx (compute_mode);
5131 if (!expand_twoval_binop_libfunc
5132 (unsignedp ? udivmod_optab : sdivmod_optab,
5133 op0, op1,
5134 NULL_RTX, remainder,
5135 unsignedp ? UMOD : MOD))
5136 remainder = NULL_RTX;
5137 }
5138 }
5139 else
5140 {
5141 /* We divided. Now finish doing X - Y * (X / Y). */
5142 remainder = expand_mult (compute_mode, quotient, op1,
5143 NULL_RTX, unsignedp);
5144 remainder = expand_binop (compute_mode, sub_optab, op0,
5145 remainder, target, unsignedp,
5146 OPTAB_LIB_WIDEN);
5147 }
5148 }
5149
5150 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5151 }
5152 \f
5153 /* Return a tree node with data type TYPE, describing the value of X.
5154 Usually this is an VAR_DECL, if there is no obvious better choice.
5155 X may be an expression, however we only support those expressions
5156 generated by loop.c. */
5157
5158 tree
5159 make_tree (tree type, rtx x)
5160 {
5161 tree t;
5162
5163 switch (GET_CODE (x))
5164 {
5165 case CONST_INT:
5166 case CONST_WIDE_INT:
5167 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5168 return t;
5169
5170 case CONST_DOUBLE:
5171 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5172 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5173 t = wide_int_to_tree (type,
5174 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5175 HOST_BITS_PER_WIDE_INT * 2));
5176 else
5177 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5178
5179 return t;
5180
5181 case CONST_VECTOR:
5182 {
5183 int units = CONST_VECTOR_NUNITS (x);
5184 tree itype = TREE_TYPE (type);
5185 int i;
5186
5187 /* Build a tree with vector elements. */
5188 tree_vector_builder elts (type, units, 1);
5189 for (i = 0; i < units; ++i)
5190 {
5191 rtx elt = CONST_VECTOR_ELT (x, i);
5192 elts.quick_push (make_tree (itype, elt));
5193 }
5194
5195 return elts.build ();
5196 }
5197
5198 case PLUS:
5199 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5200 make_tree (type, XEXP (x, 1)));
5201
5202 case MINUS:
5203 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5204 make_tree (type, XEXP (x, 1)));
5205
5206 case NEG:
5207 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5208
5209 case MULT:
5210 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5211 make_tree (type, XEXP (x, 1)));
5212
5213 case ASHIFT:
5214 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5215 make_tree (type, XEXP (x, 1)));
5216
5217 case LSHIFTRT:
5218 t = unsigned_type_for (type);
5219 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5220 make_tree (t, XEXP (x, 0)),
5221 make_tree (type, XEXP (x, 1))));
5222
5223 case ASHIFTRT:
5224 t = signed_type_for (type);
5225 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5226 make_tree (t, XEXP (x, 0)),
5227 make_tree (type, XEXP (x, 1))));
5228
5229 case DIV:
5230 if (TREE_CODE (type) != REAL_TYPE)
5231 t = signed_type_for (type);
5232 else
5233 t = type;
5234
5235 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5236 make_tree (t, XEXP (x, 0)),
5237 make_tree (t, XEXP (x, 1))));
5238 case UDIV:
5239 t = unsigned_type_for (type);
5240 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5241 make_tree (t, XEXP (x, 0)),
5242 make_tree (t, XEXP (x, 1))));
5243
5244 case SIGN_EXTEND:
5245 case ZERO_EXTEND:
5246 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5247 GET_CODE (x) == ZERO_EXTEND);
5248 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5249
5250 case CONST:
5251 {
5252 rtx op = XEXP (x, 0);
5253 if (GET_CODE (op) == VEC_DUPLICATE)
5254 {
5255 tree elt_tree = make_tree (TREE_TYPE (type), XEXP (op, 0));
5256 return build_vector_from_val (type, elt_tree);
5257 }
5258 return make_tree (type, op);
5259 }
5260
5261 case SYMBOL_REF:
5262 t = SYMBOL_REF_DECL (x);
5263 if (t)
5264 return fold_convert (type, build_fold_addr_expr (t));
5265 /* fall through. */
5266
5267 default:
5268 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5269
5270 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5271 address mode to pointer mode. */
5272 if (POINTER_TYPE_P (type))
5273 x = convert_memory_address_addr_space
5274 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5275
5276 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5277 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5278 t->decl_with_rtl.rtl = x;
5279
5280 return t;
5281 }
5282 }
5283 \f
5284 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5285 and returning TARGET.
5286
5287 If TARGET is 0, a pseudo-register or constant is returned. */
5288
5289 rtx
5290 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5291 {
5292 rtx tem = 0;
5293
5294 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5295 tem = simplify_binary_operation (AND, mode, op0, op1);
5296 if (tem == 0)
5297 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5298
5299 if (target == 0)
5300 target = tem;
5301 else if (tem != target)
5302 emit_move_insn (target, tem);
5303 return target;
5304 }
5305
5306 /* Helper function for emit_store_flag. */
5307 rtx
5308 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5309 machine_mode mode, machine_mode compare_mode,
5310 int unsignedp, rtx x, rtx y, int normalizep,
5311 machine_mode target_mode)
5312 {
5313 struct expand_operand ops[4];
5314 rtx op0, comparison, subtarget;
5315 rtx_insn *last;
5316 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5317 scalar_int_mode int_target_mode;
5318
5319 last = get_last_insn ();
5320 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5321 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5322 if (!x || !y)
5323 {
5324 delete_insns_since (last);
5325 return NULL_RTX;
5326 }
5327
5328 if (target_mode == VOIDmode)
5329 int_target_mode = result_mode;
5330 else
5331 int_target_mode = as_a <scalar_int_mode> (target_mode);
5332 if (!target)
5333 target = gen_reg_rtx (int_target_mode);
5334
5335 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5336
5337 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5338 create_fixed_operand (&ops[1], comparison);
5339 create_fixed_operand (&ops[2], x);
5340 create_fixed_operand (&ops[3], y);
5341 if (!maybe_expand_insn (icode, 4, ops))
5342 {
5343 delete_insns_since (last);
5344 return NULL_RTX;
5345 }
5346 subtarget = ops[0].value;
5347
5348 /* If we are converting to a wider mode, first convert to
5349 INT_TARGET_MODE, then normalize. This produces better combining
5350 opportunities on machines that have a SIGN_EXTRACT when we are
5351 testing a single bit. This mostly benefits the 68k.
5352
5353 If STORE_FLAG_VALUE does not have the sign bit set when
5354 interpreted in MODE, we can do this conversion as unsigned, which
5355 is usually more efficient. */
5356 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5357 {
5358 convert_move (target, subtarget,
5359 val_signbit_known_clear_p (result_mode,
5360 STORE_FLAG_VALUE));
5361 op0 = target;
5362 result_mode = int_target_mode;
5363 }
5364 else
5365 op0 = subtarget;
5366
5367 /* If we want to keep subexpressions around, don't reuse our last
5368 target. */
5369 if (optimize)
5370 subtarget = 0;
5371
5372 /* Now normalize to the proper value in MODE. Sometimes we don't
5373 have to do anything. */
5374 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5375 ;
5376 /* STORE_FLAG_VALUE might be the most negative number, so write
5377 the comparison this way to avoid a compiler-time warning. */
5378 else if (- normalizep == STORE_FLAG_VALUE)
5379 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5380
5381 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5382 it hard to use a value of just the sign bit due to ANSI integer
5383 constant typing rules. */
5384 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5385 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5386 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5387 normalizep == 1);
5388 else
5389 {
5390 gcc_assert (STORE_FLAG_VALUE & 1);
5391
5392 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5393 if (normalizep == -1)
5394 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5395 }
5396
5397 /* If we were converting to a smaller mode, do the conversion now. */
5398 if (int_target_mode != result_mode)
5399 {
5400 convert_move (target, op0, 0);
5401 return target;
5402 }
5403 else
5404 return op0;
5405 }
5406
5407
5408 /* A subroutine of emit_store_flag only including "tricks" that do not
5409 need a recursive call. These are kept separate to avoid infinite
5410 loops. */
5411
5412 static rtx
5413 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5414 machine_mode mode, int unsignedp, int normalizep,
5415 machine_mode target_mode)
5416 {
5417 rtx subtarget;
5418 enum insn_code icode;
5419 machine_mode compare_mode;
5420 enum mode_class mclass;
5421 enum rtx_code scode;
5422
5423 if (unsignedp)
5424 code = unsigned_condition (code);
5425 scode = swap_condition (code);
5426
5427 /* If one operand is constant, make it the second one. Only do this
5428 if the other operand is not constant as well. */
5429
5430 if (swap_commutative_operands_p (op0, op1))
5431 {
5432 std::swap (op0, op1);
5433 code = swap_condition (code);
5434 }
5435
5436 if (mode == VOIDmode)
5437 mode = GET_MODE (op0);
5438
5439 /* For some comparisons with 1 and -1, we can convert this to
5440 comparisons with zero. This will often produce more opportunities for
5441 store-flag insns. */
5442
5443 switch (code)
5444 {
5445 case LT:
5446 if (op1 == const1_rtx)
5447 op1 = const0_rtx, code = LE;
5448 break;
5449 case LE:
5450 if (op1 == constm1_rtx)
5451 op1 = const0_rtx, code = LT;
5452 break;
5453 case GE:
5454 if (op1 == const1_rtx)
5455 op1 = const0_rtx, code = GT;
5456 break;
5457 case GT:
5458 if (op1 == constm1_rtx)
5459 op1 = const0_rtx, code = GE;
5460 break;
5461 case GEU:
5462 if (op1 == const1_rtx)
5463 op1 = const0_rtx, code = NE;
5464 break;
5465 case LTU:
5466 if (op1 == const1_rtx)
5467 op1 = const0_rtx, code = EQ;
5468 break;
5469 default:
5470 break;
5471 }
5472
5473 /* If we are comparing a double-word integer with zero or -1, we can
5474 convert the comparison into one involving a single word. */
5475 scalar_int_mode int_mode;
5476 if (is_int_mode (mode, &int_mode)
5477 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5478 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5479 {
5480 rtx tem;
5481 if ((code == EQ || code == NE)
5482 && (op1 == const0_rtx || op1 == constm1_rtx))
5483 {
5484 rtx op00, op01;
5485
5486 /* Do a logical OR or AND of the two words and compare the
5487 result. */
5488 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5489 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5490 tem = expand_binop (word_mode,
5491 op1 == const0_rtx ? ior_optab : and_optab,
5492 op00, op01, NULL_RTX, unsignedp,
5493 OPTAB_DIRECT);
5494
5495 if (tem != 0)
5496 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5497 unsignedp, normalizep);
5498 }
5499 else if ((code == LT || code == GE) && op1 == const0_rtx)
5500 {
5501 rtx op0h;
5502
5503 /* If testing the sign bit, can just test on high word. */
5504 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5505 subreg_highpart_offset (word_mode,
5506 int_mode));
5507 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5508 unsignedp, normalizep);
5509 }
5510 else
5511 tem = NULL_RTX;
5512
5513 if (tem)
5514 {
5515 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5516 return tem;
5517 if (!target)
5518 target = gen_reg_rtx (target_mode);
5519
5520 convert_move (target, tem,
5521 !val_signbit_known_set_p (word_mode,
5522 (normalizep ? normalizep
5523 : STORE_FLAG_VALUE)));
5524 return target;
5525 }
5526 }
5527
5528 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5529 complement of A (for GE) and shifting the sign bit to the low bit. */
5530 if (op1 == const0_rtx && (code == LT || code == GE)
5531 && is_int_mode (mode, &int_mode)
5532 && (normalizep || STORE_FLAG_VALUE == 1
5533 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5534 {
5535 scalar_int_mode int_target_mode;
5536 subtarget = target;
5537
5538 if (!target)
5539 int_target_mode = int_mode;
5540 else
5541 {
5542 /* If the result is to be wider than OP0, it is best to convert it
5543 first. If it is to be narrower, it is *incorrect* to convert it
5544 first. */
5545 int_target_mode = as_a <scalar_int_mode> (target_mode);
5546 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5547 {
5548 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5549 int_mode = int_target_mode;
5550 }
5551 }
5552
5553 if (int_target_mode != int_mode)
5554 subtarget = 0;
5555
5556 if (code == GE)
5557 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5558 ((STORE_FLAG_VALUE == 1 || normalizep)
5559 ? 0 : subtarget), 0);
5560
5561 if (STORE_FLAG_VALUE == 1 || normalizep)
5562 /* If we are supposed to produce a 0/1 value, we want to do
5563 a logical shift from the sign bit to the low-order bit; for
5564 a -1/0 value, we do an arithmetic shift. */
5565 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5566 GET_MODE_BITSIZE (int_mode) - 1,
5567 subtarget, normalizep != -1);
5568
5569 if (int_mode != int_target_mode)
5570 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5571
5572 return op0;
5573 }
5574
5575 mclass = GET_MODE_CLASS (mode);
5576 FOR_EACH_MODE_FROM (compare_mode, mode)
5577 {
5578 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5579 icode = optab_handler (cstore_optab, optab_mode);
5580 if (icode != CODE_FOR_nothing)
5581 {
5582 do_pending_stack_adjust ();
5583 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5584 unsignedp, op0, op1, normalizep, target_mode);
5585 if (tem)
5586 return tem;
5587
5588 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5589 {
5590 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5591 unsignedp, op1, op0, normalizep, target_mode);
5592 if (tem)
5593 return tem;
5594 }
5595 break;
5596 }
5597 }
5598
5599 return 0;
5600 }
5601
5602 /* Subroutine of emit_store_flag that handles cases in which the operands
5603 are scalar integers. SUBTARGET is the target to use for temporary
5604 operations and TRUEVAL is the value to store when the condition is
5605 true. All other arguments are as for emit_store_flag. */
5606
5607 rtx
5608 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5609 rtx op1, scalar_int_mode mode, int unsignedp,
5610 int normalizep, rtx trueval)
5611 {
5612 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5613 rtx_insn *last = get_last_insn ();
5614
5615 /* If this is an equality comparison of integers, we can try to exclusive-or
5616 (or subtract) the two operands and use a recursive call to try the
5617 comparison with zero. Don't do any of these cases if branches are
5618 very cheap. */
5619
5620 if ((code == EQ || code == NE) && op1 != const0_rtx)
5621 {
5622 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5623 OPTAB_WIDEN);
5624
5625 if (tem == 0)
5626 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5627 OPTAB_WIDEN);
5628 if (tem != 0)
5629 tem = emit_store_flag (target, code, tem, const0_rtx,
5630 mode, unsignedp, normalizep);
5631 if (tem != 0)
5632 return tem;
5633
5634 delete_insns_since (last);
5635 }
5636
5637 /* For integer comparisons, try the reverse comparison. However, for
5638 small X and if we'd have anyway to extend, implementing "X != 0"
5639 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5640 rtx_code rcode = reverse_condition (code);
5641 if (can_compare_p (rcode, mode, ccp_store_flag)
5642 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5643 && code == NE
5644 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5645 && op1 == const0_rtx))
5646 {
5647 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5648 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5649
5650 /* Again, for the reverse comparison, use either an addition or a XOR. */
5651 if (want_add
5652 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5653 optimize_insn_for_speed_p ()) == 0)
5654 {
5655 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5656 STORE_FLAG_VALUE, target_mode);
5657 if (tem != 0)
5658 tem = expand_binop (target_mode, add_optab, tem,
5659 gen_int_mode (normalizep, target_mode),
5660 target, 0, OPTAB_WIDEN);
5661 if (tem != 0)
5662 return tem;
5663 }
5664 else if (!want_add
5665 && rtx_cost (trueval, mode, XOR, 1,
5666 optimize_insn_for_speed_p ()) == 0)
5667 {
5668 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5669 normalizep, target_mode);
5670 if (tem != 0)
5671 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5672 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5673 if (tem != 0)
5674 return tem;
5675 }
5676
5677 delete_insns_since (last);
5678 }
5679
5680 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5681 the constant zero. Reject all other comparisons at this point. Only
5682 do LE and GT if branches are expensive since they are expensive on
5683 2-operand machines. */
5684
5685 if (op1 != const0_rtx
5686 || (code != EQ && code != NE
5687 && (BRANCH_COST (optimize_insn_for_speed_p (),
5688 false) <= 1 || (code != LE && code != GT))))
5689 return 0;
5690
5691 /* Try to put the result of the comparison in the sign bit. Assume we can't
5692 do the necessary operation below. */
5693
5694 rtx tem = 0;
5695
5696 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5697 the sign bit set. */
5698
5699 if (code == LE)
5700 {
5701 /* This is destructive, so SUBTARGET can't be OP0. */
5702 if (rtx_equal_p (subtarget, op0))
5703 subtarget = 0;
5704
5705 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5706 OPTAB_WIDEN);
5707 if (tem)
5708 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5709 OPTAB_WIDEN);
5710 }
5711
5712 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5713 number of bits in the mode of OP0, minus one. */
5714
5715 if (code == GT)
5716 {
5717 if (rtx_equal_p (subtarget, op0))
5718 subtarget = 0;
5719
5720 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5721 GET_MODE_BITSIZE (mode) - 1,
5722 subtarget, 0);
5723 if (tem)
5724 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5725 OPTAB_WIDEN);
5726 }
5727
5728 if (code == EQ || code == NE)
5729 {
5730 /* For EQ or NE, one way to do the comparison is to apply an operation
5731 that converts the operand into a positive number if it is nonzero
5732 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5733 for NE we negate. This puts the result in the sign bit. Then we
5734 normalize with a shift, if needed.
5735
5736 Two operations that can do the above actions are ABS and FFS, so try
5737 them. If that doesn't work, and MODE is smaller than a full word,
5738 we can use zero-extension to the wider mode (an unsigned conversion)
5739 as the operation. */
5740
5741 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5742 that is compensated by the subsequent overflow when subtracting
5743 one / negating. */
5744
5745 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5746 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5747 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5748 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5749 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5750 {
5751 tem = convert_modes (word_mode, mode, op0, 1);
5752 mode = word_mode;
5753 }
5754
5755 if (tem != 0)
5756 {
5757 if (code == EQ)
5758 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5759 0, OPTAB_WIDEN);
5760 else
5761 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5762 }
5763
5764 /* If we couldn't do it that way, for NE we can "or" the two's complement
5765 of the value with itself. For EQ, we take the one's complement of
5766 that "or", which is an extra insn, so we only handle EQ if branches
5767 are expensive. */
5768
5769 if (tem == 0
5770 && (code == NE
5771 || BRANCH_COST (optimize_insn_for_speed_p (),
5772 false) > 1))
5773 {
5774 if (rtx_equal_p (subtarget, op0))
5775 subtarget = 0;
5776
5777 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5778 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5779 OPTAB_WIDEN);
5780
5781 if (tem && code == EQ)
5782 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5783 }
5784 }
5785
5786 if (tem && normalizep)
5787 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5788 GET_MODE_BITSIZE (mode) - 1,
5789 subtarget, normalizep == 1);
5790
5791 if (tem)
5792 {
5793 if (!target)
5794 ;
5795 else if (GET_MODE (tem) != target_mode)
5796 {
5797 convert_move (target, tem, 0);
5798 tem = target;
5799 }
5800 else if (!subtarget)
5801 {
5802 emit_move_insn (target, tem);
5803 tem = target;
5804 }
5805 }
5806 else
5807 delete_insns_since (last);
5808
5809 return tem;
5810 }
5811
5812 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5813 and storing in TARGET. Normally return TARGET.
5814 Return 0 if that cannot be done.
5815
5816 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5817 it is VOIDmode, they cannot both be CONST_INT.
5818
5819 UNSIGNEDP is for the case where we have to widen the operands
5820 to perform the operation. It says to use zero-extension.
5821
5822 NORMALIZEP is 1 if we should convert the result to be either zero
5823 or one. Normalize is -1 if we should convert the result to be
5824 either zero or -1. If NORMALIZEP is zero, the result will be left
5825 "raw" out of the scc insn. */
5826
5827 rtx
5828 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5829 machine_mode mode, int unsignedp, int normalizep)
5830 {
5831 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5832 enum rtx_code rcode;
5833 rtx subtarget;
5834 rtx tem, trueval;
5835 rtx_insn *last;
5836
5837 /* If we compare constants, we shouldn't use a store-flag operation,
5838 but a constant load. We can get there via the vanilla route that
5839 usually generates a compare-branch sequence, but will in this case
5840 fold the comparison to a constant, and thus elide the branch. */
5841 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5842 return NULL_RTX;
5843
5844 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5845 target_mode);
5846 if (tem)
5847 return tem;
5848
5849 /* If we reached here, we can't do this with a scc insn, however there
5850 are some comparisons that can be done in other ways. Don't do any
5851 of these cases if branches are very cheap. */
5852 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5853 return 0;
5854
5855 /* See what we need to return. We can only return a 1, -1, or the
5856 sign bit. */
5857
5858 if (normalizep == 0)
5859 {
5860 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5861 normalizep = STORE_FLAG_VALUE;
5862
5863 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5864 ;
5865 else
5866 return 0;
5867 }
5868
5869 last = get_last_insn ();
5870
5871 /* If optimizing, use different pseudo registers for each insn, instead
5872 of reusing the same pseudo. This leads to better CSE, but slows
5873 down the compiler, since there are more pseudos. */
5874 subtarget = (!optimize
5875 && (target_mode == mode)) ? target : NULL_RTX;
5876 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5877
5878 /* For floating-point comparisons, try the reverse comparison or try
5879 changing the "orderedness" of the comparison. */
5880 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5881 {
5882 enum rtx_code first_code;
5883 bool and_them;
5884
5885 rcode = reverse_condition_maybe_unordered (code);
5886 if (can_compare_p (rcode, mode, ccp_store_flag)
5887 && (code == ORDERED || code == UNORDERED
5888 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5889 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5890 {
5891 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5892 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5893
5894 /* For the reverse comparison, use either an addition or a XOR. */
5895 if (want_add
5896 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5897 optimize_insn_for_speed_p ()) == 0)
5898 {
5899 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5900 STORE_FLAG_VALUE, target_mode);
5901 if (tem)
5902 return expand_binop (target_mode, add_optab, tem,
5903 gen_int_mode (normalizep, target_mode),
5904 target, 0, OPTAB_WIDEN);
5905 }
5906 else if (!want_add
5907 && rtx_cost (trueval, mode, XOR, 1,
5908 optimize_insn_for_speed_p ()) == 0)
5909 {
5910 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5911 normalizep, target_mode);
5912 if (tem)
5913 return expand_binop (target_mode, xor_optab, tem, trueval,
5914 target, INTVAL (trueval) >= 0,
5915 OPTAB_WIDEN);
5916 }
5917 }
5918
5919 delete_insns_since (last);
5920
5921 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5922 if (code == ORDERED || code == UNORDERED)
5923 return 0;
5924
5925 and_them = split_comparison (code, mode, &first_code, &code);
5926
5927 /* If there are no NaNs, the first comparison should always fall through.
5928 Effectively change the comparison to the other one. */
5929 if (!HONOR_NANS (mode))
5930 {
5931 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5932 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5933 target_mode);
5934 }
5935
5936 if (!HAVE_conditional_move)
5937 return 0;
5938
5939 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5940 conditional move. */
5941 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5942 normalizep, target_mode);
5943 if (tem == 0)
5944 return 0;
5945
5946 if (and_them)
5947 tem = emit_conditional_move (target, code, op0, op1, mode,
5948 tem, const0_rtx, GET_MODE (tem), 0);
5949 else
5950 tem = emit_conditional_move (target, code, op0, op1, mode,
5951 trueval, tem, GET_MODE (tem), 0);
5952
5953 if (tem == 0)
5954 delete_insns_since (last);
5955 return tem;
5956 }
5957
5958 /* The remaining tricks only apply to integer comparisons. */
5959
5960 scalar_int_mode int_mode;
5961 if (is_int_mode (mode, &int_mode))
5962 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
5963 unsignedp, normalizep, trueval);
5964
5965 return 0;
5966 }
5967
5968 /* Like emit_store_flag, but always succeeds. */
5969
5970 rtx
5971 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5972 machine_mode mode, int unsignedp, int normalizep)
5973 {
5974 rtx tem;
5975 rtx_code_label *label;
5976 rtx trueval, falseval;
5977
5978 /* First see if emit_store_flag can do the job. */
5979 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5980 if (tem != 0)
5981 return tem;
5982
5983 if (!target)
5984 target = gen_reg_rtx (word_mode);
5985
5986 /* If this failed, we have to do this with set/compare/jump/set code.
5987 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5988 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5989 if (code == NE
5990 && GET_MODE_CLASS (mode) == MODE_INT
5991 && REG_P (target)
5992 && op0 == target
5993 && op1 == const0_rtx)
5994 {
5995 label = gen_label_rtx ();
5996 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5997 NULL_RTX, NULL, label,
5998 profile_probability::uninitialized ());
5999 emit_move_insn (target, trueval);
6000 emit_label (label);
6001 return target;
6002 }
6003
6004 if (!REG_P (target)
6005 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6006 target = gen_reg_rtx (GET_MODE (target));
6007
6008 /* Jump in the right direction if the target cannot implement CODE
6009 but can jump on its reverse condition. */
6010 falseval = const0_rtx;
6011 if (! can_compare_p (code, mode, ccp_jump)
6012 && (! FLOAT_MODE_P (mode)
6013 || code == ORDERED || code == UNORDERED
6014 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6015 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6016 {
6017 enum rtx_code rcode;
6018 if (FLOAT_MODE_P (mode))
6019 rcode = reverse_condition_maybe_unordered (code);
6020 else
6021 rcode = reverse_condition (code);
6022
6023 /* Canonicalize to UNORDERED for the libcall. */
6024 if (can_compare_p (rcode, mode, ccp_jump)
6025 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6026 {
6027 falseval = trueval;
6028 trueval = const0_rtx;
6029 code = rcode;
6030 }
6031 }
6032
6033 emit_move_insn (target, trueval);
6034 label = gen_label_rtx ();
6035 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6036 label, profile_probability::uninitialized ());
6037
6038 emit_move_insn (target, falseval);
6039 emit_label (label);
6040
6041 return target;
6042 }
6043 \f
6044 /* Perform possibly multi-word comparison and conditional jump to LABEL
6045 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6046 now a thin wrapper around do_compare_rtx_and_jump. */
6047
6048 static void
6049 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6050 rtx_code_label *label)
6051 {
6052 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6053 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6054 NULL, label, profile_probability::uninitialized ());
6055 }