]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/expmed.cc
Fix profiledbootstrap
[thirdparty/gcc.git] / gcc / expmed.cc
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2023 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Work around tree-optimization/91825. */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "optabs.h"
35 #include "expmed.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
46
47 struct target_expmed default_target_expmed;
48 #if SWITCHABLE_TARGET
49 struct target_expmed *this_target_expmed = &default_target_expmed;
50 #endif
51
52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
53 unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 poly_uint64, poly_uint64,
56 machine_mode, rtx, bool, bool);
57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 poly_uint64, poly_uint64,
61 rtx, scalar_int_mode, bool);
62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT,
65 rtx, scalar_int_mode, bool);
66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
67 unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT,
69 poly_uint64, poly_uint64,
70 rtx, scalar_int_mode, bool);
71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
72 unsigned HOST_WIDE_INT,
73 unsigned HOST_WIDE_INT, int, rtx,
74 machine_mode, machine_mode, bool, bool);
75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
76 unsigned HOST_WIDE_INT,
77 unsigned HOST_WIDE_INT, rtx, int, bool);
78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
79 unsigned HOST_WIDE_INT,
80 unsigned HOST_WIDE_INT, rtx, int, bool);
81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
83 unsigned HOST_WIDE_INT,
84 unsigned HOST_WIDE_INT, int, bool);
85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91 The mask is truncated if necessary to the width of mode MODE. The
92 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
93
94 static inline rtx
95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
96 {
97 return immed_wide_int_const
98 (wi::shifted_mask (bitpos, bitsize, complement,
99 GET_MODE_PRECISION (mode)), mode);
100 }
101
102 /* Test whether a value is zero of a power of two. */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
105
106 struct init_expmed_rtl
107 {
108 rtx reg;
109 rtx plus;
110 rtx neg;
111 rtx mult;
112 rtx sdiv;
113 rtx udiv;
114 rtx sdiv_32;
115 rtx smod_32;
116 rtx wide_mult;
117 rtx wide_lshr;
118 rtx wide_trunc;
119 rtx shift;
120 rtx shift_mult;
121 rtx shift_add;
122 rtx shift_sub0;
123 rtx shift_sub1;
124 rtx zext;
125 rtx trunc;
126
127 rtx pow2[MAX_BITS_PER_WORD];
128 rtx cint[MAX_BITS_PER_WORD];
129 };
130
131 static void
132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
133 scalar_int_mode from_mode, bool speed)
134 {
135 int to_size, from_size;
136 rtx which;
137
138 to_size = GET_MODE_PRECISION (to_mode);
139 from_size = GET_MODE_PRECISION (from_mode);
140
141 /* Most partial integers have a precision less than the "full"
142 integer it requires for storage. In case one doesn't, for
143 comparison purposes here, reduce the bit size by one in that
144 case. */
145 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146 && pow2p_hwi (to_size))
147 to_size --;
148 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
149 && pow2p_hwi (from_size))
150 from_size --;
151
152 /* Assume cost of zero-extend and sign-extend is the same. */
153 which = (to_size < from_size ? all->trunc : all->zext);
154
155 PUT_MODE (all->reg, from_mode);
156 set_convert_cost (to_mode, from_mode, speed,
157 set_src_cost (which, to_mode, speed));
158 /* Restore all->reg's mode. */
159 PUT_MODE (all->reg, to_mode);
160 }
161
162 static void
163 init_expmed_one_mode (struct init_expmed_rtl *all,
164 machine_mode mode, int speed)
165 {
166 int m, n, mode_bitsize;
167 machine_mode mode_from;
168
169 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
170
171 PUT_MODE (all->reg, mode);
172 PUT_MODE (all->plus, mode);
173 PUT_MODE (all->neg, mode);
174 PUT_MODE (all->mult, mode);
175 PUT_MODE (all->sdiv, mode);
176 PUT_MODE (all->udiv, mode);
177 PUT_MODE (all->sdiv_32, mode);
178 PUT_MODE (all->smod_32, mode);
179 PUT_MODE (all->wide_trunc, mode);
180 PUT_MODE (all->shift, mode);
181 PUT_MODE (all->shift_mult, mode);
182 PUT_MODE (all->shift_add, mode);
183 PUT_MODE (all->shift_sub0, mode);
184 PUT_MODE (all->shift_sub1, mode);
185 PUT_MODE (all->zext, mode);
186 PUT_MODE (all->trunc, mode);
187
188 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
189 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
190 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
191 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
192 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
193
194 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
195 <= 2 * add_cost (speed, mode)));
196 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
197 <= 4 * add_cost (speed, mode)));
198
199 set_shift_cost (speed, mode, 0, 0);
200 {
201 int cost = add_cost (speed, mode);
202 set_shiftadd_cost (speed, mode, 0, cost);
203 set_shiftsub0_cost (speed, mode, 0, cost);
204 set_shiftsub1_cost (speed, mode, 0, cost);
205 }
206
207 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
208 for (m = 1; m < n; m++)
209 {
210 XEXP (all->shift, 1) = all->cint[m];
211 XEXP (all->shift_mult, 1) = all->pow2[m];
212
213 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
214 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
215 speed));
216 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
217 speed));
218 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
219 speed));
220 }
221
222 scalar_int_mode int_mode_to;
223 if (is_a <scalar_int_mode> (mode, &int_mode_to))
224 {
225 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
226 mode_from = (machine_mode)(mode_from + 1))
227 init_expmed_one_conv (all, int_mode_to,
228 as_a <scalar_int_mode> (mode_from), speed);
229
230 scalar_int_mode wider_mode;
231 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
232 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
233 {
234 PUT_MODE (all->reg, mode);
235 PUT_MODE (all->zext, wider_mode);
236 PUT_MODE (all->wide_mult, wider_mode);
237 PUT_MODE (all->wide_lshr, wider_mode);
238 XEXP (all->wide_lshr, 1)
239 = gen_int_shift_amount (wider_mode, mode_bitsize);
240
241 set_mul_widen_cost (speed, wider_mode,
242 set_src_cost (all->wide_mult, wider_mode, speed));
243 set_mul_highpart_cost (speed, int_mode_to,
244 set_src_cost (all->wide_trunc,
245 int_mode_to, speed));
246 }
247 }
248 }
249
250 void
251 init_expmed (void)
252 {
253 struct init_expmed_rtl all;
254 machine_mode mode = QImode;
255 int m, speed;
256
257 memset (&all, 0, sizeof all);
258 for (m = 1; m < MAX_BITS_PER_WORD; m++)
259 {
260 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
261 all.cint[m] = GEN_INT (m);
262 }
263
264 /* Avoid using hard regs in ways which may be unsupported. */
265 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
266 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
267 all.neg = gen_rtx_NEG (mode, all.reg);
268 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
269 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
270 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
271 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
272 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
273 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
274 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
275 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
276 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
277 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
278 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
279 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
280 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
281 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
282 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
283
284 for (speed = 0; speed < 2; speed++)
285 {
286 crtl->maybe_hot_insn_p = speed;
287 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
288
289 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
290 mode = (machine_mode)(mode + 1))
291 init_expmed_one_mode (&all, mode, speed);
292
293 if (MIN_MODE_PARTIAL_INT != VOIDmode)
294 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
295 mode = (machine_mode)(mode + 1))
296 init_expmed_one_mode (&all, mode, speed);
297
298 if (MIN_MODE_VECTOR_INT != VOIDmode)
299 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
300 mode = (machine_mode)(mode + 1))
301 init_expmed_one_mode (&all, mode, speed);
302 }
303
304 if (alg_hash_used_p ())
305 {
306 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
307 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
308 }
309 else
310 set_alg_hash_used_p (true);
311 default_rtl_profile ();
312
313 ggc_free (all.trunc);
314 ggc_free (all.shift_sub1);
315 ggc_free (all.shift_sub0);
316 ggc_free (all.shift_add);
317 ggc_free (all.shift_mult);
318 ggc_free (all.shift);
319 ggc_free (all.wide_trunc);
320 ggc_free (all.wide_lshr);
321 ggc_free (all.wide_mult);
322 ggc_free (all.zext);
323 ggc_free (all.smod_32);
324 ggc_free (all.sdiv_32);
325 ggc_free (all.udiv);
326 ggc_free (all.sdiv);
327 ggc_free (all.mult);
328 ggc_free (all.neg);
329 ggc_free (all.plus);
330 ggc_free (all.reg);
331 }
332
333 /* Return an rtx representing minus the value of X.
334 MODE is the intended mode of the result,
335 useful if X is a CONST_INT. */
336
337 rtx
338 negate_rtx (machine_mode mode, rtx x)
339 {
340 rtx result = simplify_unary_operation (NEG, mode, x, mode);
341
342 if (result == 0)
343 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
344
345 return result;
346 }
347
348 /* Whether reverse storage order is supported on the target. */
349 static int reverse_storage_order_supported = -1;
350
351 /* Check whether reverse storage order is supported on the target. */
352
353 static void
354 check_reverse_storage_order_support (void)
355 {
356 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
357 {
358 reverse_storage_order_supported = 0;
359 sorry ("reverse scalar storage order");
360 }
361 else
362 reverse_storage_order_supported = 1;
363 }
364
365 /* Whether reverse FP storage order is supported on the target. */
366 static int reverse_float_storage_order_supported = -1;
367
368 /* Check whether reverse FP storage order is supported on the target. */
369
370 static void
371 check_reverse_float_storage_order_support (void)
372 {
373 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
374 {
375 reverse_float_storage_order_supported = 0;
376 sorry ("reverse floating-point scalar storage order");
377 }
378 else
379 reverse_float_storage_order_supported = 1;
380 }
381
382 /* Return an rtx representing value of X with reverse storage order.
383 MODE is the intended mode of the result,
384 useful if X is a CONST_INT. */
385
386 rtx
387 flip_storage_order (machine_mode mode, rtx x)
388 {
389 scalar_int_mode int_mode;
390 rtx result;
391
392 if (mode == QImode)
393 return x;
394
395 if (COMPLEX_MODE_P (mode))
396 {
397 rtx real = read_complex_part (x, false);
398 rtx imag = read_complex_part (x, true);
399
400 real = flip_storage_order (GET_MODE_INNER (mode), real);
401 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
402
403 return gen_rtx_CONCAT (mode, real, imag);
404 }
405
406 if (UNLIKELY (reverse_storage_order_supported < 0))
407 check_reverse_storage_order_support ();
408
409 if (!is_a <scalar_int_mode> (mode, &int_mode))
410 {
411 if (FLOAT_MODE_P (mode)
412 && UNLIKELY (reverse_float_storage_order_supported < 0))
413 check_reverse_float_storage_order_support ();
414
415 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
416 || !targetm.scalar_mode_supported_p (int_mode))
417 {
418 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
419 return x;
420 }
421 x = gen_lowpart (int_mode, x);
422 }
423
424 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
425 if (result == 0)
426 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
427
428 if (int_mode != mode)
429 result = gen_lowpart (mode, result);
430
431 return result;
432 }
433
434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
435 first unit of mode MODE that contains a bitfield of size BITSIZE at
436 bit position BITNUM. If MODE is not set, return a BLKmode reference
437 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
438 of the field within the new memory. */
439
440 static rtx
441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
442 unsigned HOST_WIDE_INT bitsize,
443 unsigned HOST_WIDE_INT bitnum,
444 unsigned HOST_WIDE_INT *new_bitnum)
445 {
446 scalar_int_mode imode;
447 if (mode.exists (&imode))
448 {
449 unsigned int unit = GET_MODE_BITSIZE (imode);
450 *new_bitnum = bitnum % unit;
451 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
452 return adjust_bitfield_address (mem, imode, offset);
453 }
454 else
455 {
456 *new_bitnum = bitnum % BITS_PER_UNIT;
457 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
458 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
459 / BITS_PER_UNIT);
460 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
461 }
462 }
463
464 /* The caller wants to perform insertion or extraction PATTERN on a
465 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
466 BITREGION_START and BITREGION_END are as for store_bit_field
467 and FIELDMODE is the natural mode of the field.
468
469 Search for a mode that is compatible with the memory access
470 restrictions and (where applicable) with a register insertion or
471 extraction. Return the new memory on success, storing the adjusted
472 bit position in *NEW_BITNUM. Return null otherwise. */
473
474 static rtx
475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
476 rtx op0, HOST_WIDE_INT bitsize,
477 HOST_WIDE_INT bitnum,
478 poly_uint64 bitregion_start,
479 poly_uint64 bitregion_end,
480 machine_mode fieldmode,
481 unsigned HOST_WIDE_INT *new_bitnum)
482 {
483 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
484 bitregion_end, MEM_ALIGN (op0),
485 MEM_VOLATILE_P (op0));
486 scalar_int_mode best_mode;
487 if (iter.next_mode (&best_mode))
488 {
489 /* We can use a memory in BEST_MODE. See whether this is true for
490 any wider modes. All other things being equal, we prefer to
491 use the widest mode possible because it tends to expose more
492 CSE opportunities. */
493 if (!iter.prefer_smaller_modes ())
494 {
495 /* Limit the search to the mode required by the corresponding
496 register insertion or extraction instruction, if any. */
497 scalar_int_mode limit_mode = word_mode;
498 extraction_insn insn;
499 if (get_best_reg_extraction_insn (&insn, pattern,
500 GET_MODE_BITSIZE (best_mode),
501 fieldmode))
502 limit_mode = insn.field_mode;
503
504 scalar_int_mode wider_mode;
505 while (iter.next_mode (&wider_mode)
506 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
507 best_mode = wider_mode;
508 }
509 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
510 new_bitnum);
511 }
512 return NULL_RTX;
513 }
514
515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
516 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
517 offset is then BITNUM / BITS_PER_UNIT. */
518
519 static bool
520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
521 machine_mode struct_mode)
522 {
523 poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
524 if (BYTES_BIG_ENDIAN)
525 return (multiple_p (bitnum, BITS_PER_UNIT)
526 && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
527 || multiple_p (bitnum + bitsize,
528 regsize * BITS_PER_UNIT)));
529 else
530 return multiple_p (bitnum, regsize * BITS_PER_UNIT);
531 }
532
533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
534 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
535 Return false if the access would touch memory outside the range
536 BITREGION_START to BITREGION_END for conformance to the C++ memory
537 model. */
538
539 static bool
540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
541 unsigned HOST_WIDE_INT bitnum,
542 scalar_int_mode fieldmode,
543 poly_uint64 bitregion_start,
544 poly_uint64 bitregion_end)
545 {
546 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
547
548 /* -fstrict-volatile-bitfields must be enabled and we must have a
549 volatile MEM. */
550 if (!MEM_P (op0)
551 || !MEM_VOLATILE_P (op0)
552 || flag_strict_volatile_bitfields <= 0)
553 return false;
554
555 /* The bit size must not be larger than the field mode, and
556 the field mode must not be larger than a word. */
557 if (bitsize > modesize || modesize > BITS_PER_WORD)
558 return false;
559
560 /* Check for cases of unaligned fields that must be split. */
561 if (bitnum % modesize + bitsize > modesize)
562 return false;
563
564 /* The memory must be sufficiently aligned for a MODESIZE access.
565 This condition guarantees, that the memory access will not
566 touch anything after the end of the structure. */
567 if (MEM_ALIGN (op0) < modesize)
568 return false;
569
570 /* Check for cases where the C++ memory model applies. */
571 if (maybe_ne (bitregion_end, 0U)
572 && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
573 || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
574 bitregion_end)))
575 return false;
576
577 return true;
578 }
579
580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
581 bit number BITNUM can be treated as a simple value of mode MODE.
582 Store the byte offset in *BYTENUM if so. */
583
584 static bool
585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
586 machine_mode mode, poly_uint64 *bytenum)
587 {
588 return (MEM_P (op0)
589 && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
590 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
591 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
592 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
593 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
594 }
595 \f
596 /* Try to use instruction INSV to store VALUE into a field of OP0.
597 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
598 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
599 are as for store_bit_field. */
600
601 static bool
602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
603 opt_scalar_int_mode op0_mode,
604 unsigned HOST_WIDE_INT bitsize,
605 unsigned HOST_WIDE_INT bitnum,
606 rtx value, scalar_int_mode value_mode)
607 {
608 class expand_operand ops[4];
609 rtx value1;
610 rtx xop0 = op0;
611 rtx_insn *last = get_last_insn ();
612 bool copy_back = false;
613
614 scalar_int_mode op_mode = insv->field_mode;
615 unsigned int unit = GET_MODE_BITSIZE (op_mode);
616 if (bitsize == 0 || bitsize > unit)
617 return false;
618
619 if (MEM_P (xop0))
620 /* Get a reference to the first byte of the field. */
621 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
622 &bitnum);
623 else
624 {
625 /* Convert from counting within OP0 to counting in OP_MODE. */
626 if (BYTES_BIG_ENDIAN)
627 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
628
629 /* If xop0 is a register, we need it in OP_MODE
630 to make it acceptable to the format of insv. */
631 if (GET_CODE (xop0) == SUBREG)
632 {
633 /* If such a SUBREG can't be created, give up. */
634 if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
635 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
636 return false;
637 /* We can't just change the mode, because this might clobber op0,
638 and we will need the original value of op0 if insv fails. */
639 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
640 SUBREG_BYTE (xop0));
641 }
642 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
643 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
644 }
645
646 /* If the destination is a paradoxical subreg such that we need a
647 truncate to the inner mode, perform the insertion on a temporary and
648 truncate the result to the original destination. Note that we can't
649 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
650 X) 0)) is (reg:N X). */
651 if (GET_CODE (xop0) == SUBREG
652 && REG_P (SUBREG_REG (xop0))
653 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
654 op_mode))
655 {
656 rtx tem = gen_reg_rtx (op_mode);
657 emit_move_insn (tem, xop0);
658 xop0 = tem;
659 copy_back = true;
660 }
661
662 /* There are similar overflow check at the start of store_bit_field_1,
663 but that only check the situation where the field lies completely
664 outside the register, while there do have situation where the field
665 lies partialy in the register, we need to adjust bitsize for this
666 partial overflow situation. Without this fix, pr48335-2.c on big-endian
667 will broken on those arch support bit insert instruction, like arm, aarch64
668 etc. */
669 if (bitsize + bitnum > unit && bitnum < unit)
670 {
671 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
672 "destination object, data truncated into %wu-bit",
673 bitsize, unit - bitnum);
674 bitsize = unit - bitnum;
675 }
676
677 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
678 "backwards" from the size of the unit we are inserting into.
679 Otherwise, we count bits from the most significant on a
680 BYTES/BITS_BIG_ENDIAN machine. */
681
682 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
683 bitnum = unit - bitsize - bitnum;
684
685 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
686 value1 = value;
687 if (value_mode != op_mode)
688 {
689 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
690 {
691 rtx tmp;
692 /* Optimization: Don't bother really extending VALUE
693 if it has all the bits we will actually use. However,
694 if we must narrow it, be sure we do it correctly. */
695
696 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
697 {
698 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
699 if (! tmp)
700 tmp = simplify_gen_subreg (op_mode,
701 force_reg (value_mode, value1),
702 value_mode, 0);
703 }
704 else
705 {
706 tmp = gen_lowpart_if_possible (op_mode, value1);
707 if (! tmp)
708 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
709 }
710 value1 = tmp;
711 }
712 else if (CONST_INT_P (value))
713 value1 = gen_int_mode (INTVAL (value), op_mode);
714 else
715 /* Parse phase is supposed to make VALUE's data type
716 match that of the component reference, which is a type
717 at least as wide as the field; so VALUE should have
718 a mode that corresponds to that type. */
719 gcc_assert (CONSTANT_P (value));
720 }
721
722 create_fixed_operand (&ops[0], xop0);
723 create_integer_operand (&ops[1], bitsize);
724 create_integer_operand (&ops[2], bitnum);
725 create_input_operand (&ops[3], value1, op_mode);
726 if (maybe_expand_insn (insv->icode, 4, ops))
727 {
728 if (copy_back)
729 convert_move (op0, xop0, true);
730 return true;
731 }
732 delete_insns_since (last);
733 return false;
734 }
735
736 /* A subroutine of store_bit_field, with the same arguments. Return true
737 if the operation could be implemented.
738
739 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
740 no other way of implementing the operation. If FALLBACK_P is false,
741 return false instead.
742
743 if UNDEFINED_P is true then STR_RTX is undefined and may be set using
744 a subreg instead. */
745
746 static bool
747 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
748 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
749 machine_mode fieldmode,
750 rtx value, bool reverse, bool fallback_p, bool undefined_p)
751 {
752 rtx op0 = str_rtx;
753
754 while (GET_CODE (op0) == SUBREG)
755 {
756 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
757 op0 = SUBREG_REG (op0);
758 }
759
760 /* No action is needed if the target is a register and if the field
761 lies completely outside that register. This can occur if the source
762 code contains an out-of-bounds access to a small array. */
763 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
764 return true;
765
766 /* Use vec_set patterns for inserting parts of vectors whenever
767 available. */
768 machine_mode outermode = GET_MODE (op0);
769 scalar_mode innermode = GET_MODE_INNER (outermode);
770 poly_uint64 pos;
771 if (VECTOR_MODE_P (outermode)
772 && !MEM_P (op0)
773 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
774 && fieldmode == innermode
775 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
776 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
777 {
778 class expand_operand ops[3];
779 enum insn_code icode = optab_handler (vec_set_optab, outermode);
780
781 create_fixed_operand (&ops[0], op0);
782 create_input_operand (&ops[1], value, innermode);
783 create_integer_operand (&ops[2], pos);
784 if (maybe_expand_insn (icode, 3, ops))
785 return true;
786 }
787
788 /* If the target is a register, overwriting the entire object, or storing
789 a full-word or multi-word field can be done with just a SUBREG. */
790 if (!MEM_P (op0)
791 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
792 {
793 /* Use the subreg machinery either to narrow OP0 to the required
794 words or to cope with mode punning between equal-sized modes.
795 In the latter case, use subreg on the rhs side, not lhs. */
796 rtx sub;
797 poly_uint64 bytenum;
798 poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
799 if (known_eq (bitnum, 0U)
800 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
801 {
802 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
803 if (sub)
804 {
805 if (reverse)
806 sub = flip_storage_order (GET_MODE (op0), sub);
807 emit_move_insn (op0, sub);
808 return true;
809 }
810 }
811 else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
812 && (undefined_p
813 || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
814 && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
815 && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
816 {
817 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
818 if (sub)
819 {
820 if (reverse)
821 value = flip_storage_order (fieldmode, value);
822 emit_move_insn (sub, value);
823 return true;
824 }
825 }
826 }
827
828 /* If the target is memory, storing any naturally aligned field can be
829 done with a simple store. For targets that support fast unaligned
830 memory, any naturally sized, unit aligned field can be done directly. */
831 poly_uint64 bytenum;
832 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
833 {
834 op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
835 if (reverse)
836 value = flip_storage_order (fieldmode, value);
837 emit_move_insn (op0, value);
838 return true;
839 }
840
841 /* It's possible we'll need to handle other cases here for
842 polynomial bitnum and bitsize. */
843
844 /* From here on we need to be looking at a fixed-size insertion. */
845 unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
846 unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
847
848 /* Make sure we are playing with integral modes. Pun with subregs
849 if we aren't. This must come after the entire register case above,
850 since that case is valid for any mode. The following cases are only
851 valid for integral modes. */
852 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
853 scalar_int_mode imode;
854 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
855 {
856 if (MEM_P (op0))
857 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
858 0, MEM_SIZE (op0));
859 else if (!op0_mode.exists ())
860 {
861 if (ibitnum == 0
862 && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
863 && MEM_P (value)
864 && !reverse)
865 {
866 value = adjust_address (value, GET_MODE (op0), 0);
867 emit_move_insn (op0, value);
868 return true;
869 }
870 if (!fallback_p)
871 return false;
872 rtx temp = assign_stack_temp (GET_MODE (op0),
873 GET_MODE_SIZE (GET_MODE (op0)));
874 emit_move_insn (temp, op0);
875 store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
876 reverse, fallback_p, undefined_p);
877 emit_move_insn (op0, temp);
878 return true;
879 }
880 else
881 op0 = gen_lowpart (op0_mode.require (), op0);
882 }
883
884 return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
885 bitregion_start, bitregion_end,
886 fieldmode, value, reverse, fallback_p);
887 }
888
889 /* Subroutine of store_bit_field_1, with the same arguments, except
890 that BITSIZE and BITNUM are constant. Handle cases specific to
891 integral modes. If OP0_MODE is defined, it is the mode of OP0,
892 otherwise OP0 is a BLKmode MEM. */
893
894 static bool
895 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
896 unsigned HOST_WIDE_INT bitsize,
897 unsigned HOST_WIDE_INT bitnum,
898 poly_uint64 bitregion_start,
899 poly_uint64 bitregion_end,
900 machine_mode fieldmode,
901 rtx value, bool reverse, bool fallback_p)
902 {
903 /* Storing an lsb-aligned field in a register
904 can be done with a movstrict instruction. */
905
906 if (!MEM_P (op0)
907 && !reverse
908 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
909 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
910 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
911 {
912 class expand_operand ops[2];
913 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
914 rtx arg0 = op0;
915 unsigned HOST_WIDE_INT subreg_off;
916
917 if (GET_CODE (arg0) == SUBREG)
918 {
919 /* Else we've got some float mode source being extracted into
920 a different float mode destination -- this combination of
921 subregs results in Severe Tire Damage. */
922 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
923 || GET_MODE_CLASS (fieldmode) == MODE_INT
924 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
925 arg0 = SUBREG_REG (arg0);
926 }
927
928 subreg_off = bitnum / BITS_PER_UNIT;
929 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
930 /* STRICT_LOW_PART must have a non-paradoxical subreg as
931 operand. */
932 && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
933 {
934 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
935
936 create_fixed_operand (&ops[0], arg0);
937 /* Shrink the source operand to FIELDMODE. */
938 create_convert_operand_to (&ops[1], value, fieldmode, false);
939 if (maybe_expand_insn (icode, 2, ops))
940 return true;
941 }
942 }
943
944 /* Handle fields bigger than a word. */
945
946 if (bitsize > BITS_PER_WORD)
947 {
948 /* Here we transfer the words of the field
949 in the order least significant first.
950 This is because the most significant word is the one which may
951 be less than full.
952 However, only do that if the value is not BLKmode. */
953
954 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
955 const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
956 rtx_insn *last;
957
958 /* This is the mode we must force value to, so that there will be enough
959 subwords to extract. Note that fieldmode will often (always?) be
960 VOIDmode, because that is what store_field uses to indicate that this
961 is a bit field, but passing VOIDmode to operand_subword_force
962 is not allowed.
963
964 The mode must be fixed-size, since insertions into variable-sized
965 objects are meant to be handled before calling this function. */
966 fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
967 if (value_mode == VOIDmode)
968 value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
969
970 last = get_last_insn ();
971 for (int i = 0; i < nwords; i++)
972 {
973 /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
974 except maybe for the last iteration. */
975 const unsigned HOST_WIDE_INT new_bitsize
976 = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
977 /* Bit offset from the starting bit number in the target. */
978 const unsigned int bit_offset
979 = backwards ^ reverse
980 ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
981 : i * BITS_PER_WORD;
982 /* Starting word number in the value. */
983 const unsigned int wordnum
984 = backwards
985 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
986 : i;
987 /* The chunk of the value in word_mode. We use bit-field extraction
988 in BLKmode to handle unaligned memory references and to shift the
989 last chunk right on big-endian machines if need be. */
990 rtx value_word
991 = fieldmode == BLKmode
992 ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
993 1, NULL_RTX, word_mode, word_mode, false,
994 NULL)
995 : operand_subword_force (value, wordnum, value_mode);
996
997 if (!store_bit_field_1 (op0, new_bitsize,
998 bitnum + bit_offset,
999 bitregion_start, bitregion_end,
1000 word_mode,
1001 value_word, reverse, fallback_p, false))
1002 {
1003 delete_insns_since (last);
1004 return false;
1005 }
1006 }
1007 return true;
1008 }
1009
1010 /* If VALUE has a floating-point or complex mode, access it as an
1011 integer of the corresponding size. This can occur on a machine
1012 with 64 bit registers that uses SFmode for float. It can also
1013 occur for unaligned float or complex fields. */
1014 rtx orig_value = value;
1015 scalar_int_mode value_mode;
1016 if (GET_MODE (value) == VOIDmode)
1017 /* By this point we've dealt with values that are bigger than a word,
1018 so word_mode is a conservatively correct choice. */
1019 value_mode = word_mode;
1020 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1021 {
1022 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1023 value = gen_reg_rtx (value_mode);
1024 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1025 }
1026
1027 /* If OP0 is a multi-word register, narrow it to the affected word.
1028 If the region spans two words, defer to store_split_bit_field.
1029 Don't do this if op0 is a single hard register wider than word
1030 such as a float or vector register. */
1031 if (!MEM_P (op0)
1032 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1033 && (!REG_P (op0)
1034 || !HARD_REGISTER_P (op0)
1035 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1036 {
1037 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1038 {
1039 if (!fallback_p)
1040 return false;
1041
1042 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1043 bitregion_start, bitregion_end,
1044 value, value_mode, reverse);
1045 return true;
1046 }
1047 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1048 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1049 gcc_assert (op0);
1050 op0_mode = word_mode;
1051 bitnum %= BITS_PER_WORD;
1052 }
1053
1054 /* From here on we can assume that the field to be stored in fits
1055 within a word. If the destination is a register, it too fits
1056 in a word. */
1057
1058 extraction_insn insv;
1059 if (!MEM_P (op0)
1060 && !reverse
1061 && get_best_reg_extraction_insn (&insv, EP_insv,
1062 GET_MODE_BITSIZE (op0_mode.require ()),
1063 fieldmode)
1064 && store_bit_field_using_insv (&insv, op0, op0_mode,
1065 bitsize, bitnum, value, value_mode))
1066 return true;
1067
1068 /* If OP0 is a memory, try copying it to a register and seeing if a
1069 cheap register alternative is available. */
1070 if (MEM_P (op0) && !reverse)
1071 {
1072 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1073 fieldmode)
1074 && store_bit_field_using_insv (&insv, op0, op0_mode,
1075 bitsize, bitnum, value, value_mode))
1076 return true;
1077
1078 rtx_insn *last = get_last_insn ();
1079
1080 /* Try loading part of OP0 into a register, inserting the bitfield
1081 into that, and then copying the result back to OP0. */
1082 unsigned HOST_WIDE_INT bitpos;
1083 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1084 bitregion_start, bitregion_end,
1085 fieldmode, &bitpos);
1086 if (xop0)
1087 {
1088 rtx tempreg = copy_to_reg (xop0);
1089 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1090 bitregion_start, bitregion_end,
1091 fieldmode, orig_value, reverse, false, false))
1092 {
1093 emit_move_insn (xop0, tempreg);
1094 return true;
1095 }
1096 delete_insns_since (last);
1097 }
1098 }
1099
1100 if (!fallback_p)
1101 return false;
1102
1103 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1104 bitregion_end, value, value_mode, reverse);
1105 return true;
1106 }
1107
1108 /* Generate code to store value from rtx VALUE
1109 into a bit-field within structure STR_RTX
1110 containing BITSIZE bits starting at bit BITNUM.
1111
1112 BITREGION_START is bitpos of the first bitfield in this region.
1113 BITREGION_END is the bitpos of the ending bitfield in this region.
1114 These two fields are 0, if the C++ memory model does not apply,
1115 or we are not interested in keeping track of bitfield regions.
1116
1117 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1118
1119 If REVERSE is true, the store is to be done in reverse order.
1120
1121 If UNDEFINED_P is true then STR_RTX is currently undefined. */
1122
1123 void
1124 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1125 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1126 machine_mode fieldmode,
1127 rtx value, bool reverse, bool undefined_p)
1128 {
1129 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1130 unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1131 scalar_int_mode int_mode;
1132 if (bitsize.is_constant (&ibitsize)
1133 && bitnum.is_constant (&ibitnum)
1134 && is_a <scalar_int_mode> (fieldmode, &int_mode)
1135 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1136 bitregion_start, bitregion_end))
1137 {
1138 /* Storing of a full word can be done with a simple store.
1139 We know here that the field can be accessed with one single
1140 instruction. For targets that support unaligned memory,
1141 an unaligned access may be necessary. */
1142 if (ibitsize == GET_MODE_BITSIZE (int_mode))
1143 {
1144 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1145 ibitnum / BITS_PER_UNIT);
1146 if (reverse)
1147 value = flip_storage_order (int_mode, value);
1148 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1149 emit_move_insn (str_rtx, value);
1150 }
1151 else
1152 {
1153 rtx temp;
1154
1155 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1156 ibitnum, &ibitnum);
1157 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1158 temp = copy_to_reg (str_rtx);
1159 if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1160 int_mode, value, reverse, true, undefined_p))
1161 gcc_unreachable ();
1162
1163 emit_move_insn (str_rtx, temp);
1164 }
1165
1166 return;
1167 }
1168
1169 /* Under the C++0x memory model, we must not touch bits outside the
1170 bit region. Adjust the address to start at the beginning of the
1171 bit region. */
1172 if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1173 {
1174 scalar_int_mode best_mode;
1175 machine_mode addr_mode = VOIDmode;
1176
1177 poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1178 bitnum -= bitregion_start;
1179 poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1180 bitregion_end -= bitregion_start;
1181 bitregion_start = 0;
1182 if (bitsize.is_constant (&ibitsize)
1183 && bitnum.is_constant (&ibitnum)
1184 && get_best_mode (ibitsize, ibitnum,
1185 bitregion_start, bitregion_end,
1186 MEM_ALIGN (str_rtx), INT_MAX,
1187 MEM_VOLATILE_P (str_rtx), &best_mode))
1188 addr_mode = best_mode;
1189 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1190 offset, size);
1191 }
1192
1193 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1194 bitregion_start, bitregion_end,
1195 fieldmode, value, reverse, true, undefined_p))
1196 gcc_unreachable ();
1197 }
1198 \f
1199 /* Use shifts and boolean operations to store VALUE into a bit field of
1200 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1201 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1202 the mode of VALUE.
1203
1204 If REVERSE is true, the store is to be done in reverse order. */
1205
1206 static void
1207 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1208 unsigned HOST_WIDE_INT bitsize,
1209 unsigned HOST_WIDE_INT bitnum,
1210 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1211 rtx value, scalar_int_mode value_mode, bool reverse)
1212 {
1213 /* There is a case not handled here:
1214 a structure with a known alignment of just a halfword
1215 and a field split across two aligned halfwords within the structure.
1216 Or likewise a structure with a known alignment of just a byte
1217 and a field split across two bytes.
1218 Such cases are not supposed to be able to occur. */
1219
1220 scalar_int_mode best_mode;
1221 if (MEM_P (op0))
1222 {
1223 unsigned int max_bitsize = BITS_PER_WORD;
1224 scalar_int_mode imode;
1225 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1226 max_bitsize = GET_MODE_BITSIZE (imode);
1227
1228 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1229 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1230 &best_mode))
1231 {
1232 /* The only way this should occur is if the field spans word
1233 boundaries. */
1234 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1235 bitregion_start, bitregion_end,
1236 value, value_mode, reverse);
1237 return;
1238 }
1239
1240 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1241 }
1242 else
1243 best_mode = op0_mode.require ();
1244
1245 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1246 value, value_mode, reverse);
1247 }
1248
1249 /* Helper function for store_fixed_bit_field, stores
1250 the bit field always using MODE, which is the mode of OP0. The other
1251 arguments are as for store_fixed_bit_field. */
1252
1253 static void
1254 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1255 unsigned HOST_WIDE_INT bitsize,
1256 unsigned HOST_WIDE_INT bitnum,
1257 rtx value, scalar_int_mode value_mode, bool reverse)
1258 {
1259 rtx temp;
1260 int all_zero = 0;
1261 int all_one = 0;
1262
1263 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1264 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1265
1266 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1267 /* BITNUM is the distance between our msb
1268 and that of the containing datum.
1269 Convert it to the distance from the lsb. */
1270 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1271
1272 /* Now BITNUM is always the distance between our lsb
1273 and that of OP0. */
1274
1275 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1276 we must first convert its mode to MODE. */
1277
1278 if (CONST_INT_P (value))
1279 {
1280 unsigned HOST_WIDE_INT v = UINTVAL (value);
1281
1282 if (bitsize < HOST_BITS_PER_WIDE_INT)
1283 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1284
1285 if (v == 0)
1286 all_zero = 1;
1287 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1288 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1289 || (bitsize == HOST_BITS_PER_WIDE_INT
1290 && v == HOST_WIDE_INT_M1U))
1291 all_one = 1;
1292
1293 value = lshift_value (mode, v, bitnum);
1294 }
1295 else
1296 {
1297 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1298 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1299
1300 if (value_mode != mode)
1301 value = convert_to_mode (mode, value, 1);
1302
1303 if (must_and)
1304 value = expand_binop (mode, and_optab, value,
1305 mask_rtx (mode, 0, bitsize, 0),
1306 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1307 if (bitnum > 0)
1308 value = expand_shift (LSHIFT_EXPR, mode, value,
1309 bitnum, NULL_RTX, 1);
1310 }
1311
1312 if (reverse)
1313 value = flip_storage_order (mode, value);
1314
1315 /* Now clear the chosen bits in OP0,
1316 except that if VALUE is -1 we need not bother. */
1317 /* We keep the intermediates in registers to allow CSE to combine
1318 consecutive bitfield assignments. */
1319
1320 temp = force_reg (mode, op0);
1321
1322 if (! all_one)
1323 {
1324 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1325 if (reverse)
1326 mask = flip_storage_order (mode, mask);
1327 temp = expand_binop (mode, and_optab, temp, mask,
1328 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1329 temp = force_reg (mode, temp);
1330 }
1331
1332 /* Now logical-or VALUE into OP0, unless it is zero. */
1333
1334 if (! all_zero)
1335 {
1336 temp = expand_binop (mode, ior_optab, temp, value,
1337 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1338 temp = force_reg (mode, temp);
1339 }
1340
1341 if (op0 != temp)
1342 {
1343 op0 = copy_rtx (op0);
1344 emit_move_insn (op0, temp);
1345 }
1346 }
1347 \f
1348 /* Store a bit field that is split across multiple accessible memory objects.
1349
1350 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1351 BITSIZE is the field width; BITPOS the position of its first bit
1352 (within the word).
1353 VALUE is the value to store, which has mode VALUE_MODE.
1354 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1355 a BLKmode MEM.
1356
1357 If REVERSE is true, the store is to be done in reverse order.
1358
1359 This does not yet handle fields wider than BITS_PER_WORD. */
1360
1361 static void
1362 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1363 unsigned HOST_WIDE_INT bitsize,
1364 unsigned HOST_WIDE_INT bitpos,
1365 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1366 rtx value, scalar_int_mode value_mode, bool reverse)
1367 {
1368 unsigned int unit, total_bits, bitsdone = 0;
1369
1370 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1371 much at a time. */
1372 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1373 unit = BITS_PER_WORD;
1374 else
1375 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1376
1377 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1378 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1379 again, and we will mutually recurse forever. */
1380 if (MEM_P (op0) && op0_mode.exists ())
1381 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1382
1383 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1384 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1385 that VALUE might be a floating-point constant. */
1386 if (CONSTANT_P (value) && !CONST_INT_P (value))
1387 {
1388 rtx word = gen_lowpart_common (word_mode, value);
1389
1390 if (word && (value != word))
1391 value = word;
1392 else
1393 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1394 value_mode = word_mode;
1395 }
1396
1397 total_bits = GET_MODE_BITSIZE (value_mode);
1398
1399 while (bitsdone < bitsize)
1400 {
1401 unsigned HOST_WIDE_INT thissize;
1402 unsigned HOST_WIDE_INT thispos;
1403 unsigned HOST_WIDE_INT offset;
1404 rtx part;
1405
1406 offset = (bitpos + bitsdone) / unit;
1407 thispos = (bitpos + bitsdone) % unit;
1408
1409 /* When region of bytes we can touch is restricted, decrease
1410 UNIT close to the end of the region as needed. If op0 is a REG
1411 or SUBREG of REG, don't do this, as there can't be data races
1412 on a register and we can expand shorter code in some cases. */
1413 if (maybe_ne (bitregion_end, 0U)
1414 && unit > BITS_PER_UNIT
1415 && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1416 && !REG_P (op0)
1417 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1418 {
1419 unit = unit / 2;
1420 continue;
1421 }
1422
1423 /* THISSIZE must not overrun a word boundary. Otherwise,
1424 store_fixed_bit_field will call us again, and we will mutually
1425 recurse forever. */
1426 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1427 thissize = MIN (thissize, unit - thispos);
1428
1429 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1430 {
1431 /* Fetch successively less significant portions. */
1432 if (CONST_INT_P (value))
1433 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1434 >> (bitsize - bitsdone - thissize))
1435 & ((HOST_WIDE_INT_1 << thissize) - 1));
1436 /* Likewise, but the source is little-endian. */
1437 else if (reverse)
1438 part = extract_fixed_bit_field (word_mode, value, value_mode,
1439 thissize,
1440 bitsize - bitsdone - thissize,
1441 NULL_RTX, 1, false);
1442 else
1443 /* The args are chosen so that the last part includes the
1444 lsb. Give extract_bit_field the value it needs (with
1445 endianness compensation) to fetch the piece we want. */
1446 part = extract_fixed_bit_field (word_mode, value, value_mode,
1447 thissize,
1448 total_bits - bitsize + bitsdone,
1449 NULL_RTX, 1, false);
1450 }
1451 else
1452 {
1453 /* Fetch successively more significant portions. */
1454 if (CONST_INT_P (value))
1455 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1456 >> bitsdone)
1457 & ((HOST_WIDE_INT_1 << thissize) - 1));
1458 /* Likewise, but the source is big-endian. */
1459 else if (reverse)
1460 part = extract_fixed_bit_field (word_mode, value, value_mode,
1461 thissize,
1462 total_bits - bitsdone - thissize,
1463 NULL_RTX, 1, false);
1464 else
1465 part = extract_fixed_bit_field (word_mode, value, value_mode,
1466 thissize, bitsdone, NULL_RTX,
1467 1, false);
1468 }
1469
1470 /* If OP0 is a register, then handle OFFSET here. */
1471 rtx op0_piece = op0;
1472 opt_scalar_int_mode op0_piece_mode = op0_mode;
1473 if (SUBREG_P (op0) || REG_P (op0))
1474 {
1475 scalar_int_mode imode;
1476 if (op0_mode.exists (&imode)
1477 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1478 {
1479 if (offset)
1480 op0_piece = const0_rtx;
1481 }
1482 else
1483 {
1484 op0_piece = operand_subword_force (op0,
1485 offset * unit / BITS_PER_WORD,
1486 GET_MODE (op0));
1487 op0_piece_mode = word_mode;
1488 }
1489 offset &= BITS_PER_WORD / unit - 1;
1490 }
1491
1492 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1493 it is just an out-of-bounds access. Ignore it. */
1494 if (op0_piece != const0_rtx)
1495 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1496 offset * unit + thispos, bitregion_start,
1497 bitregion_end, part, word_mode, reverse);
1498 bitsdone += thissize;
1499 }
1500 }
1501 \f
1502 /* A subroutine of extract_bit_field_1 that converts return value X
1503 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1504 to extract_bit_field. */
1505
1506 static rtx
1507 convert_extracted_bit_field (rtx x, machine_mode mode,
1508 machine_mode tmode, bool unsignedp)
1509 {
1510 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1511 return x;
1512
1513 /* If the x mode is not a scalar integral, first convert to the
1514 integer mode of that size and then access it as a floating-point
1515 value via a SUBREG. */
1516 if (!SCALAR_INT_MODE_P (tmode))
1517 {
1518 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1519 x = convert_to_mode (int_mode, x, unsignedp);
1520 x = force_reg (int_mode, x);
1521 return gen_lowpart (tmode, x);
1522 }
1523
1524 return convert_to_mode (tmode, x, unsignedp);
1525 }
1526
1527 /* Try to use an ext(z)v pattern to extract a field from OP0.
1528 Return the extracted value on success, otherwise return null.
1529 EXTV describes the extraction instruction to use. If OP0_MODE
1530 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1531 The other arguments are as for extract_bit_field. */
1532
1533 static rtx
1534 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1535 opt_scalar_int_mode op0_mode,
1536 unsigned HOST_WIDE_INT bitsize,
1537 unsigned HOST_WIDE_INT bitnum,
1538 int unsignedp, rtx target,
1539 machine_mode mode, machine_mode tmode)
1540 {
1541 class expand_operand ops[4];
1542 rtx spec_target = target;
1543 rtx spec_target_subreg = 0;
1544 scalar_int_mode ext_mode = extv->field_mode;
1545 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1546
1547 if (bitsize == 0 || unit < bitsize)
1548 return NULL_RTX;
1549
1550 if (MEM_P (op0))
1551 /* Get a reference to the first byte of the field. */
1552 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1553 &bitnum);
1554 else
1555 {
1556 /* Convert from counting within OP0 to counting in EXT_MODE. */
1557 if (BYTES_BIG_ENDIAN)
1558 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1559
1560 /* If op0 is a register, we need it in EXT_MODE to make it
1561 acceptable to the format of ext(z)v. */
1562 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1563 return NULL_RTX;
1564 if (REG_P (op0) && op0_mode.require () != ext_mode)
1565 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1566 }
1567
1568 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1569 "backwards" from the size of the unit we are extracting from.
1570 Otherwise, we count bits from the most significant on a
1571 BYTES/BITS_BIG_ENDIAN machine. */
1572
1573 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1574 bitnum = unit - bitsize - bitnum;
1575
1576 if (target == 0)
1577 target = spec_target = gen_reg_rtx (tmode);
1578
1579 if (GET_MODE (target) != ext_mode)
1580 {
1581 rtx temp;
1582 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1583 between the mode of the extraction (word_mode) and the target
1584 mode. Instead, create a temporary and use convert_move to set
1585 the target. */
1586 if (REG_P (target)
1587 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1588 && (temp = gen_lowpart_if_possible (ext_mode, target)))
1589 {
1590 target = temp;
1591 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1592 spec_target_subreg = target;
1593 }
1594 else
1595 target = gen_reg_rtx (ext_mode);
1596 }
1597
1598 create_output_operand (&ops[0], target, ext_mode);
1599 create_fixed_operand (&ops[1], op0);
1600 create_integer_operand (&ops[2], bitsize);
1601 create_integer_operand (&ops[3], bitnum);
1602 if (maybe_expand_insn (extv->icode, 4, ops))
1603 {
1604 target = ops[0].value;
1605 if (target == spec_target)
1606 return target;
1607 if (target == spec_target_subreg)
1608 return spec_target;
1609 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1610 }
1611 return NULL_RTX;
1612 }
1613
1614 /* See whether it would be valid to extract the part of OP0 with
1615 mode OP0_MODE described by BITNUM and BITSIZE into a value of
1616 mode MODE using a subreg operation.
1617 Return the subreg if so, otherwise return null. */
1618
1619 static rtx
1620 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1621 machine_mode op0_mode,
1622 poly_uint64 bitsize, poly_uint64 bitnum)
1623 {
1624 poly_uint64 bytenum;
1625 if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1626 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1627 && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1628 && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1629 return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1630 return NULL_RTX;
1631 }
1632
1633 /* A subroutine of extract_bit_field, with the same arguments.
1634 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1635 if we can find no other means of implementing the operation.
1636 if FALLBACK_P is false, return NULL instead. */
1637
1638 static rtx
1639 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1640 int unsignedp, rtx target, machine_mode mode,
1641 machine_mode tmode, bool reverse, bool fallback_p,
1642 rtx *alt_rtl)
1643 {
1644 rtx op0 = str_rtx;
1645 machine_mode mode1;
1646
1647 if (tmode == VOIDmode)
1648 tmode = mode;
1649
1650 while (GET_CODE (op0) == SUBREG)
1651 {
1652 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1653 op0 = SUBREG_REG (op0);
1654 }
1655
1656 /* If we have an out-of-bounds access to a register, just return an
1657 uninitialized register of the required mode. This can occur if the
1658 source code contains an out-of-bounds access to a small array. */
1659 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1660 return gen_reg_rtx (tmode);
1661
1662 if (REG_P (op0)
1663 && mode == GET_MODE (op0)
1664 && known_eq (bitnum, 0U)
1665 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1666 {
1667 if (reverse)
1668 op0 = flip_storage_order (mode, op0);
1669 /* We're trying to extract a full register from itself. */
1670 return op0;
1671 }
1672
1673 /* First try to check for vector from vector extractions. */
1674 if (VECTOR_MODE_P (GET_MODE (op0))
1675 && !MEM_P (op0)
1676 && VECTOR_MODE_P (tmode)
1677 && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1678 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1679 {
1680 machine_mode new_mode = GET_MODE (op0);
1681 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1682 {
1683 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1684 poly_uint64 nunits;
1685 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1686 GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1687 || !related_vector_mode (tmode, inner_mode,
1688 nunits).exists (&new_mode)
1689 || maybe_ne (GET_MODE_SIZE (new_mode),
1690 GET_MODE_SIZE (GET_MODE (op0))))
1691 new_mode = VOIDmode;
1692 }
1693 poly_uint64 pos;
1694 if (new_mode != VOIDmode
1695 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1696 != CODE_FOR_nothing)
1697 && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1698 {
1699 class expand_operand ops[3];
1700 machine_mode outermode = new_mode;
1701 machine_mode innermode = tmode;
1702 enum insn_code icode
1703 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1704
1705 if (new_mode != GET_MODE (op0))
1706 op0 = gen_lowpart (new_mode, op0);
1707 create_output_operand (&ops[0], target, innermode);
1708 ops[0].target = 1;
1709 create_input_operand (&ops[1], op0, outermode);
1710 create_integer_operand (&ops[2], pos);
1711 if (maybe_expand_insn (icode, 3, ops))
1712 {
1713 if (alt_rtl && ops[0].target)
1714 *alt_rtl = target;
1715 target = ops[0].value;
1716 if (GET_MODE (target) != mode)
1717 return gen_lowpart (tmode, target);
1718 return target;
1719 }
1720 }
1721 }
1722
1723 /* See if we can get a better vector mode before extracting. */
1724 if (VECTOR_MODE_P (GET_MODE (op0))
1725 && !MEM_P (op0)
1726 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1727 {
1728 machine_mode new_mode;
1729
1730 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1731 new_mode = MIN_MODE_VECTOR_FLOAT;
1732 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1733 new_mode = MIN_MODE_VECTOR_FRACT;
1734 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1735 new_mode = MIN_MODE_VECTOR_UFRACT;
1736 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1737 new_mode = MIN_MODE_VECTOR_ACCUM;
1738 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1739 new_mode = MIN_MODE_VECTOR_UACCUM;
1740 else
1741 new_mode = MIN_MODE_VECTOR_INT;
1742
1743 FOR_EACH_MODE_FROM (new_mode, new_mode)
1744 if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1745 && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1746 && targetm.vector_mode_supported_p (new_mode)
1747 && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1748 break;
1749 if (new_mode != VOIDmode)
1750 op0 = gen_lowpart (new_mode, op0);
1751 }
1752
1753 /* Use vec_extract patterns for extracting parts of vectors whenever
1754 available. If that fails, see whether the current modes and bitregion
1755 give a natural subreg. */
1756 machine_mode outermode = GET_MODE (op0);
1757 if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1758 {
1759 scalar_mode innermode = GET_MODE_INNER (outermode);
1760 enum insn_code icode
1761 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1762 poly_uint64 pos;
1763 if (icode != CODE_FOR_nothing
1764 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1765 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1766 {
1767 class expand_operand ops[3];
1768
1769 create_output_operand (&ops[0], target, innermode);
1770 ops[0].target = 1;
1771 create_input_operand (&ops[1], op0, outermode);
1772 create_integer_operand (&ops[2], pos);
1773 if (maybe_expand_insn (icode, 3, ops))
1774 {
1775 if (alt_rtl && ops[0].target)
1776 *alt_rtl = target;
1777 target = ops[0].value;
1778 if (GET_MODE (target) != mode)
1779 return gen_lowpart (tmode, target);
1780 return target;
1781 }
1782 }
1783 /* Using subregs is useful if we're extracting one register vector
1784 from a multi-register vector. extract_bit_field_as_subreg checks
1785 for valid bitsize and bitnum, so we don't need to do that here. */
1786 if (VECTOR_MODE_P (mode))
1787 {
1788 rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1789 bitsize, bitnum);
1790 if (sub)
1791 return sub;
1792 }
1793 }
1794
1795 /* Make sure we are playing with integral modes. Pun with subregs
1796 if we aren't. */
1797 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1798 scalar_int_mode imode;
1799 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1800 {
1801 if (MEM_P (op0))
1802 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1803 0, MEM_SIZE (op0));
1804 else if (op0_mode.exists (&imode))
1805 {
1806 op0 = gen_lowpart (imode, op0);
1807
1808 /* If we got a SUBREG, force it into a register since we
1809 aren't going to be able to do another SUBREG on it. */
1810 if (GET_CODE (op0) == SUBREG)
1811 op0 = force_reg (imode, op0);
1812 }
1813 else
1814 {
1815 poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1816 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1817 emit_move_insn (mem, op0);
1818 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1819 }
1820 }
1821
1822 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1823 If that's wrong, the solution is to test for it and set TARGET to 0
1824 if needed. */
1825
1826 /* Get the mode of the field to use for atomic access or subreg
1827 conversion. */
1828 if (!SCALAR_INT_MODE_P (tmode)
1829 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1830 mode1 = mode;
1831 gcc_assert (mode1 != BLKmode);
1832
1833 /* Extraction of a full MODE1 value can be done with a subreg as long
1834 as the least significant bit of the value is the least significant
1835 bit of either OP0 or a word of OP0. */
1836 if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1837 {
1838 rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1839 bitsize, bitnum);
1840 if (sub)
1841 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1842 }
1843
1844 /* Extraction of a full MODE1 value can be done with a load as long as
1845 the field is on a byte boundary and is sufficiently aligned. */
1846 poly_uint64 bytenum;
1847 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1848 {
1849 op0 = adjust_bitfield_address (op0, mode1, bytenum);
1850 if (reverse)
1851 op0 = flip_storage_order (mode1, op0);
1852 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1853 }
1854
1855 /* If we have a memory source and a non-constant bit offset, restrict
1856 the memory to the referenced bytes. This is a worst-case fallback
1857 but is useful for things like vector booleans. */
1858 if (MEM_P (op0) && !bitnum.is_constant ())
1859 {
1860 bytenum = bits_to_bytes_round_down (bitnum);
1861 bitnum = num_trailing_bits (bitnum);
1862 poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1863 op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1864 op0_mode = opt_scalar_int_mode ();
1865 }
1866
1867 /* It's possible we'll need to handle other cases here for
1868 polynomial bitnum and bitsize. */
1869
1870 /* From here on we need to be looking at a fixed-size insertion. */
1871 return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1872 bitnum.to_constant (), unsignedp,
1873 target, mode, tmode, reverse, fallback_p);
1874 }
1875
1876 /* Subroutine of extract_bit_field_1, with the same arguments, except
1877 that BITSIZE and BITNUM are constant. Handle cases specific to
1878 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1879 otherwise OP0 is a BLKmode MEM. */
1880
1881 static rtx
1882 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1883 unsigned HOST_WIDE_INT bitsize,
1884 unsigned HOST_WIDE_INT bitnum, int unsignedp,
1885 rtx target, machine_mode mode, machine_mode tmode,
1886 bool reverse, bool fallback_p)
1887 {
1888 /* Handle fields bigger than a word. */
1889
1890 if (bitsize > BITS_PER_WORD)
1891 {
1892 /* Here we transfer the words of the field
1893 in the order least significant first.
1894 This is because the most significant word is the one which may
1895 be less than full. */
1896
1897 const bool backwards = WORDS_BIG_ENDIAN;
1898 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1899 unsigned int i;
1900 rtx_insn *last;
1901
1902 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1903 target = gen_reg_rtx (mode);
1904
1905 /* In case we're about to clobber a base register or something
1906 (see gcc.c-torture/execute/20040625-1.c). */
1907 if (reg_mentioned_p (target, op0))
1908 target = gen_reg_rtx (mode);
1909
1910 /* Indicate for flow that the entire target reg is being set. */
1911 emit_clobber (target);
1912
1913 /* The mode must be fixed-size, since extract_bit_field_1 handles
1914 extractions from variable-sized objects before calling this
1915 function. */
1916 unsigned int target_size
1917 = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1918 last = get_last_insn ();
1919 for (i = 0; i < nwords; i++)
1920 {
1921 /* If I is 0, use the low-order word in both field and target;
1922 if I is 1, use the next to lowest word; and so on. */
1923 /* Word number in TARGET to use. */
1924 unsigned int wordnum
1925 = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1926 /* Offset from start of field in OP0. */
1927 unsigned int bit_offset = (backwards ^ reverse
1928 ? MAX ((int) bitsize - ((int) i + 1)
1929 * BITS_PER_WORD,
1930 0)
1931 : (int) i * BITS_PER_WORD);
1932 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1933 rtx result_part
1934 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1935 bitsize - i * BITS_PER_WORD),
1936 bitnum + bit_offset, 1, target_part,
1937 mode, word_mode, reverse, fallback_p, NULL);
1938
1939 gcc_assert (target_part);
1940 if (!result_part)
1941 {
1942 delete_insns_since (last);
1943 return NULL;
1944 }
1945
1946 if (result_part != target_part)
1947 emit_move_insn (target_part, result_part);
1948 }
1949
1950 if (unsignedp)
1951 {
1952 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1953 need to be zero'd out. */
1954 if (target_size > nwords * UNITS_PER_WORD)
1955 {
1956 unsigned int i, total_words;
1957
1958 total_words = target_size / UNITS_PER_WORD;
1959 for (i = nwords; i < total_words; i++)
1960 emit_move_insn
1961 (operand_subword (target,
1962 backwards ? total_words - i - 1 : i,
1963 1, VOIDmode),
1964 const0_rtx);
1965 }
1966 return target;
1967 }
1968
1969 /* Signed bit field: sign-extend with two arithmetic shifts. */
1970 target = expand_shift (LSHIFT_EXPR, mode, target,
1971 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1972 return expand_shift (RSHIFT_EXPR, mode, target,
1973 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1974 }
1975
1976 /* If OP0 is a multi-word register, narrow it to the affected word.
1977 If the region spans two words, defer to extract_split_bit_field. */
1978 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1979 {
1980 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1981 {
1982 if (!fallback_p)
1983 return NULL_RTX;
1984 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1985 unsignedp, reverse);
1986 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1987 }
1988 /* If OP0 is a hard register, copy it to a pseudo before calling
1989 simplify_gen_subreg. */
1990 if (REG_P (op0) && HARD_REGISTER_P (op0))
1991 op0 = copy_to_reg (op0);
1992 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1993 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1994 op0_mode = word_mode;
1995 bitnum %= BITS_PER_WORD;
1996 }
1997
1998 /* From here on we know the desired field is smaller than a word.
1999 If OP0 is a register, it too fits within a word. */
2000 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2001 extraction_insn extv;
2002 if (!MEM_P (op0)
2003 && !reverse
2004 /* ??? We could limit the structure size to the part of OP0 that
2005 contains the field, with appropriate checks for endianness
2006 and TARGET_TRULY_NOOP_TRUNCATION. */
2007 && get_best_reg_extraction_insn (&extv, pattern,
2008 GET_MODE_BITSIZE (op0_mode.require ()),
2009 tmode))
2010 {
2011 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2012 bitsize, bitnum,
2013 unsignedp, target, mode,
2014 tmode);
2015 if (result)
2016 return result;
2017 }
2018
2019 /* If OP0 is a memory, try copying it to a register and seeing if a
2020 cheap register alternative is available. */
2021 if (MEM_P (op0) & !reverse)
2022 {
2023 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2024 tmode))
2025 {
2026 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2027 bitsize, bitnum,
2028 unsignedp, target, mode,
2029 tmode);
2030 if (result)
2031 return result;
2032 }
2033
2034 rtx_insn *last = get_last_insn ();
2035
2036 /* Try loading part of OP0 into a register and extracting the
2037 bitfield from that. */
2038 unsigned HOST_WIDE_INT bitpos;
2039 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2040 0, 0, tmode, &bitpos);
2041 if (xop0)
2042 {
2043 xop0 = copy_to_reg (xop0);
2044 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2045 unsignedp, target,
2046 mode, tmode, reverse, false, NULL);
2047 if (result)
2048 return result;
2049 delete_insns_since (last);
2050 }
2051 }
2052
2053 if (!fallback_p)
2054 return NULL;
2055
2056 /* Find a correspondingly-sized integer field, so we can apply
2057 shifts and masks to it. */
2058 scalar_int_mode int_mode;
2059 if (!int_mode_for_mode (tmode).exists (&int_mode))
2060 /* If this fails, we should probably push op0 out to memory and then
2061 do a load. */
2062 int_mode = int_mode_for_mode (mode).require ();
2063
2064 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2065 bitnum, target, unsignedp, reverse);
2066
2067 /* Complex values must be reversed piecewise, so we need to undo the global
2068 reversal, convert to the complex mode and reverse again. */
2069 if (reverse && COMPLEX_MODE_P (tmode))
2070 {
2071 target = flip_storage_order (int_mode, target);
2072 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2073 target = flip_storage_order (tmode, target);
2074 }
2075 else
2076 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2077
2078 return target;
2079 }
2080
2081 /* Generate code to extract a byte-field from STR_RTX
2082 containing BITSIZE bits, starting at BITNUM,
2083 and put it in TARGET if possible (if TARGET is nonzero).
2084 Regardless of TARGET, we return the rtx for where the value is placed.
2085
2086 STR_RTX is the structure containing the byte (a REG or MEM).
2087 UNSIGNEDP is nonzero if this is an unsigned bit field.
2088 MODE is the natural mode of the field value once extracted.
2089 TMODE is the mode the caller would like the value to have;
2090 but the value may be returned with type MODE instead.
2091
2092 If REVERSE is true, the extraction is to be done in reverse order.
2093
2094 If a TARGET is specified and we can store in it at no extra cost,
2095 we do so, and return TARGET.
2096 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2097 if they are equally easy.
2098
2099 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2100 then *ALT_RTL is set to TARGET (before legitimziation). */
2101
2102 rtx
2103 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2104 int unsignedp, rtx target, machine_mode mode,
2105 machine_mode tmode, bool reverse, rtx *alt_rtl)
2106 {
2107 machine_mode mode1;
2108
2109 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2110 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2111 mode1 = GET_MODE (str_rtx);
2112 else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2113 mode1 = GET_MODE (target);
2114 else
2115 mode1 = tmode;
2116
2117 unsigned HOST_WIDE_INT ibitsize, ibitnum;
2118 scalar_int_mode int_mode;
2119 if (bitsize.is_constant (&ibitsize)
2120 && bitnum.is_constant (&ibitnum)
2121 && is_a <scalar_int_mode> (mode1, &int_mode)
2122 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2123 int_mode, 0, 0))
2124 {
2125 /* Extraction of a full INT_MODE value can be done with a simple load.
2126 We know here that the field can be accessed with one single
2127 instruction. For targets that support unaligned memory,
2128 an unaligned access may be necessary. */
2129 if (ibitsize == GET_MODE_BITSIZE (int_mode))
2130 {
2131 rtx result = adjust_bitfield_address (str_rtx, int_mode,
2132 ibitnum / BITS_PER_UNIT);
2133 if (reverse)
2134 result = flip_storage_order (int_mode, result);
2135 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2136 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2137 }
2138
2139 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2140 &ibitnum);
2141 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2142 str_rtx = copy_to_reg (str_rtx);
2143 return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2144 target, mode, tmode, reverse, true, alt_rtl);
2145 }
2146
2147 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2148 target, mode, tmode, reverse, true, alt_rtl);
2149 }
2150 \f
2151 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2152 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2153 otherwise OP0 is a BLKmode MEM.
2154
2155 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2156 If REVERSE is true, the extraction is to be done in reverse order.
2157
2158 If TARGET is nonzero, attempts to store the value there
2159 and return TARGET, but this is not guaranteed.
2160 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2161
2162 static rtx
2163 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2164 opt_scalar_int_mode op0_mode,
2165 unsigned HOST_WIDE_INT bitsize,
2166 unsigned HOST_WIDE_INT bitnum, rtx target,
2167 int unsignedp, bool reverse)
2168 {
2169 scalar_int_mode mode;
2170 if (MEM_P (op0))
2171 {
2172 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2173 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2174 /* The only way this should occur is if the field spans word
2175 boundaries. */
2176 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2177 unsignedp, reverse);
2178
2179 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2180 }
2181 else
2182 mode = op0_mode.require ();
2183
2184 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2185 target, unsignedp, reverse);
2186 }
2187
2188 /* Helper function for extract_fixed_bit_field, extracts
2189 the bit field always using MODE, which is the mode of OP0.
2190 The other arguments are as for extract_fixed_bit_field. */
2191
2192 static rtx
2193 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2194 unsigned HOST_WIDE_INT bitsize,
2195 unsigned HOST_WIDE_INT bitnum, rtx target,
2196 int unsignedp, bool reverse)
2197 {
2198 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2199 for invalid input, such as extract equivalent of f5 from
2200 gcc.dg/pr48335-2.c. */
2201
2202 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2203 /* BITNUM is the distance between our msb and that of OP0.
2204 Convert it to the distance from the lsb. */
2205 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2206
2207 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2208 We have reduced the big-endian case to the little-endian case. */
2209 if (reverse)
2210 op0 = flip_storage_order (mode, op0);
2211
2212 if (unsignedp)
2213 {
2214 if (bitnum)
2215 {
2216 /* If the field does not already start at the lsb,
2217 shift it so it does. */
2218 /* Maybe propagate the target for the shift. */
2219 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2220 if (tmode != mode)
2221 subtarget = 0;
2222 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2223 }
2224 /* Convert the value to the desired mode. TMODE must also be a
2225 scalar integer for this conversion to make sense, since we
2226 shouldn't reinterpret the bits. */
2227 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2228 if (mode != new_mode)
2229 op0 = convert_to_mode (new_mode, op0, 1);
2230
2231 /* Unless the msb of the field used to be the msb when we shifted,
2232 mask out the upper bits. */
2233
2234 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2235 return expand_binop (new_mode, and_optab, op0,
2236 mask_rtx (new_mode, 0, bitsize, 0),
2237 target, 1, OPTAB_LIB_WIDEN);
2238 return op0;
2239 }
2240
2241 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2242 then arithmetic-shift its lsb to the lsb of the word. */
2243 op0 = force_reg (mode, op0);
2244
2245 /* Find the narrowest integer mode that contains the field. */
2246
2247 opt_scalar_int_mode mode_iter;
2248 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2249 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2250 break;
2251
2252 mode = mode_iter.require ();
2253 op0 = convert_to_mode (mode, op0, 0);
2254
2255 if (mode != tmode)
2256 target = 0;
2257
2258 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2259 {
2260 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2261 /* Maybe propagate the target for the shift. */
2262 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2263 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2264 }
2265
2266 return expand_shift (RSHIFT_EXPR, mode, op0,
2267 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2268 }
2269
2270 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2271 VALUE << BITPOS. */
2272
2273 static rtx
2274 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2275 int bitpos)
2276 {
2277 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2278 }
2279 \f
2280 /* Extract a bit field that is split across two words
2281 and return an RTX for the result.
2282
2283 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2284 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2285 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2286 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2287 a BLKmode MEM.
2288
2289 If REVERSE is true, the extraction is to be done in reverse order. */
2290
2291 static rtx
2292 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2293 unsigned HOST_WIDE_INT bitsize,
2294 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2295 bool reverse)
2296 {
2297 unsigned int unit;
2298 unsigned int bitsdone = 0;
2299 rtx result = NULL_RTX;
2300 int first = 1;
2301
2302 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2303 much at a time. */
2304 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2305 unit = BITS_PER_WORD;
2306 else
2307 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2308
2309 while (bitsdone < bitsize)
2310 {
2311 unsigned HOST_WIDE_INT thissize;
2312 rtx part;
2313 unsigned HOST_WIDE_INT thispos;
2314 unsigned HOST_WIDE_INT offset;
2315
2316 offset = (bitpos + bitsdone) / unit;
2317 thispos = (bitpos + bitsdone) % unit;
2318
2319 /* THISSIZE must not overrun a word boundary. Otherwise,
2320 extract_fixed_bit_field will call us again, and we will mutually
2321 recurse forever. */
2322 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2323 thissize = MIN (thissize, unit - thispos);
2324
2325 /* If OP0 is a register, then handle OFFSET here. */
2326 rtx op0_piece = op0;
2327 opt_scalar_int_mode op0_piece_mode = op0_mode;
2328 if (SUBREG_P (op0) || REG_P (op0))
2329 {
2330 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2331 op0_piece_mode = word_mode;
2332 offset = 0;
2333 }
2334
2335 /* Extract the parts in bit-counting order,
2336 whose meaning is determined by BYTES_PER_UNIT.
2337 OFFSET is in UNITs, and UNIT is in bits. */
2338 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2339 thissize, offset * unit + thispos,
2340 0, 1, reverse);
2341 bitsdone += thissize;
2342
2343 /* Shift this part into place for the result. */
2344 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2345 {
2346 if (bitsize != bitsdone)
2347 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2348 bitsize - bitsdone, 0, 1);
2349 }
2350 else
2351 {
2352 if (bitsdone != thissize)
2353 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2354 bitsdone - thissize, 0, 1);
2355 }
2356
2357 if (first)
2358 result = part;
2359 else
2360 /* Combine the parts with bitwise or. This works
2361 because we extracted each part as an unsigned bit field. */
2362 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2363 OPTAB_LIB_WIDEN);
2364
2365 first = 0;
2366 }
2367
2368 /* Unsigned bit field: we are done. */
2369 if (unsignedp)
2370 return result;
2371 /* Signed bit field: sign-extend with two arithmetic shifts. */
2372 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2373 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2374 return expand_shift (RSHIFT_EXPR, word_mode, result,
2375 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2376 }
2377 \f
2378 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2379 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2380 MODE, fill the upper bits with zeros. Fail if the layout of either
2381 mode is unknown (as for CC modes) or if the extraction would involve
2382 unprofitable mode punning. Return the value on success, otherwise
2383 return null.
2384
2385 This is different from gen_lowpart* in these respects:
2386
2387 - the returned value must always be considered an rvalue
2388
2389 - when MODE is wider than SRC_MODE, the extraction involves
2390 a zero extension
2391
2392 - when MODE is smaller than SRC_MODE, the extraction involves
2393 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2394
2395 In other words, this routine performs a computation, whereas the
2396 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2397 operations. */
2398
2399 rtx
2400 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2401 {
2402 scalar_int_mode int_mode, src_int_mode;
2403
2404 if (mode == src_mode)
2405 return src;
2406
2407 if (CONSTANT_P (src))
2408 {
2409 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2410 fails, it will happily create (subreg (symbol_ref)) or similar
2411 invalid SUBREGs. */
2412 poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2413 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2414 if (ret)
2415 return ret;
2416
2417 if (GET_MODE (src) == VOIDmode
2418 || !validate_subreg (mode, src_mode, src, byte))
2419 return NULL_RTX;
2420
2421 src = force_reg (GET_MODE (src), src);
2422 return gen_rtx_SUBREG (mode, src, byte);
2423 }
2424
2425 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2426 return NULL_RTX;
2427
2428 if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2429 && targetm.modes_tieable_p (mode, src_mode))
2430 {
2431 rtx x = gen_lowpart_common (mode, src);
2432 if (x)
2433 return x;
2434 }
2435
2436 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2437 || !int_mode_for_mode (mode).exists (&int_mode))
2438 return NULL_RTX;
2439
2440 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2441 return NULL_RTX;
2442 if (!targetm.modes_tieable_p (int_mode, mode))
2443 return NULL_RTX;
2444
2445 src = gen_lowpart (src_int_mode, src);
2446 if (!validate_subreg (int_mode, src_int_mode, src,
2447 subreg_lowpart_offset (int_mode, src_int_mode)))
2448 return NULL_RTX;
2449
2450 src = convert_modes (int_mode, src_int_mode, src, true);
2451 src = gen_lowpart (mode, src);
2452 return src;
2453 }
2454 \f
2455 /* Add INC into TARGET. */
2456
2457 void
2458 expand_inc (rtx target, rtx inc)
2459 {
2460 rtx value = expand_binop (GET_MODE (target), add_optab,
2461 target, inc,
2462 target, 0, OPTAB_LIB_WIDEN);
2463 if (value != target)
2464 emit_move_insn (target, value);
2465 }
2466
2467 /* Subtract DEC from TARGET. */
2468
2469 void
2470 expand_dec (rtx target, rtx dec)
2471 {
2472 rtx value = expand_binop (GET_MODE (target), sub_optab,
2473 target, dec,
2474 target, 0, OPTAB_LIB_WIDEN);
2475 if (value != target)
2476 emit_move_insn (target, value);
2477 }
2478 \f
2479 /* Output a shift instruction for expression code CODE,
2480 with SHIFTED being the rtx for the value to shift,
2481 and AMOUNT the rtx for the amount to shift by.
2482 Store the result in the rtx TARGET, if that is convenient.
2483 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2484 Return the rtx for where the value is.
2485 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2486 in which case 0 is returned. */
2487
2488 static rtx
2489 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2490 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2491 {
2492 rtx op1, temp = 0;
2493 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2494 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2495 optab lshift_optab = ashl_optab;
2496 optab rshift_arith_optab = ashr_optab;
2497 optab rshift_uns_optab = lshr_optab;
2498 optab lrotate_optab = rotl_optab;
2499 optab rrotate_optab = rotr_optab;
2500 machine_mode op1_mode;
2501 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2502 int attempt;
2503 bool speed = optimize_insn_for_speed_p ();
2504
2505 op1 = amount;
2506 op1_mode = GET_MODE (op1);
2507
2508 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2509 shift amount is a vector, use the vector/vector shift patterns. */
2510 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2511 {
2512 lshift_optab = vashl_optab;
2513 rshift_arith_optab = vashr_optab;
2514 rshift_uns_optab = vlshr_optab;
2515 lrotate_optab = vrotl_optab;
2516 rrotate_optab = vrotr_optab;
2517 }
2518
2519 /* Previously detected shift-counts computed by NEGATE_EXPR
2520 and shifted in the other direction; but that does not work
2521 on all machines. */
2522
2523 if (SHIFT_COUNT_TRUNCATED)
2524 {
2525 if (CONST_INT_P (op1)
2526 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2527 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2528 op1 = gen_int_shift_amount (mode,
2529 (unsigned HOST_WIDE_INT) INTVAL (op1)
2530 % GET_MODE_BITSIZE (scalar_mode));
2531 else if (GET_CODE (op1) == SUBREG
2532 && subreg_lowpart_p (op1)
2533 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2534 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2535 op1 = SUBREG_REG (op1);
2536 }
2537
2538 /* Canonicalize rotates by constant amount. We may canonicalize
2539 to reduce the immediate or if the ISA can rotate by constants
2540 in only on direction. */
2541 if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2542 {
2543 op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2544 - INTVAL (op1)));
2545 left = !left;
2546 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2547 }
2548
2549 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2550 Note that this is not the case for bigger values. For instance a rotation
2551 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2552 0x04030201 (bswapsi). */
2553 if (rotate
2554 && CONST_INT_P (op1)
2555 && INTVAL (op1) == BITS_PER_UNIT
2556 && GET_MODE_SIZE (scalar_mode) == 2
2557 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2558 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2559
2560 if (op1 == const0_rtx)
2561 return shifted;
2562
2563 /* Check whether its cheaper to implement a left shift by a constant
2564 bit count by a sequence of additions. */
2565 if (code == LSHIFT_EXPR
2566 && CONST_INT_P (op1)
2567 && INTVAL (op1) > 0
2568 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2569 && INTVAL (op1) < MAX_BITS_PER_WORD
2570 && (shift_cost (speed, mode, INTVAL (op1))
2571 > INTVAL (op1) * add_cost (speed, mode))
2572 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2573 {
2574 int i;
2575 for (i = 0; i < INTVAL (op1); i++)
2576 {
2577 temp = force_reg (mode, shifted);
2578 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2579 unsignedp, OPTAB_LIB_WIDEN);
2580 }
2581 return shifted;
2582 }
2583
2584 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2585 {
2586 enum optab_methods methods;
2587
2588 if (attempt == 0)
2589 methods = OPTAB_DIRECT;
2590 else if (attempt == 1)
2591 methods = OPTAB_WIDEN;
2592 else
2593 methods = OPTAB_LIB_WIDEN;
2594
2595 if (rotate)
2596 {
2597 /* Widening does not work for rotation. */
2598 if (methods == OPTAB_WIDEN)
2599 continue;
2600 else if (methods == OPTAB_LIB_WIDEN)
2601 {
2602 /* If we have been unable to open-code this by a rotation,
2603 do it as the IOR of two shifts. I.e., to rotate A
2604 by N bits, compute
2605 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2606 where C is the bitsize of A.
2607
2608 It is theoretically possible that the target machine might
2609 not be able to perform either shift and hence we would
2610 be making two libcalls rather than just the one for the
2611 shift (similarly if IOR could not be done). We will allow
2612 this extremely unlikely lossage to avoid complicating the
2613 code below. */
2614
2615 rtx subtarget = target == shifted ? 0 : target;
2616 rtx new_amount, other_amount;
2617 rtx temp1;
2618
2619 new_amount = op1;
2620 if (op1 == const0_rtx)
2621 return shifted;
2622 else if (CONST_INT_P (op1))
2623 other_amount = gen_int_shift_amount
2624 (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2625 else
2626 {
2627 other_amount
2628 = simplify_gen_unary (NEG, GET_MODE (op1),
2629 op1, GET_MODE (op1));
2630 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2631 other_amount
2632 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2633 gen_int_mode (mask, GET_MODE (op1)));
2634 }
2635
2636 shifted = force_reg (mode, shifted);
2637
2638 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2639 mode, shifted, new_amount, 0, 1);
2640 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2641 mode, shifted, other_amount,
2642 subtarget, 1);
2643 return expand_binop (mode, ior_optab, temp, temp1, target,
2644 unsignedp, methods);
2645 }
2646
2647 temp = expand_binop (mode,
2648 left ? lrotate_optab : rrotate_optab,
2649 shifted, op1, target, unsignedp, methods);
2650 }
2651 else if (unsignedp)
2652 temp = expand_binop (mode,
2653 left ? lshift_optab : rshift_uns_optab,
2654 shifted, op1, target, unsignedp, methods);
2655
2656 /* Do arithmetic shifts.
2657 Also, if we are going to widen the operand, we can just as well
2658 use an arithmetic right-shift instead of a logical one. */
2659 if (temp == 0 && ! rotate
2660 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2661 {
2662 enum optab_methods methods1 = methods;
2663
2664 /* If trying to widen a log shift to an arithmetic shift,
2665 don't accept an arithmetic shift of the same size. */
2666 if (unsignedp)
2667 methods1 = OPTAB_MUST_WIDEN;
2668
2669 /* Arithmetic shift */
2670
2671 temp = expand_binop (mode,
2672 left ? lshift_optab : rshift_arith_optab,
2673 shifted, op1, target, unsignedp, methods1);
2674 }
2675
2676 /* We used to try extzv here for logical right shifts, but that was
2677 only useful for one machine, the VAX, and caused poor code
2678 generation there for lshrdi3, so the code was deleted and a
2679 define_expand for lshrsi3 was added to vax.md. */
2680 }
2681
2682 gcc_assert (temp != NULL_RTX || may_fail);
2683 return temp;
2684 }
2685
2686 /* Output a shift instruction for expression code CODE,
2687 with SHIFTED being the rtx for the value to shift,
2688 and AMOUNT the amount to shift by.
2689 Store the result in the rtx TARGET, if that is convenient.
2690 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2691 Return the rtx for where the value is. */
2692
2693 rtx
2694 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2695 poly_int64 amount, rtx target, int unsignedp)
2696 {
2697 return expand_shift_1 (code, mode, shifted,
2698 gen_int_shift_amount (mode, amount),
2699 target, unsignedp);
2700 }
2701
2702 /* Likewise, but return 0 if that cannot be done. */
2703
2704 rtx
2705 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2706 int amount, rtx target, int unsignedp)
2707 {
2708 return expand_shift_1 (code, mode,
2709 shifted, GEN_INT (amount), target, unsignedp, true);
2710 }
2711
2712 /* Output a shift instruction for expression code CODE,
2713 with SHIFTED being the rtx for the value to shift,
2714 and AMOUNT the tree for the amount to shift by.
2715 Store the result in the rtx TARGET, if that is convenient.
2716 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2717 Return the rtx for where the value is. */
2718
2719 rtx
2720 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2721 tree amount, rtx target, int unsignedp)
2722 {
2723 return expand_shift_1 (code, mode,
2724 shifted, expand_normal (amount), target, unsignedp);
2725 }
2726
2727 \f
2728 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2729 const struct mult_cost *, machine_mode mode);
2730 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2731 const struct algorithm *, enum mult_variant);
2732 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2733 static rtx extract_high_half (scalar_int_mode, rtx);
2734 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2735 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2736 int, int);
2737 /* Compute and return the best algorithm for multiplying by T.
2738 The algorithm must cost less than cost_limit
2739 If retval.cost >= COST_LIMIT, no algorithm was found and all
2740 other field of the returned struct are undefined.
2741 MODE is the machine mode of the multiplication. */
2742
2743 static void
2744 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2745 const struct mult_cost *cost_limit, machine_mode mode)
2746 {
2747 int m;
2748 struct algorithm *alg_in, *best_alg;
2749 struct mult_cost best_cost;
2750 struct mult_cost new_limit;
2751 int op_cost, op_latency;
2752 unsigned HOST_WIDE_INT orig_t = t;
2753 unsigned HOST_WIDE_INT q;
2754 int maxm, hash_index;
2755 bool cache_hit = false;
2756 enum alg_code cache_alg = alg_zero;
2757 bool speed = optimize_insn_for_speed_p ();
2758 scalar_int_mode imode;
2759 struct alg_hash_entry *entry_ptr;
2760
2761 /* Indicate that no algorithm is yet found. If no algorithm
2762 is found, this value will be returned and indicate failure. */
2763 alg_out->cost.cost = cost_limit->cost + 1;
2764 alg_out->cost.latency = cost_limit->latency + 1;
2765
2766 if (cost_limit->cost < 0
2767 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2768 return;
2769
2770 /* Be prepared for vector modes. */
2771 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2772
2773 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2774
2775 /* Restrict the bits of "t" to the multiplication's mode. */
2776 t &= GET_MODE_MASK (imode);
2777
2778 /* t == 1 can be done in zero cost. */
2779 if (t == 1)
2780 {
2781 alg_out->ops = 1;
2782 alg_out->cost.cost = 0;
2783 alg_out->cost.latency = 0;
2784 alg_out->op[0] = alg_m;
2785 return;
2786 }
2787
2788 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2789 fail now. */
2790 if (t == 0)
2791 {
2792 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2793 return;
2794 else
2795 {
2796 alg_out->ops = 1;
2797 alg_out->cost.cost = zero_cost (speed);
2798 alg_out->cost.latency = zero_cost (speed);
2799 alg_out->op[0] = alg_zero;
2800 return;
2801 }
2802 }
2803
2804 /* We'll be needing a couple extra algorithm structures now. */
2805
2806 alg_in = XALLOCA (struct algorithm);
2807 best_alg = XALLOCA (struct algorithm);
2808 best_cost = *cost_limit;
2809
2810 /* Compute the hash index. */
2811 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2812
2813 /* See if we already know what to do for T. */
2814 entry_ptr = alg_hash_entry_ptr (hash_index);
2815 if (entry_ptr->t == t
2816 && entry_ptr->mode == mode
2817 && entry_ptr->speed == speed
2818 && entry_ptr->alg != alg_unknown)
2819 {
2820 cache_alg = entry_ptr->alg;
2821
2822 if (cache_alg == alg_impossible)
2823 {
2824 /* The cache tells us that it's impossible to synthesize
2825 multiplication by T within entry_ptr->cost. */
2826 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2827 /* COST_LIMIT is at least as restrictive as the one
2828 recorded in the hash table, in which case we have no
2829 hope of synthesizing a multiplication. Just
2830 return. */
2831 return;
2832
2833 /* If we get here, COST_LIMIT is less restrictive than the
2834 one recorded in the hash table, so we may be able to
2835 synthesize a multiplication. Proceed as if we didn't
2836 have the cache entry. */
2837 }
2838 else
2839 {
2840 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2841 /* The cached algorithm shows that this multiplication
2842 requires more cost than COST_LIMIT. Just return. This
2843 way, we don't clobber this cache entry with
2844 alg_impossible but retain useful information. */
2845 return;
2846
2847 cache_hit = true;
2848
2849 switch (cache_alg)
2850 {
2851 case alg_shift:
2852 goto do_alg_shift;
2853
2854 case alg_add_t_m2:
2855 case alg_sub_t_m2:
2856 goto do_alg_addsub_t_m2;
2857
2858 case alg_add_factor:
2859 case alg_sub_factor:
2860 goto do_alg_addsub_factor;
2861
2862 case alg_add_t2_m:
2863 goto do_alg_add_t2_m;
2864
2865 case alg_sub_t2_m:
2866 goto do_alg_sub_t2_m;
2867
2868 default:
2869 gcc_unreachable ();
2870 }
2871 }
2872 }
2873
2874 /* If we have a group of zero bits at the low-order part of T, try
2875 multiplying by the remaining bits and then doing a shift. */
2876
2877 if ((t & 1) == 0)
2878 {
2879 do_alg_shift:
2880 m = ctz_or_zero (t); /* m = number of low zero bits */
2881 if (m < maxm)
2882 {
2883 q = t >> m;
2884 /* The function expand_shift will choose between a shift and
2885 a sequence of additions, so the observed cost is given as
2886 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2887 op_cost = m * add_cost (speed, mode);
2888 if (shift_cost (speed, mode, m) < op_cost)
2889 op_cost = shift_cost (speed, mode, m);
2890 new_limit.cost = best_cost.cost - op_cost;
2891 new_limit.latency = best_cost.latency - op_cost;
2892 synth_mult (alg_in, q, &new_limit, mode);
2893
2894 alg_in->cost.cost += op_cost;
2895 alg_in->cost.latency += op_cost;
2896 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2897 {
2898 best_cost = alg_in->cost;
2899 std::swap (alg_in, best_alg);
2900 best_alg->log[best_alg->ops] = m;
2901 best_alg->op[best_alg->ops] = alg_shift;
2902 }
2903
2904 /* See if treating ORIG_T as a signed number yields a better
2905 sequence. Try this sequence only for a negative ORIG_T
2906 as it would be useless for a non-negative ORIG_T. */
2907 if ((HOST_WIDE_INT) orig_t < 0)
2908 {
2909 /* Shift ORIG_T as follows because a right shift of a
2910 negative-valued signed type is implementation
2911 defined. */
2912 q = ~(~orig_t >> m);
2913 /* The function expand_shift will choose between a shift
2914 and a sequence of additions, so the observed cost is
2915 given as MIN (m * add_cost(speed, mode),
2916 shift_cost(speed, mode, m)). */
2917 op_cost = m * add_cost (speed, mode);
2918 if (shift_cost (speed, mode, m) < op_cost)
2919 op_cost = shift_cost (speed, mode, m);
2920 new_limit.cost = best_cost.cost - op_cost;
2921 new_limit.latency = best_cost.latency - op_cost;
2922 synth_mult (alg_in, q, &new_limit, mode);
2923
2924 alg_in->cost.cost += op_cost;
2925 alg_in->cost.latency += op_cost;
2926 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2927 {
2928 best_cost = alg_in->cost;
2929 std::swap (alg_in, best_alg);
2930 best_alg->log[best_alg->ops] = m;
2931 best_alg->op[best_alg->ops] = alg_shift;
2932 }
2933 }
2934 }
2935 if (cache_hit)
2936 goto done;
2937 }
2938
2939 /* If we have an odd number, add or subtract one. */
2940 if ((t & 1) != 0)
2941 {
2942 unsigned HOST_WIDE_INT w;
2943
2944 do_alg_addsub_t_m2:
2945 for (w = 1; (w & t) != 0; w <<= 1)
2946 ;
2947 /* If T was -1, then W will be zero after the loop. This is another
2948 case where T ends with ...111. Handling this with (T + 1) and
2949 subtract 1 produces slightly better code and results in algorithm
2950 selection much faster than treating it like the ...0111 case
2951 below. */
2952 if (w == 0
2953 || (w > 2
2954 /* Reject the case where t is 3.
2955 Thus we prefer addition in that case. */
2956 && t != 3))
2957 {
2958 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2959
2960 op_cost = add_cost (speed, mode);
2961 new_limit.cost = best_cost.cost - op_cost;
2962 new_limit.latency = best_cost.latency - op_cost;
2963 synth_mult (alg_in, t + 1, &new_limit, mode);
2964
2965 alg_in->cost.cost += op_cost;
2966 alg_in->cost.latency += op_cost;
2967 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2968 {
2969 best_cost = alg_in->cost;
2970 std::swap (alg_in, best_alg);
2971 best_alg->log[best_alg->ops] = 0;
2972 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2973 }
2974 }
2975 else
2976 {
2977 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2978
2979 op_cost = add_cost (speed, mode);
2980 new_limit.cost = best_cost.cost - op_cost;
2981 new_limit.latency = best_cost.latency - op_cost;
2982 synth_mult (alg_in, t - 1, &new_limit, mode);
2983
2984 alg_in->cost.cost += op_cost;
2985 alg_in->cost.latency += op_cost;
2986 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2987 {
2988 best_cost = alg_in->cost;
2989 std::swap (alg_in, best_alg);
2990 best_alg->log[best_alg->ops] = 0;
2991 best_alg->op[best_alg->ops] = alg_add_t_m2;
2992 }
2993 }
2994
2995 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2996 quickly with a - a * n for some appropriate constant n. */
2997 m = exact_log2 (-orig_t + 1);
2998 if (m >= 0 && m < maxm)
2999 {
3000 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3001 /* If the target has a cheap shift-and-subtract insn use
3002 that in preference to a shift insn followed by a sub insn.
3003 Assume that the shift-and-sub is "atomic" with a latency
3004 equal to it's cost, otherwise assume that on superscalar
3005 hardware the shift may be executed concurrently with the
3006 earlier steps in the algorithm. */
3007 if (shiftsub1_cost (speed, mode, m) <= op_cost)
3008 {
3009 op_cost = shiftsub1_cost (speed, mode, m);
3010 op_latency = op_cost;
3011 }
3012 else
3013 op_latency = add_cost (speed, mode);
3014
3015 new_limit.cost = best_cost.cost - op_cost;
3016 new_limit.latency = best_cost.latency - op_latency;
3017 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3018 &new_limit, mode);
3019
3020 alg_in->cost.cost += op_cost;
3021 alg_in->cost.latency += op_latency;
3022 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3023 {
3024 best_cost = alg_in->cost;
3025 std::swap (alg_in, best_alg);
3026 best_alg->log[best_alg->ops] = m;
3027 best_alg->op[best_alg->ops] = alg_sub_t_m2;
3028 }
3029 }
3030
3031 if (cache_hit)
3032 goto done;
3033 }
3034
3035 /* Look for factors of t of the form
3036 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3037 If we find such a factor, we can multiply by t using an algorithm that
3038 multiplies by q, shift the result by m and add/subtract it to itself.
3039
3040 We search for large factors first and loop down, even if large factors
3041 are less probable than small; if we find a large factor we will find a
3042 good sequence quickly, and therefore be able to prune (by decreasing
3043 COST_LIMIT) the search. */
3044
3045 do_alg_addsub_factor:
3046 for (m = floor_log2 (t - 1); m >= 2; m--)
3047 {
3048 unsigned HOST_WIDE_INT d;
3049
3050 d = (HOST_WIDE_INT_1U << m) + 1;
3051 if (t % d == 0 && t > d && m < maxm
3052 && (!cache_hit || cache_alg == alg_add_factor))
3053 {
3054 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3055 if (shiftadd_cost (speed, mode, m) <= op_cost)
3056 op_cost = shiftadd_cost (speed, mode, m);
3057
3058 op_latency = op_cost;
3059
3060
3061 new_limit.cost = best_cost.cost - op_cost;
3062 new_limit.latency = best_cost.latency - op_latency;
3063 synth_mult (alg_in, t / d, &new_limit, mode);
3064
3065 alg_in->cost.cost += op_cost;
3066 alg_in->cost.latency += op_latency;
3067 if (alg_in->cost.latency < op_cost)
3068 alg_in->cost.latency = op_cost;
3069 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3070 {
3071 best_cost = alg_in->cost;
3072 std::swap (alg_in, best_alg);
3073 best_alg->log[best_alg->ops] = m;
3074 best_alg->op[best_alg->ops] = alg_add_factor;
3075 }
3076 /* Other factors will have been taken care of in the recursion. */
3077 break;
3078 }
3079
3080 d = (HOST_WIDE_INT_1U << m) - 1;
3081 if (t % d == 0 && t > d && m < maxm
3082 && (!cache_hit || cache_alg == alg_sub_factor))
3083 {
3084 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3085 if (shiftsub0_cost (speed, mode, m) <= op_cost)
3086 op_cost = shiftsub0_cost (speed, mode, m);
3087
3088 op_latency = op_cost;
3089
3090 new_limit.cost = best_cost.cost - op_cost;
3091 new_limit.latency = best_cost.latency - op_latency;
3092 synth_mult (alg_in, t / d, &new_limit, mode);
3093
3094 alg_in->cost.cost += op_cost;
3095 alg_in->cost.latency += op_latency;
3096 if (alg_in->cost.latency < op_cost)
3097 alg_in->cost.latency = op_cost;
3098 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3099 {
3100 best_cost = alg_in->cost;
3101 std::swap (alg_in, best_alg);
3102 best_alg->log[best_alg->ops] = m;
3103 best_alg->op[best_alg->ops] = alg_sub_factor;
3104 }
3105 break;
3106 }
3107 }
3108 if (cache_hit)
3109 goto done;
3110
3111 /* Try shift-and-add (load effective address) instructions,
3112 i.e. do a*3, a*5, a*9. */
3113 if ((t & 1) != 0)
3114 {
3115 do_alg_add_t2_m:
3116 q = t - 1;
3117 m = ctz_hwi (q);
3118 if (q && m < maxm)
3119 {
3120 op_cost = shiftadd_cost (speed, mode, m);
3121 new_limit.cost = best_cost.cost - op_cost;
3122 new_limit.latency = best_cost.latency - op_cost;
3123 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3124
3125 alg_in->cost.cost += op_cost;
3126 alg_in->cost.latency += op_cost;
3127 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3128 {
3129 best_cost = alg_in->cost;
3130 std::swap (alg_in, best_alg);
3131 best_alg->log[best_alg->ops] = m;
3132 best_alg->op[best_alg->ops] = alg_add_t2_m;
3133 }
3134 }
3135 if (cache_hit)
3136 goto done;
3137
3138 do_alg_sub_t2_m:
3139 q = t + 1;
3140 m = ctz_hwi (q);
3141 if (q && m < maxm)
3142 {
3143 op_cost = shiftsub0_cost (speed, mode, m);
3144 new_limit.cost = best_cost.cost - op_cost;
3145 new_limit.latency = best_cost.latency - op_cost;
3146 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3147
3148 alg_in->cost.cost += op_cost;
3149 alg_in->cost.latency += op_cost;
3150 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3151 {
3152 best_cost = alg_in->cost;
3153 std::swap (alg_in, best_alg);
3154 best_alg->log[best_alg->ops] = m;
3155 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3156 }
3157 }
3158 if (cache_hit)
3159 goto done;
3160 }
3161
3162 done:
3163 /* If best_cost has not decreased, we have not found any algorithm. */
3164 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3165 {
3166 /* We failed to find an algorithm. Record alg_impossible for
3167 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3168 we are asked to find an algorithm for T within the same or
3169 lower COST_LIMIT, we can immediately return to the
3170 caller. */
3171 entry_ptr->t = t;
3172 entry_ptr->mode = mode;
3173 entry_ptr->speed = speed;
3174 entry_ptr->alg = alg_impossible;
3175 entry_ptr->cost = *cost_limit;
3176 return;
3177 }
3178
3179 /* Cache the result. */
3180 if (!cache_hit)
3181 {
3182 entry_ptr->t = t;
3183 entry_ptr->mode = mode;
3184 entry_ptr->speed = speed;
3185 entry_ptr->alg = best_alg->op[best_alg->ops];
3186 entry_ptr->cost.cost = best_cost.cost;
3187 entry_ptr->cost.latency = best_cost.latency;
3188 }
3189
3190 /* If we are getting a too long sequence for `struct algorithm'
3191 to record, make this search fail. */
3192 if (best_alg->ops == MAX_BITS_PER_WORD)
3193 return;
3194
3195 /* Copy the algorithm from temporary space to the space at alg_out.
3196 We avoid using structure assignment because the majority of
3197 best_alg is normally undefined, and this is a critical function. */
3198 alg_out->ops = best_alg->ops + 1;
3199 alg_out->cost = best_cost;
3200 memcpy (alg_out->op, best_alg->op,
3201 alg_out->ops * sizeof *alg_out->op);
3202 memcpy (alg_out->log, best_alg->log,
3203 alg_out->ops * sizeof *alg_out->log);
3204 }
3205 \f
3206 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3207 Try three variations:
3208
3209 - a shift/add sequence based on VAL itself
3210 - a shift/add sequence based on -VAL, followed by a negation
3211 - a shift/add sequence based on VAL - 1, followed by an addition.
3212
3213 Return true if the cheapest of these cost less than MULT_COST,
3214 describing the algorithm in *ALG and final fixup in *VARIANT. */
3215
3216 bool
3217 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3218 struct algorithm *alg, enum mult_variant *variant,
3219 int mult_cost)
3220 {
3221 struct algorithm alg2;
3222 struct mult_cost limit;
3223 int op_cost;
3224 bool speed = optimize_insn_for_speed_p ();
3225
3226 /* Fail quickly for impossible bounds. */
3227 if (mult_cost < 0)
3228 return false;
3229
3230 /* Ensure that mult_cost provides a reasonable upper bound.
3231 Any constant multiplication can be performed with less
3232 than 2 * bits additions. */
3233 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3234 if (mult_cost > op_cost)
3235 mult_cost = op_cost;
3236
3237 *variant = basic_variant;
3238 limit.cost = mult_cost;
3239 limit.latency = mult_cost;
3240 synth_mult (alg, val, &limit, mode);
3241
3242 /* This works only if the inverted value actually fits in an
3243 `unsigned int' */
3244 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3245 {
3246 op_cost = neg_cost (speed, mode);
3247 if (MULT_COST_LESS (&alg->cost, mult_cost))
3248 {
3249 limit.cost = alg->cost.cost - op_cost;
3250 limit.latency = alg->cost.latency - op_cost;
3251 }
3252 else
3253 {
3254 limit.cost = mult_cost - op_cost;
3255 limit.latency = mult_cost - op_cost;
3256 }
3257
3258 synth_mult (&alg2, -val, &limit, mode);
3259 alg2.cost.cost += op_cost;
3260 alg2.cost.latency += op_cost;
3261 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3262 *alg = alg2, *variant = negate_variant;
3263 }
3264
3265 /* This proves very useful for division-by-constant. */
3266 op_cost = add_cost (speed, mode);
3267 if (MULT_COST_LESS (&alg->cost, mult_cost))
3268 {
3269 limit.cost = alg->cost.cost - op_cost;
3270 limit.latency = alg->cost.latency - op_cost;
3271 }
3272 else
3273 {
3274 limit.cost = mult_cost - op_cost;
3275 limit.latency = mult_cost - op_cost;
3276 }
3277
3278 synth_mult (&alg2, val - 1, &limit, mode);
3279 alg2.cost.cost += op_cost;
3280 alg2.cost.latency += op_cost;
3281 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3282 *alg = alg2, *variant = add_variant;
3283
3284 return MULT_COST_LESS (&alg->cost, mult_cost);
3285 }
3286
3287 /* A subroutine of expand_mult, used for constant multiplications.
3288 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3289 convenient. Use the shift/add sequence described by ALG and apply
3290 the final fixup specified by VARIANT. */
3291
3292 static rtx
3293 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3294 rtx target, const struct algorithm *alg,
3295 enum mult_variant variant)
3296 {
3297 unsigned HOST_WIDE_INT val_so_far;
3298 rtx_insn *insn;
3299 rtx accum, tem;
3300 int opno;
3301 machine_mode nmode;
3302
3303 /* Avoid referencing memory over and over and invalid sharing
3304 on SUBREGs. */
3305 op0 = force_reg (mode, op0);
3306
3307 /* ACCUM starts out either as OP0 or as a zero, depending on
3308 the first operation. */
3309
3310 if (alg->op[0] == alg_zero)
3311 {
3312 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3313 val_so_far = 0;
3314 }
3315 else if (alg->op[0] == alg_m)
3316 {
3317 accum = copy_to_mode_reg (mode, op0);
3318 val_so_far = 1;
3319 }
3320 else
3321 gcc_unreachable ();
3322
3323 for (opno = 1; opno < alg->ops; opno++)
3324 {
3325 int log = alg->log[opno];
3326 rtx shift_subtarget = optimize ? 0 : accum;
3327 rtx add_target
3328 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3329 && !optimize)
3330 ? target : 0;
3331 rtx accum_target = optimize ? 0 : accum;
3332 rtx accum_inner;
3333
3334 switch (alg->op[opno])
3335 {
3336 case alg_shift:
3337 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3338 /* REG_EQUAL note will be attached to the following insn. */
3339 emit_move_insn (accum, tem);
3340 val_so_far <<= log;
3341 break;
3342
3343 case alg_add_t_m2:
3344 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3345 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3346 add_target ? add_target : accum_target);
3347 val_so_far += HOST_WIDE_INT_1U << log;
3348 break;
3349
3350 case alg_sub_t_m2:
3351 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3352 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3353 add_target ? add_target : accum_target);
3354 val_so_far -= HOST_WIDE_INT_1U << log;
3355 break;
3356
3357 case alg_add_t2_m:
3358 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3359 log, shift_subtarget, 0);
3360 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3361 add_target ? add_target : accum_target);
3362 val_so_far = (val_so_far << log) + 1;
3363 break;
3364
3365 case alg_sub_t2_m:
3366 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3367 log, shift_subtarget, 0);
3368 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3369 add_target ? add_target : accum_target);
3370 val_so_far = (val_so_far << log) - 1;
3371 break;
3372
3373 case alg_add_factor:
3374 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3375 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3376 add_target ? add_target : accum_target);
3377 val_so_far += val_so_far << log;
3378 break;
3379
3380 case alg_sub_factor:
3381 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3382 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3383 (add_target
3384 ? add_target : (optimize ? 0 : tem)));
3385 val_so_far = (val_so_far << log) - val_so_far;
3386 break;
3387
3388 default:
3389 gcc_unreachable ();
3390 }
3391
3392 if (SCALAR_INT_MODE_P (mode))
3393 {
3394 /* Write a REG_EQUAL note on the last insn so that we can cse
3395 multiplication sequences. Note that if ACCUM is a SUBREG,
3396 we've set the inner register and must properly indicate that. */
3397 tem = op0, nmode = mode;
3398 accum_inner = accum;
3399 if (GET_CODE (accum) == SUBREG)
3400 {
3401 accum_inner = SUBREG_REG (accum);
3402 nmode = GET_MODE (accum_inner);
3403 tem = gen_lowpart (nmode, op0);
3404 }
3405
3406 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3407 In that case, only the low bits of accum would be guaranteed to
3408 be equal to the content of the REG_EQUAL note, the upper bits
3409 can be anything. */
3410 if (!paradoxical_subreg_p (tem))
3411 {
3412 insn = get_last_insn ();
3413 wide_int wval_so_far
3414 = wi::uhwi (val_so_far,
3415 GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3416 rtx c = immed_wide_int_const (wval_so_far, nmode);
3417 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3418 accum_inner);
3419 }
3420 }
3421 }
3422
3423 if (variant == negate_variant)
3424 {
3425 val_so_far = -val_so_far;
3426 accum = expand_unop (mode, neg_optab, accum, target, 0);
3427 }
3428 else if (variant == add_variant)
3429 {
3430 val_so_far = val_so_far + 1;
3431 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3432 }
3433
3434 /* Compare only the bits of val and val_so_far that are significant
3435 in the result mode, to avoid sign-/zero-extension confusion. */
3436 nmode = GET_MODE_INNER (mode);
3437 val &= GET_MODE_MASK (nmode);
3438 val_so_far &= GET_MODE_MASK (nmode);
3439 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3440
3441 return accum;
3442 }
3443
3444 /* Perform a multiplication and return an rtx for the result.
3445 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3446 TARGET is a suggestion for where to store the result (an rtx).
3447
3448 We check specially for a constant integer as OP1.
3449 If you want this check for OP0 as well, then before calling
3450 you should swap the two operands if OP0 would be constant. */
3451
3452 rtx
3453 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3454 int unsignedp, bool no_libcall)
3455 {
3456 enum mult_variant variant;
3457 struct algorithm algorithm;
3458 rtx scalar_op1;
3459 int max_cost;
3460 bool speed = optimize_insn_for_speed_p ();
3461 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3462
3463 if (CONSTANT_P (op0))
3464 std::swap (op0, op1);
3465
3466 /* For vectors, there are several simplifications that can be made if
3467 all elements of the vector constant are identical. */
3468 scalar_op1 = unwrap_const_vec_duplicate (op1);
3469
3470 if (INTEGRAL_MODE_P (mode))
3471 {
3472 rtx fake_reg;
3473 HOST_WIDE_INT coeff;
3474 bool is_neg;
3475 int mode_bitsize;
3476
3477 if (op1 == CONST0_RTX (mode))
3478 return op1;
3479 if (op1 == CONST1_RTX (mode))
3480 return op0;
3481 if (op1 == CONSTM1_RTX (mode))
3482 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3483 op0, target, 0);
3484
3485 if (do_trapv)
3486 goto skip_synth;
3487
3488 /* If mode is integer vector mode, check if the backend supports
3489 vector lshift (by scalar or vector) at all. If not, we can't use
3490 synthetized multiply. */
3491 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3492 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3493 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3494 goto skip_synth;
3495
3496 /* These are the operations that are potentially turned into
3497 a sequence of shifts and additions. */
3498 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3499
3500 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3501 less than or equal in size to `unsigned int' this doesn't matter.
3502 If the mode is larger than `unsigned int', then synth_mult works
3503 only if the constant value exactly fits in an `unsigned int' without
3504 any truncation. This means that multiplying by negative values does
3505 not work; results are off by 2^32 on a 32 bit machine. */
3506 if (CONST_INT_P (scalar_op1))
3507 {
3508 coeff = INTVAL (scalar_op1);
3509 is_neg = coeff < 0;
3510 }
3511 #if TARGET_SUPPORTS_WIDE_INT
3512 else if (CONST_WIDE_INT_P (scalar_op1))
3513 #else
3514 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3515 #endif
3516 {
3517 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3518 /* Perfect power of 2 (other than 1, which is handled above). */
3519 if (shift > 0)
3520 return expand_shift (LSHIFT_EXPR, mode, op0,
3521 shift, target, unsignedp);
3522 else
3523 goto skip_synth;
3524 }
3525 else
3526 goto skip_synth;
3527
3528 /* We used to test optimize here, on the grounds that it's better to
3529 produce a smaller program when -O is not used. But this causes
3530 such a terrible slowdown sometimes that it seems better to always
3531 use synth_mult. */
3532
3533 /* Special case powers of two. */
3534 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3535 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3536 return expand_shift (LSHIFT_EXPR, mode, op0,
3537 floor_log2 (coeff), target, unsignedp);
3538
3539 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3540
3541 /* Attempt to handle multiplication of DImode values by negative
3542 coefficients, by performing the multiplication by a positive
3543 multiplier and then inverting the result. */
3544 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3545 {
3546 /* Its safe to use -coeff even for INT_MIN, as the
3547 result is interpreted as an unsigned coefficient.
3548 Exclude cost of op0 from max_cost to match the cost
3549 calculation of the synth_mult. */
3550 coeff = -(unsigned HOST_WIDE_INT) coeff;
3551 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3552 mode, speed)
3553 - neg_cost (speed, mode));
3554 if (max_cost <= 0)
3555 goto skip_synth;
3556
3557 /* Special case powers of two. */
3558 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3559 {
3560 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3561 floor_log2 (coeff), target, unsignedp);
3562 return expand_unop (mode, neg_optab, temp, target, 0);
3563 }
3564
3565 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3566 max_cost))
3567 {
3568 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3569 &algorithm, variant);
3570 return expand_unop (mode, neg_optab, temp, target, 0);
3571 }
3572 goto skip_synth;
3573 }
3574
3575 /* Exclude cost of op0 from max_cost to match the cost
3576 calculation of the synth_mult. */
3577 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3578 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3579 return expand_mult_const (mode, op0, coeff, target,
3580 &algorithm, variant);
3581 }
3582 skip_synth:
3583
3584 /* Expand x*2.0 as x+x. */
3585 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3586 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3587 {
3588 op0 = force_reg (GET_MODE (op0), op0);
3589 return expand_binop (mode, add_optab, op0, op0,
3590 target, unsignedp,
3591 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3592 }
3593
3594 /* This used to use umul_optab if unsigned, but for non-widening multiply
3595 there is no difference between signed and unsigned. */
3596 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3597 op0, op1, target, unsignedp,
3598 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3599 gcc_assert (op0 || no_libcall);
3600 return op0;
3601 }
3602
3603 /* Return a cost estimate for multiplying a register by the given
3604 COEFFicient in the given MODE and SPEED. */
3605
3606 int
3607 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3608 {
3609 int max_cost;
3610 struct algorithm algorithm;
3611 enum mult_variant variant;
3612
3613 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3614 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3615 mode, speed);
3616 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3617 return algorithm.cost.cost;
3618 else
3619 return max_cost;
3620 }
3621
3622 /* Perform a widening multiplication and return an rtx for the result.
3623 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3624 TARGET is a suggestion for where to store the result (an rtx).
3625 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3626 or smul_widen_optab.
3627
3628 We check specially for a constant integer as OP1, comparing the
3629 cost of a widening multiply against the cost of a sequence of shifts
3630 and adds. */
3631
3632 rtx
3633 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3634 int unsignedp, optab this_optab)
3635 {
3636 bool speed = optimize_insn_for_speed_p ();
3637 rtx cop1;
3638
3639 if (CONST_INT_P (op1)
3640 && GET_MODE (op0) != VOIDmode
3641 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3642 this_optab == umul_widen_optab))
3643 && CONST_INT_P (cop1)
3644 && (INTVAL (cop1) >= 0
3645 || HWI_COMPUTABLE_MODE_P (mode)))
3646 {
3647 HOST_WIDE_INT coeff = INTVAL (cop1);
3648 int max_cost;
3649 enum mult_variant variant;
3650 struct algorithm algorithm;
3651
3652 if (coeff == 0)
3653 return CONST0_RTX (mode);
3654
3655 /* Special case powers of two. */
3656 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3657 {
3658 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3659 return expand_shift (LSHIFT_EXPR, mode, op0,
3660 floor_log2 (coeff), target, unsignedp);
3661 }
3662
3663 /* Exclude cost of op0 from max_cost to match the cost
3664 calculation of the synth_mult. */
3665 max_cost = mul_widen_cost (speed, mode);
3666 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3667 max_cost))
3668 {
3669 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3670 return expand_mult_const (mode, op0, coeff, target,
3671 &algorithm, variant);
3672 }
3673 }
3674 return expand_binop (mode, this_optab, op0, op1, target,
3675 unsignedp, OPTAB_LIB_WIDEN);
3676 }
3677 \f
3678 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3679 replace division by D, and put the least significant N bits of the result
3680 in *MULTIPLIER_PTR and return the most significant bit.
3681
3682 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3683 needed precision is in PRECISION (should be <= N).
3684
3685 PRECISION should be as small as possible so this function can choose
3686 multiplier more freely.
3687
3688 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3689 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3690
3691 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3692 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3693
3694 unsigned HOST_WIDE_INT
3695 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3696 unsigned HOST_WIDE_INT *multiplier_ptr,
3697 int *post_shift_ptr, int *lgup_ptr)
3698 {
3699 int lgup, post_shift;
3700 int pow, pow2;
3701
3702 /* lgup = ceil(log2(divisor)); */
3703 lgup = ceil_log2 (d);
3704
3705 gcc_assert (lgup <= n);
3706
3707 pow = n + lgup;
3708 pow2 = n + lgup - precision;
3709
3710 /* mlow = 2^(N + lgup)/d */
3711 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3712 wide_int mlow = wi::udiv_trunc (val, d);
3713
3714 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3715 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3716 wide_int mhigh = wi::udiv_trunc (val, d);
3717
3718 /* If precision == N, then mlow, mhigh exceed 2^N
3719 (but they do not exceed 2^(N+1)). */
3720
3721 /* Reduce to lowest terms. */
3722 for (post_shift = lgup; post_shift > 0; post_shift--)
3723 {
3724 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3725 HOST_BITS_PER_WIDE_INT);
3726 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3727 HOST_BITS_PER_WIDE_INT);
3728 if (ml_lo >= mh_lo)
3729 break;
3730
3731 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3732 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3733 }
3734
3735 *post_shift_ptr = post_shift;
3736 *lgup_ptr = lgup;
3737 if (n < HOST_BITS_PER_WIDE_INT)
3738 {
3739 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3740 *multiplier_ptr = mhigh.to_uhwi () & mask;
3741 return mhigh.to_uhwi () > mask;
3742 }
3743 else
3744 {
3745 *multiplier_ptr = mhigh.to_uhwi ();
3746 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3747 }
3748 }
3749
3750 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3751 congruent to 1 (mod 2**N). */
3752
3753 static unsigned HOST_WIDE_INT
3754 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3755 {
3756 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3757
3758 /* The algorithm notes that the choice y = x satisfies
3759 x*y == 1 mod 2^3, since x is assumed odd.
3760 Each iteration doubles the number of bits of significance in y. */
3761
3762 unsigned HOST_WIDE_INT mask;
3763 unsigned HOST_WIDE_INT y = x;
3764 int nbit = 3;
3765
3766 mask = (n == HOST_BITS_PER_WIDE_INT
3767 ? HOST_WIDE_INT_M1U
3768 : (HOST_WIDE_INT_1U << n) - 1);
3769
3770 while (nbit < n)
3771 {
3772 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3773 nbit *= 2;
3774 }
3775 return y;
3776 }
3777
3778 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3779 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3780 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3781 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3782 become signed.
3783
3784 The result is put in TARGET if that is convenient.
3785
3786 MODE is the mode of operation. */
3787
3788 rtx
3789 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3790 rtx op1, rtx target, int unsignedp)
3791 {
3792 rtx tem;
3793 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3794
3795 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3796 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3797 tem = expand_and (mode, tem, op1, NULL_RTX);
3798 adj_operand
3799 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3800 adj_operand);
3801
3802 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3803 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3804 tem = expand_and (mode, tem, op0, NULL_RTX);
3805 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3806 target);
3807
3808 return target;
3809 }
3810
3811 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3812
3813 static rtx
3814 extract_high_half (scalar_int_mode mode, rtx op)
3815 {
3816 if (mode == word_mode)
3817 return gen_highpart (mode, op);
3818
3819 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3820
3821 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3822 GET_MODE_BITSIZE (mode), 0, 1);
3823 return convert_modes (mode, wider_mode, op, 0);
3824 }
3825
3826 /* Like expmed_mult_highpart, but only consider using a multiplication
3827 optab. OP1 is an rtx for the constant operand. */
3828
3829 static rtx
3830 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3831 rtx target, int unsignedp, int max_cost)
3832 {
3833 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3834 optab moptab;
3835 rtx tem;
3836 int size;
3837 bool speed = optimize_insn_for_speed_p ();
3838
3839 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3840
3841 size = GET_MODE_BITSIZE (mode);
3842
3843 /* Firstly, try using a multiplication insn that only generates the needed
3844 high part of the product, and in the sign flavor of unsignedp. */
3845 if (mul_highpart_cost (speed, mode) < max_cost)
3846 {
3847 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3848 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3849 unsignedp, OPTAB_DIRECT);
3850 if (tem)
3851 return tem;
3852 }
3853
3854 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3855 Need to adjust the result after the multiplication. */
3856 if (size - 1 < BITS_PER_WORD
3857 && (mul_highpart_cost (speed, mode)
3858 + 2 * shift_cost (speed, mode, size-1)
3859 + 4 * add_cost (speed, mode) < max_cost))
3860 {
3861 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3862 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3863 unsignedp, OPTAB_DIRECT);
3864 if (tem)
3865 /* We used the wrong signedness. Adjust the result. */
3866 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3867 tem, unsignedp);
3868 }
3869
3870 /* Try widening multiplication. */
3871 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3872 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3873 && mul_widen_cost (speed, wider_mode) < max_cost)
3874 {
3875 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3876 unsignedp, OPTAB_WIDEN);
3877 if (tem)
3878 return extract_high_half (mode, tem);
3879 }
3880
3881 /* Try widening the mode and perform a non-widening multiplication. */
3882 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3883 && size - 1 < BITS_PER_WORD
3884 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3885 < max_cost))
3886 {
3887 rtx_insn *insns;
3888 rtx wop0, wop1;
3889
3890 /* We need to widen the operands, for example to ensure the
3891 constant multiplier is correctly sign or zero extended.
3892 Use a sequence to clean-up any instructions emitted by
3893 the conversions if things don't work out. */
3894 start_sequence ();
3895 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3896 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3897 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3898 unsignedp, OPTAB_WIDEN);
3899 insns = get_insns ();
3900 end_sequence ();
3901
3902 if (tem)
3903 {
3904 emit_insn (insns);
3905 return extract_high_half (mode, tem);
3906 }
3907 }
3908
3909 /* Try widening multiplication of opposite signedness, and adjust. */
3910 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3911 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3912 && size - 1 < BITS_PER_WORD
3913 && (mul_widen_cost (speed, wider_mode)
3914 + 2 * shift_cost (speed, mode, size-1)
3915 + 4 * add_cost (speed, mode) < max_cost))
3916 {
3917 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3918 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3919 if (tem != 0)
3920 {
3921 tem = extract_high_half (mode, tem);
3922 /* We used the wrong signedness. Adjust the result. */
3923 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3924 target, unsignedp);
3925 }
3926 }
3927
3928 return 0;
3929 }
3930
3931 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3932 putting the high half of the result in TARGET if that is convenient,
3933 and return where the result is. If the operation cannot be performed,
3934 0 is returned.
3935
3936 MODE is the mode of operation and result.
3937
3938 UNSIGNEDP nonzero means unsigned multiply.
3939
3940 MAX_COST is the total allowed cost for the expanded RTL. */
3941
3942 static rtx
3943 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3944 rtx target, int unsignedp, int max_cost)
3945 {
3946 unsigned HOST_WIDE_INT cnst1;
3947 int extra_cost;
3948 bool sign_adjust = false;
3949 enum mult_variant variant;
3950 struct algorithm alg;
3951 rtx tem;
3952 bool speed = optimize_insn_for_speed_p ();
3953
3954 /* We can't support modes wider than HOST_BITS_PER_INT. */
3955 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3956
3957 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3958
3959 /* We can't optimize modes wider than BITS_PER_WORD.
3960 ??? We might be able to perform double-word arithmetic if
3961 mode == word_mode, however all the cost calculations in
3962 synth_mult etc. assume single-word operations. */
3963 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3964 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3965 return expmed_mult_highpart_optab (mode, op0, op1, target,
3966 unsignedp, max_cost);
3967
3968 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3969
3970 /* Check whether we try to multiply by a negative constant. */
3971 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3972 {
3973 sign_adjust = true;
3974 extra_cost += add_cost (speed, mode);
3975 }
3976
3977 /* See whether shift/add multiplication is cheap enough. */
3978 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3979 max_cost - extra_cost))
3980 {
3981 /* See whether the specialized multiplication optabs are
3982 cheaper than the shift/add version. */
3983 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3984 alg.cost.cost + extra_cost);
3985 if (tem)
3986 return tem;
3987
3988 tem = convert_to_mode (wider_mode, op0, unsignedp);
3989 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3990 tem = extract_high_half (mode, tem);
3991
3992 /* Adjust result for signedness. */
3993 if (sign_adjust)
3994 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3995
3996 return tem;
3997 }
3998 return expmed_mult_highpart_optab (mode, op0, op1, target,
3999 unsignedp, max_cost);
4000 }
4001
4002
4003 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
4004
4005 static rtx
4006 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4007 {
4008 rtx result, temp, shift;
4009 rtx_code_label *label;
4010 int logd;
4011 int prec = GET_MODE_PRECISION (mode);
4012
4013 logd = floor_log2 (d);
4014 result = gen_reg_rtx (mode);
4015
4016 /* Avoid conditional branches when they're expensive. */
4017 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4018 && optimize_insn_for_speed_p ())
4019 {
4020 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4021 mode, 0, -1);
4022 if (signmask)
4023 {
4024 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4025 signmask = force_reg (mode, signmask);
4026 shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4027
4028 /* Use the rtx_cost of a LSHIFTRT instruction to determine
4029 which instruction sequence to use. If logical right shifts
4030 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4031 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4032
4033 temp = gen_rtx_LSHIFTRT (mode, result, shift);
4034 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4035 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4036 > COSTS_N_INSNS (2)))
4037 {
4038 temp = expand_binop (mode, xor_optab, op0, signmask,
4039 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040 temp = expand_binop (mode, sub_optab, temp, signmask,
4041 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4042 temp = expand_binop (mode, and_optab, temp,
4043 gen_int_mode (masklow, mode),
4044 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4045 temp = expand_binop (mode, xor_optab, temp, signmask,
4046 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4047 temp = expand_binop (mode, sub_optab, temp, signmask,
4048 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4049 }
4050 else
4051 {
4052 signmask = expand_binop (mode, lshr_optab, signmask, shift,
4053 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4054 signmask = force_reg (mode, signmask);
4055
4056 temp = expand_binop (mode, add_optab, op0, signmask,
4057 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4058 temp = expand_binop (mode, and_optab, temp,
4059 gen_int_mode (masklow, mode),
4060 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4061 temp = expand_binop (mode, sub_optab, temp, signmask,
4062 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4063 }
4064 return temp;
4065 }
4066 }
4067
4068 /* Mask contains the mode's signbit and the significant bits of the
4069 modulus. By including the signbit in the operation, many targets
4070 can avoid an explicit compare operation in the following comparison
4071 against zero. */
4072 wide_int mask = wi::mask (logd, false, prec);
4073 mask = wi::set_bit (mask, prec - 1);
4074
4075 temp = expand_binop (mode, and_optab, op0,
4076 immed_wide_int_const (mask, mode),
4077 result, 1, OPTAB_LIB_WIDEN);
4078 if (temp != result)
4079 emit_move_insn (result, temp);
4080
4081 label = gen_label_rtx ();
4082 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4083
4084 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4085 0, OPTAB_LIB_WIDEN);
4086
4087 mask = wi::mask (logd, true, prec);
4088 temp = expand_binop (mode, ior_optab, temp,
4089 immed_wide_int_const (mask, mode),
4090 result, 1, OPTAB_LIB_WIDEN);
4091 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4092 0, OPTAB_LIB_WIDEN);
4093 if (temp != result)
4094 emit_move_insn (result, temp);
4095 emit_label (label);
4096 return result;
4097 }
4098
4099 /* Expand signed division of OP0 by a power of two D in mode MODE.
4100 This routine is only called for positive values of D. */
4101
4102 static rtx
4103 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4104 {
4105 rtx temp;
4106 rtx_code_label *label;
4107 int logd;
4108
4109 logd = floor_log2 (d);
4110
4111 if (d == 2
4112 && BRANCH_COST (optimize_insn_for_speed_p (),
4113 false) >= 1)
4114 {
4115 temp = gen_reg_rtx (mode);
4116 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4117 if (temp != NULL_RTX)
4118 {
4119 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4120 0, OPTAB_LIB_WIDEN);
4121 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4122 }
4123 }
4124
4125 if (HAVE_conditional_move
4126 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4127 {
4128 rtx temp2;
4129
4130 start_sequence ();
4131 temp2 = copy_to_mode_reg (mode, op0);
4132 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4133 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4134 temp = force_reg (mode, temp);
4135
4136 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4137 temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4138 temp, temp2, mode, 0);
4139 if (temp2)
4140 {
4141 rtx_insn *seq = get_insns ();
4142 end_sequence ();
4143 emit_insn (seq);
4144 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4145 }
4146 end_sequence ();
4147 }
4148
4149 if (BRANCH_COST (optimize_insn_for_speed_p (),
4150 false) >= 2)
4151 {
4152 int ushift = GET_MODE_BITSIZE (mode) - logd;
4153
4154 temp = gen_reg_rtx (mode);
4155 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4156 if (temp != NULL_RTX)
4157 {
4158 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4159 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4160 > COSTS_N_INSNS (1))
4161 temp = expand_binop (mode, and_optab, temp,
4162 gen_int_mode (d - 1, mode),
4163 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4164 else
4165 temp = expand_shift (RSHIFT_EXPR, mode, temp,
4166 ushift, NULL_RTX, 1);
4167 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4168 0, OPTAB_LIB_WIDEN);
4169 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4170 }
4171 }
4172
4173 label = gen_label_rtx ();
4174 temp = copy_to_mode_reg (mode, op0);
4175 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4176 expand_inc (temp, gen_int_mode (d - 1, mode));
4177 emit_label (label);
4178 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4179 }
4180 \f
4181 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4182 if that is convenient, and returning where the result is.
4183 You may request either the quotient or the remainder as the result;
4184 specify REM_FLAG nonzero to get the remainder.
4185
4186 CODE is the expression code for which kind of division this is;
4187 it controls how rounding is done. MODE is the machine mode to use.
4188 UNSIGNEDP nonzero means do unsigned division. */
4189
4190 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4191 and then correct it by or'ing in missing high bits
4192 if result of ANDI is nonzero.
4193 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4194 This could optimize to a bfexts instruction.
4195 But C doesn't use these operations, so their optimizations are
4196 left for later. */
4197 /* ??? For modulo, we don't actually need the highpart of the first product,
4198 the low part will do nicely. And for small divisors, the second multiply
4199 can also be a low-part only multiply or even be completely left out.
4200 E.g. to calculate the remainder of a division by 3 with a 32 bit
4201 multiply, multiply with 0x55555556 and extract the upper two bits;
4202 the result is exact for inputs up to 0x1fffffff.
4203 The input range can be reduced by using cross-sum rules.
4204 For odd divisors >= 3, the following table gives right shift counts
4205 so that if a number is shifted by an integer multiple of the given
4206 amount, the remainder stays the same:
4207 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4208 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4209 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4210 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4211 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4212
4213 Cross-sum rules for even numbers can be derived by leaving as many bits
4214 to the right alone as the divisor has zeros to the right.
4215 E.g. if x is an unsigned 32 bit number:
4216 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4217 */
4218
4219 rtx
4220 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4221 rtx op0, rtx op1, rtx target, int unsignedp,
4222 enum optab_methods methods)
4223 {
4224 machine_mode compute_mode;
4225 rtx tquotient;
4226 rtx quotient = 0, remainder = 0;
4227 rtx_insn *last;
4228 rtx_insn *insn;
4229 optab optab1, optab2;
4230 int op1_is_constant, op1_is_pow2 = 0;
4231 int max_cost, extra_cost;
4232 static HOST_WIDE_INT last_div_const = 0;
4233 bool speed = optimize_insn_for_speed_p ();
4234
4235 op1_is_constant = CONST_INT_P (op1);
4236 if (op1_is_constant)
4237 {
4238 wide_int ext_op1 = rtx_mode_t (op1, mode);
4239 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4240 || (! unsignedp
4241 && wi::popcount (wi::neg (ext_op1)) == 1));
4242 }
4243
4244 /*
4245 This is the structure of expand_divmod:
4246
4247 First comes code to fix up the operands so we can perform the operations
4248 correctly and efficiently.
4249
4250 Second comes a switch statement with code specific for each rounding mode.
4251 For some special operands this code emits all RTL for the desired
4252 operation, for other cases, it generates only a quotient and stores it in
4253 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4254 to indicate that it has not done anything.
4255
4256 Last comes code that finishes the operation. If QUOTIENT is set and
4257 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4258 QUOTIENT is not set, it is computed using trunc rounding.
4259
4260 We try to generate special code for division and remainder when OP1 is a
4261 constant. If |OP1| = 2**n we can use shifts and some other fast
4262 operations. For other values of OP1, we compute a carefully selected
4263 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4264 by m.
4265
4266 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4267 half of the product. Different strategies for generating the product are
4268 implemented in expmed_mult_highpart.
4269
4270 If what we actually want is the remainder, we generate that by another
4271 by-constant multiplication and a subtraction. */
4272
4273 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4274 code below will malfunction if we are, so check here and handle
4275 the special case if so. */
4276 if (op1 == const1_rtx)
4277 return rem_flag ? const0_rtx : op0;
4278
4279 /* When dividing by -1, we could get an overflow.
4280 negv_optab can handle overflows. */
4281 if (! unsignedp && op1 == constm1_rtx)
4282 {
4283 if (rem_flag)
4284 return const0_rtx;
4285 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4286 ? negv_optab : neg_optab, op0, target, 0);
4287 }
4288
4289 if (target
4290 /* Don't use the function value register as a target
4291 since we have to read it as well as write it,
4292 and function-inlining gets confused by this. */
4293 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4294 /* Don't clobber an operand while doing a multi-step calculation. */
4295 || ((rem_flag || op1_is_constant)
4296 && (reg_mentioned_p (target, op0)
4297 || (MEM_P (op0) && MEM_P (target))))
4298 || reg_mentioned_p (target, op1)
4299 || (MEM_P (op1) && MEM_P (target))))
4300 target = 0;
4301
4302 /* Get the mode in which to perform this computation. Normally it will
4303 be MODE, but sometimes we can't do the desired operation in MODE.
4304 If so, pick a wider mode in which we can do the operation. Convert
4305 to that mode at the start to avoid repeated conversions.
4306
4307 First see what operations we need. These depend on the expression
4308 we are evaluating. (We assume that divxx3 insns exist under the
4309 same conditions that modxx3 insns and that these insns don't normally
4310 fail. If these assumptions are not correct, we may generate less
4311 efficient code in some cases.)
4312
4313 Then see if we find a mode in which we can open-code that operation
4314 (either a division, modulus, or shift). Finally, check for the smallest
4315 mode for which we can do the operation with a library call. */
4316
4317 /* We might want to refine this now that we have division-by-constant
4318 optimization. Since expmed_mult_highpart tries so many variants, it is
4319 not straightforward to generalize this. Maybe we should make an array
4320 of possible modes in init_expmed? Save this for GCC 2.7. */
4321
4322 optab1 = (op1_is_pow2
4323 ? (unsignedp ? lshr_optab : ashr_optab)
4324 : (unsignedp ? udiv_optab : sdiv_optab));
4325 optab2 = (op1_is_pow2 ? optab1
4326 : (unsignedp ? udivmod_optab : sdivmod_optab));
4327
4328 if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4329 {
4330 FOR_EACH_MODE_FROM (compute_mode, mode)
4331 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4332 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4333 break;
4334
4335 if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4336 FOR_EACH_MODE_FROM (compute_mode, mode)
4337 if (optab_libfunc (optab1, compute_mode)
4338 || optab_libfunc (optab2, compute_mode))
4339 break;
4340 }
4341 else
4342 compute_mode = mode;
4343
4344 /* If we still couldn't find a mode, use MODE, but expand_binop will
4345 probably die. */
4346 if (compute_mode == VOIDmode)
4347 compute_mode = mode;
4348
4349 if (target && GET_MODE (target) == compute_mode)
4350 tquotient = target;
4351 else
4352 tquotient = gen_reg_rtx (compute_mode);
4353
4354 #if 0
4355 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4356 (mode), and thereby get better code when OP1 is a constant. Do that
4357 later. It will require going over all usages of SIZE below. */
4358 size = GET_MODE_BITSIZE (mode);
4359 #endif
4360
4361 /* Only deduct something for a REM if the last divide done was
4362 for a different constant. Then set the constant of the last
4363 divide. */
4364 max_cost = (unsignedp
4365 ? udiv_cost (speed, compute_mode)
4366 : sdiv_cost (speed, compute_mode));
4367 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4368 && INTVAL (op1) == last_div_const))
4369 max_cost -= (mul_cost (speed, compute_mode)
4370 + add_cost (speed, compute_mode));
4371
4372 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4373
4374 /* Now convert to the best mode to use. */
4375 if (compute_mode != mode)
4376 {
4377 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4378 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4379
4380 /* convert_modes may have placed op1 into a register, so we
4381 must recompute the following. */
4382 op1_is_constant = CONST_INT_P (op1);
4383 if (op1_is_constant)
4384 {
4385 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4386 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4387 || (! unsignedp
4388 && wi::popcount (wi::neg (ext_op1)) == 1));
4389 }
4390 else
4391 op1_is_pow2 = 0;
4392 }
4393
4394 /* If one of the operands is a volatile MEM, copy it into a register. */
4395
4396 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4397 op0 = force_reg (compute_mode, op0);
4398 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4399 op1 = force_reg (compute_mode, op1);
4400
4401 /* If we need the remainder or if OP1 is constant, we need to
4402 put OP0 in a register in case it has any queued subexpressions. */
4403 if (rem_flag || op1_is_constant)
4404 op0 = force_reg (compute_mode, op0);
4405
4406 last = get_last_insn ();
4407
4408 /* Promote floor rounding to trunc rounding for unsigned operations. */
4409 if (unsignedp)
4410 {
4411 if (code == FLOOR_DIV_EXPR)
4412 code = TRUNC_DIV_EXPR;
4413 if (code == FLOOR_MOD_EXPR)
4414 code = TRUNC_MOD_EXPR;
4415 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4416 code = TRUNC_DIV_EXPR;
4417 }
4418
4419 if (op1 != const0_rtx)
4420 switch (code)
4421 {
4422 case TRUNC_MOD_EXPR:
4423 case TRUNC_DIV_EXPR:
4424 if (op1_is_constant)
4425 {
4426 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4427 int size = GET_MODE_BITSIZE (int_mode);
4428 if (unsignedp)
4429 {
4430 unsigned HOST_WIDE_INT mh, ml;
4431 int pre_shift, post_shift;
4432 int dummy;
4433 wide_int wd = rtx_mode_t (op1, int_mode);
4434 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4435
4436 if (wi::popcount (wd) == 1)
4437 {
4438 pre_shift = floor_log2 (d);
4439 if (rem_flag)
4440 {
4441 unsigned HOST_WIDE_INT mask
4442 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4443 remainder
4444 = expand_binop (int_mode, and_optab, op0,
4445 gen_int_mode (mask, int_mode),
4446 remainder, 1, methods);
4447 if (remainder)
4448 return gen_lowpart (mode, remainder);
4449 }
4450 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4451 pre_shift, tquotient, 1);
4452 }
4453 else if (size <= HOST_BITS_PER_WIDE_INT)
4454 {
4455 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4456 {
4457 /* Most significant bit of divisor is set; emit an scc
4458 insn. */
4459 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4460 int_mode, 1, 1);
4461 }
4462 else
4463 {
4464 /* Find a suitable multiplier and right shift count
4465 instead of multiplying with D. */
4466
4467 mh = choose_multiplier (d, size, size,
4468 &ml, &post_shift, &dummy);
4469
4470 /* If the suggested multiplier is more than SIZE bits,
4471 we can do better for even divisors, using an
4472 initial right shift. */
4473 if (mh != 0 && (d & 1) == 0)
4474 {
4475 pre_shift = ctz_or_zero (d);
4476 mh = choose_multiplier (d >> pre_shift, size,
4477 size - pre_shift,
4478 &ml, &post_shift, &dummy);
4479 gcc_assert (!mh);
4480 }
4481 else
4482 pre_shift = 0;
4483
4484 if (mh != 0)
4485 {
4486 rtx t1, t2, t3, t4;
4487
4488 if (post_shift - 1 >= BITS_PER_WORD)
4489 goto fail1;
4490
4491 extra_cost
4492 = (shift_cost (speed, int_mode, post_shift - 1)
4493 + shift_cost (speed, int_mode, 1)
4494 + 2 * add_cost (speed, int_mode));
4495 t1 = expmed_mult_highpart
4496 (int_mode, op0, gen_int_mode (ml, int_mode),
4497 NULL_RTX, 1, max_cost - extra_cost);
4498 if (t1 == 0)
4499 goto fail1;
4500 t2 = force_operand (gen_rtx_MINUS (int_mode,
4501 op0, t1),
4502 NULL_RTX);
4503 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4504 t2, 1, NULL_RTX, 1);
4505 t4 = force_operand (gen_rtx_PLUS (int_mode,
4506 t1, t3),
4507 NULL_RTX);
4508 quotient = expand_shift
4509 (RSHIFT_EXPR, int_mode, t4,
4510 post_shift - 1, tquotient, 1);
4511 }
4512 else
4513 {
4514 rtx t1, t2;
4515
4516 if (pre_shift >= BITS_PER_WORD
4517 || post_shift >= BITS_PER_WORD)
4518 goto fail1;
4519
4520 t1 = expand_shift
4521 (RSHIFT_EXPR, int_mode, op0,
4522 pre_shift, NULL_RTX, 1);
4523 extra_cost
4524 = (shift_cost (speed, int_mode, pre_shift)
4525 + shift_cost (speed, int_mode, post_shift));
4526 t2 = expmed_mult_highpart
4527 (int_mode, t1,
4528 gen_int_mode (ml, int_mode),
4529 NULL_RTX, 1, max_cost - extra_cost);
4530 if (t2 == 0)
4531 goto fail1;
4532 quotient = expand_shift
4533 (RSHIFT_EXPR, int_mode, t2,
4534 post_shift, tquotient, 1);
4535 }
4536 }
4537 }
4538 else /* Too wide mode to use tricky code */
4539 break;
4540
4541 insn = get_last_insn ();
4542 if (insn != last)
4543 set_dst_reg_note (insn, REG_EQUAL,
4544 gen_rtx_UDIV (int_mode, op0, op1),
4545 quotient);
4546 }
4547 else /* TRUNC_DIV, signed */
4548 {
4549 unsigned HOST_WIDE_INT ml;
4550 int lgup, post_shift;
4551 rtx mlr;
4552 HOST_WIDE_INT d = INTVAL (op1);
4553 unsigned HOST_WIDE_INT abs_d;
4554
4555 /* Not prepared to handle division/remainder by
4556 0xffffffffffffffff8000000000000000 etc. */
4557 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4558 break;
4559
4560 /* Since d might be INT_MIN, we have to cast to
4561 unsigned HOST_WIDE_INT before negating to avoid
4562 undefined signed overflow. */
4563 abs_d = (d >= 0
4564 ? (unsigned HOST_WIDE_INT) d
4565 : - (unsigned HOST_WIDE_INT) d);
4566
4567 /* n rem d = n rem -d */
4568 if (rem_flag && d < 0)
4569 {
4570 d = abs_d;
4571 op1 = gen_int_mode (abs_d, int_mode);
4572 }
4573
4574 if (d == 1)
4575 quotient = op0;
4576 else if (d == -1)
4577 quotient = expand_unop (int_mode, neg_optab, op0,
4578 tquotient, 0);
4579 else if (size <= HOST_BITS_PER_WIDE_INT
4580 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4581 {
4582 /* This case is not handled correctly below. */
4583 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4584 int_mode, 1, 1);
4585 if (quotient == 0)
4586 goto fail1;
4587 }
4588 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4589 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4590 && (rem_flag
4591 ? smod_pow2_cheap (speed, int_mode)
4592 : sdiv_pow2_cheap (speed, int_mode))
4593 /* We assume that cheap metric is true if the
4594 optab has an expander for this mode. */
4595 && ((optab_handler ((rem_flag ? smod_optab
4596 : sdiv_optab),
4597 int_mode)
4598 != CODE_FOR_nothing)
4599 || (optab_handler (sdivmod_optab, int_mode)
4600 != CODE_FOR_nothing)))
4601 ;
4602 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4603 {
4604 if (rem_flag)
4605 {
4606 remainder = expand_smod_pow2 (int_mode, op0, d);
4607 if (remainder)
4608 return gen_lowpart (mode, remainder);
4609 }
4610
4611 if (sdiv_pow2_cheap (speed, int_mode)
4612 && ((optab_handler (sdiv_optab, int_mode)
4613 != CODE_FOR_nothing)
4614 || (optab_handler (sdivmod_optab, int_mode)
4615 != CODE_FOR_nothing)))
4616 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4617 int_mode, op0,
4618 gen_int_mode (abs_d,
4619 int_mode),
4620 NULL_RTX, 0);
4621 else
4622 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4623
4624 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4625 negate the quotient. */
4626 if (d < 0)
4627 {
4628 insn = get_last_insn ();
4629 if (insn != last
4630 && abs_d < (HOST_WIDE_INT_1U
4631 << (HOST_BITS_PER_WIDE_INT - 1)))
4632 set_dst_reg_note (insn, REG_EQUAL,
4633 gen_rtx_DIV (int_mode, op0,
4634 gen_int_mode
4635 (abs_d,
4636 int_mode)),
4637 quotient);
4638
4639 quotient = expand_unop (int_mode, neg_optab,
4640 quotient, quotient, 0);
4641 }
4642 }
4643 else if (size <= HOST_BITS_PER_WIDE_INT)
4644 {
4645 choose_multiplier (abs_d, size, size - 1,
4646 &ml, &post_shift, &lgup);
4647 if (ml < HOST_WIDE_INT_1U << (size - 1))
4648 {
4649 rtx t1, t2, t3;
4650
4651 if (post_shift >= BITS_PER_WORD
4652 || size - 1 >= BITS_PER_WORD)
4653 goto fail1;
4654
4655 extra_cost = (shift_cost (speed, int_mode, post_shift)
4656 + shift_cost (speed, int_mode, size - 1)
4657 + add_cost (speed, int_mode));
4658 t1 = expmed_mult_highpart
4659 (int_mode, op0, gen_int_mode (ml, int_mode),
4660 NULL_RTX, 0, max_cost - extra_cost);
4661 if (t1 == 0)
4662 goto fail1;
4663 t2 = expand_shift
4664 (RSHIFT_EXPR, int_mode, t1,
4665 post_shift, NULL_RTX, 0);
4666 t3 = expand_shift
4667 (RSHIFT_EXPR, int_mode, op0,
4668 size - 1, NULL_RTX, 0);
4669 if (d < 0)
4670 quotient
4671 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4672 tquotient);
4673 else
4674 quotient
4675 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4676 tquotient);
4677 }
4678 else
4679 {
4680 rtx t1, t2, t3, t4;
4681
4682 if (post_shift >= BITS_PER_WORD
4683 || size - 1 >= BITS_PER_WORD)
4684 goto fail1;
4685
4686 ml |= HOST_WIDE_INT_M1U << (size - 1);
4687 mlr = gen_int_mode (ml, int_mode);
4688 extra_cost = (shift_cost (speed, int_mode, post_shift)
4689 + shift_cost (speed, int_mode, size - 1)
4690 + 2 * add_cost (speed, int_mode));
4691 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4692 NULL_RTX, 0,
4693 max_cost - extra_cost);
4694 if (t1 == 0)
4695 goto fail1;
4696 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4697 NULL_RTX);
4698 t3 = expand_shift
4699 (RSHIFT_EXPR, int_mode, t2,
4700 post_shift, NULL_RTX, 0);
4701 t4 = expand_shift
4702 (RSHIFT_EXPR, int_mode, op0,
4703 size - 1, NULL_RTX, 0);
4704 if (d < 0)
4705 quotient
4706 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4707 tquotient);
4708 else
4709 quotient
4710 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4711 tquotient);
4712 }
4713 }
4714 else /* Too wide mode to use tricky code */
4715 break;
4716
4717 insn = get_last_insn ();
4718 if (insn != last)
4719 set_dst_reg_note (insn, REG_EQUAL,
4720 gen_rtx_DIV (int_mode, op0, op1),
4721 quotient);
4722 }
4723 break;
4724 }
4725 fail1:
4726 delete_insns_since (last);
4727 break;
4728
4729 case FLOOR_DIV_EXPR:
4730 case FLOOR_MOD_EXPR:
4731 /* We will come here only for signed operations. */
4732 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4733 {
4734 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4735 int size = GET_MODE_BITSIZE (int_mode);
4736 unsigned HOST_WIDE_INT mh, ml;
4737 int pre_shift, lgup, post_shift;
4738 HOST_WIDE_INT d = INTVAL (op1);
4739
4740 if (d > 0)
4741 {
4742 /* We could just as easily deal with negative constants here,
4743 but it does not seem worth the trouble for GCC 2.6. */
4744 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4745 {
4746 pre_shift = floor_log2 (d);
4747 if (rem_flag)
4748 {
4749 unsigned HOST_WIDE_INT mask
4750 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4751 remainder = expand_binop
4752 (int_mode, and_optab, op0,
4753 gen_int_mode (mask, int_mode),
4754 remainder, 0, methods);
4755 if (remainder)
4756 return gen_lowpart (mode, remainder);
4757 }
4758 quotient = expand_shift
4759 (RSHIFT_EXPR, int_mode, op0,
4760 pre_shift, tquotient, 0);
4761 }
4762 else
4763 {
4764 rtx t1, t2, t3, t4;
4765
4766 mh = choose_multiplier (d, size, size - 1,
4767 &ml, &post_shift, &lgup);
4768 gcc_assert (!mh);
4769
4770 if (post_shift < BITS_PER_WORD
4771 && size - 1 < BITS_PER_WORD)
4772 {
4773 t1 = expand_shift
4774 (RSHIFT_EXPR, int_mode, op0,
4775 size - 1, NULL_RTX, 0);
4776 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4777 NULL_RTX, 0, OPTAB_WIDEN);
4778 extra_cost = (shift_cost (speed, int_mode, post_shift)
4779 + shift_cost (speed, int_mode, size - 1)
4780 + 2 * add_cost (speed, int_mode));
4781 t3 = expmed_mult_highpart
4782 (int_mode, t2, gen_int_mode (ml, int_mode),
4783 NULL_RTX, 1, max_cost - extra_cost);
4784 if (t3 != 0)
4785 {
4786 t4 = expand_shift
4787 (RSHIFT_EXPR, int_mode, t3,
4788 post_shift, NULL_RTX, 1);
4789 quotient = expand_binop (int_mode, xor_optab,
4790 t4, t1, tquotient, 0,
4791 OPTAB_WIDEN);
4792 }
4793 }
4794 }
4795 }
4796 else
4797 {
4798 rtx nsign, t1, t2, t3, t4;
4799 t1 = force_operand (gen_rtx_PLUS (int_mode,
4800 op0, constm1_rtx), NULL_RTX);
4801 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4802 0, OPTAB_WIDEN);
4803 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4804 size - 1, NULL_RTX, 0);
4805 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4806 NULL_RTX);
4807 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4808 NULL_RTX, 0);
4809 if (t4)
4810 {
4811 rtx t5;
4812 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4813 NULL_RTX, 0);
4814 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4815 tquotient);
4816 }
4817 }
4818 }
4819
4820 if (quotient != 0)
4821 break;
4822 delete_insns_since (last);
4823
4824 /* Try using an instruction that produces both the quotient and
4825 remainder, using truncation. We can easily compensate the quotient
4826 or remainder to get floor rounding, once we have the remainder.
4827 Notice that we compute also the final remainder value here,
4828 and return the result right away. */
4829 if (target == 0 || GET_MODE (target) != compute_mode)
4830 target = gen_reg_rtx (compute_mode);
4831
4832 if (rem_flag)
4833 {
4834 remainder
4835 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4836 quotient = gen_reg_rtx (compute_mode);
4837 }
4838 else
4839 {
4840 quotient
4841 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4842 remainder = gen_reg_rtx (compute_mode);
4843 }
4844
4845 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4846 quotient, remainder, 0))
4847 {
4848 /* This could be computed with a branch-less sequence.
4849 Save that for later. */
4850 rtx tem;
4851 rtx_code_label *label = gen_label_rtx ();
4852 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4853 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4854 NULL_RTX, 0, OPTAB_WIDEN);
4855 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4856 expand_dec (quotient, const1_rtx);
4857 expand_inc (remainder, op1);
4858 emit_label (label);
4859 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4860 }
4861
4862 /* No luck with division elimination or divmod. Have to do it
4863 by conditionally adjusting op0 *and* the result. */
4864 {
4865 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4866 rtx adjusted_op0;
4867 rtx tem;
4868
4869 quotient = gen_reg_rtx (compute_mode);
4870 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4871 label1 = gen_label_rtx ();
4872 label2 = gen_label_rtx ();
4873 label3 = gen_label_rtx ();
4874 label4 = gen_label_rtx ();
4875 label5 = gen_label_rtx ();
4876 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4877 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4878 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4879 quotient, 0, methods);
4880 if (tem != quotient)
4881 emit_move_insn (quotient, tem);
4882 emit_jump_insn (targetm.gen_jump (label5));
4883 emit_barrier ();
4884 emit_label (label1);
4885 expand_inc (adjusted_op0, const1_rtx);
4886 emit_jump_insn (targetm.gen_jump (label4));
4887 emit_barrier ();
4888 emit_label (label2);
4889 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4890 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4891 quotient, 0, methods);
4892 if (tem != quotient)
4893 emit_move_insn (quotient, tem);
4894 emit_jump_insn (targetm.gen_jump (label5));
4895 emit_barrier ();
4896 emit_label (label3);
4897 expand_dec (adjusted_op0, const1_rtx);
4898 emit_label (label4);
4899 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4900 quotient, 0, methods);
4901 if (tem != quotient)
4902 emit_move_insn (quotient, tem);
4903 expand_dec (quotient, const1_rtx);
4904 emit_label (label5);
4905 }
4906 break;
4907
4908 case CEIL_DIV_EXPR:
4909 case CEIL_MOD_EXPR:
4910 if (unsignedp)
4911 {
4912 if (op1_is_constant
4913 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4914 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4915 || INTVAL (op1) >= 0))
4916 {
4917 scalar_int_mode int_mode
4918 = as_a <scalar_int_mode> (compute_mode);
4919 rtx t1, t2, t3;
4920 unsigned HOST_WIDE_INT d = INTVAL (op1);
4921 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4922 floor_log2 (d), tquotient, 1);
4923 t2 = expand_binop (int_mode, and_optab, op0,
4924 gen_int_mode (d - 1, int_mode),
4925 NULL_RTX, 1, methods);
4926 t3 = gen_reg_rtx (int_mode);
4927 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4928 if (t3 == 0)
4929 {
4930 rtx_code_label *lab;
4931 lab = gen_label_rtx ();
4932 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4933 expand_inc (t1, const1_rtx);
4934 emit_label (lab);
4935 quotient = t1;
4936 }
4937 else
4938 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4939 tquotient);
4940 break;
4941 }
4942
4943 /* Try using an instruction that produces both the quotient and
4944 remainder, using truncation. We can easily compensate the
4945 quotient or remainder to get ceiling rounding, once we have the
4946 remainder. Notice that we compute also the final remainder
4947 value here, and return the result right away. */
4948 if (target == 0 || GET_MODE (target) != compute_mode)
4949 target = gen_reg_rtx (compute_mode);
4950
4951 if (rem_flag)
4952 {
4953 remainder = (REG_P (target)
4954 ? target : gen_reg_rtx (compute_mode));
4955 quotient = gen_reg_rtx (compute_mode);
4956 }
4957 else
4958 {
4959 quotient = (REG_P (target)
4960 ? target : gen_reg_rtx (compute_mode));
4961 remainder = gen_reg_rtx (compute_mode);
4962 }
4963
4964 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4965 remainder, 1))
4966 {
4967 /* This could be computed with a branch-less sequence.
4968 Save that for later. */
4969 rtx_code_label *label = gen_label_rtx ();
4970 do_cmp_and_jump (remainder, const0_rtx, EQ,
4971 compute_mode, label);
4972 expand_inc (quotient, const1_rtx);
4973 expand_dec (remainder, op1);
4974 emit_label (label);
4975 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4976 }
4977
4978 /* No luck with division elimination or divmod. Have to do it
4979 by conditionally adjusting op0 *and* the result. */
4980 {
4981 rtx_code_label *label1, *label2;
4982 rtx adjusted_op0, tem;
4983
4984 quotient = gen_reg_rtx (compute_mode);
4985 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4986 label1 = gen_label_rtx ();
4987 label2 = gen_label_rtx ();
4988 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4989 compute_mode, label1);
4990 emit_move_insn (quotient, const0_rtx);
4991 emit_jump_insn (targetm.gen_jump (label2));
4992 emit_barrier ();
4993 emit_label (label1);
4994 expand_dec (adjusted_op0, const1_rtx);
4995 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4996 quotient, 1, methods);
4997 if (tem != quotient)
4998 emit_move_insn (quotient, tem);
4999 expand_inc (quotient, const1_rtx);
5000 emit_label (label2);
5001 }
5002 }
5003 else /* signed */
5004 {
5005 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5006 && INTVAL (op1) >= 0)
5007 {
5008 /* This is extremely similar to the code for the unsigned case
5009 above. For 2.7 we should merge these variants, but for
5010 2.6.1 I don't want to touch the code for unsigned since that
5011 get used in C. The signed case will only be used by other
5012 languages (Ada). */
5013
5014 rtx t1, t2, t3;
5015 unsigned HOST_WIDE_INT d = INTVAL (op1);
5016 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5017 floor_log2 (d), tquotient, 0);
5018 t2 = expand_binop (compute_mode, and_optab, op0,
5019 gen_int_mode (d - 1, compute_mode),
5020 NULL_RTX, 1, methods);
5021 t3 = gen_reg_rtx (compute_mode);
5022 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5023 compute_mode, 1, 1);
5024 if (t3 == 0)
5025 {
5026 rtx_code_label *lab;
5027 lab = gen_label_rtx ();
5028 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5029 expand_inc (t1, const1_rtx);
5030 emit_label (lab);
5031 quotient = t1;
5032 }
5033 else
5034 quotient = force_operand (gen_rtx_PLUS (compute_mode,
5035 t1, t3),
5036 tquotient);
5037 break;
5038 }
5039
5040 /* Try using an instruction that produces both the quotient and
5041 remainder, using truncation. We can easily compensate the
5042 quotient or remainder to get ceiling rounding, once we have the
5043 remainder. Notice that we compute also the final remainder
5044 value here, and return the result right away. */
5045 if (target == 0 || GET_MODE (target) != compute_mode)
5046 target = gen_reg_rtx (compute_mode);
5047 if (rem_flag)
5048 {
5049 remainder= (REG_P (target)
5050 ? target : gen_reg_rtx (compute_mode));
5051 quotient = gen_reg_rtx (compute_mode);
5052 }
5053 else
5054 {
5055 quotient = (REG_P (target)
5056 ? target : gen_reg_rtx (compute_mode));
5057 remainder = gen_reg_rtx (compute_mode);
5058 }
5059
5060 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5061 remainder, 0))
5062 {
5063 /* This could be computed with a branch-less sequence.
5064 Save that for later. */
5065 rtx tem;
5066 rtx_code_label *label = gen_label_rtx ();
5067 do_cmp_and_jump (remainder, const0_rtx, EQ,
5068 compute_mode, label);
5069 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5070 NULL_RTX, 0, OPTAB_WIDEN);
5071 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5072 expand_inc (quotient, const1_rtx);
5073 expand_dec (remainder, op1);
5074 emit_label (label);
5075 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5076 }
5077
5078 /* No luck with division elimination or divmod. Have to do it
5079 by conditionally adjusting op0 *and* the result. */
5080 {
5081 rtx_code_label *label1, *label2, *label3, *label4, *label5;
5082 rtx adjusted_op0;
5083 rtx tem;
5084
5085 quotient = gen_reg_rtx (compute_mode);
5086 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5087 label1 = gen_label_rtx ();
5088 label2 = gen_label_rtx ();
5089 label3 = gen_label_rtx ();
5090 label4 = gen_label_rtx ();
5091 label5 = gen_label_rtx ();
5092 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5093 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5094 compute_mode, label1);
5095 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5096 quotient, 0, methods);
5097 if (tem != quotient)
5098 emit_move_insn (quotient, tem);
5099 emit_jump_insn (targetm.gen_jump (label5));
5100 emit_barrier ();
5101 emit_label (label1);
5102 expand_dec (adjusted_op0, const1_rtx);
5103 emit_jump_insn (targetm.gen_jump (label4));
5104 emit_barrier ();
5105 emit_label (label2);
5106 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5107 compute_mode, label3);
5108 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5109 quotient, 0, methods);
5110 if (tem != quotient)
5111 emit_move_insn (quotient, tem);
5112 emit_jump_insn (targetm.gen_jump (label5));
5113 emit_barrier ();
5114 emit_label (label3);
5115 expand_inc (adjusted_op0, const1_rtx);
5116 emit_label (label4);
5117 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5118 quotient, 0, methods);
5119 if (tem != quotient)
5120 emit_move_insn (quotient, tem);
5121 expand_inc (quotient, const1_rtx);
5122 emit_label (label5);
5123 }
5124 }
5125 break;
5126
5127 case EXACT_DIV_EXPR:
5128 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5129 {
5130 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5131 int size = GET_MODE_BITSIZE (int_mode);
5132 HOST_WIDE_INT d = INTVAL (op1);
5133 unsigned HOST_WIDE_INT ml;
5134 int pre_shift;
5135 rtx t1;
5136
5137 pre_shift = ctz_or_zero (d);
5138 ml = invert_mod2n (d >> pre_shift, size);
5139 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5140 pre_shift, NULL_RTX, unsignedp);
5141 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5142 NULL_RTX, 1);
5143
5144 insn = get_last_insn ();
5145 set_dst_reg_note (insn, REG_EQUAL,
5146 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5147 int_mode, op0, op1),
5148 quotient);
5149 }
5150 break;
5151
5152 case ROUND_DIV_EXPR:
5153 case ROUND_MOD_EXPR:
5154 if (unsignedp)
5155 {
5156 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5157 rtx tem;
5158 rtx_code_label *label;
5159 label = gen_label_rtx ();
5160 quotient = gen_reg_rtx (int_mode);
5161 remainder = gen_reg_rtx (int_mode);
5162 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5163 {
5164 rtx tem;
5165 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5166 quotient, 1, methods);
5167 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5168 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5169 remainder, 1, methods);
5170 }
5171 tem = plus_constant (int_mode, op1, -1);
5172 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5173 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5174 expand_inc (quotient, const1_rtx);
5175 expand_dec (remainder, op1);
5176 emit_label (label);
5177 }
5178 else
5179 {
5180 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5181 int size = GET_MODE_BITSIZE (int_mode);
5182 rtx abs_rem, abs_op1, tem, mask;
5183 rtx_code_label *label;
5184 label = gen_label_rtx ();
5185 quotient = gen_reg_rtx (int_mode);
5186 remainder = gen_reg_rtx (int_mode);
5187 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5188 {
5189 rtx tem;
5190 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5191 quotient, 0, methods);
5192 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5193 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5194 remainder, 0, methods);
5195 }
5196 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5197 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5198 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5199 1, NULL_RTX, 1);
5200 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5201 tem = expand_binop (int_mode, xor_optab, op0, op1,
5202 NULL_RTX, 0, OPTAB_WIDEN);
5203 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5204 size - 1, NULL_RTX, 0);
5205 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5206 NULL_RTX, 0, OPTAB_WIDEN);
5207 tem = expand_binop (int_mode, sub_optab, tem, mask,
5208 NULL_RTX, 0, OPTAB_WIDEN);
5209 expand_inc (quotient, tem);
5210 tem = expand_binop (int_mode, xor_optab, mask, op1,
5211 NULL_RTX, 0, OPTAB_WIDEN);
5212 tem = expand_binop (int_mode, sub_optab, tem, mask,
5213 NULL_RTX, 0, OPTAB_WIDEN);
5214 expand_dec (remainder, tem);
5215 emit_label (label);
5216 }
5217 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5218
5219 default:
5220 gcc_unreachable ();
5221 }
5222
5223 if (quotient == 0)
5224 {
5225 if (target && GET_MODE (target) != compute_mode)
5226 target = 0;
5227
5228 if (rem_flag)
5229 {
5230 /* Try to produce the remainder without producing the quotient.
5231 If we seem to have a divmod pattern that does not require widening,
5232 don't try widening here. We should really have a WIDEN argument
5233 to expand_twoval_binop, since what we'd really like to do here is
5234 1) try a mod insn in compute_mode
5235 2) try a divmod insn in compute_mode
5236 3) try a div insn in compute_mode and multiply-subtract to get
5237 remainder
5238 4) try the same things with widening allowed. */
5239 remainder
5240 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5241 op0, op1, target,
5242 unsignedp,
5243 ((optab_handler (optab2, compute_mode)
5244 != CODE_FOR_nothing)
5245 ? OPTAB_DIRECT : OPTAB_WIDEN));
5246 if (remainder == 0)
5247 {
5248 /* No luck there. Can we do remainder and divide at once
5249 without a library call? */
5250 remainder = gen_reg_rtx (compute_mode);
5251 if (! expand_twoval_binop ((unsignedp
5252 ? udivmod_optab
5253 : sdivmod_optab),
5254 op0, op1,
5255 NULL_RTX, remainder, unsignedp))
5256 remainder = 0;
5257 }
5258
5259 if (remainder)
5260 return gen_lowpart (mode, remainder);
5261 }
5262
5263 /* Produce the quotient. Try a quotient insn, but not a library call.
5264 If we have a divmod in this mode, use it in preference to widening
5265 the div (for this test we assume it will not fail). Note that optab2
5266 is set to the one of the two optabs that the call below will use. */
5267 quotient
5268 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5269 op0, op1, rem_flag ? NULL_RTX : target,
5270 unsignedp,
5271 ((optab_handler (optab2, compute_mode)
5272 != CODE_FOR_nothing)
5273 ? OPTAB_DIRECT : OPTAB_WIDEN));
5274
5275 if (quotient == 0)
5276 {
5277 /* No luck there. Try a quotient-and-remainder insn,
5278 keeping the quotient alone. */
5279 quotient = gen_reg_rtx (compute_mode);
5280 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5281 op0, op1,
5282 quotient, NULL_RTX, unsignedp))
5283 {
5284 quotient = 0;
5285 if (! rem_flag)
5286 /* Still no luck. If we are not computing the remainder,
5287 use a library call for the quotient. */
5288 quotient = sign_expand_binop (compute_mode,
5289 udiv_optab, sdiv_optab,
5290 op0, op1, target,
5291 unsignedp, methods);
5292 }
5293 }
5294 }
5295
5296 if (rem_flag)
5297 {
5298 if (target && GET_MODE (target) != compute_mode)
5299 target = 0;
5300
5301 if (quotient == 0)
5302 {
5303 /* No divide instruction either. Use library for remainder. */
5304 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5305 op0, op1, target,
5306 unsignedp, methods);
5307 /* No remainder function. Try a quotient-and-remainder
5308 function, keeping the remainder. */
5309 if (!remainder
5310 && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5311 {
5312 remainder = gen_reg_rtx (compute_mode);
5313 if (!expand_twoval_binop_libfunc
5314 (unsignedp ? udivmod_optab : sdivmod_optab,
5315 op0, op1,
5316 NULL_RTX, remainder,
5317 unsignedp ? UMOD : MOD))
5318 remainder = NULL_RTX;
5319 }
5320 }
5321 else
5322 {
5323 /* We divided. Now finish doing X - Y * (X / Y). */
5324 remainder = expand_mult (compute_mode, quotient, op1,
5325 NULL_RTX, unsignedp);
5326 remainder = expand_binop (compute_mode, sub_optab, op0,
5327 remainder, target, unsignedp,
5328 methods);
5329 }
5330 }
5331
5332 if (methods != OPTAB_LIB_WIDEN
5333 && (rem_flag ? remainder : quotient) == NULL_RTX)
5334 return NULL_RTX;
5335
5336 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5337 }
5338 \f
5339 /* Return a tree node with data type TYPE, describing the value of X.
5340 Usually this is an VAR_DECL, if there is no obvious better choice.
5341 X may be an expression, however we only support those expressions
5342 generated by loop.c. */
5343
5344 tree
5345 make_tree (tree type, rtx x)
5346 {
5347 tree t;
5348
5349 switch (GET_CODE (x))
5350 {
5351 case CONST_INT:
5352 case CONST_WIDE_INT:
5353 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5354 return t;
5355
5356 case CONST_DOUBLE:
5357 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5358 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5359 t = wide_int_to_tree (type,
5360 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5361 HOST_BITS_PER_WIDE_INT * 2));
5362 else
5363 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5364
5365 return t;
5366
5367 case CONST_VECTOR:
5368 {
5369 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5370 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5371 tree itype = TREE_TYPE (type);
5372
5373 /* Build a tree with vector elements. */
5374 tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5375 unsigned int count = elts.encoded_nelts ();
5376 for (unsigned int i = 0; i < count; ++i)
5377 {
5378 rtx elt = CONST_VECTOR_ELT (x, i);
5379 elts.quick_push (make_tree (itype, elt));
5380 }
5381
5382 return elts.build ();
5383 }
5384
5385 case PLUS:
5386 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5387 make_tree (type, XEXP (x, 1)));
5388
5389 case MINUS:
5390 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5391 make_tree (type, XEXP (x, 1)));
5392
5393 case NEG:
5394 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5395
5396 case MULT:
5397 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5398 make_tree (type, XEXP (x, 1)));
5399
5400 case ASHIFT:
5401 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5402 make_tree (type, XEXP (x, 1)));
5403
5404 case LSHIFTRT:
5405 t = unsigned_type_for (type);
5406 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5407 make_tree (t, XEXP (x, 0)),
5408 make_tree (type, XEXP (x, 1))));
5409
5410 case ASHIFTRT:
5411 t = signed_type_for (type);
5412 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5413 make_tree (t, XEXP (x, 0)),
5414 make_tree (type, XEXP (x, 1))));
5415
5416 case DIV:
5417 if (TREE_CODE (type) != REAL_TYPE)
5418 t = signed_type_for (type);
5419 else
5420 t = type;
5421
5422 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5423 make_tree (t, XEXP (x, 0)),
5424 make_tree (t, XEXP (x, 1))));
5425 case UDIV:
5426 t = unsigned_type_for (type);
5427 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5428 make_tree (t, XEXP (x, 0)),
5429 make_tree (t, XEXP (x, 1))));
5430
5431 case SIGN_EXTEND:
5432 case ZERO_EXTEND:
5433 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5434 GET_CODE (x) == ZERO_EXTEND);
5435 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5436
5437 case CONST:
5438 return make_tree (type, XEXP (x, 0));
5439
5440 case SYMBOL_REF:
5441 t = SYMBOL_REF_DECL (x);
5442 if (t)
5443 return fold_convert (type, build_fold_addr_expr (t));
5444 /* fall through. */
5445
5446 default:
5447 if (CONST_POLY_INT_P (x))
5448 return wide_int_to_tree (t, const_poly_int_value (x));
5449
5450 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5451
5452 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5453 address mode to pointer mode. */
5454 if (POINTER_TYPE_P (type))
5455 x = convert_memory_address_addr_space
5456 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5457
5458 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5459 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5460 t->decl_with_rtl.rtl = x;
5461
5462 return t;
5463 }
5464 }
5465 \f
5466 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5467 and returning TARGET.
5468
5469 If TARGET is 0, a pseudo-register or constant is returned. */
5470
5471 rtx
5472 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5473 {
5474 rtx tem = 0;
5475
5476 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5477 tem = simplify_binary_operation (AND, mode, op0, op1);
5478 if (tem == 0)
5479 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5480
5481 if (target == 0)
5482 target = tem;
5483 else if (tem != target)
5484 emit_move_insn (target, tem);
5485 return target;
5486 }
5487
5488 /* Helper function for emit_store_flag. */
5489 rtx
5490 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5491 machine_mode mode, machine_mode compare_mode,
5492 int unsignedp, rtx x, rtx y, int normalizep,
5493 machine_mode target_mode)
5494 {
5495 class expand_operand ops[4];
5496 rtx op0, comparison, subtarget;
5497 rtx_insn *last;
5498 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5499 scalar_int_mode int_target_mode;
5500
5501 last = get_last_insn ();
5502 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5503 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5504 if (!x || !y)
5505 {
5506 delete_insns_since (last);
5507 return NULL_RTX;
5508 }
5509
5510 if (target_mode == VOIDmode)
5511 int_target_mode = result_mode;
5512 else
5513 int_target_mode = as_a <scalar_int_mode> (target_mode);
5514 if (!target)
5515 target = gen_reg_rtx (int_target_mode);
5516
5517 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5518
5519 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5520 create_fixed_operand (&ops[1], comparison);
5521 create_fixed_operand (&ops[2], x);
5522 create_fixed_operand (&ops[3], y);
5523 if (!maybe_expand_insn (icode, 4, ops))
5524 {
5525 delete_insns_since (last);
5526 return NULL_RTX;
5527 }
5528 subtarget = ops[0].value;
5529
5530 /* If we are converting to a wider mode, first convert to
5531 INT_TARGET_MODE, then normalize. This produces better combining
5532 opportunities on machines that have a SIGN_EXTRACT when we are
5533 testing a single bit. This mostly benefits the 68k.
5534
5535 If STORE_FLAG_VALUE does not have the sign bit set when
5536 interpreted in MODE, we can do this conversion as unsigned, which
5537 is usually more efficient. */
5538 if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5539 {
5540 gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5541 || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5542
5543 bool unsignedp = (STORE_FLAG_VALUE >= 0);
5544 convert_move (target, subtarget, unsignedp);
5545
5546 op0 = target;
5547 result_mode = int_target_mode;
5548 }
5549 else
5550 op0 = subtarget;
5551
5552 /* If we want to keep subexpressions around, don't reuse our last
5553 target. */
5554 if (optimize)
5555 subtarget = 0;
5556
5557 /* Now normalize to the proper value in MODE. Sometimes we don't
5558 have to do anything. */
5559 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5560 ;
5561 /* STORE_FLAG_VALUE might be the most negative number, so write
5562 the comparison this way to avoid a compiler-time warning. */
5563 else if (- normalizep == STORE_FLAG_VALUE)
5564 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5565
5566 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5567 it hard to use a value of just the sign bit due to ANSI integer
5568 constant typing rules. */
5569 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5570 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5571 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5572 normalizep == 1);
5573 else
5574 {
5575 gcc_assert (STORE_FLAG_VALUE & 1);
5576
5577 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5578 if (normalizep == -1)
5579 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5580 }
5581
5582 /* If we were converting to a smaller mode, do the conversion now. */
5583 if (int_target_mode != result_mode)
5584 {
5585 convert_move (target, op0, 0);
5586 return target;
5587 }
5588 else
5589 return op0;
5590 }
5591
5592
5593 /* A subroutine of emit_store_flag only including "tricks" that do not
5594 need a recursive call. These are kept separate to avoid infinite
5595 loops. */
5596
5597 static rtx
5598 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5599 machine_mode mode, int unsignedp, int normalizep,
5600 machine_mode target_mode)
5601 {
5602 rtx subtarget;
5603 enum insn_code icode;
5604 machine_mode compare_mode;
5605 enum mode_class mclass;
5606 enum rtx_code scode;
5607
5608 if (unsignedp)
5609 code = unsigned_condition (code);
5610 scode = swap_condition (code);
5611
5612 /* If one operand is constant, make it the second one. Only do this
5613 if the other operand is not constant as well. */
5614
5615 if (swap_commutative_operands_p (op0, op1))
5616 {
5617 std::swap (op0, op1);
5618 code = swap_condition (code);
5619 }
5620
5621 if (mode == VOIDmode)
5622 mode = GET_MODE (op0);
5623
5624 if (CONST_SCALAR_INT_P (op1))
5625 canonicalize_comparison (mode, &code, &op1);
5626
5627 /* For some comparisons with 1 and -1, we can convert this to
5628 comparisons with zero. This will often produce more opportunities for
5629 store-flag insns. */
5630
5631 switch (code)
5632 {
5633 case LT:
5634 if (op1 == const1_rtx)
5635 op1 = const0_rtx, code = LE;
5636 break;
5637 case LE:
5638 if (op1 == constm1_rtx)
5639 op1 = const0_rtx, code = LT;
5640 break;
5641 case GE:
5642 if (op1 == const1_rtx)
5643 op1 = const0_rtx, code = GT;
5644 break;
5645 case GT:
5646 if (op1 == constm1_rtx)
5647 op1 = const0_rtx, code = GE;
5648 break;
5649 case GEU:
5650 if (op1 == const1_rtx)
5651 op1 = const0_rtx, code = NE;
5652 break;
5653 case LTU:
5654 if (op1 == const1_rtx)
5655 op1 = const0_rtx, code = EQ;
5656 break;
5657 default:
5658 break;
5659 }
5660
5661 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5662 complement of A (for GE) and shifting the sign bit to the low bit. */
5663 scalar_int_mode int_mode;
5664 if (op1 == const0_rtx && (code == LT || code == GE)
5665 && is_int_mode (mode, &int_mode)
5666 && (normalizep || STORE_FLAG_VALUE == 1
5667 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5668 {
5669 scalar_int_mode int_target_mode;
5670 subtarget = target;
5671
5672 if (!target)
5673 int_target_mode = int_mode;
5674 else
5675 {
5676 /* If the result is to be wider than OP0, it is best to convert it
5677 first. If it is to be narrower, it is *incorrect* to convert it
5678 first. */
5679 int_target_mode = as_a <scalar_int_mode> (target_mode);
5680 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5681 {
5682 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5683 int_mode = int_target_mode;
5684 }
5685 }
5686
5687 if (int_target_mode != int_mode)
5688 subtarget = 0;
5689
5690 if (code == GE)
5691 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5692 ((STORE_FLAG_VALUE == 1 || normalizep)
5693 ? 0 : subtarget), 0);
5694
5695 if (STORE_FLAG_VALUE == 1 || normalizep)
5696 /* If we are supposed to produce a 0/1 value, we want to do
5697 a logical shift from the sign bit to the low-order bit; for
5698 a -1/0 value, we do an arithmetic shift. */
5699 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5700 GET_MODE_BITSIZE (int_mode) - 1,
5701 subtarget, normalizep != -1);
5702
5703 if (int_mode != int_target_mode)
5704 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5705
5706 return op0;
5707 }
5708
5709 /* Next try expanding this via the backend's cstore<mode>4. */
5710 mclass = GET_MODE_CLASS (mode);
5711 FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5712 {
5713 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5714 icode = optab_handler (cstore_optab, optab_mode);
5715 if (icode != CODE_FOR_nothing)
5716 {
5717 do_pending_stack_adjust ();
5718 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5719 unsignedp, op0, op1, normalizep, target_mode);
5720 if (tem)
5721 return tem;
5722
5723 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5724 {
5725 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5726 unsignedp, op1, op0, normalizep, target_mode);
5727 if (tem)
5728 return tem;
5729 }
5730 break;
5731 }
5732 }
5733
5734 /* If we are comparing a double-word integer with zero or -1, we can
5735 convert the comparison into one involving a single word. */
5736 if (is_int_mode (mode, &int_mode)
5737 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5738 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5739 {
5740 rtx tem;
5741 if ((code == EQ || code == NE)
5742 && (op1 == const0_rtx || op1 == constm1_rtx))
5743 {
5744 rtx op00, op01;
5745
5746 /* Do a logical OR or AND of the two words and compare the
5747 result. */
5748 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5749 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5750 tem = expand_binop (word_mode,
5751 op1 == const0_rtx ? ior_optab : and_optab,
5752 op00, op01, NULL_RTX, unsignedp,
5753 OPTAB_DIRECT);
5754
5755 if (tem != 0)
5756 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5757 unsignedp, normalizep);
5758 }
5759 else if ((code == LT || code == GE) && op1 == const0_rtx)
5760 {
5761 rtx op0h;
5762
5763 /* If testing the sign bit, can just test on high word. */
5764 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5765 subreg_highpart_offset (word_mode,
5766 int_mode));
5767 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5768 unsignedp, normalizep);
5769 }
5770 else
5771 tem = NULL_RTX;
5772
5773 if (tem)
5774 {
5775 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5776 return tem;
5777 if (!target)
5778 target = gen_reg_rtx (target_mode);
5779
5780 convert_move (target, tem,
5781 !val_signbit_known_set_p (word_mode,
5782 (normalizep ? normalizep
5783 : STORE_FLAG_VALUE)));
5784 return target;
5785 }
5786 }
5787
5788 return 0;
5789 }
5790
5791 /* Subroutine of emit_store_flag that handles cases in which the operands
5792 are scalar integers. SUBTARGET is the target to use for temporary
5793 operations and TRUEVAL is the value to store when the condition is
5794 true. All other arguments are as for emit_store_flag. */
5795
5796 rtx
5797 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5798 rtx op1, scalar_int_mode mode, int unsignedp,
5799 int normalizep, rtx trueval)
5800 {
5801 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5802 rtx_insn *last = get_last_insn ();
5803
5804 /* If this is an equality comparison of integers, we can try to exclusive-or
5805 (or subtract) the two operands and use a recursive call to try the
5806 comparison with zero. Don't do any of these cases if branches are
5807 very cheap. */
5808
5809 if ((code == EQ || code == NE) && op1 != const0_rtx)
5810 {
5811 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5812 OPTAB_WIDEN);
5813
5814 if (tem == 0)
5815 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5816 OPTAB_WIDEN);
5817 if (tem != 0)
5818 tem = emit_store_flag (target, code, tem, const0_rtx,
5819 mode, unsignedp, normalizep);
5820 if (tem != 0)
5821 return tem;
5822
5823 delete_insns_since (last);
5824 }
5825
5826 /* For integer comparisons, try the reverse comparison. However, for
5827 small X and if we'd have anyway to extend, implementing "X != 0"
5828 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5829 rtx_code rcode = reverse_condition (code);
5830 if (can_compare_p (rcode, mode, ccp_store_flag)
5831 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5832 && code == NE
5833 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5834 && op1 == const0_rtx))
5835 {
5836 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5837 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5838
5839 /* Again, for the reverse comparison, use either an addition or a XOR. */
5840 if (want_add
5841 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5842 optimize_insn_for_speed_p ()) == 0)
5843 {
5844 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5845 STORE_FLAG_VALUE, target_mode);
5846 if (tem != 0)
5847 tem = expand_binop (target_mode, add_optab, tem,
5848 gen_int_mode (normalizep, target_mode),
5849 target, 0, OPTAB_WIDEN);
5850 if (tem != 0)
5851 return tem;
5852 }
5853 else if (!want_add
5854 && rtx_cost (trueval, mode, XOR, 1,
5855 optimize_insn_for_speed_p ()) == 0)
5856 {
5857 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5858 normalizep, target_mode);
5859 if (tem != 0)
5860 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5861 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5862 if (tem != 0)
5863 return tem;
5864 }
5865
5866 delete_insns_since (last);
5867 }
5868
5869 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5870 the constant zero. Reject all other comparisons at this point. Only
5871 do LE and GT if branches are expensive since they are expensive on
5872 2-operand machines. */
5873
5874 if (op1 != const0_rtx
5875 || (code != EQ && code != NE
5876 && (BRANCH_COST (optimize_insn_for_speed_p (),
5877 false) <= 1 || (code != LE && code != GT))))
5878 return 0;
5879
5880 /* Try to put the result of the comparison in the sign bit. Assume we can't
5881 do the necessary operation below. */
5882
5883 rtx tem = 0;
5884
5885 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5886 the sign bit set. */
5887
5888 if (code == LE)
5889 {
5890 /* This is destructive, so SUBTARGET can't be OP0. */
5891 if (rtx_equal_p (subtarget, op0))
5892 subtarget = 0;
5893
5894 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5895 OPTAB_WIDEN);
5896 if (tem)
5897 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5898 OPTAB_WIDEN);
5899 }
5900
5901 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5902 number of bits in the mode of OP0, minus one. */
5903
5904 if (code == GT)
5905 {
5906 if (rtx_equal_p (subtarget, op0))
5907 subtarget = 0;
5908
5909 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5910 GET_MODE_BITSIZE (mode) - 1,
5911 subtarget, 0);
5912 if (tem)
5913 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5914 OPTAB_WIDEN);
5915 }
5916
5917 if (code == EQ || code == NE)
5918 {
5919 /* For EQ or NE, one way to do the comparison is to apply an operation
5920 that converts the operand into a positive number if it is nonzero
5921 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5922 for NE we negate. This puts the result in the sign bit. Then we
5923 normalize with a shift, if needed.
5924
5925 Two operations that can do the above actions are ABS and FFS, so try
5926 them. If that doesn't work, and MODE is smaller than a full word,
5927 we can use zero-extension to the wider mode (an unsigned conversion)
5928 as the operation. */
5929
5930 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5931 that is compensated by the subsequent overflow when subtracting
5932 one / negating. */
5933
5934 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5935 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5936 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5937 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5938 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5939 {
5940 tem = convert_modes (word_mode, mode, op0, 1);
5941 mode = word_mode;
5942 }
5943
5944 if (tem != 0)
5945 {
5946 if (code == EQ)
5947 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5948 0, OPTAB_WIDEN);
5949 else
5950 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5951 }
5952
5953 /* If we couldn't do it that way, for NE we can "or" the two's complement
5954 of the value with itself. For EQ, we take the one's complement of
5955 that "or", which is an extra insn, so we only handle EQ if branches
5956 are expensive. */
5957
5958 if (tem == 0
5959 && (code == NE
5960 || BRANCH_COST (optimize_insn_for_speed_p (),
5961 false) > 1))
5962 {
5963 if (rtx_equal_p (subtarget, op0))
5964 subtarget = 0;
5965
5966 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5967 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5968 OPTAB_WIDEN);
5969
5970 if (tem && code == EQ)
5971 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5972 }
5973 }
5974
5975 if (tem && normalizep)
5976 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5977 GET_MODE_BITSIZE (mode) - 1,
5978 subtarget, normalizep == 1);
5979
5980 if (tem)
5981 {
5982 if (!target)
5983 ;
5984 else if (GET_MODE (tem) != target_mode)
5985 {
5986 convert_move (target, tem, 0);
5987 tem = target;
5988 }
5989 else if (!subtarget)
5990 {
5991 emit_move_insn (target, tem);
5992 tem = target;
5993 }
5994 }
5995 else
5996 delete_insns_since (last);
5997
5998 return tem;
5999 }
6000
6001 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6002 and storing in TARGET. Normally return TARGET.
6003 Return 0 if that cannot be done.
6004
6005 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
6006 it is VOIDmode, they cannot both be CONST_INT.
6007
6008 UNSIGNEDP is for the case where we have to widen the operands
6009 to perform the operation. It says to use zero-extension.
6010
6011 NORMALIZEP is 1 if we should convert the result to be either zero
6012 or one. Normalize is -1 if we should convert the result to be
6013 either zero or -1. If NORMALIZEP is zero, the result will be left
6014 "raw" out of the scc insn. */
6015
6016 rtx
6017 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6018 machine_mode mode, int unsignedp, int normalizep)
6019 {
6020 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6021 enum rtx_code rcode;
6022 rtx subtarget;
6023 rtx tem, trueval;
6024 rtx_insn *last;
6025
6026 /* If we compare constants, we shouldn't use a store-flag operation,
6027 but a constant load. We can get there via the vanilla route that
6028 usually generates a compare-branch sequence, but will in this case
6029 fold the comparison to a constant, and thus elide the branch. */
6030 if (CONSTANT_P (op0) && CONSTANT_P (op1))
6031 return NULL_RTX;
6032
6033 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6034 target_mode);
6035 if (tem)
6036 return tem;
6037
6038 /* If we reached here, we can't do this with a scc insn, however there
6039 are some comparisons that can be done in other ways. Don't do any
6040 of these cases if branches are very cheap. */
6041 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6042 return 0;
6043
6044 /* See what we need to return. We can only return a 1, -1, or the
6045 sign bit. */
6046
6047 if (normalizep == 0)
6048 {
6049 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6050 normalizep = STORE_FLAG_VALUE;
6051
6052 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6053 ;
6054 else
6055 return 0;
6056 }
6057
6058 last = get_last_insn ();
6059
6060 /* If optimizing, use different pseudo registers for each insn, instead
6061 of reusing the same pseudo. This leads to better CSE, but slows
6062 down the compiler, since there are more pseudos. */
6063 subtarget = (!optimize
6064 && (target_mode == mode)) ? target : NULL_RTX;
6065 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6066
6067 /* For floating-point comparisons, try the reverse comparison or try
6068 changing the "orderedness" of the comparison. */
6069 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6070 {
6071 enum rtx_code first_code;
6072 bool and_them;
6073
6074 rcode = reverse_condition_maybe_unordered (code);
6075 if (can_compare_p (rcode, mode, ccp_store_flag)
6076 && (code == ORDERED || code == UNORDERED
6077 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6078 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6079 {
6080 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6081 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6082
6083 /* For the reverse comparison, use either an addition or a XOR. */
6084 if (want_add
6085 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6086 optimize_insn_for_speed_p ()) == 0)
6087 {
6088 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6089 STORE_FLAG_VALUE, target_mode);
6090 if (tem)
6091 return expand_binop (target_mode, add_optab, tem,
6092 gen_int_mode (normalizep, target_mode),
6093 target, 0, OPTAB_WIDEN);
6094 }
6095 else if (!want_add
6096 && rtx_cost (trueval, mode, XOR, 1,
6097 optimize_insn_for_speed_p ()) == 0)
6098 {
6099 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6100 normalizep, target_mode);
6101 if (tem)
6102 return expand_binop (target_mode, xor_optab, tem, trueval,
6103 target, INTVAL (trueval) >= 0,
6104 OPTAB_WIDEN);
6105 }
6106 }
6107
6108 delete_insns_since (last);
6109
6110 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6111 if (code == ORDERED || code == UNORDERED)
6112 return 0;
6113
6114 and_them = split_comparison (code, mode, &first_code, &code);
6115
6116 /* If there are no NaNs, the first comparison should always fall through.
6117 Effectively change the comparison to the other one. */
6118 if (!HONOR_NANS (mode))
6119 {
6120 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6121 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6122 target_mode);
6123 }
6124
6125 if (!HAVE_conditional_move)
6126 return 0;
6127
6128 /* Do not turn a trapping comparison into a non-trapping one. */
6129 if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6130 && flag_trapping_math)
6131 return 0;
6132
6133 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6134 conditional move. */
6135 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6136 normalizep, target_mode);
6137 if (tem == 0)
6138 return 0;
6139
6140 if (and_them)
6141 tem = emit_conditional_move (target, { code, op0, op1, mode },
6142 tem, const0_rtx, GET_MODE (tem), 0);
6143 else
6144 tem = emit_conditional_move (target, { code, op0, op1, mode },
6145 trueval, tem, GET_MODE (tem), 0);
6146
6147 if (tem == 0)
6148 delete_insns_since (last);
6149 return tem;
6150 }
6151
6152 /* The remaining tricks only apply to integer comparisons. */
6153
6154 scalar_int_mode int_mode;
6155 if (is_int_mode (mode, &int_mode))
6156 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6157 unsignedp, normalizep, trueval);
6158
6159 return 0;
6160 }
6161
6162 /* Like emit_store_flag, but always succeeds. */
6163
6164 rtx
6165 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6166 machine_mode mode, int unsignedp, int normalizep)
6167 {
6168 rtx tem;
6169 rtx_code_label *label;
6170 rtx trueval, falseval;
6171
6172 /* First see if emit_store_flag can do the job. */
6173 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6174 if (tem != 0)
6175 return tem;
6176
6177 /* If one operand is constant, make it the second one. Only do this
6178 if the other operand is not constant as well. */
6179 if (swap_commutative_operands_p (op0, op1))
6180 {
6181 std::swap (op0, op1);
6182 code = swap_condition (code);
6183 }
6184
6185 if (mode == VOIDmode)
6186 mode = GET_MODE (op0);
6187
6188 if (!target)
6189 target = gen_reg_rtx (word_mode);
6190
6191 /* If this failed, we have to do this with set/compare/jump/set code.
6192 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6193 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6194 if (code == NE
6195 && GET_MODE_CLASS (mode) == MODE_INT
6196 && REG_P (target)
6197 && op0 == target
6198 && op1 == const0_rtx)
6199 {
6200 label = gen_label_rtx ();
6201 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6202 NULL_RTX, NULL, label,
6203 profile_probability::uninitialized ());
6204 emit_move_insn (target, trueval);
6205 emit_label (label);
6206 return target;
6207 }
6208
6209 if (!REG_P (target)
6210 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6211 target = gen_reg_rtx (GET_MODE (target));
6212
6213 /* Jump in the right direction if the target cannot implement CODE
6214 but can jump on its reverse condition. */
6215 falseval = const0_rtx;
6216 if (! can_compare_p (code, mode, ccp_jump)
6217 && (! FLOAT_MODE_P (mode)
6218 || code == ORDERED || code == UNORDERED
6219 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6220 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6221 {
6222 enum rtx_code rcode;
6223 if (FLOAT_MODE_P (mode))
6224 rcode = reverse_condition_maybe_unordered (code);
6225 else
6226 rcode = reverse_condition (code);
6227
6228 /* Canonicalize to UNORDERED for the libcall. */
6229 if (can_compare_p (rcode, mode, ccp_jump)
6230 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6231 {
6232 falseval = trueval;
6233 trueval = const0_rtx;
6234 code = rcode;
6235 }
6236 }
6237
6238 emit_move_insn (target, trueval);
6239 label = gen_label_rtx ();
6240 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6241 label, profile_probability::uninitialized ());
6242
6243 emit_move_insn (target, falseval);
6244 emit_label (label);
6245
6246 return target;
6247 }
6248
6249 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6250 and exclusive ranges in order to create an equivalent comparison. See
6251 canonicalize_cmp_for_target for the possible cases. */
6252
6253 static enum rtx_code
6254 equivalent_cmp_code (enum rtx_code code)
6255 {
6256 switch (code)
6257 {
6258 case GT:
6259 return GE;
6260 case GE:
6261 return GT;
6262 case LT:
6263 return LE;
6264 case LE:
6265 return LT;
6266 case GTU:
6267 return GEU;
6268 case GEU:
6269 return GTU;
6270 case LTU:
6271 return LEU;
6272 case LEU:
6273 return LTU;
6274
6275 default:
6276 return code;
6277 }
6278 }
6279
6280 /* Choose the more appropiate immediate in scalar integer comparisons. The
6281 purpose of this is to end up with an immediate which can be loaded into a
6282 register in fewer moves, if possible.
6283
6284 For each integer comparison there exists an equivalent choice:
6285 i) a > b or a >= b + 1
6286 ii) a <= b or a < b + 1
6287 iii) a >= b or a > b - 1
6288 iv) a < b or a <= b - 1
6289
6290 MODE is the mode of the first operand.
6291 CODE points to the comparison code.
6292 IMM points to the rtx containing the immediate. *IMM must satisfy
6293 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6294 on exit. */
6295
6296 void
6297 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6298 {
6299 if (!SCALAR_INT_MODE_P (mode))
6300 return;
6301
6302 int to_add = 0;
6303 enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6304
6305 /* Extract the immediate value from the rtx. */
6306 wide_int imm_val = rtx_mode_t (*imm, mode);
6307
6308 if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6309 to_add = 1;
6310 else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6311 to_add = -1;
6312 else
6313 return;
6314
6315 /* Check for overflow/underflow in the case of signed values and
6316 wrapping around in the case of unsigned values. If any occur
6317 cancel the optimization. */
6318 wi::overflow_type overflow = wi::OVF_NONE;
6319 wide_int imm_modif;
6320
6321 if (to_add == 1)
6322 imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6323 else
6324 imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6325
6326 if (overflow)
6327 return;
6328
6329 /* The following creates a pseudo; if we cannot do that, bail out. */
6330 if (!can_create_pseudo_p ())
6331 return;
6332
6333 rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6334 rtx new_imm = immed_wide_int_const (imm_modif, mode);
6335
6336 rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6337 rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6338
6339 /* Update the immediate and the code. */
6340 if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6341 {
6342 *code = equivalent_cmp_code (*code);
6343 *imm = new_imm;
6344 }
6345 }
6346
6347
6348 \f
6349 /* Perform possibly multi-word comparison and conditional jump to LABEL
6350 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6351 now a thin wrapper around do_compare_rtx_and_jump. */
6352
6353 static void
6354 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6355 rtx_code_label *label)
6356 {
6357 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6358 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6359 NULL, label, profile_probability::uninitialized ());
6360 }