]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/expmed.c
[18/77] Make int_mode_for_mode return an opt_scalar_int_mode
[thirdparty/gcc.git] / gcc / expmed.c
CommitLineData
44037a66
TG
1/* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
cbe34bb5 3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
44037a66 4
1322177d 5This file is part of GCC.
44037a66 6
1322177d
LB
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
1322177d 10version.
44037a66 11
1322177d
LB
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
44037a66
TG
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
44037a66
TG
20
21
22#include "config.h"
670ee920 23#include "system.h"
4977bab6 24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5 26#include "target.h"
44037a66 27#include "rtl.h"
957060b5
AM
28#include "tree.h"
29#include "predict.h"
4d0cdd0c 30#include "memmodel.h"
957060b5
AM
31#include "tm_p.h"
32#include "expmed.h"
33#include "optabs.h"
34#include "emit-rtl.h"
c7131fb2 35#include "diagnostic-core.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
36566b39
PK
38#include "dojump.h"
39#include "explow.h"
44037a66 40#include "expr.h"
b0c48229 41#include "langhooks.h"
462f85ce
RS
42
43struct target_expmed default_target_expmed;
44#if SWITCHABLE_TARGET
45struct target_expmed *this_target_expmed = &default_target_expmed;
46#endif
44037a66 47
502b8322
AJ
48static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
1169e45d
AH
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
ee45a32d 52 rtx, bool);
ebb99f96
BE
53static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
ee45a32d 55 rtx, bool);
502b8322 56static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
1169e45d
AH
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
ee45a32d 60 rtx, bool);
ef4bddc2 61static rtx extract_fixed_bit_field (machine_mode, rtx,
502b8322 62 unsigned HOST_WIDE_INT,
ee45a32d 63 unsigned HOST_WIDE_INT, rtx, int, bool);
ef4bddc2 64static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
6f4e9cf8 65 unsigned HOST_WIDE_INT,
ee45a32d 66 unsigned HOST_WIDE_INT, rtx, int, bool);
ef4bddc2 67static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
502b8322 68static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
ee45a32d 69 unsigned HOST_WIDE_INT, int, bool);
ef4bddc2
RS
70static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
44037a66 73
807e902e
KZ
74/* Return a constant integer mask value of mode MODE with BITSIZE ones
75 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76 The mask is truncated if necessary to the width of mode MODE. The
77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
78
79static inline rtx
ef4bddc2 80mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
807e902e
KZ
81{
82 return immed_wide_int_const
83 (wi::shifted_mask (bitpos, bitsize, complement,
84 GET_MODE_PRECISION (mode)), mode);
85}
86
58b42e19 87/* Test whether a value is zero of a power of two. */
be63b77d 88#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
fecfbfa4 89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
58b42e19 90
84ddb681 91struct init_expmed_rtl
44037a66 92{
c83cf304
JJ
93 rtx reg;
94 rtx plus;
95 rtx neg;
96 rtx mult;
97 rtx sdiv;
98 rtx udiv;
99 rtx sdiv_32;
100 rtx smod_32;
101 rtx wide_mult;
102 rtx wide_lshr;
103 rtx wide_trunc;
104 rtx shift;
105 rtx shift_mult;
106 rtx shift_add;
107 rtx shift_sub0;
108 rtx shift_sub1;
109 rtx zext;
110 rtx trunc;
79b4a8dc 111
965703ed
RS
112 rtx pow2[MAX_BITS_PER_WORD];
113 rtx cint[MAX_BITS_PER_WORD];
84ddb681
RH
114};
115
91f8035e 116static void
ef4bddc2
RS
117init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 machine_mode from_mode, bool speed)
91f8035e
RH
119{
120 int to_size, from_size;
121 rtx which;
122
50b6ee8b
DD
123 to_size = GET_MODE_PRECISION (to_mode);
124 from_size = GET_MODE_PRECISION (from_mode);
125
126 /* Most partial integers have a precision less than the "full"
127 integer it requires for storage. In case one doesn't, for
128 comparison purposes here, reduce the bit size by one in that
129 case. */
130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146ec50f 131 && pow2p_hwi (to_size))
50b6ee8b
DD
132 to_size --;
133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
146ec50f 134 && pow2p_hwi (from_size))
50b6ee8b 135 from_size --;
91f8035e
RH
136
137 /* Assume cost of zero-extend and sign-extend is the same. */
c83cf304 138 which = (to_size < from_size ? all->trunc : all->zext);
91f8035e 139
c83cf304 140 PUT_MODE (all->reg, from_mode);
e548c9df
AM
141 set_convert_cost (to_mode, from_mode, speed,
142 set_src_cost (which, to_mode, speed));
91f8035e
RH
143}
144
84ddb681
RH
145static void
146init_expmed_one_mode (struct init_expmed_rtl *all,
ef4bddc2 147 machine_mode mode, int speed)
84ddb681
RH
148{
149 int m, n, mode_bitsize;
ef4bddc2 150 machine_mode mode_from;
44037a66 151
84ddb681 152 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
38a448ca 153
c83cf304
JJ
154 PUT_MODE (all->reg, mode);
155 PUT_MODE (all->plus, mode);
156 PUT_MODE (all->neg, mode);
157 PUT_MODE (all->mult, mode);
158 PUT_MODE (all->sdiv, mode);
159 PUT_MODE (all->udiv, mode);
160 PUT_MODE (all->sdiv_32, mode);
161 PUT_MODE (all->smod_32, mode);
162 PUT_MODE (all->wide_trunc, mode);
163 PUT_MODE (all->shift, mode);
164 PUT_MODE (all->shift_mult, mode);
165 PUT_MODE (all->shift_add, mode);
166 PUT_MODE (all->shift_sub0, mode);
167 PUT_MODE (all->shift_sub1, mode);
168 PUT_MODE (all->zext, mode);
169 PUT_MODE (all->trunc, mode);
170
e548c9df
AM
171 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
c83cf304 176
e548c9df 177 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
5322d07e 178 <= 2 * add_cost (speed, mode)));
e548c9df 179 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
5322d07e
NF
180 <= 4 * add_cost (speed, mode)));
181
182 set_shift_cost (speed, mode, 0, 0);
183 {
184 int cost = add_cost (speed, mode);
185 set_shiftadd_cost (speed, mode, 0, cost);
186 set_shiftsub0_cost (speed, mode, 0, cost);
187 set_shiftsub1_cost (speed, mode, 0, cost);
188 }
84ddb681
RH
189
190 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191 for (m = 1; m < n; m++)
192 {
c83cf304
JJ
193 XEXP (all->shift, 1) = all->cint[m];
194 XEXP (all->shift_mult, 1) = all->pow2[m];
84ddb681 195
e548c9df
AM
196 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 speed));
199 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 speed));
201 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202 speed));
84ddb681
RH
203 }
204
205 if (SCALAR_INT_MODE_P (mode))
965703ed 206 {
91f8035e 207 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
ef4bddc2 208 mode_from = (machine_mode)(mode_from + 1))
91f8035e 209 init_expmed_one_conv (all, mode, mode_from, speed);
490d0f6c
RS
210
211 machine_mode wider_mode;
212 if (GET_MODE_CLASS (mode) == MODE_INT
213 && GET_MODE_WIDER_MODE (mode).exists (&wider_mode))
84ddb681 214 {
c83cf304
JJ
215 PUT_MODE (all->zext, wider_mode);
216 PUT_MODE (all->wide_mult, wider_mode);
217 PUT_MODE (all->wide_lshr, wider_mode);
218 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
84ddb681 219
91f8035e 220 set_mul_widen_cost (speed, wider_mode,
e548c9df 221 set_src_cost (all->wide_mult, wider_mode, speed));
91f8035e 222 set_mul_highpart_cost (speed, mode,
e548c9df 223 set_src_cost (all->wide_trunc, mode, speed));
84ddb681 224 }
965703ed 225 }
84ddb681
RH
226}
227
228void
229init_expmed (void)
230{
231 struct init_expmed_rtl all;
ef4bddc2 232 machine_mode mode = QImode;
84ddb681
RH
233 int m, speed;
234
79b4a8dc 235 memset (&all, 0, sizeof all);
84ddb681
RH
236 for (m = 1; m < MAX_BITS_PER_WORD; m++)
237 {
fecfbfa4 238 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
84ddb681
RH
239 all.cint[m] = GEN_INT (m);
240 }
79b4a8dc 241
1d27fed4 242 /* Avoid using hard regs in ways which may be unsupported. */
9fccb335 243 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
c83cf304
JJ
244 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
245 all.neg = gen_rtx_NEG (mode, all.reg);
246 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
247 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
248 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
249 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
250 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
251 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
252 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
253 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
254 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
255 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
256 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
257 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
258 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
259 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
260 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
6dd8f4bb 261
f40751dd 262 for (speed = 0; speed < 2; speed++)
71af73bb 263 {
f40751dd 264 crtl->maybe_hot_insn_p = speed;
e548c9df 265 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
79b4a8dc 266
91f8035e 267 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
ef4bddc2 268 mode = (machine_mode)(mode + 1))
84ddb681 269 init_expmed_one_mode (&all, mode, speed);
79b4a8dc 270
91f8035e
RH
271 if (MIN_MODE_PARTIAL_INT != VOIDmode)
272 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
ef4bddc2 273 mode = (machine_mode)(mode + 1))
91f8035e
RH
274 init_expmed_one_mode (&all, mode, speed);
275
276 if (MIN_MODE_VECTOR_INT != VOIDmode)
277 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
ef4bddc2 278 mode = (machine_mode)(mode + 1))
91f8035e 279 init_expmed_one_mode (&all, mode, speed);
79b4a8dc 280 }
84ddb681 281
5322d07e
NF
282 if (alg_hash_used_p ())
283 {
284 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
285 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
286 }
c371bb73 287 else
5322d07e 288 set_alg_hash_used_p (true);
f40751dd 289 default_rtl_profile ();
c83cf304
JJ
290
291 ggc_free (all.trunc);
292 ggc_free (all.shift_sub1);
293 ggc_free (all.shift_sub0);
294 ggc_free (all.shift_add);
295 ggc_free (all.shift_mult);
296 ggc_free (all.shift);
297 ggc_free (all.wide_trunc);
298 ggc_free (all.wide_lshr);
299 ggc_free (all.wide_mult);
300 ggc_free (all.zext);
301 ggc_free (all.smod_32);
302 ggc_free (all.sdiv_32);
303 ggc_free (all.udiv);
304 ggc_free (all.sdiv);
305 ggc_free (all.mult);
306 ggc_free (all.neg);
307 ggc_free (all.plus);
308 ggc_free (all.reg);
44037a66
TG
309}
310
311/* Return an rtx representing minus the value of X.
312 MODE is the intended mode of the result,
313 useful if X is a CONST_INT. */
314
315rtx
ef4bddc2 316negate_rtx (machine_mode mode, rtx x)
44037a66 317{
a39a7484
RK
318 rtx result = simplify_unary_operation (NEG, mode, x, mode);
319
fdb5537f 320 if (result == 0)
a39a7484
RK
321 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
322
323 return result;
44037a66 324}
da920570 325
ee45a32d
EB
326/* Whether reverse storage order is supported on the target. */
327static int reverse_storage_order_supported = -1;
328
329/* Check whether reverse storage order is supported on the target. */
330
331static void
332check_reverse_storage_order_support (void)
333{
334 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
335 {
336 reverse_storage_order_supported = 0;
337 sorry ("reverse scalar storage order");
338 }
339 else
340 reverse_storage_order_supported = 1;
341}
342
343/* Whether reverse FP storage order is supported on the target. */
344static int reverse_float_storage_order_supported = -1;
345
346/* Check whether reverse FP storage order is supported on the target. */
347
348static void
349check_reverse_float_storage_order_support (void)
350{
351 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
352 {
353 reverse_float_storage_order_supported = 0;
354 sorry ("reverse floating-point scalar storage order");
355 }
356 else
357 reverse_float_storage_order_supported = 1;
358}
359
360/* Return an rtx representing value of X with reverse storage order.
361 MODE is the intended mode of the result,
362 useful if X is a CONST_INT. */
363
364rtx
b8506a8a 365flip_storage_order (machine_mode mode, rtx x)
ee45a32d 366{
fffbab82 367 scalar_int_mode int_mode;
ee45a32d
EB
368 rtx result;
369
370 if (mode == QImode)
371 return x;
372
373 if (COMPLEX_MODE_P (mode))
374 {
375 rtx real = read_complex_part (x, false);
376 rtx imag = read_complex_part (x, true);
377
378 real = flip_storage_order (GET_MODE_INNER (mode), real);
379 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
380
381 return gen_rtx_CONCAT (mode, real, imag);
382 }
383
384 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
385 check_reverse_storage_order_support ();
386
fffbab82 387 if (!is_a <scalar_int_mode> (mode, &int_mode))
ee45a32d
EB
388 {
389 if (FLOAT_MODE_P (mode)
390 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
391 check_reverse_float_storage_order_support ();
392
fffbab82 393 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
ee45a32d
EB
394 {
395 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
396 return x;
397 }
398 x = gen_lowpart (int_mode, x);
399 }
400
401 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
402 if (result == 0)
403 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
404
405 if (int_mode != mode)
406 result = gen_lowpart (mode, result);
407
408 return result;
409}
410
26f8b976
RS
411/* Adjust bitfield memory MEM so that it points to the first unit of mode
412 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
413 If MODE is BLKmode, return a reference to every byte in the bitfield.
414 Set *NEW_BITNUM to the bit position of the field within the new memory. */
415
416static rtx
ef4bddc2 417narrow_bit_field_mem (rtx mem, machine_mode mode,
26f8b976
RS
418 unsigned HOST_WIDE_INT bitsize,
419 unsigned HOST_WIDE_INT bitnum,
420 unsigned HOST_WIDE_INT *new_bitnum)
421{
422 if (mode == BLKmode)
423 {
424 *new_bitnum = bitnum % BITS_PER_UNIT;
425 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
426 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
427 / BITS_PER_UNIT);
428 return adjust_bitfield_address_size (mem, mode, offset, size);
429 }
430 else
431 {
432 unsigned int unit = GET_MODE_BITSIZE (mode);
433 *new_bitnum = bitnum % unit;
434 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
435 return adjust_bitfield_address (mem, mode, offset);
436 }
437}
438
fcdd52b7
RS
439/* The caller wants to perform insertion or extraction PATTERN on a
440 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
441 BITREGION_START and BITREGION_END are as for store_bit_field
442 and FIELDMODE is the natural mode of the field.
443
444 Search for a mode that is compatible with the memory access
445 restrictions and (where applicable) with a register insertion or
446 extraction. Return the new memory on success, storing the adjusted
447 bit position in *NEW_BITNUM. Return null otherwise. */
448
449static rtx
450adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
451 rtx op0, HOST_WIDE_INT bitsize,
452 HOST_WIDE_INT bitnum,
453 unsigned HOST_WIDE_INT bitregion_start,
454 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 455 machine_mode fieldmode,
fcdd52b7
RS
456 unsigned HOST_WIDE_INT *new_bitnum)
457{
458 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
459 bitregion_end, MEM_ALIGN (op0),
460 MEM_VOLATILE_P (op0));
ef4bddc2 461 machine_mode best_mode;
fcdd52b7
RS
462 if (iter.next_mode (&best_mode))
463 {
464 /* We can use a memory in BEST_MODE. See whether this is true for
465 any wider modes. All other things being equal, we prefer to
466 use the widest mode possible because it tends to expose more
467 CSE opportunities. */
468 if (!iter.prefer_smaller_modes ())
469 {
470 /* Limit the search to the mode required by the corresponding
471 register insertion or extraction instruction, if any. */
ef4bddc2 472 machine_mode limit_mode = word_mode;
fcdd52b7
RS
473 extraction_insn insn;
474 if (get_best_reg_extraction_insn (&insn, pattern,
475 GET_MODE_BITSIZE (best_mode),
476 fieldmode))
477 limit_mode = insn.field_mode;
478
ef4bddc2 479 machine_mode wider_mode;
fcdd52b7
RS
480 while (iter.next_mode (&wider_mode)
481 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
482 best_mode = wider_mode;
483 }
484 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
485 new_bitnum);
486 }
487 return NULL_RTX;
488}
489
bebf0797
RS
490/* Return true if a bitfield of size BITSIZE at bit number BITNUM within
491 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
492 offset is then BITNUM / BITS_PER_UNIT. */
493
494static bool
495lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
496 unsigned HOST_WIDE_INT bitsize,
ef4bddc2 497 machine_mode struct_mode)
bebf0797
RS
498{
499 if (BYTES_BIG_ENDIAN)
c1a4d0b5 500 return (bitnum % BITS_PER_UNIT == 0
bebf0797
RS
501 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
502 || (bitnum + bitsize) % BITS_PER_WORD == 0));
503 else
504 return bitnum % BITS_PER_WORD == 0;
505}
00efe3ea 506
548cfdc2 507/* Return true if -fstrict-volatile-bitfields applies to an access of OP0
6f4e9cf8
BE
508 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
509 Return false if the access would touch memory outside the range
510 BITREGION_START to BITREGION_END for conformance to the C++ memory
511 model. */
f5d4f18c
SL
512
513static bool
514strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
515 unsigned HOST_WIDE_INT bitnum,
ef4bddc2 516 machine_mode fieldmode,
6f4e9cf8
BE
517 unsigned HOST_WIDE_INT bitregion_start,
518 unsigned HOST_WIDE_INT bitregion_end)
f5d4f18c
SL
519{
520 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
521
522 /* -fstrict-volatile-bitfields must be enabled and we must have a
523 volatile MEM. */
524 if (!MEM_P (op0)
525 || !MEM_VOLATILE_P (op0)
526 || flag_strict_volatile_bitfields <= 0)
527 return false;
528
529 /* Non-integral modes likely only happen with packed structures.
530 Punt. */
531 if (!SCALAR_INT_MODE_P (fieldmode))
532 return false;
533
534 /* The bit size must not be larger than the field mode, and
535 the field mode must not be larger than a word. */
536 if (bitsize > modesize || modesize > BITS_PER_WORD)
537 return false;
538
539 /* Check for cases of unaligned fields that must be split. */
b6dd42a9
BE
540 if (bitnum % modesize + bitsize > modesize)
541 return false;
542
543 /* The memory must be sufficiently aligned for a MODESIZE access.
544 This condition guarantees, that the memory access will not
545 touch anything after the end of the structure. */
546 if (MEM_ALIGN (op0) < modesize)
f5d4f18c
SL
547 return false;
548
6f4e9cf8
BE
549 /* Check for cases where the C++ memory model applies. */
550 if (bitregion_end != 0
551 && (bitnum - bitnum % modesize < bitregion_start
40f94f7d 552 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
6f4e9cf8
BE
553 return false;
554
f5d4f18c
SL
555 return true;
556}
557
00efe3ea
RS
558/* Return true if OP is a memory and if a bitfield of size BITSIZE at
559 bit number BITNUM can be treated as a simple value of mode MODE. */
560
561static bool
562simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
ef4bddc2 563 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
00efe3ea
RS
564{
565 return (MEM_P (op0)
566 && bitnum % BITS_PER_UNIT == 0
567 && bitsize == GET_MODE_BITSIZE (mode)
568 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
569 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
570 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
571}
6d7db3c5 572\f
fcdd52b7
RS
573/* Try to use instruction INSV to store VALUE into a field of OP0.
574 BITSIZE and BITNUM are as for store_bit_field. */
a20556e4
RS
575
576static bool
fcdd52b7
RS
577store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
578 unsigned HOST_WIDE_INT bitsize,
548cfdc2
EB
579 unsigned HOST_WIDE_INT bitnum,
580 rtx value)
a20556e4
RS
581{
582 struct expand_operand ops[4];
583 rtx value1;
584 rtx xop0 = op0;
f3f6fb16 585 rtx_insn *last = get_last_insn ();
a20556e4
RS
586 bool copy_back = false;
587
ef4bddc2 588 machine_mode op_mode = insv->field_mode;
a20556e4
RS
589 unsigned int unit = GET_MODE_BITSIZE (op_mode);
590 if (bitsize == 0 || bitsize > unit)
591 return false;
592
593 if (MEM_P (xop0))
26f8b976 594 /* Get a reference to the first byte of the field. */
fcdd52b7
RS
595 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
596 &bitnum);
a20556e4
RS
597 else
598 {
599 /* Convert from counting within OP0 to counting in OP_MODE. */
600 if (BYTES_BIG_ENDIAN)
601 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
602
603 /* If xop0 is a register, we need it in OP_MODE
604 to make it acceptable to the format of insv. */
605 if (GET_CODE (xop0) == SUBREG)
606 /* We can't just change the mode, because this might clobber op0,
607 and we will need the original value of op0 if insv fails. */
608 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
609 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
610 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
611 }
612
613 /* If the destination is a paradoxical subreg such that we need a
614 truncate to the inner mode, perform the insertion on a temporary and
615 truncate the result to the original destination. Note that we can't
616 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
617 X) 0)) is (reg:N X). */
618 if (GET_CODE (xop0) == SUBREG
619 && REG_P (SUBREG_REG (xop0))
620 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
621 op_mode))
622 {
623 rtx tem = gen_reg_rtx (op_mode);
624 emit_move_insn (tem, xop0);
625 xop0 = tem;
626 copy_back = true;
627 }
628
4ae9783e
JW
629 /* There are similar overflow check at the start of store_bit_field_1,
630 but that only check the situation where the field lies completely
631 outside the register, while there do have situation where the field
632 lies partialy in the register, we need to adjust bitsize for this
633 partial overflow situation. Without this fix, pr48335-2.c on big-endian
634 will broken on those arch support bit insert instruction, like arm, aarch64
635 etc. */
636 if (bitsize + bitnum > unit && bitnum < unit)
637 {
e623cedf
JW
638 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
639 "destination object, data truncated into %wu-bit",
640 bitsize, unit - bitnum);
4ae9783e
JW
641 bitsize = unit - bitnum;
642 }
643
a20556e4
RS
644 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
645 "backwards" from the size of the unit we are inserting into.
646 Otherwise, we count bits from the most significant on a
647 BYTES/BITS_BIG_ENDIAN machine. */
648
649 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
650 bitnum = unit - bitsize - bitnum;
651
652 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
653 value1 = value;
654 if (GET_MODE (value) != op_mode)
655 {
656 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
657 {
686d390a 658 rtx tmp;
a20556e4
RS
659 /* Optimization: Don't bother really extending VALUE
660 if it has all the bits we will actually use. However,
661 if we must narrow it, be sure we do it correctly. */
662
663 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
664 {
a20556e4
RS
665 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
666 if (! tmp)
667 tmp = simplify_gen_subreg (op_mode,
668 force_reg (GET_MODE (value),
669 value1),
670 GET_MODE (value), 0);
a20556e4
RS
671 }
672 else
686d390a
JJ
673 {
674 tmp = gen_lowpart_if_possible (op_mode, value1);
675 if (! tmp)
676 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
677 value1));
678 }
679 value1 = tmp;
a20556e4
RS
680 }
681 else if (CONST_INT_P (value))
682 value1 = gen_int_mode (INTVAL (value), op_mode);
683 else
684 /* Parse phase is supposed to make VALUE's data type
685 match that of the component reference, which is a type
686 at least as wide as the field; so VALUE should have
687 a mode that corresponds to that type. */
688 gcc_assert (CONSTANT_P (value));
689 }
690
691 create_fixed_operand (&ops[0], xop0);
692 create_integer_operand (&ops[1], bitsize);
693 create_integer_operand (&ops[2], bitnum);
694 create_input_operand (&ops[3], value1, op_mode);
fcdd52b7 695 if (maybe_expand_insn (insv->icode, 4, ops))
a20556e4
RS
696 {
697 if (copy_back)
698 convert_move (op0, xop0, true);
699 return true;
700 }
701 delete_insns_since (last);
702 return false;
703}
704
6d7db3c5
RS
705/* A subroutine of store_bit_field, with the same arguments. Return true
706 if the operation could be implemented.
44037a66 707
6d7db3c5
RS
708 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
709 no other way of implementing the operation. If FALLBACK_P is false,
710 return false instead. */
711
712static bool
713store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
714 unsigned HOST_WIDE_INT bitnum,
715 unsigned HOST_WIDE_INT bitregion_start,
716 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 717 machine_mode fieldmode,
ee45a32d 718 rtx value, bool reverse, bool fallback_p)
44037a66 719{
b3694847 720 rtx op0 = str_rtx;
28526e20 721 rtx orig_value;
da920570 722
44037a66
TG
723 while (GET_CODE (op0) == SUBREG)
724 {
725 /* The following line once was done only if WORDS_BIG_ENDIAN,
726 but I think that is a mistake. WORDS_BIG_ENDIAN is
727 meaningful at a much higher level; when structures are copied
728 between memory and regs, the higher-numbered regs
729 always get higher addresses. */
495db1a1
AK
730 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
731 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
bebf0797 732 int byte_offset = 0;
495db1a1 733
ee45a32d 734 /* Paradoxical subregs need special handling on big-endian machines. */
03a95621 735 if (paradoxical_subreg_p (op0))
495db1a1
AK
736 {
737 int difference = inner_mode_size - outer_mode_size;
738
739 if (WORDS_BIG_ENDIAN)
740 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
741 if (BYTES_BIG_ENDIAN)
742 byte_offset += difference % UNITS_PER_WORD;
743 }
744 else
745 byte_offset = SUBREG_BYTE (op0);
746
747 bitnum += byte_offset * BITS_PER_UNIT;
44037a66
TG
748 op0 = SUBREG_REG (op0);
749 }
750
2c58f7dd
RS
751 /* No action is needed if the target is a register and if the field
752 lies completely outside that register. This can occur if the source
753 code contains an out-of-bounds access to a small array. */
754 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
6d7db3c5 755 return true;
2c58f7dd 756
b42271d6 757 /* Use vec_set patterns for inserting parts of vectors whenever
997404de
JH
758 available. */
759 if (VECTOR_MODE_P (GET_MODE (op0))
3c0cb5de 760 && !MEM_P (op0)
947131ba 761 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
997404de 762 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
6c825cd4
DS
763 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
764 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
997404de 765 {
a5c7d693 766 struct expand_operand ops[3];
ef4bddc2
RS
767 machine_mode outermode = GET_MODE (op0);
768 machine_mode innermode = GET_MODE_INNER (outermode);
a5c7d693 769 enum insn_code icode = optab_handler (vec_set_optab, outermode);
997404de 770 int pos = bitnum / GET_MODE_BITSIZE (innermode);
997404de 771
a5c7d693
RS
772 create_fixed_operand (&ops[0], op0);
773 create_input_operand (&ops[1], value, innermode);
774 create_integer_operand (&ops[2], pos);
775 if (maybe_expand_insn (icode, 3, ops))
776 return true;
997404de
JH
777 }
778
308ecea0 779 /* If the target is a register, overwriting the entire object, or storing
bebf0797
RS
780 a full-word or multi-word field can be done with just a SUBREG. */
781 if (!MEM_P (op0)
782 && bitsize == GET_MODE_BITSIZE (fieldmode)
783 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
784 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
785 {
786 /* Use the subreg machinery either to narrow OP0 to the required
d8c84975
JJ
787 words or to cope with mode punning between equal-sized modes.
788 In the latter case, use subreg on the rhs side, not lhs. */
789 rtx sub;
790
791 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
792 {
793 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
794 if (sub)
795 {
ee45a32d
EB
796 if (reverse)
797 sub = flip_storage_order (GET_MODE (op0), sub);
d8c84975
JJ
798 emit_move_insn (op0, sub);
799 return true;
800 }
801 }
802 else
bebf0797 803 {
d8c84975
JJ
804 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
805 bitnum / BITS_PER_UNIT);
806 if (sub)
807 {
ee45a32d
EB
808 if (reverse)
809 value = flip_storage_order (fieldmode, value);
d8c84975
JJ
810 emit_move_insn (sub, value);
811 return true;
812 }
bebf0797
RS
813 }
814 }
308ecea0 815
bebf0797 816 /* If the target is memory, storing any naturally aligned field can be
308ecea0 817 done with a simple store. For targets that support fast unaligned
0b69c29f 818 memory, any naturally sized, unit aligned field can be done directly. */
00efe3ea 819 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
44037a66 820 {
bebf0797 821 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
ee45a32d
EB
822 if (reverse)
823 value = flip_storage_order (fieldmode, value);
44037a66 824 emit_move_insn (op0, value);
6d7db3c5 825 return true;
44037a66
TG
826 }
827
a8ca7756
JW
828 /* Make sure we are playing with integral modes. Pun with subregs
829 if we aren't. This must come after the entire register case above,
830 since that case is valid for any mode. The following cases are only
831 valid for integral modes. */
304b9962
RS
832 opt_scalar_int_mode opt_imode = int_mode_for_mode (GET_MODE (op0));
833 scalar_int_mode imode;
834 if (!opt_imode.exists (&imode) || imode != GET_MODE (op0))
835 {
836 if (MEM_P (op0))
837 op0 = adjust_bitfield_address_size (op0, opt_imode.else_blk (),
838 0, MEM_SIZE (op0));
839 else
840 op0 = gen_lowpart (op0_mode.require (), op0);
841 }
a8ca7756 842
44037a66 843 /* Storing an lsb-aligned field in a register
bebf0797 844 can be done with a movstrict instruction. */
44037a66 845
3c0cb5de 846 if (!MEM_P (op0)
ee45a32d 847 && !reverse
bebf0797 848 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
44037a66 849 && bitsize == GET_MODE_BITSIZE (fieldmode)
947131ba 850 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
44037a66 851 {
a5c7d693
RS
852 struct expand_operand ops[2];
853 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
5d560619 854 rtx arg0 = op0;
19228b93 855 unsigned HOST_WIDE_INT subreg_off;
5e4900c7 856
a5c7d693 857 if (GET_CODE (arg0) == SUBREG)
44037a66 858 {
5b0264cb
NS
859 /* Else we've got some float mode source being extracted into
860 a different float mode destination -- this combination of
861 subregs results in Severe Tire Damage. */
a5c7d693 862 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
5b0264cb
NS
863 || GET_MODE_CLASS (fieldmode) == MODE_INT
864 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
a5c7d693 865 arg0 = SUBREG_REG (arg0);
5e4900c7 866 }
470032d7 867
bebf0797 868 subreg_off = bitnum / BITS_PER_UNIT;
19228b93
JJ
869 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
870 {
871 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
a5c7d693 872
19228b93
JJ
873 create_fixed_operand (&ops[0], arg0);
874 /* Shrink the source operand to FIELDMODE. */
875 create_convert_operand_to (&ops[1], value, fieldmode, false);
876 if (maybe_expand_insn (icode, 2, ops))
877 return true;
878 }
44037a66
TG
879 }
880
881 /* Handle fields bigger than a word. */
882
883 if (bitsize > BITS_PER_WORD)
884 {
885 /* Here we transfer the words of the field
886 in the order least significant first.
887 This is because the most significant word is the one which may
ad83e87b
PB
888 be less than full.
889 However, only do that if the value is not BLKmode. */
890
ee45a32d 891 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
770ae6cc
RK
892 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
893 unsigned int i;
f3f6fb16 894 rtx_insn *last;
44037a66
TG
895
896 /* This is the mode we must force value to, so that there will be enough
897 subwords to extract. Note that fieldmode will often (always?) be
898 VOIDmode, because that is what store_field uses to indicate that this
535a42b1
NS
899 is a bit field, but passing VOIDmode to operand_subword_force
900 is not allowed. */
9f5e2e11
RS
901 fieldmode = GET_MODE (value);
902 if (fieldmode == VOIDmode)
6f83092f 903 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
44037a66 904
6d7db3c5 905 last = get_last_insn ();
44037a66
TG
906 for (i = 0; i < nwords; i++)
907 {
ad83e87b
PB
908 /* If I is 0, use the low-order word in both field and target;
909 if I is 1, use the next to lowest word; and so on. */
00d6b19a
AB
910 unsigned int wordnum = (backwards
911 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
912 - i - 1
913 : i);
ee45a32d 914 unsigned int bit_offset = (backwards ^ reverse
04050c69
RK
915 ? MAX ((int) bitsize - ((int) i + 1)
916 * BITS_PER_WORD,
917 0)
918 : (int) i * BITS_PER_WORD);
6d7db3c5 919 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
3bdb97b8
AK
920 unsigned HOST_WIDE_INT new_bitsize =
921 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
922
923 /* If the remaining chunk doesn't have full wordsize we have
ee45a32d 924 to make sure that for big-endian machines the higher order
3bdb97b8
AK
925 bits are used. */
926 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
927 value_word = simplify_expand_binop (word_mode, lshr_optab,
928 value_word,
929 GEN_INT (BITS_PER_WORD
930 - new_bitsize),
931 NULL_RTX, true,
932 OPTAB_LIB_WIDEN);
933
934 if (!store_bit_field_1 (op0, new_bitsize,
1169e45d
AH
935 bitnum + bit_offset,
936 bitregion_start, bitregion_end,
937 word_mode,
ee45a32d 938 value_word, reverse, fallback_p))
6d7db3c5
RS
939 {
940 delete_insns_since (last);
941 return false;
942 }
44037a66 943 }
6d7db3c5 944 return true;
44037a66
TG
945 }
946
4f1da2e9
RS
947 /* If VALUE has a floating-point or complex mode, access it as an
948 integer of the corresponding size. This can occur on a machine
949 with 64 bit registers that uses SFmode for float. It can also
950 occur for unaligned float or complex fields. */
28526e20 951 orig_value = value;
4f1da2e9
RS
952 if (GET_MODE (value) != VOIDmode
953 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
32b069d3 954 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
4f1da2e9 955 {
304b9962 956 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)).require ());
4f1da2e9
RS
957 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
958 }
2305bcad 959
bebf0797 960 /* If OP0 is a multi-word register, narrow it to the affected word.
7d790165
JJ
961 If the region spans two words, defer to store_split_bit_field.
962 Don't do this if op0 is a single hard register wider than word
963 such as a float or vector register. */
964 if (!MEM_P (op0)
965 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
966 && (!REG_P (op0)
967 || !HARD_REGISTER_P (op0)
968 || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
bebf0797 969 {
867a0126 970 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
bebf0797
RS
971 {
972 if (!fallback_p)
973 return false;
974
975 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
ee45a32d 976 bitregion_end, value, reverse);
bebf0797
RS
977 return true;
978 }
867a0126
RS
979 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
980 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
981 gcc_assert (op0);
982 bitnum %= BITS_PER_WORD;
bebf0797
RS
983 }
984
985 /* From here on we can assume that the field to be stored in fits
986 within a word. If the destination is a register, it too fits
987 in a word. */
44037a66 988
fcdd52b7
RS
989 extraction_insn insv;
990 if (!MEM_P (op0)
ee45a32d 991 && !reverse
fcdd52b7
RS
992 && get_best_reg_extraction_insn (&insv, EP_insv,
993 GET_MODE_BITSIZE (GET_MODE (op0)),
994 fieldmode)
995 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
a20556e4 996 return true;
6d7db3c5
RS
997
998 /* If OP0 is a memory, try copying it to a register and seeing if a
999 cheap register alternative is available. */
ee45a32d 1000 if (MEM_P (op0) && !reverse)
6d7db3c5 1001 {
f5d4f18c
SL
1002 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003 fieldmode)
fcdd52b7 1004 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
17a73ba0
RS
1005 return true;
1006
f3f6fb16 1007 rtx_insn *last = get_last_insn ();
6d7db3c5 1008
fcdd52b7
RS
1009 /* Try loading part of OP0 into a register, inserting the bitfield
1010 into that, and then copying the result back to OP0. */
1011 unsigned HOST_WIDE_INT bitpos;
1012 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013 bitregion_start, bitregion_end,
1014 fieldmode, &bitpos);
1015 if (xop0)
0fb7aeda 1016 {
fcdd52b7 1017 rtx tempreg = copy_to_reg (xop0);
bebf0797 1018 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1169e45d 1019 bitregion_start, bitregion_end,
ee45a32d 1020 fieldmode, orig_value, reverse, false))
6d7db3c5
RS
1021 {
1022 emit_move_insn (xop0, tempreg);
1023 return true;
1024 }
44037a66 1025 delete_insns_since (last);
44037a66
TG
1026 }
1027 }
6d7db3c5
RS
1028
1029 if (!fallback_p)
1030 return false;
1031
bebf0797 1032 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
ee45a32d 1033 bitregion_end, value, reverse);
6d7db3c5
RS
1034 return true;
1035}
1036
1037/* Generate code to store value from rtx VALUE
1038 into a bit-field within structure STR_RTX
1039 containing BITSIZE bits starting at bit BITNUM.
1169e45d
AH
1040
1041 BITREGION_START is bitpos of the first bitfield in this region.
1042 BITREGION_END is the bitpos of the ending bitfield in this region.
1043 These two fields are 0, if the C++ memory model does not apply,
1044 or we are not interested in keeping track of bitfield regions.
1045
ee45a32d
EB
1046 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048 If REVERSE is true, the store is to be done in reverse order. */
6d7db3c5
RS
1049
1050void
1051store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
1052 unsigned HOST_WIDE_INT bitnum,
1053 unsigned HOST_WIDE_INT bitregion_start,
1054 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 1055 machine_mode fieldmode,
ee45a32d 1056 rtx value, bool reverse)
6d7db3c5 1057{
f5d4f18c 1058 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
6f4e9cf8
BE
1059 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060 bitregion_start, bitregion_end))
f5d4f18c 1061 {
b6dd42a9
BE
1062 /* Storing of a full word can be done with a simple store.
1063 We know here that the field can be accessed with one single
1064 instruction. For targets that support unaligned memory,
1065 an unaligned access may be necessary. */
53c615a2 1066 if (bitsize == GET_MODE_BITSIZE (fieldmode))
f5d4f18c
SL
1067 {
1068 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069 bitnum / BITS_PER_UNIT);
ee45a32d
EB
1070 if (reverse)
1071 value = flip_storage_order (fieldmode, value);
b6dd42a9 1072 gcc_assert (bitnum % BITS_PER_UNIT == 0);
f5d4f18c
SL
1073 emit_move_insn (str_rtx, value);
1074 }
1075 else
ebb99f96 1076 {
53c615a2
BE
1077 rtx temp;
1078
ebb99f96
BE
1079 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080 &bitnum);
b6dd42a9 1081 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
53c615a2
BE
1082 temp = copy_to_reg (str_rtx);
1083 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
ee45a32d 1084 fieldmode, value, reverse, true))
53c615a2
BE
1085 gcc_unreachable ();
1086
1087 emit_move_insn (str_rtx, temp);
ebb99f96
BE
1088 }
1089
f5d4f18c
SL
1090 return;
1091 }
1092
1169e45d
AH
1093 /* Under the C++0x memory model, we must not touch bits outside the
1094 bit region. Adjust the address to start at the beginning of the
1095 bit region. */
a59b038c 1096 if (MEM_P (str_rtx) && bitregion_start > 0)
1169e45d 1097 {
ef4bddc2 1098 machine_mode bestmode;
ee88e690 1099 HOST_WIDE_INT offset, size;
1169e45d 1100
a59b038c
EB
1101 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1169e45d
AH
1103 offset = bitregion_start / BITS_PER_UNIT;
1104 bitnum -= bitregion_start;
ee88e690 1105 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1169e45d
AH
1106 bitregion_end -= bitregion_start;
1107 bitregion_start = 0;
1108 bestmode = get_best_mode (bitsize, bitnum,
1109 bitregion_start, bitregion_end,
fcdd52b7 1110 MEM_ALIGN (str_rtx), VOIDmode,
1169e45d 1111 MEM_VOLATILE_P (str_rtx));
ee88e690 1112 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1169e45d
AH
1113 }
1114
1115 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116 bitregion_start, bitregion_end,
ee45a32d 1117 fieldmode, value, reverse, true))
6d7db3c5 1118 gcc_unreachable ();
44037a66
TG
1119}
1120\f
bebf0797 1121/* Use shifts and boolean operations to store VALUE into a bit field of
ee45a32d
EB
1122 width BITSIZE in OP0, starting at bit BITNUM.
1123
1124 If REVERSE is true, the store is to be done in reverse order. */
44037a66
TG
1125
1126static void
bebf0797
RS
1127store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128 unsigned HOST_WIDE_INT bitnum,
1169e45d
AH
1129 unsigned HOST_WIDE_INT bitregion_start,
1130 unsigned HOST_WIDE_INT bitregion_end,
ee45a32d 1131 rtx value, bool reverse)
44037a66 1132{
44037a66
TG
1133 /* There is a case not handled here:
1134 a structure with a known alignment of just a halfword
1135 and a field split across two aligned halfwords within the structure.
1136 Or likewise a structure with a known alignment of just a byte
1137 and a field split across two bytes.
1138 Such cases are not supposed to be able to occur. */
1139
bebf0797 1140 if (MEM_P (op0))
44037a66 1141 {
ef4bddc2 1142 machine_mode mode = GET_MODE (op0);
053a35af 1143 if (GET_MODE_BITSIZE (mode) == 0
0fb7aeda
KH
1144 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145 mode = word_mode;
f5d4f18c
SL
1146 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
44037a66
TG
1148
1149 if (mode == VOIDmode)
1150 {
1151 /* The only way this should occur is if the field spans word
1152 boundaries. */
bebf0797 1153 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
ee45a32d 1154 bitregion_end, value, reverse);
44037a66
TG
1155 return;
1156 }
1157
26f8b976 1158 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
44037a66
TG
1159 }
1160
ee45a32d 1161 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
ebb99f96
BE
1162}
1163
1164/* Helper function for store_fixed_bit_field, stores
1165 the bit field always using the MODE of OP0. */
1166
1167static void
1168store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
548cfdc2 1169 unsigned HOST_WIDE_INT bitnum,
ee45a32d 1170 rtx value, bool reverse)
ebb99f96 1171{
ef4bddc2 1172 machine_mode mode;
ebb99f96
BE
1173 rtx temp;
1174 int all_zero = 0;
1175 int all_one = 0;
1176
44037a66 1177 mode = GET_MODE (op0);
bebf0797 1178 gcc_assert (SCALAR_INT_MODE_P (mode));
44037a66 1179
bebf0797
RS
1180 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
44037a66 1182
ee45a32d 1183 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
bebf0797
RS
1184 /* BITNUM is the distance between our msb
1185 and that of the containing datum.
1186 Convert it to the distance from the lsb. */
1187 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
44037a66 1188
bebf0797 1189 /* Now BITNUM is always the distance between our lsb
44037a66
TG
1190 and that of OP0. */
1191
bebf0797 1192 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
44037a66
TG
1193 we must first convert its mode to MODE. */
1194
481683e1 1195 if (CONST_INT_P (value))
44037a66 1196 {
e507a433 1197 unsigned HOST_WIDE_INT v = UINTVAL (value);
44037a66 1198
b1ec3c92 1199 if (bitsize < HOST_BITS_PER_WIDE_INT)
fecfbfa4 1200 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
44037a66
TG
1201
1202 if (v == 0)
1203 all_zero = 1;
b1ec3c92 1204 else if ((bitsize < HOST_BITS_PER_WIDE_INT
fecfbfa4 1205 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
e507a433 1206 || (bitsize == HOST_BITS_PER_WIDE_INT
fecfbfa4 1207 && v == HOST_WIDE_INT_M1U))
44037a66
TG
1208 all_one = 1;
1209
088c5368 1210 value = lshift_value (mode, v, bitnum);
44037a66
TG
1211 }
1212 else
1213 {
1214 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
bebf0797 1215 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
44037a66
TG
1216
1217 if (GET_MODE (value) != mode)
86cfb27a 1218 value = convert_to_mode (mode, value, 1);
44037a66
TG
1219
1220 if (must_and)
1221 value = expand_binop (mode, and_optab, value,
1222 mask_rtx (mode, 0, bitsize, 0),
b1ec3c92 1223 NULL_RTX, 1, OPTAB_LIB_WIDEN);
bebf0797 1224 if (bitnum > 0)
44037a66 1225 value = expand_shift (LSHIFT_EXPR, mode, value,
bebf0797 1226 bitnum, NULL_RTX, 1);
44037a66
TG
1227 }
1228
ee45a32d
EB
1229 if (reverse)
1230 value = flip_storage_order (mode, value);
1231
44037a66
TG
1232 /* Now clear the chosen bits in OP0,
1233 except that if VALUE is -1 we need not bother. */
c505fc06
RS
1234 /* We keep the intermediates in registers to allow CSE to combine
1235 consecutive bitfield assignments. */
44037a66 1236
c505fc06 1237 temp = force_reg (mode, op0);
44037a66
TG
1238
1239 if (! all_one)
1240 {
ee45a32d
EB
1241 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242 if (reverse)
1243 mask = flip_storage_order (mode, mask);
1244 temp = expand_binop (mode, and_optab, temp, mask,
c505fc06
RS
1245 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246 temp = force_reg (mode, temp);
44037a66 1247 }
44037a66
TG
1248
1249 /* Now logical-or VALUE into OP0, unless it is zero. */
1250
1251 if (! all_zero)
c505fc06
RS
1252 {
1253 temp = expand_binop (mode, ior_optab, temp, value,
1254 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255 temp = force_reg (mode, temp);
1256 }
1257
44037a66 1258 if (op0 != temp)
4679504c
UB
1259 {
1260 op0 = copy_rtx (op0);
1261 emit_move_insn (op0, temp);
1262 }
44037a66
TG
1263}
1264\f
06c94bce 1265/* Store a bit field that is split across multiple accessible memory objects.
44037a66 1266
06c94bce 1267 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
44037a66
TG
1268 BITSIZE is the field width; BITPOS the position of its first bit
1269 (within the word).
06c94bce 1270 VALUE is the value to store.
06c94bce 1271
ee45a32d
EB
1272 If REVERSE is true, the store is to be done in reverse order.
1273
06c94bce 1274 This does not yet handle fields wider than BITS_PER_WORD. */
44037a66
TG
1275
1276static void
502b8322 1277store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
1278 unsigned HOST_WIDE_INT bitpos,
1279 unsigned HOST_WIDE_INT bitregion_start,
1280 unsigned HOST_WIDE_INT bitregion_end,
ee45a32d 1281 rtx value, bool reverse)
44037a66 1282{
ee45a32d 1283 unsigned int unit, total_bits, bitsdone = 0;
4ee16841 1284
0eb61c19
DE
1285 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286 much at a time. */
f8cfc6aa 1287 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
4ee16841
DE
1288 unit = BITS_PER_WORD;
1289 else
04050c69 1290 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
e54d80d0 1291
ebb99f96
BE
1292 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1294 again, and we will mutually recurse forever. */
1295 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
3d709ff0
RS
1298 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1300 that VALUE might be a floating-point constant. */
481683e1 1301 if (CONSTANT_P (value) && !CONST_INT_P (value))
3d709ff0
RS
1302 {
1303 rtx word = gen_lowpart_common (word_mode, value);
1304
bc8a0e39 1305 if (word && (value != word))
3d709ff0
RS
1306 value = word;
1307 else
1308 value = gen_lowpart_common (word_mode,
d01bc862
DE
1309 force_reg (GET_MODE (value) != VOIDmode
1310 ? GET_MODE (value)
1311 : word_mode, value));
3d709ff0 1312 }
44037a66 1313
ee45a32d
EB
1314 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
06c94bce 1316 while (bitsdone < bitsize)
44037a66 1317 {
770ae6cc 1318 unsigned HOST_WIDE_INT thissize;
770ae6cc
RK
1319 unsigned HOST_WIDE_INT thispos;
1320 unsigned HOST_WIDE_INT offset;
ee45a32d 1321 rtx part, word;
44037a66 1322
06c94bce
RS
1323 offset = (bitpos + bitsdone) / unit;
1324 thispos = (bitpos + bitsdone) % unit;
44037a66 1325
f1cc9589 1326 /* When region of bytes we can touch is restricted, decrease
bd3647bf
JJ
1327 UNIT close to the end of the region as needed. If op0 is a REG
1328 or SUBREG of REG, don't do this, as there can't be data races
1329 on a register and we can expand shorter code in some cases. */
f1cc9589
JJ
1330 if (bitregion_end
1331 && unit > BITS_PER_UNIT
bd3647bf
JJ
1332 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333 && !REG_P (op0)
1334 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
f1cc9589
JJ
1335 {
1336 unit = unit / 2;
1337 continue;
1338 }
1339
0eb61c19
DE
1340 /* THISSIZE must not overrun a word boundary. Otherwise,
1341 store_fixed_bit_field will call us again, and we will mutually
1342 recurse forever. */
1343 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344 thissize = MIN (thissize, unit - thispos);
44037a66 1345
ee45a32d 1346 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
f76b9db2
ILT
1347 {
1348 /* Fetch successively less significant portions. */
481683e1 1349 if (CONST_INT_P (value))
f76b9db2
ILT
1350 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351 >> (bitsize - bitsdone - thissize))
fecfbfa4 1352 & ((HOST_WIDE_INT_1 << thissize) - 1));
ee45a32d
EB
1353 /* Likewise, but the source is little-endian. */
1354 else if (reverse)
1355 part = extract_fixed_bit_field (word_mode, value, thissize,
1356 bitsize - bitsdone - thissize,
1357 NULL_RTX, 1, false);
f76b9db2 1358 else
b8ab7fc8
RS
1359 {
1360 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361 /* The args are chosen so that the last part includes the
1362 lsb. Give extract_bit_field the value it needs (with
1363 endianness compensation) to fetch the piece we want. */
1364 part = extract_fixed_bit_field (word_mode, value, thissize,
1365 total_bits - bitsize + bitsdone,
ee45a32d 1366 NULL_RTX, 1, false);
b8ab7fc8 1367 }
f76b9db2 1368 }
06c94bce 1369 else
f76b9db2
ILT
1370 {
1371 /* Fetch successively more significant portions. */
481683e1 1372 if (CONST_INT_P (value))
f76b9db2
ILT
1373 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374 >> bitsdone)
fecfbfa4 1375 & ((HOST_WIDE_INT_1 << thissize) - 1));
ee45a32d
EB
1376 /* Likewise, but the source is big-endian. */
1377 else if (reverse)
1378 part = extract_fixed_bit_field (word_mode, value, thissize,
1379 total_bits - bitsdone - thissize,
1380 NULL_RTX, 1, false);
f76b9db2 1381 else
b8ab7fc8 1382 part = extract_fixed_bit_field (word_mode, value, thissize,
ee45a32d 1383 bitsdone, NULL_RTX, 1, false);
f76b9db2 1384 }
44037a66 1385
867a0126
RS
1386 /* If OP0 is a register, then handle OFFSET here. */
1387 if (SUBREG_P (op0) || REG_P (op0))
06c94bce 1388 {
ef4bddc2 1389 machine_mode op0_mode = GET_MODE (op0);
19228b93
JJ
1390 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1391 word = offset ? const0_rtx : op0;
1392 else
bd3647bf
JJ
1393 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1394 GET_MODE (op0));
1395 offset &= BITS_PER_WORD / unit - 1;
06c94bce
RS
1396 }
1397 else
1398 word = op0;
44037a66 1399
bebf0797 1400 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
19228b93
JJ
1401 it is just an out-of-bounds access. Ignore it. */
1402 if (word != const0_rtx)
bebf0797 1403 store_fixed_bit_field (word, thissize, offset * unit + thispos,
ee45a32d
EB
1404 bitregion_start, bitregion_end, part,
1405 reverse);
06c94bce
RS
1406 bitsdone += thissize;
1407 }
44037a66
TG
1408}
1409\f
6d7db3c5
RS
1410/* A subroutine of extract_bit_field_1 that converts return value X
1411 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1412 to extract_bit_field. */
44037a66 1413
6d7db3c5 1414static rtx
ef4bddc2
RS
1415convert_extracted_bit_field (rtx x, machine_mode mode,
1416 machine_mode tmode, bool unsignedp)
6d7db3c5
RS
1417{
1418 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1419 return x;
44037a66 1420
6d7db3c5
RS
1421 /* If the x mode is not a scalar integral, first convert to the
1422 integer mode of that size and then access it as a floating-point
1423 value via a SUBREG. */
1424 if (!SCALAR_INT_MODE_P (tmode))
1425 {
304b9962 1426 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
fffbab82
RS
1427 x = convert_to_mode (int_mode, x, unsignedp);
1428 x = force_reg (int_mode, x);
6d7db3c5
RS
1429 return gen_lowpart (tmode, x);
1430 }
44037a66 1431
6d7db3c5
RS
1432 return convert_to_mode (tmode, x, unsignedp);
1433}
1434
a20556e4
RS
1435/* Try to use an ext(z)v pattern to extract a field from OP0.
1436 Return the extracted value on success, otherwise return null.
1437 EXT_MODE is the mode of the extraction and the other arguments
1438 are as for extract_bit_field. */
1439
1440static rtx
fcdd52b7
RS
1441extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1442 unsigned HOST_WIDE_INT bitsize,
a20556e4
RS
1443 unsigned HOST_WIDE_INT bitnum,
1444 int unsignedp, rtx target,
ef4bddc2 1445 machine_mode mode, machine_mode tmode)
a20556e4
RS
1446{
1447 struct expand_operand ops[4];
1448 rtx spec_target = target;
1449 rtx spec_target_subreg = 0;
ef4bddc2 1450 machine_mode ext_mode = extv->field_mode;
a20556e4
RS
1451 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1452
1453 if (bitsize == 0 || unit < bitsize)
1454 return NULL_RTX;
1455
1456 if (MEM_P (op0))
26f8b976 1457 /* Get a reference to the first byte of the field. */
fcdd52b7
RS
1458 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1459 &bitnum);
a20556e4
RS
1460 else
1461 {
1462 /* Convert from counting within OP0 to counting in EXT_MODE. */
1463 if (BYTES_BIG_ENDIAN)
1464 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1465
1466 /* If op0 is a register, we need it in EXT_MODE to make it
1467 acceptable to the format of ext(z)v. */
1468 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1469 return NULL_RTX;
1470 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1471 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1472 }
1473
1474 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1475 "backwards" from the size of the unit we are extracting from.
1476 Otherwise, we count bits from the most significant on a
1477 BYTES/BITS_BIG_ENDIAN machine. */
1478
1479 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1480 bitnum = unit - bitsize - bitnum;
1481
1482 if (target == 0)
1483 target = spec_target = gen_reg_rtx (tmode);
1484
1485 if (GET_MODE (target) != ext_mode)
1486 {
1487 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1488 between the mode of the extraction (word_mode) and the target
1489 mode. Instead, create a temporary and use convert_move to set
1490 the target. */
1491 if (REG_P (target)
1492 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1493 {
1494 target = gen_lowpart (ext_mode, target);
1495 if (GET_MODE_PRECISION (ext_mode)
1496 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1497 spec_target_subreg = target;
1498 }
1499 else
1500 target = gen_reg_rtx (ext_mode);
1501 }
1502
1503 create_output_operand (&ops[0], target, ext_mode);
1504 create_fixed_operand (&ops[1], op0);
1505 create_integer_operand (&ops[2], bitsize);
1506 create_integer_operand (&ops[3], bitnum);
fcdd52b7 1507 if (maybe_expand_insn (extv->icode, 4, ops))
a20556e4
RS
1508 {
1509 target = ops[0].value;
1510 if (target == spec_target)
1511 return target;
1512 if (target == spec_target_subreg)
1513 return spec_target;
1514 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1515 }
1516 return NULL_RTX;
1517}
1518
6d7db3c5
RS
1519/* A subroutine of extract_bit_field, with the same arguments.
1520 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1521 if we can find no other means of implementing the operation.
1522 if FALLBACK_P is false, return NULL instead. */
1523
1524static rtx
1525extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
c6285bd7 1526 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
ef4bddc2 1527 machine_mode mode, machine_mode tmode,
f96bf49a 1528 bool reverse, bool fallback_p, rtx *alt_rtl)
44037a66 1529{
b3694847 1530 rtx op0 = str_rtx;
ef4bddc2 1531 machine_mode mode1;
44037a66 1532
44037a66
TG
1533 if (tmode == VOIDmode)
1534 tmode = mode;
6ca6193b 1535
44037a66
TG
1536 while (GET_CODE (op0) == SUBREG)
1537 {
2c58f7dd 1538 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
44037a66
TG
1539 op0 = SUBREG_REG (op0);
1540 }
77295dec 1541
2c58f7dd 1542 /* If we have an out-of-bounds access to a register, just return an
647eea9d 1543 uninitialized register of the required mode. This can occur if the
2c58f7dd
RS
1544 source code contains an out-of-bounds access to a small array. */
1545 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1546 return gen_reg_rtx (tmode);
1547
f8cfc6aa 1548 if (REG_P (op0)
aac280fb
DD
1549 && mode == GET_MODE (op0)
1550 && bitnum == 0
0b69c29f 1551 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
aac280fb 1552 {
ee45a32d
EB
1553 if (reverse)
1554 op0 = flip_storage_order (mode, op0);
0b69c29f 1555 /* We're trying to extract a full register from itself. */
aac280fb
DD
1556 return op0;
1557 }
1558
ff03930a
JJ
1559 /* First try to check for vector from vector extractions. */
1560 if (VECTOR_MODE_P (GET_MODE (op0))
1561 && !MEM_P (op0)
1562 && VECTOR_MODE_P (tmode)
1563 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1564 {
1565 machine_mode new_mode = GET_MODE (op0);
1566 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1567 {
1568 new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1569 GET_MODE_BITSIZE (GET_MODE (op0))
1570 / GET_MODE_UNIT_BITSIZE (tmode));
1571 if (!VECTOR_MODE_P (new_mode)
1572 || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1573 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1574 || !targetm.vector_mode_supported_p (new_mode))
1575 new_mode = VOIDmode;
1576 }
1577 if (new_mode != VOIDmode
1578 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1579 != CODE_FOR_nothing)
1580 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1581 == bitnum / GET_MODE_BITSIZE (tmode)))
1582 {
1583 struct expand_operand ops[3];
1584 machine_mode outermode = new_mode;
1585 machine_mode innermode = tmode;
1586 enum insn_code icode
1587 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1588 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1589
1590 if (new_mode != GET_MODE (op0))
1591 op0 = gen_lowpart (new_mode, op0);
1592 create_output_operand (&ops[0], target, innermode);
1593 ops[0].target = 1;
1594 create_input_operand (&ops[1], op0, outermode);
1595 create_integer_operand (&ops[2], pos);
1596 if (maybe_expand_insn (icode, 3, ops))
1597 {
1598 if (alt_rtl && ops[0].target)
1599 *alt_rtl = target;
1600 target = ops[0].value;
1601 if (GET_MODE (target) != mode)
1602 return gen_lowpart (tmode, target);
1603 return target;
1604 }
1605 }
1606 }
1607
0890b981
AP
1608 /* See if we can get a better vector mode before extracting. */
1609 if (VECTOR_MODE_P (GET_MODE (op0))
1610 && !MEM_P (op0)
1611 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1612 {
ef4bddc2 1613 machine_mode new_mode;
0890b981
AP
1614
1615 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1616 new_mode = MIN_MODE_VECTOR_FLOAT;
325217ed
CF
1617 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1618 new_mode = MIN_MODE_VECTOR_FRACT;
1619 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1620 new_mode = MIN_MODE_VECTOR_UFRACT;
1621 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1622 new_mode = MIN_MODE_VECTOR_ACCUM;
1623 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1624 new_mode = MIN_MODE_VECTOR_UACCUM;
0890b981
AP
1625 else
1626 new_mode = MIN_MODE_VECTOR_INT;
1627
c94843d2 1628 FOR_EACH_MODE_FROM (new_mode, new_mode)
b147c5b9 1629 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
ab2b55c1 1630 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
0890b981
AP
1631 && targetm.vector_mode_supported_p (new_mode))
1632 break;
1633 if (new_mode != VOIDmode)
1634 op0 = gen_lowpart (new_mode, op0);
1635 }
1636
997404de
JH
1637 /* Use vec_extract patterns for extracting parts of vectors whenever
1638 available. */
1639 if (VECTOR_MODE_P (GET_MODE (op0))
3c0cb5de 1640 && !MEM_P (op0)
ff03930a
JJ
1641 && (convert_optab_handler (vec_extract_optab, GET_MODE (op0),
1642 GET_MODE_INNER (GET_MODE (op0)))
1643 != CODE_FOR_nothing)
6c825cd4
DS
1644 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1645 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
997404de 1646 {
a5c7d693 1647 struct expand_operand ops[3];
ef4bddc2
RS
1648 machine_mode outermode = GET_MODE (op0);
1649 machine_mode innermode = GET_MODE_INNER (outermode);
ff03930a
JJ
1650 enum insn_code icode
1651 = convert_optab_handler (vec_extract_optab, outermode, innermode);
b42271d6 1652 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
997404de 1653
a5c7d693 1654 create_output_operand (&ops[0], target, innermode);
f96bf49a 1655 ops[0].target = 1;
a5c7d693
RS
1656 create_input_operand (&ops[1], op0, outermode);
1657 create_integer_operand (&ops[2], pos);
1658 if (maybe_expand_insn (icode, 3, ops))
997404de 1659 {
f96bf49a
JW
1660 if (alt_rtl && ops[0].target)
1661 *alt_rtl = target;
a5c7d693
RS
1662 target = ops[0].value;
1663 if (GET_MODE (target) != mode)
1664 return gen_lowpart (tmode, target);
1665 return target;
997404de
JH
1666 }
1667 }
1668
d006aa54
RH
1669 /* Make sure we are playing with integral modes. Pun with subregs
1670 if we aren't. */
304b9962
RS
1671 opt_scalar_int_mode opt_imode = int_mode_for_mode (GET_MODE (op0));
1672 scalar_int_mode imode;
1673 if (!opt_imode.exists (&imode) || imode != GET_MODE (op0))
1674 {
1675 if (MEM_P (op0))
1676 op0 = adjust_bitfield_address_size (op0, opt_imode.else_blk (),
1677 0, MEM_SIZE (op0));
1678 else if (opt_imode.exists (&imode))
1679 {
1680 op0 = gen_lowpart (imode, op0);
1681
1682 /* If we got a SUBREG, force it into a register since we
1683 aren't going to be able to do another SUBREG on it. */
1684 if (GET_CODE (op0) == SUBREG)
1685 op0 = force_reg (imode, op0);
1686 }
1687 else
1688 {
1689 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1690 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1691 emit_move_insn (mem, op0);
1692 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1693 }
1694 }
d006aa54 1695
6ca6193b
JDA
1696 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1697 If that's wrong, the solution is to test for it and set TARGET to 0
1698 if needed. */
e98f90d3 1699
f5d4f18c
SL
1700 /* Get the mode of the field to use for atomic access or subreg
1701 conversion. */
b8ab7fc8
RS
1702 mode1 = mode;
1703 if (SCALAR_INT_MODE_P (tmode))
44037a66 1704 {
ef4bddc2 1705 machine_mode try_mode = mode_for_size (bitsize,
b8ab7fc8
RS
1706 GET_MODE_CLASS (tmode), 0);
1707 if (try_mode != BLKmode)
1708 mode1 = try_mode;
1709 }
1710 gcc_assert (mode1 != BLKmode);
1711
1712 /* Extraction of a full MODE1 value can be done with a subreg as long
1713 as the least significant bit of the value is the least significant
1714 bit of either OP0 or a word of OP0. */
1715 if (!MEM_P (op0)
ee45a32d 1716 && !reverse
b8ab7fc8
RS
1717 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1718 && bitsize == GET_MODE_BITSIZE (mode1)
1719 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1720 {
1721 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1722 bitnum / BITS_PER_UNIT);
1723 if (sub)
1724 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1725 }
1726
1727 /* Extraction of a full MODE1 value can be done with a load as long as
1728 the field is on a byte boundary and is sufficiently aligned. */
00efe3ea 1729 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
b8ab7fc8
RS
1730 {
1731 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
ee45a32d
EB
1732 if (reverse)
1733 op0 = flip_storage_order (mode1, op0);
b8ab7fc8 1734 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
44037a66 1735 }
b8ab7fc8 1736
44037a66 1737 /* Handle fields bigger than a word. */
c410d49e 1738
44037a66
TG
1739 if (bitsize > BITS_PER_WORD)
1740 {
1741 /* Here we transfer the words of the field
1742 in the order least significant first.
1743 This is because the most significant word is the one which may
1744 be less than full. */
1745
ee45a32d 1746 const bool backwards = WORDS_BIG_ENDIAN;
770ae6cc
RK
1747 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1748 unsigned int i;
f3f6fb16 1749 rtx_insn *last;
44037a66 1750
02972eaf 1751 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
44037a66
TG
1752 target = gen_reg_rtx (mode);
1753
7d21a61e
DD
1754 /* In case we're about to clobber a base register or something
1755 (see gcc.c-torture/execute/20040625-1.c). */
1756 if (reg_mentioned_p (target, str_rtx))
1757 target = gen_reg_rtx (mode);
1758
34ea783b 1759 /* Indicate for flow that the entire target reg is being set. */
c41c1387 1760 emit_clobber (target);
34ea783b 1761
5ef0b50d 1762 last = get_last_insn ();
44037a66
TG
1763 for (i = 0; i < nwords; i++)
1764 {
1765 /* If I is 0, use the low-order word in both field and target;
1766 if I is 1, use the next to lowest word; and so on. */
77295dec 1767 /* Word number in TARGET to use. */
770ae6cc 1768 unsigned int wordnum
0cd9e9ee 1769 = (backwards
770ae6cc
RK
1770 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1771 : i);
77295dec 1772 /* Offset from start of field in OP0. */
ee45a32d 1773 unsigned int bit_offset = (backwards ^ reverse
0cd9e9ee
EB
1774 ? MAX ((int) bitsize - ((int) i + 1)
1775 * BITS_PER_WORD,
1776 0)
770ae6cc 1777 : (int) i * BITS_PER_WORD);
44037a66
TG
1778 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1779 rtx result_part
5ef0b50d
EB
1780 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1781 bitsize - i * BITS_PER_WORD),
c6285bd7 1782 bitnum + bit_offset, 1, target_part,
f96bf49a 1783 mode, word_mode, reverse, fallback_p, NULL);
44037a66 1784
5b0264cb 1785 gcc_assert (target_part);
5ef0b50d
EB
1786 if (!result_part)
1787 {
1788 delete_insns_since (last);
1789 return NULL;
1790 }
44037a66
TG
1791
1792 if (result_part != target_part)
1793 emit_move_insn (target_part, result_part);
1794 }
1795
5f57dff0 1796 if (unsignedp)
77295dec
DE
1797 {
1798 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1799 need to be zero'd out. */
1800 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1801 {
770ae6cc 1802 unsigned int i, total_words;
77295dec
DE
1803
1804 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1805 for (i = nwords; i < total_words; i++)
04050c69
RK
1806 emit_move_insn
1807 (operand_subword (target,
0cd9e9ee 1808 backwards ? total_words - i - 1 : i,
04050c69
RK
1809 1, VOIDmode),
1810 const0_rtx);
77295dec
DE
1811 }
1812 return target;
1813 }
1814
5f57dff0
JW
1815 /* Signed bit field: sign-extend with two arithmetic shifts. */
1816 target = expand_shift (LSHIFT_EXPR, mode, target,
eb6c3df1 1817 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
5f57dff0 1818 return expand_shift (RSHIFT_EXPR, mode, target,
eb6c3df1 1819 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
44037a66 1820 }
c410d49e 1821
b8ab7fc8
RS
1822 /* If OP0 is a multi-word register, narrow it to the affected word.
1823 If the region spans two words, defer to extract_split_bit_field. */
1824 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
44037a66 1825 {
867a0126 1826 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
470032d7 1827 {
b8ab7fc8
RS
1828 if (!fallback_p)
1829 return NULL_RTX;
ee45a32d
EB
1830 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1831 reverse);
b8ab7fc8 1832 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
470032d7 1833 }
867a0126
RS
1834 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1835 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1836 bitnum %= BITS_PER_WORD;
44037a66 1837 }
44037a66 1838
b8ab7fc8
RS
1839 /* From here on we know the desired field is smaller than a word.
1840 If OP0 is a register, it too fits within a word. */
fcdd52b7
RS
1841 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1842 extraction_insn extv;
1843 if (!MEM_P (op0)
ee45a32d 1844 && !reverse
c0a8a3e6
RS
1845 /* ??? We could limit the structure size to the part of OP0 that
1846 contains the field, with appropriate checks for endianness
1847 and TRULY_NOOP_TRUNCATION. */
1848 && get_best_reg_extraction_insn (&extv, pattern,
1849 GET_MODE_BITSIZE (GET_MODE (op0)),
fcdd52b7 1850 tmode))
44037a66 1851 {
fcdd52b7 1852 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
a20556e4 1853 unsignedp, target, mode,
fcdd52b7 1854 tmode);
a20556e4
RS
1855 if (result)
1856 return result;
44037a66 1857 }
f76b9db2 1858
6d7db3c5
RS
1859 /* If OP0 is a memory, try copying it to a register and seeing if a
1860 cheap register alternative is available. */
ee45a32d 1861 if (MEM_P (op0) & !reverse)
6d7db3c5 1862 {
f5d4f18c
SL
1863 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1864 tmode))
17a73ba0 1865 {
fcdd52b7
RS
1866 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1867 bitnum, unsignedp,
1868 target, mode,
1869 tmode);
17a73ba0
RS
1870 if (result)
1871 return result;
1872 }
1873
f3f6fb16 1874 rtx_insn *last = get_last_insn ();
f76b9db2 1875
fcdd52b7
RS
1876 /* Try loading part of OP0 into a register and extracting the
1877 bitfield from that. */
1878 unsigned HOST_WIDE_INT bitpos;
1879 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1880 0, 0, tmode, &bitpos);
1881 if (xop0)
6d7db3c5 1882 {
fcdd52b7
RS
1883 xop0 = copy_to_reg (xop0);
1884 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
c6285bd7 1885 unsignedp, target,
f96bf49a 1886 mode, tmode, reverse, false, NULL);
fcdd52b7
RS
1887 if (result)
1888 return result;
1889 delete_insns_since (last);
c410d49e 1890 }
44037a66 1891 }
562fc702 1892
6d7db3c5
RS
1893 if (!fallback_p)
1894 return NULL;
1895
b8ab7fc8
RS
1896 /* Find a correspondingly-sized integer field, so we can apply
1897 shifts and masks to it. */
304b9962
RS
1898 scalar_int_mode int_mode;
1899 if (!int_mode_for_mode (tmode).exists (&int_mode))
1900 /* If this fails, we should probably push op0 out to memory and then
1901 do a load. */
1902 int_mode = int_mode_for_mode (mode).require ();
b8ab7fc8 1903
ee45a32d
EB
1904 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1905 unsignedp, reverse);
1906
1907 /* Complex values must be reversed piecewise, so we need to undo the global
1908 reversal, convert to the complex mode and reverse again. */
1909 if (reverse && COMPLEX_MODE_P (tmode))
1910 {
1911 target = flip_storage_order (int_mode, target);
1912 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1913 target = flip_storage_order (tmode, target);
1914 }
1915 else
1916 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1917
1918 return target;
6d7db3c5
RS
1919}
1920
1921/* Generate code to extract a byte-field from STR_RTX
1922 containing BITSIZE bits, starting at BITNUM,
1923 and put it in TARGET if possible (if TARGET is nonzero).
1924 Regardless of TARGET, we return the rtx for where the value is placed.
1925
1926 STR_RTX is the structure containing the byte (a REG or MEM).
1927 UNSIGNEDP is nonzero if this is an unsigned bit field.
1928 MODE is the natural mode of the field value once extracted.
1929 TMODE is the mode the caller would like the value to have;
1930 but the value may be returned with type MODE instead.
1931
ee45a32d
EB
1932 If REVERSE is true, the extraction is to be done in reverse order.
1933
6d7db3c5
RS
1934 If a TARGET is specified and we can store in it at no extra cost,
1935 we do so, and return TARGET.
1936 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1937 if they are equally easy. */
1938
1939rtx
1940extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
c6285bd7 1941 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
f96bf49a
JW
1942 machine_mode mode, machine_mode tmode, bool reverse,
1943 rtx *alt_rtl)
6d7db3c5 1944{
ef4bddc2 1945 machine_mode mode1;
f5d4f18c
SL
1946
1947 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1948 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1949 mode1 = GET_MODE (str_rtx);
1950 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1951 mode1 = GET_MODE (target);
1952 else
1953 mode1 = tmode;
1954
6f4e9cf8 1955 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
f5d4f18c 1956 {
b6dd42a9
BE
1957 /* Extraction of a full MODE1 value can be done with a simple load.
1958 We know here that the field can be accessed with one single
1959 instruction. For targets that support unaligned memory,
1960 an unaligned access may be necessary. */
1961 if (bitsize == GET_MODE_BITSIZE (mode1))
6f4e9cf8 1962 {
53c615a2
BE
1963 rtx result = adjust_bitfield_address (str_rtx, mode1,
1964 bitnum / BITS_PER_UNIT);
ee45a32d
EB
1965 if (reverse)
1966 result = flip_storage_order (mode1, result);
b6dd42a9 1967 gcc_assert (bitnum % BITS_PER_UNIT == 0);
53c615a2 1968 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
6f4e9cf8
BE
1969 }
1970
53c615a2
BE
1971 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1972 &bitnum);
b6dd42a9 1973 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
53c615a2 1974 str_rtx = copy_to_reg (str_rtx);
f5d4f18c 1975 }
53c615a2 1976
c6285bd7 1977 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
f96bf49a 1978 target, mode, tmode, reverse, true, alt_rtl);
44037a66
TG
1979}
1980\f
b8ab7fc8
RS
1981/* Use shifts and boolean operations to extract a field of BITSIZE bits
1982 from bit BITNUM of OP0.
44037a66
TG
1983
1984 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
ee45a32d
EB
1985 If REVERSE is true, the extraction is to be done in reverse order.
1986
44037a66
TG
1987 If TARGET is nonzero, attempts to store the value there
1988 and return TARGET, but this is not guaranteed.
04050c69 1989 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
44037a66
TG
1990
1991static rtx
ef4bddc2 1992extract_fixed_bit_field (machine_mode tmode, rtx op0,
502b8322 1993 unsigned HOST_WIDE_INT bitsize,
b8ab7fc8 1994 unsigned HOST_WIDE_INT bitnum, rtx target,
ee45a32d 1995 int unsignedp, bool reverse)
44037a66 1996{
b8ab7fc8 1997 if (MEM_P (op0))
44037a66 1998 {
ef4bddc2 1999 machine_mode mode
548cfdc2
EB
2000 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
2001 MEM_VOLATILE_P (op0));
44037a66
TG
2002
2003 if (mode == VOIDmode)
2004 /* The only way this should occur is if the field spans word
2005 boundaries. */
ee45a32d
EB
2006 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
2007 reverse);
44037a66 2008
f5d4f18c 2009 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
44037a66
TG
2010 }
2011
6f4e9cf8 2012 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
ee45a32d 2013 target, unsignedp, reverse);
6f4e9cf8
BE
2014}
2015
2016/* Helper function for extract_fixed_bit_field, extracts
2017 the bit field always using the MODE of OP0. */
2018
2019static rtx
ef4bddc2 2020extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
6f4e9cf8
BE
2021 unsigned HOST_WIDE_INT bitsize,
2022 unsigned HOST_WIDE_INT bitnum, rtx target,
ee45a32d 2023 int unsignedp, bool reverse)
6f4e9cf8 2024{
ef4bddc2 2025 machine_mode mode = GET_MODE (op0);
b8ab7fc8
RS
2026 gcc_assert (SCALAR_INT_MODE_P (mode));
2027
2028 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2029 for invalid input, such as extract equivalent of f5 from
2030 gcc.dg/pr48335-2.c. */
37811a73 2031
ee45a32d 2032 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
b8ab7fc8 2033 /* BITNUM is the distance between our msb and that of OP0.
04050c69 2034 Convert it to the distance from the lsb. */
b8ab7fc8 2035 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
44037a66 2036
b8ab7fc8 2037 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
44037a66 2038 We have reduced the big-endian case to the little-endian case. */
ee45a32d
EB
2039 if (reverse)
2040 op0 = flip_storage_order (mode, op0);
44037a66
TG
2041
2042 if (unsignedp)
2043 {
b8ab7fc8 2044 if (bitnum)
44037a66
TG
2045 {
2046 /* If the field does not already start at the lsb,
2047 shift it so it does. */
44037a66 2048 /* Maybe propagate the target for the shift. */
f8cfc6aa 2049 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
6399c0ab
SB
2050 if (tmode != mode)
2051 subtarget = 0;
b8ab7fc8 2052 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
44037a66
TG
2053 }
2054 /* Convert the value to the desired mode. */
2055 if (mode != tmode)
2056 op0 = convert_to_mode (tmode, op0, 1);
2057
2058 /* Unless the msb of the field used to be the msb when we shifted,
2059 mask out the upper bits. */
2060
b8ab7fc8 2061 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
44037a66
TG
2062 return expand_binop (GET_MODE (op0), and_optab, op0,
2063 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2064 target, 1, OPTAB_LIB_WIDEN);
2065 return op0;
2066 }
2067
2068 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2069 then arithmetic-shift its lsb to the lsb of the word. */
2070 op0 = force_reg (mode, op0);
44037a66
TG
2071
2072 /* Find the narrowest integer mode that contains the field. */
2073
c94843d2 2074 FOR_EACH_MODE_IN_CLASS (mode, MODE_INT)
b8ab7fc8 2075 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
44037a66
TG
2076 {
2077 op0 = convert_to_mode (mode, op0, 0);
2078 break;
2079 }
2080
ccb1b17b
JJ
2081 if (mode != tmode)
2082 target = 0;
2083
b8ab7fc8 2084 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
44037a66 2085 {
b8ab7fc8 2086 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
44037a66 2087 /* Maybe propagate the target for the shift. */
f8cfc6aa 2088 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
44037a66
TG
2089 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2090 }
2091
2092 return expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 2093 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
44037a66 2094}
44037a66
TG
2095
2096/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
088c5368 2097 VALUE << BITPOS. */
44037a66
TG
2098
2099static rtx
ef4bddc2 2100lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
088c5368 2101 int bitpos)
44037a66 2102{
807e902e 2103 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
44037a66
TG
2104}
2105\f
2106/* Extract a bit field that is split across two words
2107 and return an RTX for the result.
2108
2109 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2110 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
ee45a32d
EB
2111 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2112
2113 If REVERSE is true, the extraction is to be done in reverse order. */
44037a66
TG
2114
2115static rtx
502b8322 2116extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
ee45a32d
EB
2117 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2118 bool reverse)
44037a66 2119{
770ae6cc
RK
2120 unsigned int unit;
2121 unsigned int bitsdone = 0;
c16ddde3 2122 rtx result = NULL_RTX;
06c94bce 2123 int first = 1;
44037a66 2124
4ee16841
DE
2125 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2126 much at a time. */
f8cfc6aa 2127 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
4ee16841
DE
2128 unit = BITS_PER_WORD;
2129 else
609023ff 2130 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
4ee16841 2131
06c94bce
RS
2132 while (bitsdone < bitsize)
2133 {
770ae6cc 2134 unsigned HOST_WIDE_INT thissize;
06c94bce 2135 rtx part, word;
770ae6cc
RK
2136 unsigned HOST_WIDE_INT thispos;
2137 unsigned HOST_WIDE_INT offset;
06c94bce
RS
2138
2139 offset = (bitpos + bitsdone) / unit;
2140 thispos = (bitpos + bitsdone) % unit;
2141
0eb61c19
DE
2142 /* THISSIZE must not overrun a word boundary. Otherwise,
2143 extract_fixed_bit_field will call us again, and we will mutually
2144 recurse forever. */
2145 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2146 thissize = MIN (thissize, unit - thispos);
06c94bce 2147
867a0126
RS
2148 /* If OP0 is a register, then handle OFFSET here. */
2149 if (SUBREG_P (op0) || REG_P (op0))
06c94bce
RS
2150 {
2151 word = operand_subword_force (op0, offset, GET_MODE (op0));
2152 offset = 0;
2153 }
2154 else
2155 word = op0;
2156
06c94bce 2157 /* Extract the parts in bit-counting order,
0eb61c19 2158 whose meaning is determined by BYTES_PER_UNIT.
b8ab7fc8
RS
2159 OFFSET is in UNITs, and UNIT is in bits. */
2160 part = extract_fixed_bit_field (word_mode, word, thissize,
ee45a32d 2161 offset * unit + thispos, 0, 1, reverse);
06c94bce 2162 bitsdone += thissize;
44037a66 2163
06c94bce 2164 /* Shift this part into place for the result. */
ee45a32d 2165 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
f76b9db2
ILT
2166 {
2167 if (bitsize != bitsdone)
2168 part = expand_shift (LSHIFT_EXPR, word_mode, part,
eb6c3df1 2169 bitsize - bitsdone, 0, 1);
f76b9db2
ILT
2170 }
2171 else
2172 {
2173 if (bitsdone != thissize)
2174 part = expand_shift (LSHIFT_EXPR, word_mode, part,
eb6c3df1 2175 bitsdone - thissize, 0, 1);
f76b9db2 2176 }
44037a66 2177
06c94bce
RS
2178 if (first)
2179 result = part;
2180 else
2181 /* Combine the parts with bitwise or. This works
2182 because we extracted each part as an unsigned bit field. */
2183 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2184 OPTAB_LIB_WIDEN);
2185
2186 first = 0;
2187 }
44037a66
TG
2188
2189 /* Unsigned bit field: we are done. */
2190 if (unsignedp)
2191 return result;
2192 /* Signed bit field: sign-extend with two arithmetic shifts. */
2193 result = expand_shift (LSHIFT_EXPR, word_mode, result,
eb6c3df1 2194 BITS_PER_WORD - bitsize, NULL_RTX, 0);
44037a66 2195 return expand_shift (RSHIFT_EXPR, word_mode, result,
eb6c3df1 2196 BITS_PER_WORD - bitsize, NULL_RTX, 0);
44037a66
TG
2197}
2198\f
18b526e8
RS
2199/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2200 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2201 MODE, fill the upper bits with zeros. Fail if the layout of either
2202 mode is unknown (as for CC modes) or if the extraction would involve
2203 unprofitable mode punning. Return the value on success, otherwise
2204 return null.
2205
2206 This is different from gen_lowpart* in these respects:
2207
2208 - the returned value must always be considered an rvalue
2209
2210 - when MODE is wider than SRC_MODE, the extraction involves
2211 a zero extension
2212
2213 - when MODE is smaller than SRC_MODE, the extraction involves
2214 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2215
2216 In other words, this routine performs a computation, whereas the
2217 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2218 operations. */
2219
2220rtx
ef4bddc2 2221extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
18b526e8 2222{
ef4bddc2 2223 machine_mode int_mode, src_int_mode;
18b526e8
RS
2224
2225 if (mode == src_mode)
2226 return src;
2227
2228 if (CONSTANT_P (src))
d898d29b
JJ
2229 {
2230 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2231 fails, it will happily create (subreg (symbol_ref)) or similar
2232 invalid SUBREGs. */
2233 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2234 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2235 if (ret)
2236 return ret;
2237
2238 if (GET_MODE (src) == VOIDmode
2239 || !validate_subreg (mode, src_mode, src, byte))
2240 return NULL_RTX;
2241
2242 src = force_reg (GET_MODE (src), src);
2243 return gen_rtx_SUBREG (mode, src, byte);
2244 }
18b526e8
RS
2245
2246 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2247 return NULL_RTX;
2248
2249 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2250 && MODES_TIEABLE_P (mode, src_mode))
2251 {
2252 rtx x = gen_lowpart_common (mode, src);
2253 if (x)
2254 return x;
2255 }
2256
304b9962
RS
2257 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2258 || !int_mode_for_mode (mode).exists (&int_mode))
18b526e8
RS
2259 return NULL_RTX;
2260
2261 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2262 return NULL_RTX;
2263 if (!MODES_TIEABLE_P (int_mode, mode))
2264 return NULL_RTX;
2265
2266 src = gen_lowpart (src_int_mode, src);
2267 src = convert_modes (int_mode, src_int_mode, src, true);
2268 src = gen_lowpart (mode, src);
2269 return src;
2270}
2271\f
44037a66
TG
2272/* Add INC into TARGET. */
2273
2274void
502b8322 2275expand_inc (rtx target, rtx inc)
44037a66
TG
2276{
2277 rtx value = expand_binop (GET_MODE (target), add_optab,
2278 target, inc,
2279 target, 0, OPTAB_LIB_WIDEN);
2280 if (value != target)
2281 emit_move_insn (target, value);
2282}
2283
2284/* Subtract DEC from TARGET. */
2285
2286void
502b8322 2287expand_dec (rtx target, rtx dec)
44037a66
TG
2288{
2289 rtx value = expand_binop (GET_MODE (target), sub_optab,
2290 target, dec,
2291 target, 0, OPTAB_LIB_WIDEN);
2292 if (value != target)
2293 emit_move_insn (target, value);
2294}
2295\f
2296/* Output a shift instruction for expression code CODE,
2297 with SHIFTED being the rtx for the value to shift,
86529a49 2298 and AMOUNT the rtx for the amount to shift by.
44037a66
TG
2299 Store the result in the rtx TARGET, if that is convenient.
2300 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
ea000c3f
EB
2301 Return the rtx for where the value is.
2302 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2303 in which case 0 is returned. */
44037a66 2304
86529a49 2305static rtx
ef4bddc2 2306expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
ea000c3f 2307 rtx amount, rtx target, int unsignedp, bool may_fail = false)
44037a66 2308{
b3694847
SS
2309 rtx op1, temp = 0;
2310 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2311 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
71d46ca5
MM
2312 optab lshift_optab = ashl_optab;
2313 optab rshift_arith_optab = ashr_optab;
2314 optab rshift_uns_optab = lshr_optab;
2315 optab lrotate_optab = rotl_optab;
2316 optab rrotate_optab = rotr_optab;
ef4bddc2
RS
2317 machine_mode op1_mode;
2318 machine_mode scalar_mode = mode;
d858f359 2319 int attempt;
f40751dd 2320 bool speed = optimize_insn_for_speed_p ();
44037a66 2321
a4ee446d
JJ
2322 if (VECTOR_MODE_P (mode))
2323 scalar_mode = GET_MODE_INNER (mode);
86529a49 2324 op1 = amount;
71d46ca5
MM
2325 op1_mode = GET_MODE (op1);
2326
2327 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2328 shift amount is a vector, use the vector/vector shift patterns. */
2329 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2330 {
2331 lshift_optab = vashl_optab;
2332 rshift_arith_optab = vashr_optab;
2333 rshift_uns_optab = vlshr_optab;
2334 lrotate_optab = vrotl_optab;
2335 rrotate_optab = vrotr_optab;
2336 }
2337
44037a66
TG
2338 /* Previously detected shift-counts computed by NEGATE_EXPR
2339 and shifted in the other direction; but that does not work
2340 on all machines. */
2341
166cdf4a
RH
2342 if (SHIFT_COUNT_TRUNCATED)
2343 {
481683e1 2344 if (CONST_INT_P (op1)
0fb7aeda 2345 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
a4ee446d 2346 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
0fb7aeda 2347 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
a4ee446d 2348 % GET_MODE_BITSIZE (scalar_mode));
166cdf4a 2349 else if (GET_CODE (op1) == SUBREG
c1cb09ad 2350 && subreg_lowpart_p (op1)
7afe2801
DM
2351 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2352 && SCALAR_INT_MODE_P (GET_MODE (op1)))
166cdf4a
RH
2353 op1 = SUBREG_REG (op1);
2354 }
2ab0a5c4 2355
75776c6d
JJ
2356 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2357 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2358 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2359 amount instead. */
2360 if (rotate
2361 && CONST_INT_P (op1)
a4ee446d
JJ
2362 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2363 GET_MODE_BITSIZE (scalar_mode) - 1))
75776c6d 2364 {
a4ee446d 2365 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
75776c6d
JJ
2366 left = !left;
2367 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2368 }
2369
a25efea0
TP
2370 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2371 Note that this is not the case for bigger values. For instance a rotation
2372 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2373 0x04030201 (bswapsi). */
2374 if (rotate
2375 && CONST_INT_P (op1)
2376 && INTVAL (op1) == BITS_PER_UNIT
2377 && GET_MODE_SIZE (scalar_mode) == 2
2378 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2379 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2380 unsignedp);
2381
44037a66
TG
2382 if (op1 == const0_rtx)
2383 return shifted;
2384
15bad393
RS
2385 /* Check whether its cheaper to implement a left shift by a constant
2386 bit count by a sequence of additions. */
2387 if (code == LSHIFT_EXPR
481683e1 2388 && CONST_INT_P (op1)
15bad393 2389 && INTVAL (op1) > 0
a4ee446d 2390 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
cb2eb96f 2391 && INTVAL (op1) < MAX_BITS_PER_WORD
5322d07e
NF
2392 && (shift_cost (speed, mode, INTVAL (op1))
2393 > INTVAL (op1) * add_cost (speed, mode))
2394 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
15bad393
RS
2395 {
2396 int i;
2397 for (i = 0; i < INTVAL (op1); i++)
2398 {
2399 temp = force_reg (mode, shifted);
2400 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2401 unsignedp, OPTAB_LIB_WIDEN);
2402 }
2403 return shifted;
2404 }
2405
d858f359 2406 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
44037a66
TG
2407 {
2408 enum optab_methods methods;
2409
d858f359 2410 if (attempt == 0)
44037a66 2411 methods = OPTAB_DIRECT;
d858f359 2412 else if (attempt == 1)
44037a66
TG
2413 methods = OPTAB_WIDEN;
2414 else
2415 methods = OPTAB_LIB_WIDEN;
2416
2417 if (rotate)
2418 {
2419 /* Widening does not work for rotation. */
2420 if (methods == OPTAB_WIDEN)
2421 continue;
2422 else if (methods == OPTAB_LIB_WIDEN)
cbec710e 2423 {
39e71615 2424 /* If we have been unable to open-code this by a rotation,
cbec710e 2425 do it as the IOR of two shifts. I.e., to rotate A
ae6fa899
JJ
2426 by N bits, compute
2427 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
cbec710e
RK
2428 where C is the bitsize of A.
2429
2430 It is theoretically possible that the target machine might
2431 not be able to perform either shift and hence we would
2432 be making two libcalls rather than just the one for the
2433 shift (similarly if IOR could not be done). We will allow
2434 this extremely unlikely lossage to avoid complicating the
2435 code below. */
2436
39e71615 2437 rtx subtarget = target == shifted ? 0 : target;
86529a49 2438 rtx new_amount, other_amount;
39e71615 2439 rtx temp1;
86529a49
RG
2440
2441 new_amount = op1;
ae6fa899
JJ
2442 if (op1 == const0_rtx)
2443 return shifted;
2444 else if (CONST_INT_P (op1))
a4ee446d 2445 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
5c049507
RG
2446 - INTVAL (op1));
2447 else
ae6fa899
JJ
2448 {
2449 other_amount
2450 = simplify_gen_unary (NEG, GET_MODE (op1),
2451 op1, GET_MODE (op1));
a4ee446d 2452 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
ae6fa899 2453 other_amount
69a59f0f
RS
2454 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2455 gen_int_mode (mask, GET_MODE (op1)));
ae6fa899 2456 }
39e71615
RK
2457
2458 shifted = force_reg (mode, shifted);
2459
86529a49
RG
2460 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2461 mode, shifted, new_amount, 0, 1);
2462 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2463 mode, shifted, other_amount,
2464 subtarget, 1);
39e71615
RK
2465 return expand_binop (mode, ior_optab, temp, temp1, target,
2466 unsignedp, methods);
cbec710e 2467 }
44037a66
TG
2468
2469 temp = expand_binop (mode,
71d46ca5 2470 left ? lrotate_optab : rrotate_optab,
44037a66
TG
2471 shifted, op1, target, unsignedp, methods);
2472 }
2473 else if (unsignedp)
a34958c9 2474 temp = expand_binop (mode,
71d46ca5 2475 left ? lshift_optab : rshift_uns_optab,
a34958c9 2476 shifted, op1, target, unsignedp, methods);
44037a66
TG
2477
2478 /* Do arithmetic shifts.
2479 Also, if we are going to widen the operand, we can just as well
2480 use an arithmetic right-shift instead of a logical one. */
2481 if (temp == 0 && ! rotate
2482 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2483 {
2484 enum optab_methods methods1 = methods;
2485
2486 /* If trying to widen a log shift to an arithmetic shift,
2487 don't accept an arithmetic shift of the same size. */
2488 if (unsignedp)
2489 methods1 = OPTAB_MUST_WIDEN;
2490
2491 /* Arithmetic shift */
2492
2493 temp = expand_binop (mode,
71d46ca5 2494 left ? lshift_optab : rshift_arith_optab,
44037a66
TG
2495 shifted, op1, target, unsignedp, methods1);
2496 }
2497
711a5e64 2498 /* We used to try extzv here for logical right shifts, but that was
c410d49e 2499 only useful for one machine, the VAX, and caused poor code
711a5e64
RK
2500 generation there for lshrdi3, so the code was deleted and a
2501 define_expand for lshrsi3 was added to vax.md. */
44037a66
TG
2502 }
2503
ea000c3f 2504 gcc_assert (temp != NULL_RTX || may_fail);
44037a66
TG
2505 return temp;
2506}
eb6c3df1
RG
2507
2508/* Output a shift instruction for expression code CODE,
2509 with SHIFTED being the rtx for the value to shift,
2510 and AMOUNT the amount to shift by.
2511 Store the result in the rtx TARGET, if that is convenient.
2512 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2513 Return the rtx for where the value is. */
2514
2515rtx
ef4bddc2 2516expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
eb6c3df1
RG
2517 int amount, rtx target, int unsignedp)
2518{
86529a49
RG
2519 return expand_shift_1 (code, mode,
2520 shifted, GEN_INT (amount), target, unsignedp);
2521}
2522
ea000c3f
EB
2523/* Likewise, but return 0 if that cannot be done. */
2524
2525static rtx
2526maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2527 int amount, rtx target, int unsignedp)
2528{
2529 return expand_shift_1 (code, mode,
2530 shifted, GEN_INT (amount), target, unsignedp, true);
2531}
2532
86529a49
RG
2533/* Output a shift instruction for expression code CODE,
2534 with SHIFTED being the rtx for the value to shift,
2535 and AMOUNT the tree for the amount to shift by.
2536 Store the result in the rtx TARGET, if that is convenient.
2537 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2538 Return the rtx for where the value is. */
2539
2540rtx
ef4bddc2 2541expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
86529a49
RG
2542 tree amount, rtx target, int unsignedp)
2543{
2544 return expand_shift_1 (code, mode,
2545 shifted, expand_normal (amount), target, unsignedp);
eb6c3df1 2546}
86529a49 2547
44037a66 2548\f
41c64ac0 2549static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
ef4bddc2 2550 const struct mult_cost *, machine_mode mode);
ef4bddc2 2551static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
8efc8980 2552 const struct algorithm *, enum mult_variant);
502b8322 2553static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
ef4bddc2
RS
2554static rtx extract_high_half (machine_mode, rtx);
2555static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2556static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
8efc8980 2557 int, int);
44037a66 2558/* Compute and return the best algorithm for multiplying by T.
7963ac37
RK
2559 The algorithm must cost less than cost_limit
2560 If retval.cost >= COST_LIMIT, no algorithm was found and all
41c64ac0
RS
2561 other field of the returned struct are undefined.
2562 MODE is the machine mode of the multiplication. */
44037a66 2563
819126a6 2564static void
502b8322 2565synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
ef4bddc2 2566 const struct mult_cost *cost_limit, machine_mode mode)
44037a66 2567{
b2fb324c 2568 int m;
52786026 2569 struct algorithm *alg_in, *best_alg;
26276705
RS
2570 struct mult_cost best_cost;
2571 struct mult_cost new_limit;
2572 int op_cost, op_latency;
ef268d34 2573 unsigned HOST_WIDE_INT orig_t = t;
b2fb324c 2574 unsigned HOST_WIDE_INT q;
84ddb681 2575 int maxm, hash_index;
7b13ee6b
KH
2576 bool cache_hit = false;
2577 enum alg_code cache_alg = alg_zero;
f40751dd 2578 bool speed = optimize_insn_for_speed_p ();
ef4bddc2 2579 machine_mode imode;
5322d07e 2580 struct alg_hash_entry *entry_ptr;
44037a66 2581
7963ac37
RK
2582 /* Indicate that no algorithm is yet found. If no algorithm
2583 is found, this value will be returned and indicate failure. */
26276705 2584 alg_out->cost.cost = cost_limit->cost + 1;
3ab0f290 2585 alg_out->cost.latency = cost_limit->latency + 1;
44037a66 2586
26276705
RS
2587 if (cost_limit->cost < 0
2588 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
819126a6 2589 return;
44037a66 2590
84ddb681
RH
2591 /* Be prepared for vector modes. */
2592 imode = GET_MODE_INNER (mode);
84ddb681
RH
2593
2594 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2595
0792ab19 2596 /* Restrict the bits of "t" to the multiplication's mode. */
84ddb681 2597 t &= GET_MODE_MASK (imode);
0792ab19 2598
b385aeda
RK
2599 /* t == 1 can be done in zero cost. */
2600 if (t == 1)
b2fb324c 2601 {
819126a6 2602 alg_out->ops = 1;
26276705
RS
2603 alg_out->cost.cost = 0;
2604 alg_out->cost.latency = 0;
819126a6
RK
2605 alg_out->op[0] = alg_m;
2606 return;
b2fb324c
RK
2607 }
2608
b385aeda
RK
2609 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2610 fail now. */
819126a6 2611 if (t == 0)
b385aeda 2612 {
5322d07e 2613 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
819126a6 2614 return;
b385aeda
RK
2615 else
2616 {
819126a6 2617 alg_out->ops = 1;
5322d07e
NF
2618 alg_out->cost.cost = zero_cost (speed);
2619 alg_out->cost.latency = zero_cost (speed);
819126a6
RK
2620 alg_out->op[0] = alg_zero;
2621 return;
b385aeda
RK
2622 }
2623 }
2624
52786026
RK
2625 /* We'll be needing a couple extra algorithm structures now. */
2626
1b4572a8
KG
2627 alg_in = XALLOCA (struct algorithm);
2628 best_alg = XALLOCA (struct algorithm);
26276705 2629 best_cost = *cost_limit;
52786026 2630
7b13ee6b 2631 /* Compute the hash index. */
f40751dd 2632 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
7b13ee6b
KH
2633
2634 /* See if we already know what to do for T. */
5322d07e
NF
2635 entry_ptr = alg_hash_entry_ptr (hash_index);
2636 if (entry_ptr->t == t
5322d07e
NF
2637 && entry_ptr->mode == mode
2638 && entry_ptr->speed == speed
2639 && entry_ptr->alg != alg_unknown)
7b13ee6b 2640 {
5322d07e 2641 cache_alg = entry_ptr->alg;
0178027c
KH
2642
2643 if (cache_alg == alg_impossible)
7b13ee6b 2644 {
0178027c 2645 /* The cache tells us that it's impossible to synthesize
5322d07e
NF
2646 multiplication by T within entry_ptr->cost. */
2647 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
0178027c
KH
2648 /* COST_LIMIT is at least as restrictive as the one
2649 recorded in the hash table, in which case we have no
2650 hope of synthesizing a multiplication. Just
2651 return. */
2652 return;
2653
2654 /* If we get here, COST_LIMIT is less restrictive than the
2655 one recorded in the hash table, so we may be able to
2656 synthesize a multiplication. Proceed as if we didn't
2657 have the cache entry. */
2658 }
2659 else
2660 {
5322d07e 2661 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
0178027c
KH
2662 /* The cached algorithm shows that this multiplication
2663 requires more cost than COST_LIMIT. Just return. This
2664 way, we don't clobber this cache entry with
2665 alg_impossible but retain useful information. */
2666 return;
7b13ee6b 2667
0178027c
KH
2668 cache_hit = true;
2669
2670 switch (cache_alg)
2671 {
2672 case alg_shift:
2673 goto do_alg_shift;
7b13ee6b 2674
0178027c
KH
2675 case alg_add_t_m2:
2676 case alg_sub_t_m2:
2677 goto do_alg_addsub_t_m2;
7b13ee6b 2678
0178027c
KH
2679 case alg_add_factor:
2680 case alg_sub_factor:
2681 goto do_alg_addsub_factor;
7b13ee6b 2682
0178027c
KH
2683 case alg_add_t2_m:
2684 goto do_alg_add_t2_m;
7b13ee6b 2685
0178027c
KH
2686 case alg_sub_t2_m:
2687 goto do_alg_sub_t2_m;
2688
2689 default:
2690 gcc_unreachable ();
2691 }
7b13ee6b
KH
2692 }
2693 }
2694
b385aeda
RK
2695 /* If we have a group of zero bits at the low-order part of T, try
2696 multiplying by the remaining bits and then doing a shift. */
2697
b2fb324c 2698 if ((t & 1) == 0)
44037a66 2699 {
7b13ee6b 2700 do_alg_shift:
146ec50f 2701 m = ctz_or_zero (t); /* m = number of low zero bits */
0792ab19 2702 if (m < maxm)
44037a66 2703 {
02a65aef 2704 q = t >> m;
15bad393
RS
2705 /* The function expand_shift will choose between a shift and
2706 a sequence of additions, so the observed cost is given as
5322d07e
NF
2707 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2708 op_cost = m * add_cost (speed, mode);
2709 if (shift_cost (speed, mode, m) < op_cost)
2710 op_cost = shift_cost (speed, mode, m);
26276705
RS
2711 new_limit.cost = best_cost.cost - op_cost;
2712 new_limit.latency = best_cost.latency - op_cost;
2713 synth_mult (alg_in, q, &new_limit, mode);
2714
2715 alg_in->cost.cost += op_cost;
2716 alg_in->cost.latency += op_cost;
2717 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
02a65aef 2718 {
26276705 2719 best_cost = alg_in->cost;
076701b6 2720 std::swap (alg_in, best_alg);
02a65aef
R
2721 best_alg->log[best_alg->ops] = m;
2722 best_alg->op[best_alg->ops] = alg_shift;
02a65aef 2723 }
ddc2690a
KH
2724
2725 /* See if treating ORIG_T as a signed number yields a better
2726 sequence. Try this sequence only for a negative ORIG_T
2727 as it would be useless for a non-negative ORIG_T. */
2728 if ((HOST_WIDE_INT) orig_t < 0)
2729 {
2730 /* Shift ORIG_T as follows because a right shift of a
2731 negative-valued signed type is implementation
2732 defined. */
2733 q = ~(~orig_t >> m);
2734 /* The function expand_shift will choose between a shift
2735 and a sequence of additions, so the observed cost is
5322d07e
NF
2736 given as MIN (m * add_cost(speed, mode),
2737 shift_cost(speed, mode, m)). */
2738 op_cost = m * add_cost (speed, mode);
2739 if (shift_cost (speed, mode, m) < op_cost)
2740 op_cost = shift_cost (speed, mode, m);
ddc2690a
KH
2741 new_limit.cost = best_cost.cost - op_cost;
2742 new_limit.latency = best_cost.latency - op_cost;
2743 synth_mult (alg_in, q, &new_limit, mode);
2744
2745 alg_in->cost.cost += op_cost;
2746 alg_in->cost.latency += op_cost;
2747 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2748 {
ddc2690a 2749 best_cost = alg_in->cost;
076701b6 2750 std::swap (alg_in, best_alg);
ddc2690a
KH
2751 best_alg->log[best_alg->ops] = m;
2752 best_alg->op[best_alg->ops] = alg_shift;
2753 }
2754 }
819126a6 2755 }
7b13ee6b
KH
2756 if (cache_hit)
2757 goto done;
819126a6
RK
2758 }
2759
2760 /* If we have an odd number, add or subtract one. */
2761 if ((t & 1) != 0)
2762 {
2763 unsigned HOST_WIDE_INT w;
2764
7b13ee6b 2765 do_alg_addsub_t_m2:
819126a6
RK
2766 for (w = 1; (w & t) != 0; w <<= 1)
2767 ;
31031edd 2768 /* If T was -1, then W will be zero after the loop. This is another
c410d49e 2769 case where T ends with ...111. Handling this with (T + 1) and
31031edd
JL
2770 subtract 1 produces slightly better code and results in algorithm
2771 selection much faster than treating it like the ...0111 case
2772 below. */
2773 if (w == 0
2774 || (w > 2
2775 /* Reject the case where t is 3.
2776 Thus we prefer addition in that case. */
2777 && t != 3))
819126a6 2778 {
c61928d0 2779 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
819126a6 2780
5322d07e 2781 op_cost = add_cost (speed, mode);
26276705
RS
2782 new_limit.cost = best_cost.cost - op_cost;
2783 new_limit.latency = best_cost.latency - op_cost;
2784 synth_mult (alg_in, t + 1, &new_limit, mode);
b2fb324c 2785
26276705
RS
2786 alg_in->cost.cost += op_cost;
2787 alg_in->cost.latency += op_cost;
2788 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2789 {
26276705 2790 best_cost = alg_in->cost;
076701b6 2791 std::swap (alg_in, best_alg);
819126a6
RK
2792 best_alg->log[best_alg->ops] = 0;
2793 best_alg->op[best_alg->ops] = alg_sub_t_m2;
44037a66 2794 }
44037a66 2795 }
819126a6
RK
2796 else
2797 {
c61928d0 2798 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
44037a66 2799
5322d07e 2800 op_cost = add_cost (speed, mode);
26276705
RS
2801 new_limit.cost = best_cost.cost - op_cost;
2802 new_limit.latency = best_cost.latency - op_cost;
2803 synth_mult (alg_in, t - 1, &new_limit, mode);
819126a6 2804
26276705
RS
2805 alg_in->cost.cost += op_cost;
2806 alg_in->cost.latency += op_cost;
2807 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
819126a6 2808 {
26276705 2809 best_cost = alg_in->cost;
076701b6 2810 std::swap (alg_in, best_alg);
819126a6
RK
2811 best_alg->log[best_alg->ops] = 0;
2812 best_alg->op[best_alg->ops] = alg_add_t_m2;
819126a6
RK
2813 }
2814 }
ef268d34
KH
2815
2816 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2817 quickly with a - a * n for some appropriate constant n. */
2818 m = exact_log2 (-orig_t + 1);
2819 if (m >= 0 && m < maxm)
2820 {
35430ca0
KT
2821 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2822 /* If the target has a cheap shift-and-subtract insn use
2823 that in preference to a shift insn followed by a sub insn.
2824 Assume that the shift-and-sub is "atomic" with a latency
2825 equal to it's cost, otherwise assume that on superscalar
2826 hardware the shift may be executed concurrently with the
2827 earlier steps in the algorithm. */
2828 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2829 {
2830 op_cost = shiftsub1_cost (speed, mode, m);
2831 op_latency = op_cost;
2832 }
2833 else
2834 op_latency = add_cost (speed, mode);
2835
ef268d34 2836 new_limit.cost = best_cost.cost - op_cost;
35430ca0 2837 new_limit.latency = best_cost.latency - op_latency;
84ddb681
RH
2838 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2839 &new_limit, mode);
ef268d34
KH
2840
2841 alg_in->cost.cost += op_cost;
35430ca0 2842 alg_in->cost.latency += op_latency;
ef268d34
KH
2843 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2844 {
ef268d34 2845 best_cost = alg_in->cost;
076701b6 2846 std::swap (alg_in, best_alg);
ef268d34
KH
2847 best_alg->log[best_alg->ops] = m;
2848 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2849 }
2850 }
2851
7b13ee6b
KH
2852 if (cache_hit)
2853 goto done;
819126a6 2854 }
63610db9 2855
44037a66 2856 /* Look for factors of t of the form
7963ac37 2857 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
44037a66 2858 If we find such a factor, we can multiply by t using an algorithm that
7963ac37 2859 multiplies by q, shift the result by m and add/subtract it to itself.
44037a66 2860
7963ac37
RK
2861 We search for large factors first and loop down, even if large factors
2862 are less probable than small; if we find a large factor we will find a
2863 good sequence quickly, and therefore be able to prune (by decreasing
2864 COST_LIMIT) the search. */
2865
7b13ee6b 2866 do_alg_addsub_factor:
7963ac37 2867 for (m = floor_log2 (t - 1); m >= 2; m--)
44037a66 2868 {
7963ac37 2869 unsigned HOST_WIDE_INT d;
44037a66 2870
fecfbfa4 2871 d = (HOST_WIDE_INT_1U << m) + 1;
7b13ee6b
KH
2872 if (t % d == 0 && t > d && m < maxm
2873 && (!cache_hit || cache_alg == alg_add_factor))
44037a66 2874 {
5322d07e 2875 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
35430ca0
KT
2876 if (shiftadd_cost (speed, mode, m) <= op_cost)
2877 op_cost = shiftadd_cost (speed, mode, m);
2878
2879 op_latency = op_cost;
2880
26276705
RS
2881
2882 new_limit.cost = best_cost.cost - op_cost;
2883 new_limit.latency = best_cost.latency - op_latency;
2884 synth_mult (alg_in, t / d, &new_limit, mode);
44037a66 2885
26276705
RS
2886 alg_in->cost.cost += op_cost;
2887 alg_in->cost.latency += op_latency;
2888 if (alg_in->cost.latency < op_cost)
2889 alg_in->cost.latency = op_cost;
2890 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2891 {
26276705 2892 best_cost = alg_in->cost;
076701b6 2893 std::swap (alg_in, best_alg);
b385aeda 2894 best_alg->log[best_alg->ops] = m;
819126a6 2895 best_alg->op[best_alg->ops] = alg_add_factor;
44037a66 2896 }
c0b262c1
TG
2897 /* Other factors will have been taken care of in the recursion. */
2898 break;
44037a66
TG
2899 }
2900
fecfbfa4 2901 d = (HOST_WIDE_INT_1U << m) - 1;
7b13ee6b
KH
2902 if (t % d == 0 && t > d && m < maxm
2903 && (!cache_hit || cache_alg == alg_sub_factor))
44037a66 2904 {
5322d07e 2905 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
35430ca0
KT
2906 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2907 op_cost = shiftsub0_cost (speed, mode, m);
2908
2909 op_latency = op_cost;
26276705
RS
2910
2911 new_limit.cost = best_cost.cost - op_cost;
417c735c 2912 new_limit.latency = best_cost.latency - op_latency;
26276705 2913 synth_mult (alg_in, t / d, &new_limit, mode);
44037a66 2914
26276705
RS
2915 alg_in->cost.cost += op_cost;
2916 alg_in->cost.latency += op_latency;
2917 if (alg_in->cost.latency < op_cost)
2918 alg_in->cost.latency = op_cost;
2919 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2920 {
26276705 2921 best_cost = alg_in->cost;
076701b6 2922 std::swap (alg_in, best_alg);
b385aeda 2923 best_alg->log[best_alg->ops] = m;
819126a6 2924 best_alg->op[best_alg->ops] = alg_sub_factor;
44037a66 2925 }
c0b262c1 2926 break;
44037a66
TG
2927 }
2928 }
7b13ee6b
KH
2929 if (cache_hit)
2930 goto done;
44037a66 2931
7963ac37
RK
2932 /* Try shift-and-add (load effective address) instructions,
2933 i.e. do a*3, a*5, a*9. */
2934 if ((t & 1) != 0)
2935 {
7b13ee6b 2936 do_alg_add_t2_m:
7963ac37 2937 q = t - 1;
146ec50f
JM
2938 m = ctz_hwi (q);
2939 if (q && m < maxm)
b385aeda 2940 {
5322d07e 2941 op_cost = shiftadd_cost (speed, mode, m);
26276705
RS
2942 new_limit.cost = best_cost.cost - op_cost;
2943 new_limit.latency = best_cost.latency - op_cost;
2944 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2945
2946 alg_in->cost.cost += op_cost;
2947 alg_in->cost.latency += op_cost;
2948 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
5eebe2eb 2949 {
26276705 2950 best_cost = alg_in->cost;
076701b6 2951 std::swap (alg_in, best_alg);
5eebe2eb 2952 best_alg->log[best_alg->ops] = m;
819126a6 2953 best_alg->op[best_alg->ops] = alg_add_t2_m;
5eebe2eb 2954 }
7963ac37 2955 }
7b13ee6b
KH
2956 if (cache_hit)
2957 goto done;
44037a66 2958
7b13ee6b 2959 do_alg_sub_t2_m:
7963ac37 2960 q = t + 1;
146ec50f
JM
2961 m = ctz_hwi (q);
2962 if (q && m < maxm)
b385aeda 2963 {
5322d07e 2964 op_cost = shiftsub0_cost (speed, mode, m);
26276705
RS
2965 new_limit.cost = best_cost.cost - op_cost;
2966 new_limit.latency = best_cost.latency - op_cost;
2967 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2968
2969 alg_in->cost.cost += op_cost;
2970 alg_in->cost.latency += op_cost;
2971 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
5eebe2eb 2972 {
26276705 2973 best_cost = alg_in->cost;
076701b6 2974 std::swap (alg_in, best_alg);
5eebe2eb 2975 best_alg->log[best_alg->ops] = m;
819126a6 2976 best_alg->op[best_alg->ops] = alg_sub_t2_m;
5eebe2eb 2977 }
7963ac37 2978 }
7b13ee6b
KH
2979 if (cache_hit)
2980 goto done;
7963ac37 2981 }
44037a66 2982
7b13ee6b 2983 done:
3ab0f290
DJ
2984 /* If best_cost has not decreased, we have not found any algorithm. */
2985 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
0178027c
KH
2986 {
2987 /* We failed to find an algorithm. Record alg_impossible for
2988 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2989 we are asked to find an algorithm for T within the same or
2990 lower COST_LIMIT, we can immediately return to the
2991 caller. */
5322d07e
NF
2992 entry_ptr->t = t;
2993 entry_ptr->mode = mode;
2994 entry_ptr->speed = speed;
2995 entry_ptr->alg = alg_impossible;
2996 entry_ptr->cost = *cost_limit;
0178027c
KH
2997 return;
2998 }
3ab0f290 2999
7b13ee6b
KH
3000 /* Cache the result. */
3001 if (!cache_hit)
3002 {
5322d07e
NF
3003 entry_ptr->t = t;
3004 entry_ptr->mode = mode;
3005 entry_ptr->speed = speed;
3006 entry_ptr->alg = best_alg->op[best_alg->ops];
3007 entry_ptr->cost.cost = best_cost.cost;
3008 entry_ptr->cost.latency = best_cost.latency;
7b13ee6b
KH
3009 }
3010
52786026
RK
3011 /* If we are getting a too long sequence for `struct algorithm'
3012 to record, make this search fail. */
3013 if (best_alg->ops == MAX_BITS_PER_WORD)
3014 return;
3015
819126a6
RK
3016 /* Copy the algorithm from temporary space to the space at alg_out.
3017 We avoid using structure assignment because the majority of
3018 best_alg is normally undefined, and this is a critical function. */
3019 alg_out->ops = best_alg->ops + 1;
26276705 3020 alg_out->cost = best_cost;
4e135bdd
KG
3021 memcpy (alg_out->op, best_alg->op,
3022 alg_out->ops * sizeof *alg_out->op);
3023 memcpy (alg_out->log, best_alg->log,
3024 alg_out->ops * sizeof *alg_out->log);
44037a66
TG
3025}
3026\f
d1a6adeb 3027/* Find the cheapest way of multiplying a value of mode MODE by VAL.
8efc8980
RS
3028 Try three variations:
3029
3030 - a shift/add sequence based on VAL itself
3031 - a shift/add sequence based on -VAL, followed by a negation
3032 - a shift/add sequence based on VAL - 1, followed by an addition.
3033
f258e38b
UW
3034 Return true if the cheapest of these cost less than MULT_COST,
3035 describing the algorithm in *ALG and final fixup in *VARIANT. */
8efc8980 3036
ec573d17 3037bool
ef4bddc2 3038choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
f258e38b
UW
3039 struct algorithm *alg, enum mult_variant *variant,
3040 int mult_cost)
8efc8980 3041{
8efc8980 3042 struct algorithm alg2;
26276705
RS
3043 struct mult_cost limit;
3044 int op_cost;
f40751dd 3045 bool speed = optimize_insn_for_speed_p ();
8efc8980 3046
18eaea7f
RS
3047 /* Fail quickly for impossible bounds. */
3048 if (mult_cost < 0)
3049 return false;
3050
3051 /* Ensure that mult_cost provides a reasonable upper bound.
3052 Any constant multiplication can be performed with less
3053 than 2 * bits additions. */
5322d07e 3054 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
18eaea7f
RS
3055 if (mult_cost > op_cost)
3056 mult_cost = op_cost;
3057
8efc8980 3058 *variant = basic_variant;
26276705
RS
3059 limit.cost = mult_cost;
3060 limit.latency = mult_cost;
3061 synth_mult (alg, val, &limit, mode);
8efc8980
RS
3062
3063 /* This works only if the inverted value actually fits in an
3064 `unsigned int' */
84ddb681 3065 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
8efc8980 3066 {
c3284718 3067 op_cost = neg_cost (speed, mode);
26276705
RS
3068 if (MULT_COST_LESS (&alg->cost, mult_cost))
3069 {
3070 limit.cost = alg->cost.cost - op_cost;
3071 limit.latency = alg->cost.latency - op_cost;
3072 }
3073 else
3074 {
3075 limit.cost = mult_cost - op_cost;
3076 limit.latency = mult_cost - op_cost;
3077 }
3078
3079 synth_mult (&alg2, -val, &limit, mode);
3080 alg2.cost.cost += op_cost;
3081 alg2.cost.latency += op_cost;
3082 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
8efc8980
RS
3083 *alg = alg2, *variant = negate_variant;
3084 }
3085
3086 /* This proves very useful for division-by-constant. */
5322d07e 3087 op_cost = add_cost (speed, mode);
26276705
RS
3088 if (MULT_COST_LESS (&alg->cost, mult_cost))
3089 {
3090 limit.cost = alg->cost.cost - op_cost;
3091 limit.latency = alg->cost.latency - op_cost;
3092 }
3093 else
3094 {
3095 limit.cost = mult_cost - op_cost;
3096 limit.latency = mult_cost - op_cost;
3097 }
3098
3099 synth_mult (&alg2, val - 1, &limit, mode);
3100 alg2.cost.cost += op_cost;
3101 alg2.cost.latency += op_cost;
3102 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
8efc8980
RS
3103 *alg = alg2, *variant = add_variant;
3104
26276705 3105 return MULT_COST_LESS (&alg->cost, mult_cost);
8efc8980
RS
3106}
3107
3108/* A subroutine of expand_mult, used for constant multiplications.
3109 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3110 convenient. Use the shift/add sequence described by ALG and apply
3111 the final fixup specified by VARIANT. */
3112
3113static rtx
ef4bddc2 3114expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
8efc8980
RS
3115 rtx target, const struct algorithm *alg,
3116 enum mult_variant variant)
3117{
1b13411a 3118 unsigned HOST_WIDE_INT val_so_far;
f3f6fb16
DM
3119 rtx_insn *insn;
3120 rtx accum, tem;
8efc8980 3121 int opno;
ef4bddc2 3122 machine_mode nmode;
8efc8980 3123
d448860e
JH
3124 /* Avoid referencing memory over and over and invalid sharing
3125 on SUBREGs. */
3126 op0 = force_reg (mode, op0);
8efc8980
RS
3127
3128 /* ACCUM starts out either as OP0 or as a zero, depending on
3129 the first operation. */
3130
3131 if (alg->op[0] == alg_zero)
3132 {
84ddb681 3133 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
8efc8980
RS
3134 val_so_far = 0;
3135 }
3136 else if (alg->op[0] == alg_m)
3137 {
3138 accum = copy_to_mode_reg (mode, op0);
3139 val_so_far = 1;
3140 }
3141 else
5b0264cb 3142 gcc_unreachable ();
8efc8980
RS
3143
3144 for (opno = 1; opno < alg->ops; opno++)
3145 {
3146 int log = alg->log[opno];
7c27e184 3147 rtx shift_subtarget = optimize ? 0 : accum;
8efc8980
RS
3148 rtx add_target
3149 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
7c27e184 3150 && !optimize)
8efc8980 3151 ? target : 0;
7c27e184 3152 rtx accum_target = optimize ? 0 : accum;
7543f918 3153 rtx accum_inner;
8efc8980
RS
3154
3155 switch (alg->op[opno])
3156 {
3157 case alg_shift:
eb6c3df1 3158 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
4caa21a1
UB
3159 /* REG_EQUAL note will be attached to the following insn. */
3160 emit_move_insn (accum, tem);
8efc8980
RS
3161 val_so_far <<= log;
3162 break;
3163
3164 case alg_add_t_m2:
eb6c3df1 3165 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
8efc8980
RS
3166 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3167 add_target ? add_target : accum_target);
1b13411a 3168 val_so_far += HOST_WIDE_INT_1U << log;
8efc8980
RS
3169 break;
3170
3171 case alg_sub_t_m2:
eb6c3df1 3172 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
8efc8980
RS
3173 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3174 add_target ? add_target : accum_target);
1b13411a 3175 val_so_far -= HOST_WIDE_INT_1U << log;
8efc8980
RS
3176 break;
3177
3178 case alg_add_t2_m:
3179 accum = expand_shift (LSHIFT_EXPR, mode, accum,
eb6c3df1 3180 log, shift_subtarget, 0);
8efc8980
RS
3181 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3182 add_target ? add_target : accum_target);
3183 val_so_far = (val_so_far << log) + 1;
3184 break;
3185
3186 case alg_sub_t2_m:
3187 accum = expand_shift (LSHIFT_EXPR, mode, accum,
eb6c3df1 3188 log, shift_subtarget, 0);
8efc8980
RS
3189 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3190 add_target ? add_target : accum_target);
3191 val_so_far = (val_so_far << log) - 1;
3192 break;
3193
3194 case alg_add_factor:
eb6c3df1 3195 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
8efc8980
RS
3196 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3197 add_target ? add_target : accum_target);
3198 val_so_far += val_so_far << log;
3199 break;
3200
3201 case alg_sub_factor:
eb6c3df1 3202 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
8efc8980 3203 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
7c27e184
PB
3204 (add_target
3205 ? add_target : (optimize ? 0 : tem)));
8efc8980
RS
3206 val_so_far = (val_so_far << log) - val_so_far;
3207 break;
3208
3209 default:
5b0264cb 3210 gcc_unreachable ();
8efc8980
RS
3211 }
3212
84ddb681 3213 if (SCALAR_INT_MODE_P (mode))
8efc8980 3214 {
84ddb681
RH
3215 /* Write a REG_EQUAL note on the last insn so that we can cse
3216 multiplication sequences. Note that if ACCUM is a SUBREG,
3217 we've set the inner register and must properly indicate that. */
3218 tem = op0, nmode = mode;
3219 accum_inner = accum;
3220 if (GET_CODE (accum) == SUBREG)
3221 {
3222 accum_inner = SUBREG_REG (accum);
3223 nmode = GET_MODE (accum_inner);
3224 tem = gen_lowpart (nmode, op0);
3225 }
8efc8980 3226
84ddb681
RH
3227 insn = get_last_insn ();
3228 set_dst_reg_note (insn, REG_EQUAL,
4789c0ce
RS
3229 gen_rtx_MULT (nmode, tem,
3230 gen_int_mode (val_so_far, nmode)),
84ddb681
RH
3231 accum_inner);
3232 }
8efc8980
RS
3233 }
3234
3235 if (variant == negate_variant)
3236 {
3237 val_so_far = -val_so_far;
3238 accum = expand_unop (mode, neg_optab, accum, target, 0);
3239 }
3240 else if (variant == add_variant)
3241 {
3242 val_so_far = val_so_far + 1;
3243 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3244 }
3245
42eb30b5
ZW
3246 /* Compare only the bits of val and val_so_far that are significant
3247 in the result mode, to avoid sign-/zero-extension confusion. */
84ddb681 3248 nmode = GET_MODE_INNER (mode);
84ddb681
RH
3249 val &= GET_MODE_MASK (nmode);
3250 val_so_far &= GET_MODE_MASK (nmode);
1b13411a 3251 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
8efc8980
RS
3252
3253 return accum;
3254}
3255
44037a66
TG
3256/* Perform a multiplication and return an rtx for the result.
3257 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3258 TARGET is a suggestion for where to store the result (an rtx).
3259
3260 We check specially for a constant integer as OP1.
3261 If you want this check for OP0 as well, then before calling
3262 you should swap the two operands if OP0 would be constant. */
3263
3264rtx
ef4bddc2 3265expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
f2593a66 3266 int unsignedp)
44037a66 3267{
8efc8980
RS
3268 enum mult_variant variant;
3269 struct algorithm algorithm;
84ddb681 3270 rtx scalar_op1;
65dc9350 3271 int max_cost;
f40751dd 3272 bool speed = optimize_insn_for_speed_p ();
84ddb681 3273 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
44037a66 3274
84ddb681 3275 if (CONSTANT_P (op0))
4c278134 3276 std::swap (op0, op1);
84ddb681
RH
3277
3278 /* For vectors, there are several simplifications that can be made if
3279 all elements of the vector constant are identical. */
92695fbb 3280 scalar_op1 = unwrap_const_vec_duplicate (op1);
84ddb681
RH
3281
3282 if (INTEGRAL_MODE_P (mode))
3283 {
3284 rtx fake_reg;
caf62455
JDA
3285 HOST_WIDE_INT coeff;
3286 bool is_neg;
84ddb681
RH
3287 int mode_bitsize;
3288
3289 if (op1 == CONST0_RTX (mode))
3290 return op1;
3291 if (op1 == CONST1_RTX (mode))
3292 return op0;
3293 if (op1 == CONSTM1_RTX (mode))
3294 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3295 op0, target, 0);
3296
3297 if (do_trapv)
3298 goto skip_synth;
3299
66b3ed5f
JJ
3300 /* If mode is integer vector mode, check if the backend supports
3301 vector lshift (by scalar or vector) at all. If not, we can't use
3302 synthetized multiply. */
3303 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3304 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3305 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3306 goto skip_synth;
3307
84ddb681
RH
3308 /* These are the operations that are potentially turned into
3309 a sequence of shifts and additions. */
3310 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
65dc9350
RS
3311
3312 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3313 less than or equal in size to `unsigned int' this doesn't matter.
3314 If the mode is larger than `unsigned int', then synth_mult works
3315 only if the constant value exactly fits in an `unsigned int' without
3316 any truncation. This means that multiplying by negative values does
3317 not work; results are off by 2^32 on a 32 bit machine. */
84ddb681 3318 if (CONST_INT_P (scalar_op1))
58b42e19 3319 {
84ddb681
RH
3320 coeff = INTVAL (scalar_op1);
3321 is_neg = coeff < 0;
65dc9350 3322 }
807e902e
KZ
3323#if TARGET_SUPPORTS_WIDE_INT
3324 else if (CONST_WIDE_INT_P (scalar_op1))
3325#else
48175537 3326 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
807e902e 3327#endif
65dc9350 3328 {
f079167a 3329 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
807e902e
KZ
3330 /* Perfect power of 2 (other than 1, which is handled above). */
3331 if (shift > 0)
3332 return expand_shift (LSHIFT_EXPR, mode, op0,
3333 shift, target, unsignedp);
caf62455
JDA
3334 else
3335 goto skip_synth;
65dc9350 3336 }
84ddb681
RH
3337 else
3338 goto skip_synth;
b8698a0f 3339
65dc9350
RS
3340 /* We used to test optimize here, on the grounds that it's better to
3341 produce a smaller program when -O is not used. But this causes
3342 such a terrible slowdown sometimes that it seems better to always
3343 use synth_mult. */
65dc9350 3344
84ddb681 3345 /* Special case powers of two. */
be63b77d
JJ
3346 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3347 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
84ddb681
RH
3348 return expand_shift (LSHIFT_EXPR, mode, op0,
3349 floor_log2 (coeff), target, unsignedp);
3350
3351 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3352
3353 /* Attempt to handle multiplication of DImode values by negative
3354 coefficients, by performing the multiplication by a positive
3355 multiplier and then inverting the result. */
84ddb681
RH
3356 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3357 {
3358 /* Its safe to use -coeff even for INT_MIN, as the
3359 result is interpreted as an unsigned coefficient.
3360 Exclude cost of op0 from max_cost to match the cost
5e839bc8 3361 calculation of the synth_mult. */
be63b77d 3362 coeff = -(unsigned HOST_WIDE_INT) coeff;
e548c9df
AM
3363 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3364 mode, speed)
c3284718 3365 - neg_cost (speed, mode));
be63b77d
JJ
3366 if (max_cost <= 0)
3367 goto skip_synth;
3368
3369 /* Special case powers of two. */
3370 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3371 {
3372 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3373 floor_log2 (coeff), target, unsignedp);
3374 return expand_unop (mode, neg_optab, temp, target, 0);
3375 }
3376
3377 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3378 max_cost))
84ddb681 3379 {
be63b77d 3380 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
84ddb681
RH
3381 &algorithm, variant);
3382 return expand_unop (mode, neg_optab, temp, target, 0);
3383 }
b216b86b 3384 goto skip_synth;
58b42e19 3385 }
44037a66 3386
84ddb681
RH
3387 /* Exclude cost of op0 from max_cost to match the cost
3388 calculation of the synth_mult. */
e548c9df 3389 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
84ddb681
RH
3390 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3391 return expand_mult_const (mode, op0, coeff, target,
3392 &algorithm, variant);
f2593a66 3393 }
84ddb681 3394 skip_synth:
f2593a66
RS
3395
3396 /* Expand x*2.0 as x+x. */
34a72c33
RS
3397 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3398 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
f2593a66 3399 {
34a72c33
RS
3400 op0 = force_reg (GET_MODE (op0), op0);
3401 return expand_binop (mode, add_optab, op0, op0,
3402 target, unsignedp, OPTAB_LIB_WIDEN);
f2593a66
RS
3403 }
3404
819126a6
RK
3405 /* This used to use umul_optab if unsigned, but for non-widening multiply
3406 there is no difference between signed and unsigned. */
84ddb681 3407 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
44037a66 3408 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
5b0264cb 3409 gcc_assert (op0);
44037a66
TG
3410 return op0;
3411}
5b58b39b 3412
6dd8f4bb
BS
3413/* Return a cost estimate for multiplying a register by the given
3414 COEFFicient in the given MODE and SPEED. */
3415
3416int
ef4bddc2 3417mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
6dd8f4bb
BS
3418{
3419 int max_cost;
3420 struct algorithm algorithm;
3421 enum mult_variant variant;
3422
3423 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
e548c9df
AM
3424 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3425 mode, speed);
6dd8f4bb
BS
3426 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3427 return algorithm.cost.cost;
3428 else
3429 return max_cost;
3430}
3431
5b58b39b
BS
3432/* Perform a widening multiplication and return an rtx for the result.
3433 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3434 TARGET is a suggestion for where to store the result (an rtx).
3435 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3436 or smul_widen_optab.
3437
3438 We check specially for a constant integer as OP1, comparing the
3439 cost of a widening multiply against the cost of a sequence of shifts
3440 and adds. */
3441
3442rtx
ef4bddc2 3443expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
5b58b39b
BS
3444 int unsignedp, optab this_optab)
3445{
3446 bool speed = optimize_insn_for_speed_p ();
e7ef91dc 3447 rtx cop1;
5b58b39b
BS
3448
3449 if (CONST_INT_P (op1)
e7ef91dc
JJ
3450 && GET_MODE (op0) != VOIDmode
3451 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3452 this_optab == umul_widen_optab))
3453 && CONST_INT_P (cop1)
3454 && (INTVAL (cop1) >= 0
46c9550f 3455 || HWI_COMPUTABLE_MODE_P (mode)))
5b58b39b 3456 {
e7ef91dc 3457 HOST_WIDE_INT coeff = INTVAL (cop1);
5b58b39b
BS
3458 int max_cost;
3459 enum mult_variant variant;
3460 struct algorithm algorithm;
3461
e9082138
JJ
3462 if (coeff == 0)
3463 return CONST0_RTX (mode);
3464
5b58b39b
BS
3465 /* Special case powers of two. */
3466 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3467 {
3468 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3469 return expand_shift (LSHIFT_EXPR, mode, op0,
eb6c3df1 3470 floor_log2 (coeff), target, unsignedp);
5b58b39b
BS
3471 }
3472
3473 /* Exclude cost of op0 from max_cost to match the cost
3474 calculation of the synth_mult. */
5322d07e 3475 max_cost = mul_widen_cost (speed, mode);
5b58b39b
BS
3476 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3477 max_cost))
3478 {
3479 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3480 return expand_mult_const (mode, op0, coeff, target,
3481 &algorithm, variant);
3482 }
3483 }
3484 return expand_binop (mode, this_optab, op0, op1, target,
3485 unsignedp, OPTAB_LIB_WIDEN);
3486}
44037a66 3487\f
55c2d311
TG
3488/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3489 replace division by D, and put the least significant N bits of the result
3490 in *MULTIPLIER_PTR and return the most significant bit.
3491
3492 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3493 needed precision is in PRECISION (should be <= N).
3494
3495 PRECISION should be as small as possible so this function can choose
3496 multiplier more freely.
3497
3498 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3499 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3500
3501 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3502 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3503
55c2d311 3504unsigned HOST_WIDE_INT
502b8322 3505choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
079c527f
JJ
3506 unsigned HOST_WIDE_INT *multiplier_ptr,
3507 int *post_shift_ptr, int *lgup_ptr)
55c2d311 3508{
55c2d311
TG
3509 int lgup, post_shift;
3510 int pow, pow2;
55c2d311
TG
3511
3512 /* lgup = ceil(log2(divisor)); */
3513 lgup = ceil_log2 (d);
3514
5b0264cb 3515 gcc_assert (lgup <= n);
55c2d311
TG
3516
3517 pow = n + lgup;
3518 pow2 = n + lgup - precision;
3519
55c2d311 3520 /* mlow = 2^(N + lgup)/d */
807e902e
KZ
3521 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3522 wide_int mlow = wi::udiv_trunc (val, d);
55c2d311 3523
9be0ac8c 3524 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
807e902e
KZ
3525 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3526 wide_int mhigh = wi::udiv_trunc (val, d);
55c2d311
TG
3527
3528 /* If precision == N, then mlow, mhigh exceed 2^N
3529 (but they do not exceed 2^(N+1)). */
3530
f9da5064 3531 /* Reduce to lowest terms. */
55c2d311
TG
3532 for (post_shift = lgup; post_shift > 0; post_shift--)
3533 {
807e902e
KZ
3534 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3535 HOST_BITS_PER_WIDE_INT);
3536 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3537 HOST_BITS_PER_WIDE_INT);
55c2d311
TG
3538 if (ml_lo >= mh_lo)
3539 break;
3540
807e902e
KZ
3541 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3542 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
55c2d311
TG
3543 }
3544
3545 *post_shift_ptr = post_shift;
3546 *lgup_ptr = lgup;
3547 if (n < HOST_BITS_PER_WIDE_INT)
3548 {
fecfbfa4 3549 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
807e902e
KZ
3550 *multiplier_ptr = mhigh.to_uhwi () & mask;
3551 return mhigh.to_uhwi () >= mask;
55c2d311
TG
3552 }
3553 else
3554 {
807e902e
KZ
3555 *multiplier_ptr = mhigh.to_uhwi ();
3556 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
55c2d311
TG
3557 }
3558}
3559
3560/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3561 congruent to 1 (mod 2**N). */
3562
3563static unsigned HOST_WIDE_INT
502b8322 3564invert_mod2n (unsigned HOST_WIDE_INT x, int n)
55c2d311 3565{
0f41302f 3566 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
55c2d311
TG
3567
3568 /* The algorithm notes that the choice y = x satisfies
3569 x*y == 1 mod 2^3, since x is assumed odd.
3570 Each iteration doubles the number of bits of significance in y. */
3571
3572 unsigned HOST_WIDE_INT mask;
3573 unsigned HOST_WIDE_INT y = x;
3574 int nbit = 3;
3575
3576 mask = (n == HOST_BITS_PER_WIDE_INT
dd4786fe 3577 ? HOST_WIDE_INT_M1U
fecfbfa4 3578 : (HOST_WIDE_INT_1U << n) - 1);
55c2d311
TG
3579
3580 while (nbit < n)
3581 {
3582 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3583 nbit *= 2;
3584 }
3585 return y;
3586}
3587
3588/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3589 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3590 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3591 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3592 become signed.
3593
3594 The result is put in TARGET if that is convenient.
3595
3596 MODE is the mode of operation. */
3597
3598rtx
ef4bddc2 3599expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
502b8322 3600 rtx op1, rtx target, int unsignedp)
55c2d311
TG
3601{
3602 rtx tem;
3603 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3604
3605 tem = expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 3606 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
22273300 3607 tem = expand_and (mode, tem, op1, NULL_RTX);
38a448ca
RH
3608 adj_operand
3609 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3610 adj_operand);
55c2d311
TG
3611
3612 tem = expand_shift (RSHIFT_EXPR, mode, op1,
eb6c3df1 3613 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
22273300 3614 tem = expand_and (mode, tem, op0, NULL_RTX);
38a448ca
RH
3615 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3616 target);
55c2d311
TG
3617
3618 return target;
3619}
3620
00f07b86 3621/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
55c2d311 3622
8efc8980 3623static rtx
ef4bddc2 3624extract_high_half (machine_mode mode, rtx op)
8efc8980 3625{
ef4bddc2 3626 machine_mode wider_mode;
55c2d311 3627
8efc8980
RS
3628 if (mode == word_mode)
3629 return gen_highpart (mode, op);
71af73bb 3630
15ed7b52
JG
3631 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3632
490d0f6c 3633 wider_mode = GET_MODE_WIDER_MODE (mode).require ();
8efc8980 3634 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
eb6c3df1 3635 GET_MODE_BITSIZE (mode), 0, 1);
8efc8980
RS
3636 return convert_modes (mode, wider_mode, op, 0);
3637}
55c2d311 3638
00f07b86 3639/* Like expmed_mult_highpart, but only consider using a multiplication
8efc8980
RS
3640 optab. OP1 is an rtx for the constant operand. */
3641
3642static rtx
ef4bddc2 3643expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
8efc8980 3644 rtx target, int unsignedp, int max_cost)
55c2d311 3645{
665acd1e 3646 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
ef4bddc2 3647 machine_mode wider_mode;
55c2d311
TG
3648 optab moptab;
3649 rtx tem;
8efc8980 3650 int size;
f40751dd 3651 bool speed = optimize_insn_for_speed_p ();
55c2d311 3652
15ed7b52
JG
3653 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3654
490d0f6c 3655 wider_mode = GET_MODE_WIDER_MODE (mode).require ();
8efc8980 3656 size = GET_MODE_BITSIZE (mode);
55c2d311
TG
3657
3658 /* Firstly, try using a multiplication insn that only generates the needed
3659 high part of the product, and in the sign flavor of unsignedp. */
5322d07e 3660 if (mul_highpart_cost (speed, mode) < max_cost)
71af73bb 3661 {
8efc8980 3662 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
665acd1e 3663 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
8efc8980
RS
3664 unsignedp, OPTAB_DIRECT);
3665 if (tem)
3666 return tem;
71af73bb 3667 }
55c2d311
TG
3668
3669 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3670 Need to adjust the result after the multiplication. */
02a65aef 3671 if (size - 1 < BITS_PER_WORD
5322d07e
NF
3672 && (mul_highpart_cost (speed, mode)
3673 + 2 * shift_cost (speed, mode, size-1)
3674 + 4 * add_cost (speed, mode) < max_cost))
71af73bb 3675 {
8efc8980 3676 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
665acd1e 3677 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
8efc8980
RS
3678 unsignedp, OPTAB_DIRECT);
3679 if (tem)
71af73bb 3680 /* We used the wrong signedness. Adjust the result. */
77278891 3681 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
8efc8980 3682 tem, unsignedp);
71af73bb 3683 }
55c2d311 3684
71af73bb 3685 /* Try widening multiplication. */
55c2d311 3686 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
4d8752f0 3687 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
5322d07e 3688 && mul_widen_cost (speed, wider_mode) < max_cost)
a295d331 3689 {
665acd1e 3690 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
8efc8980
RS
3691 unsignedp, OPTAB_WIDEN);
3692 if (tem)
3693 return extract_high_half (mode, tem);
c410d49e 3694 }
71af73bb
TG
3695
3696 /* Try widening the mode and perform a non-widening multiplication. */
947131ba 3697 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
02a65aef 3698 && size - 1 < BITS_PER_WORD
5322d07e
NF
3699 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3700 < max_cost))
a295d331 3701 {
f3f6fb16
DM
3702 rtx_insn *insns;
3703 rtx wop0, wop1;
82dfb9a5
RS
3704
3705 /* We need to widen the operands, for example to ensure the
3706 constant multiplier is correctly sign or zero extended.
3707 Use a sequence to clean-up any instructions emitted by
3708 the conversions if things don't work out. */
3709 start_sequence ();
3710 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3711 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3712 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
8efc8980 3713 unsignedp, OPTAB_WIDEN);
82dfb9a5
RS
3714 insns = get_insns ();
3715 end_sequence ();
3716
8efc8980 3717 if (tem)
82dfb9a5
RS
3718 {
3719 emit_insn (insns);
3720 return extract_high_half (mode, tem);
3721 }
a295d331 3722 }
71af73bb
TG
3723
3724 /* Try widening multiplication of opposite signedness, and adjust. */
3725 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
4d8752f0 3726 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
02a65aef 3727 && size - 1 < BITS_PER_WORD
5322d07e
NF
3728 && (mul_widen_cost (speed, wider_mode)
3729 + 2 * shift_cost (speed, mode, size-1)
3730 + 4 * add_cost (speed, mode) < max_cost))
55c2d311 3731 {
665acd1e 3732 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
71af73bb
TG
3733 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3734 if (tem != 0)
55c2d311 3735 {
8efc8980 3736 tem = extract_high_half (mode, tem);
71af73bb 3737 /* We used the wrong signedness. Adjust the result. */
77278891 3738 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
71af73bb 3739 target, unsignedp);
55c2d311 3740 }
55c2d311
TG
3741 }
3742
71af73bb 3743 return 0;
8efc8980 3744}
71af73bb 3745
0d282692
RS
3746/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3747 putting the high half of the result in TARGET if that is convenient,
3748 and return where the result is. If the operation can not be performed,
3749 0 is returned.
55c2d311 3750
8efc8980
RS
3751 MODE is the mode of operation and result.
3752
3753 UNSIGNEDP nonzero means unsigned multiply.
3754
3755 MAX_COST is the total allowed cost for the expanded RTL. */
3756
0d282692 3757static rtx
ef4bddc2 3758expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
0d282692 3759 rtx target, int unsignedp, int max_cost)
8efc8980 3760{
490d0f6c 3761 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
0d282692 3762 unsigned HOST_WIDE_INT cnst1;
f258e38b
UW
3763 int extra_cost;
3764 bool sign_adjust = false;
8efc8980
RS
3765 enum mult_variant variant;
3766 struct algorithm alg;
0d282692 3767 rtx tem;
f40751dd 3768 bool speed = optimize_insn_for_speed_p ();
8efc8980 3769
15ed7b52 3770 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
8efc8980 3771 /* We can't support modes wider than HOST_BITS_PER_INT. */
46c9550f 3772 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
8efc8980 3773
0d282692 3774 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
f258e38b 3775
b8698a0f
L
3776 /* We can't optimize modes wider than BITS_PER_WORD.
3777 ??? We might be able to perform double-word arithmetic if
f258e38b
UW
3778 mode == word_mode, however all the cost calculations in
3779 synth_mult etc. assume single-word operations. */
3780 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
00f07b86 3781 return expmed_mult_highpart_optab (mode, op0, op1, target,
f258e38b
UW
3782 unsignedp, max_cost);
3783
5322d07e 3784 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
f258e38b
UW
3785
3786 /* Check whether we try to multiply by a negative constant. */
3787 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3788 {
3789 sign_adjust = true;
5322d07e 3790 extra_cost += add_cost (speed, mode);
f258e38b 3791 }
8efc8980
RS
3792
3793 /* See whether shift/add multiplication is cheap enough. */
f258e38b
UW
3794 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3795 max_cost - extra_cost))
a295d331 3796 {
8efc8980
RS
3797 /* See whether the specialized multiplication optabs are
3798 cheaper than the shift/add version. */
00f07b86 3799 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
26276705 3800 alg.cost.cost + extra_cost);
8efc8980
RS
3801 if (tem)
3802 return tem;
3803
f258e38b
UW
3804 tem = convert_to_mode (wider_mode, op0, unsignedp);
3805 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3806 tem = extract_high_half (mode, tem);
3807
9cf737f8 3808 /* Adjust result for signedness. */
f258e38b
UW
3809 if (sign_adjust)
3810 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3811
3812 return tem;
a295d331 3813 }
00f07b86 3814 return expmed_mult_highpart_optab (mode, op0, op1, target,
8efc8980 3815 unsignedp, max_cost);
55c2d311 3816}
0b55e932
RS
3817
3818
3819/* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3820
3821static rtx
ef4bddc2 3822expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
0b55e932 3823{
f3f6fb16
DM
3824 rtx result, temp, shift;
3825 rtx_code_label *label;
0b55e932 3826 int logd;
807e902e 3827 int prec = GET_MODE_PRECISION (mode);
0b55e932
RS
3828
3829 logd = floor_log2 (d);
3830 result = gen_reg_rtx (mode);
3831
3832 /* Avoid conditional branches when they're expensive. */
3a4fd356 3833 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
22660666 3834 && optimize_insn_for_speed_p ())
0b55e932
RS
3835 {
3836 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3837 mode, 0, -1);
3838 if (signmask)
3839 {
fecfbfa4 3840 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
0b55e932 3841 signmask = force_reg (mode, signmask);
1c234fcb
RS
3842 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3843
3844 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3845 which instruction sequence to use. If logical right shifts
3846 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3847 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
6e7a355c 3848
1c234fcb 3849 temp = gen_rtx_LSHIFTRT (mode, result, shift);
947131ba 3850 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
e548c9df 3851 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
5e8f01f4 3852 > COSTS_N_INSNS (2)))
1c234fcb
RS
3853 {
3854 temp = expand_binop (mode, xor_optab, op0, signmask,
3855 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3856 temp = expand_binop (mode, sub_optab, temp, signmask,
3857 NULL_RTX, 1, OPTAB_LIB_WIDEN);
2f1cd2eb
RS
3858 temp = expand_binop (mode, and_optab, temp,
3859 gen_int_mode (masklow, mode),
1c234fcb
RS
3860 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3861 temp = expand_binop (mode, xor_optab, temp, signmask,
3862 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3863 temp = expand_binop (mode, sub_optab, temp, signmask,
3864 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3865 }
3866 else
3867 {
3868 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3869 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3870 signmask = force_reg (mode, signmask);
3871
3872 temp = expand_binop (mode, add_optab, op0, signmask,
3873 NULL_RTX, 1, OPTAB_LIB_WIDEN);
2f1cd2eb
RS
3874 temp = expand_binop (mode, and_optab, temp,
3875 gen_int_mode (masklow, mode),
1c234fcb
RS
3876 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3877 temp = expand_binop (mode, sub_optab, temp, signmask,
3878 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3879 }
0b55e932
RS
3880 return temp;
3881 }
3882 }
3883
3884 /* Mask contains the mode's signbit and the significant bits of the
3885 modulus. By including the signbit in the operation, many targets
3886 can avoid an explicit compare operation in the following comparison
3887 against zero. */
807e902e
KZ
3888 wide_int mask = wi::mask (logd, false, prec);
3889 mask = wi::set_bit (mask, prec - 1);
0b55e932 3890
6e7a355c 3891 temp = expand_binop (mode, and_optab, op0,
807e902e 3892 immed_wide_int_const (mask, mode),
6e7a355c 3893 result, 1, OPTAB_LIB_WIDEN);
0b55e932
RS
3894 if (temp != result)
3895 emit_move_insn (result, temp);
3896
3897 label = gen_label_rtx ();
3898 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3899
3900 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3901 0, OPTAB_LIB_WIDEN);
807e902e
KZ
3902
3903 mask = wi::mask (logd, true, prec);
6e7a355c 3904 temp = expand_binop (mode, ior_optab, temp,
807e902e 3905 immed_wide_int_const (mask, mode),
6e7a355c 3906 result, 1, OPTAB_LIB_WIDEN);
0b55e932
RS
3907 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3908 0, OPTAB_LIB_WIDEN);
3909 if (temp != result)
3910 emit_move_insn (result, temp);
3911 emit_label (label);
3912 return result;
3913}
39cab019
RS
3914
3915/* Expand signed division of OP0 by a power of two D in mode MODE.
3916 This routine is only called for positive values of D. */
3917
3918static rtx
ef4bddc2 3919expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
39cab019 3920{
f3f6fb16
DM
3921 rtx temp;
3922 rtx_code_label *label;
39cab019
RS
3923 int logd;
3924
3925 logd = floor_log2 (d);
39cab019 3926
3a4fd356
JH
3927 if (d == 2
3928 && BRANCH_COST (optimize_insn_for_speed_p (),
3929 false) >= 1)
39cab019
RS
3930 {
3931 temp = gen_reg_rtx (mode);
3932 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3933 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3934 0, OPTAB_LIB_WIDEN);
eb6c3df1 3935 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019
RS
3936 }
3937
692e0312
TS
3938 if (HAVE_conditional_move
3939 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
fdded401
RS
3940 {
3941 rtx temp2;
3942
3943 start_sequence ();
3944 temp2 = copy_to_mode_reg (mode, op0);
2f1cd2eb 3945 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
fdded401
RS
3946 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3947 temp = force_reg (mode, temp);
3948
3949 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3950 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3951 mode, temp, temp2, mode, 0);
3952 if (temp2)
3953 {
f3f6fb16 3954 rtx_insn *seq = get_insns ();
fdded401
RS
3955 end_sequence ();
3956 emit_insn (seq);
eb6c3df1 3957 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
fdded401
RS
3958 }
3959 end_sequence ();
3960 }
fdded401 3961
3a4fd356
JH
3962 if (BRANCH_COST (optimize_insn_for_speed_p (),
3963 false) >= 2)
39cab019
RS
3964 {
3965 int ushift = GET_MODE_BITSIZE (mode) - logd;
3966
3967 temp = gen_reg_rtx (mode);
3968 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
00a7ba58
JJ
3969 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3970 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3971 > COSTS_N_INSNS (1))
2f1cd2eb 3972 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
39cab019
RS
3973 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3974 else
3975 temp = expand_shift (RSHIFT_EXPR, mode, temp,
eb6c3df1 3976 ushift, NULL_RTX, 1);
39cab019
RS
3977 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3978 0, OPTAB_LIB_WIDEN);
eb6c3df1 3979 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019
RS
3980 }
3981
3982 label = gen_label_rtx ();
3983 temp = copy_to_mode_reg (mode, op0);
3984 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
2f1cd2eb 3985 expand_inc (temp, gen_int_mode (d - 1, mode));
39cab019 3986 emit_label (label);
eb6c3df1 3987 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019 3988}
55c2d311 3989\f
44037a66
TG
3990/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3991 if that is convenient, and returning where the result is.
3992 You may request either the quotient or the remainder as the result;
3993 specify REM_FLAG nonzero to get the remainder.
3994
3995 CODE is the expression code for which kind of division this is;
3996 it controls how rounding is done. MODE is the machine mode to use.
3997 UNSIGNEDP nonzero means do unsigned division. */
3998
3999/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4000 and then correct it by or'ing in missing high bits
4001 if result of ANDI is nonzero.
4002 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4003 This could optimize to a bfexts instruction.
4004 But C doesn't use these operations, so their optimizations are
4005 left for later. */
5353610b
R
4006/* ??? For modulo, we don't actually need the highpart of the first product,
4007 the low part will do nicely. And for small divisors, the second multiply
4008 can also be a low-part only multiply or even be completely left out.
4009 E.g. to calculate the remainder of a division by 3 with a 32 bit
4010 multiply, multiply with 0x55555556 and extract the upper two bits;
4011 the result is exact for inputs up to 0x1fffffff.
4012 The input range can be reduced by using cross-sum rules.
4013 For odd divisors >= 3, the following table gives right shift counts
09da1532 4014 so that if a number is shifted by an integer multiple of the given
5353610b
R
4015 amount, the remainder stays the same:
4016 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4017 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4018 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4019 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4020 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4021
4022 Cross-sum rules for even numbers can be derived by leaving as many bits
4023 to the right alone as the divisor has zeros to the right.
4024 E.g. if x is an unsigned 32 bit number:
4025 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4026 */
44037a66
TG
4027
4028rtx
ef4bddc2 4029expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
502b8322 4030 rtx op0, rtx op1, rtx target, int unsignedp)
44037a66 4031{
ef4bddc2 4032 machine_mode compute_mode;
b3694847 4033 rtx tquotient;
55c2d311 4034 rtx quotient = 0, remainder = 0;
f3f6fb16 4035 rtx_insn *last;
2c414fba 4036 int size;
f3f6fb16 4037 rtx_insn *insn;
44037a66 4038 optab optab1, optab2;
1c4a429a 4039 int op1_is_constant, op1_is_pow2 = 0;
71af73bb 4040 int max_cost, extra_cost;
9ec36da5 4041 static HOST_WIDE_INT last_div_const = 0;
f40751dd 4042 bool speed = optimize_insn_for_speed_p ();
55c2d311 4043
481683e1 4044 op1_is_constant = CONST_INT_P (op1);
1c4a429a
JH
4045 if (op1_is_constant)
4046 {
76a7314d
JJ
4047 wide_int ext_op1 = rtx_mode_t (op1, mode);
4048 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4049 || (! unsignedp
4050 && wi::popcount (wi::neg (ext_op1)) == 1));
1c4a429a 4051 }
55c2d311
TG
4052
4053 /*
4054 This is the structure of expand_divmod:
4055
4056 First comes code to fix up the operands so we can perform the operations
4057 correctly and efficiently.
4058
4059 Second comes a switch statement with code specific for each rounding mode.
4060 For some special operands this code emits all RTL for the desired
69f61901 4061 operation, for other cases, it generates only a quotient and stores it in
55c2d311
TG
4062 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4063 to indicate that it has not done anything.
4064
69f61901
RK
4065 Last comes code that finishes the operation. If QUOTIENT is set and
4066 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4067 QUOTIENT is not set, it is computed using trunc rounding.
44037a66 4068
55c2d311
TG
4069 We try to generate special code for division and remainder when OP1 is a
4070 constant. If |OP1| = 2**n we can use shifts and some other fast
4071 operations. For other values of OP1, we compute a carefully selected
4072 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4073 by m.
4074
4075 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4076 half of the product. Different strategies for generating the product are
00f07b86 4077 implemented in expmed_mult_highpart.
55c2d311
TG
4078
4079 If what we actually want is the remainder, we generate that by another
4080 by-constant multiplication and a subtraction. */
4081
4082 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3d32ffd1
TW
4083 code below will malfunction if we are, so check here and handle
4084 the special case if so. */
4085 if (op1 == const1_rtx)
4086 return rem_flag ? const0_rtx : op0;
4087
91ce572a
CC
4088 /* When dividing by -1, we could get an overflow.
4089 negv_optab can handle overflows. */
4090 if (! unsignedp && op1 == constm1_rtx)
4091 {
4092 if (rem_flag)
0fb7aeda 4093 return const0_rtx;
c3284718 4094 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
0fb7aeda 4095 ? negv_optab : neg_optab, op0, target, 0);
91ce572a
CC
4096 }
4097
bc1c7e93
RK
4098 if (target
4099 /* Don't use the function value register as a target
4100 since we have to read it as well as write it,
4101 and function-inlining gets confused by this. */
4102 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4103 /* Don't clobber an operand while doing a multi-step calculation. */
515dfc7a 4104 || ((rem_flag || op1_is_constant)
bc1c7e93 4105 && (reg_mentioned_p (target, op0)
3c0cb5de 4106 || (MEM_P (op0) && MEM_P (target))))
bc1c7e93 4107 || reg_mentioned_p (target, op1)
3c0cb5de 4108 || (MEM_P (op1) && MEM_P (target))))
44037a66
TG
4109 target = 0;
4110
44037a66
TG
4111 /* Get the mode in which to perform this computation. Normally it will
4112 be MODE, but sometimes we can't do the desired operation in MODE.
4113 If so, pick a wider mode in which we can do the operation. Convert
4114 to that mode at the start to avoid repeated conversions.
4115
4116 First see what operations we need. These depend on the expression
4117 we are evaluating. (We assume that divxx3 insns exist under the
4118 same conditions that modxx3 insns and that these insns don't normally
4119 fail. If these assumptions are not correct, we may generate less
4120 efficient code in some cases.)
4121
4122 Then see if we find a mode in which we can open-code that operation
4123 (either a division, modulus, or shift). Finally, check for the smallest
4124 mode for which we can do the operation with a library call. */
4125
55c2d311 4126 /* We might want to refine this now that we have division-by-constant
00f07b86 4127 optimization. Since expmed_mult_highpart tries so many variants, it is
55c2d311
TG
4128 not straightforward to generalize this. Maybe we should make an array
4129 of possible modes in init_expmed? Save this for GCC 2.7. */
4130
76a7314d 4131 optab1 = (op1_is_pow2
556a56ac 4132 ? (unsignedp ? lshr_optab : ashr_optab)
44037a66 4133 : (unsignedp ? udiv_optab : sdiv_optab));
76a7314d 4134 optab2 = (op1_is_pow2 ? optab1
556a56ac 4135 : (unsignedp ? udivmod_optab : sdivmod_optab));
44037a66 4136
c94843d2 4137 FOR_EACH_MODE_FROM (compute_mode, mode)
947131ba
RS
4138 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4139 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
44037a66
TG
4140 break;
4141
4142 if (compute_mode == VOIDmode)
c94843d2 4143 FOR_EACH_MODE_FROM (compute_mode, mode)
8a33f100
JH
4144 if (optab_libfunc (optab1, compute_mode)
4145 || optab_libfunc (optab2, compute_mode))
44037a66
TG
4146 break;
4147
535a42b1
NS
4148 /* If we still couldn't find a mode, use MODE, but expand_binop will
4149 probably die. */
44037a66
TG
4150 if (compute_mode == VOIDmode)
4151 compute_mode = mode;
4152
55c2d311
TG
4153 if (target && GET_MODE (target) == compute_mode)
4154 tquotient = target;
4155 else
4156 tquotient = gen_reg_rtx (compute_mode);
2c414fba 4157
55c2d311
TG
4158 size = GET_MODE_BITSIZE (compute_mode);
4159#if 0
4160 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
71af73bb
TG
4161 (mode), and thereby get better code when OP1 is a constant. Do that
4162 later. It will require going over all usages of SIZE below. */
55c2d311
TG
4163 size = GET_MODE_BITSIZE (mode);
4164#endif
bc1c7e93 4165
9ec36da5
JL
4166 /* Only deduct something for a REM if the last divide done was
4167 for a different constant. Then set the constant of the last
4168 divide. */
5322d07e
NF
4169 max_cost = (unsignedp
4170 ? udiv_cost (speed, compute_mode)
4171 : sdiv_cost (speed, compute_mode));
a28b2ac6
RS
4172 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4173 && INTVAL (op1) == last_div_const))
5322d07e
NF
4174 max_cost -= (mul_cost (speed, compute_mode)
4175 + add_cost (speed, compute_mode));
9ec36da5
JL
4176
4177 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
71af73bb 4178
55c2d311 4179 /* Now convert to the best mode to use. */
44037a66
TG
4180 if (compute_mode != mode)
4181 {
55c2d311 4182 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
81722fa9 4183 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
e13a25d5 4184
e9a25f70
JL
4185 /* convert_modes may have placed op1 into a register, so we
4186 must recompute the following. */
481683e1 4187 op1_is_constant = CONST_INT_P (op1);
76a7314d
JJ
4188 if (op1_is_constant)
4189 {
4190 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4191 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4192 || (! unsignedp
4193 && wi::popcount (wi::neg (ext_op1)) == 1));
4194 }
4195 else
4196 op1_is_pow2 = 0;
44037a66
TG
4197 }
4198
55c2d311 4199 /* If one of the operands is a volatile MEM, copy it into a register. */
c2a47e48 4200
3c0cb5de 4201 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
55c2d311 4202 op0 = force_reg (compute_mode, op0);
3c0cb5de 4203 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
c2a47e48
RK
4204 op1 = force_reg (compute_mode, op1);
4205
ab0b6581
TG
4206 /* If we need the remainder or if OP1 is constant, we need to
4207 put OP0 in a register in case it has any queued subexpressions. */
4208 if (rem_flag || op1_is_constant)
4209 op0 = force_reg (compute_mode, op0);
bc1c7e93 4210
55c2d311 4211 last = get_last_insn ();
44037a66 4212
9faa82d8 4213 /* Promote floor rounding to trunc rounding for unsigned operations. */
55c2d311 4214 if (unsignedp)
44037a66 4215 {
55c2d311
TG
4216 if (code == FLOOR_DIV_EXPR)
4217 code = TRUNC_DIV_EXPR;
4218 if (code == FLOOR_MOD_EXPR)
4219 code = TRUNC_MOD_EXPR;
db7cafb0
JL
4220 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4221 code = TRUNC_DIV_EXPR;
55c2d311 4222 }
bc1c7e93 4223
55c2d311
TG
4224 if (op1 != const0_rtx)
4225 switch (code)
4226 {
4227 case TRUNC_MOD_EXPR:
4228 case TRUNC_DIV_EXPR:
34f016ed 4229 if (op1_is_constant)
55c2d311 4230 {
d8f1376c 4231 if (unsignedp)
55c2d311 4232 {
079c527f 4233 unsigned HOST_WIDE_INT mh, ml;
55c2d311
TG
4234 int pre_shift, post_shift;
4235 int dummy;
76a7314d
JJ
4236 wide_int wd = rtx_mode_t (op1, compute_mode);
4237 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
55c2d311 4238
76a7314d 4239 if (wi::popcount (wd) == 1)
55c2d311
TG
4240 {
4241 pre_shift = floor_log2 (d);
4242 if (rem_flag)
4243 {
2f1cd2eb 4244 unsigned HOST_WIDE_INT mask
fecfbfa4 4245 = (HOST_WIDE_INT_1U << pre_shift) - 1;
db3cf6fb
MS
4246 remainder
4247 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4248 gen_int_mode (mask, compute_mode),
db3cf6fb
MS
4249 remainder, 1,
4250 OPTAB_LIB_WIDEN);
55c2d311 4251 if (remainder)
c8dbc8ca 4252 return gen_lowpart (mode, remainder);
55c2d311
TG
4253 }
4254 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4255 pre_shift, tquotient, 1);
55c2d311 4256 }
34f016ed 4257 else if (size <= HOST_BITS_PER_WIDE_INT)
55c2d311 4258 {
fecfbfa4 4259 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
55c2d311 4260 {
dc1d6150
TG
4261 /* Most significant bit of divisor is set; emit an scc
4262 insn. */
b45f0e58
PB
4263 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4264 compute_mode, 1, 1);
55c2d311
TG
4265 }
4266 else
4267 {
dc1d6150
TG
4268 /* Find a suitable multiplier and right shift count
4269 instead of multiplying with D. */
4270
4271 mh = choose_multiplier (d, size, size,
4272 &ml, &post_shift, &dummy);
4273
4274 /* If the suggested multiplier is more than SIZE bits,
4275 we can do better for even divisors, using an
4276 initial right shift. */
4277 if (mh != 0 && (d & 1) == 0)
4278 {
146ec50f 4279 pre_shift = ctz_or_zero (d);
dc1d6150
TG
4280 mh = choose_multiplier (d >> pre_shift, size,
4281 size - pre_shift,
4282 &ml, &post_shift, &dummy);
5b0264cb 4283 gcc_assert (!mh);
dc1d6150
TG
4284 }
4285 else
4286 pre_shift = 0;
4287
4288 if (mh != 0)
4289 {
4290 rtx t1, t2, t3, t4;
4291
02a65aef
R
4292 if (post_shift - 1 >= BITS_PER_WORD)
4293 goto fail1;
4294
965703ed 4295 extra_cost
5322d07e
NF
4296 = (shift_cost (speed, compute_mode, post_shift - 1)
4297 + shift_cost (speed, compute_mode, 1)
4298 + 2 * add_cost (speed, compute_mode));
2f1cd2eb
RS
4299 t1 = expmed_mult_highpart
4300 (compute_mode, op0,
4301 gen_int_mode (ml, compute_mode),
4302 NULL_RTX, 1, max_cost - extra_cost);
dc1d6150
TG
4303 if (t1 == 0)
4304 goto fail1;
38a448ca
RH
4305 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4306 op0, t1),
dc1d6150 4307 NULL_RTX);
eb6c3df1
RG
4308 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4309 t2, 1, NULL_RTX, 1);
38a448ca
RH
4310 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4311 t1, t3),
dc1d6150 4312 NULL_RTX);
4a90aeeb
NS
4313 quotient = expand_shift
4314 (RSHIFT_EXPR, compute_mode, t4,
eb6c3df1 4315 post_shift - 1, tquotient, 1);
dc1d6150
TG
4316 }
4317 else
4318 {
4319 rtx t1, t2;
4320
02a65aef
R
4321 if (pre_shift >= BITS_PER_WORD
4322 || post_shift >= BITS_PER_WORD)
4323 goto fail1;
4324
4a90aeeb
NS
4325 t1 = expand_shift
4326 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4327 pre_shift, NULL_RTX, 1);
965703ed 4328 extra_cost
5322d07e
NF
4329 = (shift_cost (speed, compute_mode, pre_shift)
4330 + shift_cost (speed, compute_mode, post_shift));
2f1cd2eb
RS
4331 t2 = expmed_mult_highpart
4332 (compute_mode, t1,
4333 gen_int_mode (ml, compute_mode),
4334 NULL_RTX, 1, max_cost - extra_cost);
dc1d6150
TG
4335 if (t2 == 0)
4336 goto fail1;
4a90aeeb
NS
4337 quotient = expand_shift
4338 (RSHIFT_EXPR, compute_mode, t2,
eb6c3df1 4339 post_shift, tquotient, 1);
dc1d6150 4340 }
55c2d311
TG
4341 }
4342 }
34f016ed
TG
4343 else /* Too wide mode to use tricky code */
4344 break;
55c2d311
TG
4345
4346 insn = get_last_insn ();
7543f918
JR
4347 if (insn != last)
4348 set_dst_reg_note (insn, REG_EQUAL,
4349 gen_rtx_UDIV (compute_mode, op0, op1),
4350 quotient);
55c2d311
TG
4351 }
4352 else /* TRUNC_DIV, signed */
4353 {
4354 unsigned HOST_WIDE_INT ml;
4355 int lgup, post_shift;
e71c0aa7 4356 rtx mlr;
55c2d311 4357 HOST_WIDE_INT d = INTVAL (op1);
e4c9f3c2
ILT
4358 unsigned HOST_WIDE_INT abs_d;
4359
093253be
ILT
4360 /* Since d might be INT_MIN, we have to cast to
4361 unsigned HOST_WIDE_INT before negating to avoid
4362 undefined signed overflow. */
6d9c91e9
ILT
4363 abs_d = (d >= 0
4364 ? (unsigned HOST_WIDE_INT) d
4365 : - (unsigned HOST_WIDE_INT) d);
55c2d311
TG
4366
4367 /* n rem d = n rem -d */
4368 if (rem_flag && d < 0)
4369 {
4370 d = abs_d;
2496c7bd 4371 op1 = gen_int_mode (abs_d, compute_mode);
55c2d311
TG
4372 }
4373
4374 if (d == 1)
4375 quotient = op0;
4376 else if (d == -1)
4377 quotient = expand_unop (compute_mode, neg_optab, op0,
4378 tquotient, 0);
76a7314d 4379 else if (size <= HOST_BITS_PER_WIDE_INT
fecfbfa4 4380 && abs_d == HOST_WIDE_INT_1U << (size - 1))
f737b132
RK
4381 {
4382 /* This case is not handled correctly below. */
4383 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4384 compute_mode, 1, 1);
4385 if (quotient == 0)
4386 goto fail1;
4387 }
55c2d311 4388 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
76a7314d 4389 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
5322d07e
NF
4390 && (rem_flag
4391 ? smod_pow2_cheap (speed, compute_mode)
4392 : sdiv_pow2_cheap (speed, compute_mode))
0b55e932
RS
4393 /* We assume that cheap metric is true if the
4394 optab has an expander for this mode. */
166cdb08
JH
4395 && ((optab_handler ((rem_flag ? smod_optab
4396 : sdiv_optab),
947131ba 4397 compute_mode)
a8c7e72d 4398 != CODE_FOR_nothing)
947131ba
RS
4399 || (optab_handler (sdivmod_optab,
4400 compute_mode)
4401 != CODE_FOR_nothing)))
55c2d311 4402 ;
76a7314d
JJ
4403 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4404 && (size <= HOST_BITS_PER_WIDE_INT
4405 || abs_d != (unsigned HOST_WIDE_INT) d))
55c2d311 4406 {
0b55e932
RS
4407 if (rem_flag)
4408 {
4409 remainder = expand_smod_pow2 (compute_mode, op0, d);
4410 if (remainder)
4411 return gen_lowpart (mode, remainder);
4412 }
3d520aaf 4413
5322d07e 4414 if (sdiv_pow2_cheap (speed, compute_mode)
947131ba 4415 && ((optab_handler (sdiv_optab, compute_mode)
3d520aaf 4416 != CODE_FOR_nothing)
947131ba 4417 || (optab_handler (sdivmod_optab, compute_mode)
3d520aaf
DE
4418 != CODE_FOR_nothing)))
4419 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4420 compute_mode, op0,
4421 gen_int_mode (abs_d,
4422 compute_mode),
4423 NULL_RTX, 0);
4424 else
4425 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
55c2d311 4426
0b55e932
RS
4427 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4428 negate the quotient. */
55c2d311
TG
4429 if (d < 0)
4430 {
4431 insn = get_last_insn ();
4e430df8 4432 if (insn != last
fecfbfa4 4433 && abs_d < (HOST_WIDE_INT_1U
c8e7fe58 4434 << (HOST_BITS_PER_WIDE_INT - 1)))
7543f918
JR
4435 set_dst_reg_note (insn, REG_EQUAL,
4436 gen_rtx_DIV (compute_mode, op0,
6d26322f
JR
4437 gen_int_mode
4438 (abs_d,
4439 compute_mode)),
7543f918 4440 quotient);
55c2d311
TG
4441
4442 quotient = expand_unop (compute_mode, neg_optab,
4443 quotient, quotient, 0);
4444 }
4445 }
34f016ed 4446 else if (size <= HOST_BITS_PER_WIDE_INT)
55c2d311
TG
4447 {
4448 choose_multiplier (abs_d, size, size - 1,
079c527f 4449 &ml, &post_shift, &lgup);
fecfbfa4 4450 if (ml < HOST_WIDE_INT_1U << (size - 1))
55c2d311
TG
4451 {
4452 rtx t1, t2, t3;
4453
02a65aef
R
4454 if (post_shift >= BITS_PER_WORD
4455 || size - 1 >= BITS_PER_WORD)
4456 goto fail1;
4457
5322d07e
NF
4458 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4459 + shift_cost (speed, compute_mode, size - 1)
4460 + add_cost (speed, compute_mode));
2f1cd2eb
RS
4461 t1 = expmed_mult_highpart
4462 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4463 NULL_RTX, 0, max_cost - extra_cost);
55c2d311
TG
4464 if (t1 == 0)
4465 goto fail1;
4a90aeeb
NS
4466 t2 = expand_shift
4467 (RSHIFT_EXPR, compute_mode, t1,
eb6c3df1 4468 post_shift, NULL_RTX, 0);
4a90aeeb
NS
4469 t3 = expand_shift
4470 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4471 size - 1, NULL_RTX, 0);
55c2d311 4472 if (d < 0)
c5c76735
JL
4473 quotient
4474 = force_operand (gen_rtx_MINUS (compute_mode,
4475 t3, t2),
4476 tquotient);
55c2d311 4477 else
c5c76735
JL
4478 quotient
4479 = force_operand (gen_rtx_MINUS (compute_mode,
4480 t2, t3),
4481 tquotient);
55c2d311
TG
4482 }
4483 else
4484 {
4485 rtx t1, t2, t3, t4;
4486
02a65aef
R
4487 if (post_shift >= BITS_PER_WORD
4488 || size - 1 >= BITS_PER_WORD)
4489 goto fail1;
4490
dd4786fe 4491 ml |= HOST_WIDE_INT_M1U << (size - 1);
e71c0aa7 4492 mlr = gen_int_mode (ml, compute_mode);
5322d07e
NF
4493 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4494 + shift_cost (speed, compute_mode, size - 1)
4495 + 2 * add_cost (speed, compute_mode));
00f07b86 4496 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
71af73bb
TG
4497 NULL_RTX, 0,
4498 max_cost - extra_cost);
55c2d311
TG
4499 if (t1 == 0)
4500 goto fail1;
c5c76735
JL
4501 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4502 t1, op0),
55c2d311 4503 NULL_RTX);
4a90aeeb
NS
4504 t3 = expand_shift
4505 (RSHIFT_EXPR, compute_mode, t2,
eb6c3df1 4506 post_shift, NULL_RTX, 0);
4a90aeeb
NS
4507 t4 = expand_shift
4508 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4509 size - 1, NULL_RTX, 0);
55c2d311 4510 if (d < 0)
c5c76735
JL
4511 quotient
4512 = force_operand (gen_rtx_MINUS (compute_mode,
4513 t4, t3),
4514 tquotient);
55c2d311 4515 else
c5c76735
JL
4516 quotient
4517 = force_operand (gen_rtx_MINUS (compute_mode,
4518 t3, t4),
4519 tquotient);
55c2d311
TG
4520 }
4521 }
34f016ed
TG
4522 else /* Too wide mode to use tricky code */
4523 break;
55c2d311 4524
4e430df8 4525 insn = get_last_insn ();
7543f918
JR
4526 if (insn != last)
4527 set_dst_reg_note (insn, REG_EQUAL,
4528 gen_rtx_DIV (compute_mode, op0, op1),
4529 quotient);
55c2d311
TG
4530 }
4531 break;
4532 }
4533 fail1:
4534 delete_insns_since (last);
4535 break;
44037a66 4536
55c2d311
TG
4537 case FLOOR_DIV_EXPR:
4538 case FLOOR_MOD_EXPR:
4539 /* We will come here only for signed operations. */
76a7314d 4540 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
55c2d311 4541 {
079c527f 4542 unsigned HOST_WIDE_INT mh, ml;
55c2d311
TG
4543 int pre_shift, lgup, post_shift;
4544 HOST_WIDE_INT d = INTVAL (op1);
4545
4546 if (d > 0)
4547 {
4548 /* We could just as easily deal with negative constants here,
4549 but it does not seem worth the trouble for GCC 2.6. */
4550 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4551 {
4552 pre_shift = floor_log2 (d);
4553 if (rem_flag)
4554 {
2f1cd2eb 4555 unsigned HOST_WIDE_INT mask
fecfbfa4 4556 = (HOST_WIDE_INT_1U << pre_shift) - 1;
2f1cd2eb
RS
4557 remainder = expand_binop
4558 (compute_mode, and_optab, op0,
4559 gen_int_mode (mask, compute_mode),
4560 remainder, 0, OPTAB_LIB_WIDEN);
55c2d311 4561 if (remainder)
c8dbc8ca 4562 return gen_lowpart (mode, remainder);
55c2d311 4563 }
4a90aeeb
NS
4564 quotient = expand_shift
4565 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4566 pre_shift, tquotient, 0);
55c2d311
TG
4567 }
4568 else
4569 {
4570 rtx t1, t2, t3, t4;
4571
4572 mh = choose_multiplier (d, size, size - 1,
4573 &ml, &post_shift, &lgup);
5b0264cb 4574 gcc_assert (!mh);
55c2d311 4575
02a65aef
R
4576 if (post_shift < BITS_PER_WORD
4577 && size - 1 < BITS_PER_WORD)
55c2d311 4578 {
4a90aeeb
NS
4579 t1 = expand_shift
4580 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4581 size - 1, NULL_RTX, 0);
02a65aef
R
4582 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4583 NULL_RTX, 0, OPTAB_WIDEN);
5322d07e
NF
4584 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4585 + shift_cost (speed, compute_mode, size - 1)
4586 + 2 * add_cost (speed, compute_mode));
2f1cd2eb
RS
4587 t3 = expmed_mult_highpart
4588 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4589 NULL_RTX, 1, max_cost - extra_cost);
02a65aef
R
4590 if (t3 != 0)
4591 {
4a90aeeb
NS
4592 t4 = expand_shift
4593 (RSHIFT_EXPR, compute_mode, t3,
eb6c3df1 4594 post_shift, NULL_RTX, 1);
02a65aef
R
4595 quotient = expand_binop (compute_mode, xor_optab,
4596 t4, t1, tquotient, 0,
4597 OPTAB_WIDEN);
4598 }
55c2d311
TG
4599 }
4600 }
4601 }
4602 else
4603 {
4604 rtx nsign, t1, t2, t3, t4;
38a448ca
RH
4605 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4606 op0, constm1_rtx), NULL_RTX);
55c2d311
TG
4607 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4608 0, OPTAB_WIDEN);
76a7314d
JJ
4609 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4610 size - 1, NULL_RTX, 0);
38a448ca 4611 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
55c2d311
TG
4612 NULL_RTX);
4613 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4614 NULL_RTX, 0);
4615 if (t4)
4616 {
4617 rtx t5;
4618 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4619 NULL_RTX, 0);
38a448ca
RH
4620 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4621 t4, t5),
55c2d311
TG
4622 tquotient);
4623 }
4624 }
4625 }
4626
4627 if (quotient != 0)
4628 break;
4629 delete_insns_since (last);
4630
4631 /* Try using an instruction that produces both the quotient and
4632 remainder, using truncation. We can easily compensate the quotient
4633 or remainder to get floor rounding, once we have the remainder.
4634 Notice that we compute also the final remainder value here,
4635 and return the result right away. */
a45cf58c 4636 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311 4637 target = gen_reg_rtx (compute_mode);
668443c9 4638
55c2d311
TG
4639 if (rem_flag)
4640 {
668443c9 4641 remainder
f8cfc6aa 4642 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
55c2d311
TG
4643 quotient = gen_reg_rtx (compute_mode);
4644 }
4645 else
4646 {
668443c9 4647 quotient
f8cfc6aa 4648 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
55c2d311
TG
4649 remainder = gen_reg_rtx (compute_mode);
4650 }
4651
4652 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4653 quotient, remainder, 0))
4654 {
4655 /* This could be computed with a branch-less sequence.
4656 Save that for later. */
4657 rtx tem;
f3f6fb16 4658 rtx_code_label *label = gen_label_rtx ();
f5963e61 4659 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
55c2d311
TG
4660 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4661 NULL_RTX, 0, OPTAB_WIDEN);
f5963e61 4662 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
55c2d311
TG
4663 expand_dec (quotient, const1_rtx);
4664 expand_inc (remainder, op1);
4665 emit_label (label);
c8dbc8ca 4666 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4667 }
4668
4669 /* No luck with division elimination or divmod. Have to do it
4670 by conditionally adjusting op0 *and* the result. */
44037a66 4671 {
f3f6fb16 4672 rtx_code_label *label1, *label2, *label3, *label4, *label5;
55c2d311
TG
4673 rtx adjusted_op0;
4674 rtx tem;
4675
4676 quotient = gen_reg_rtx (compute_mode);
4677 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4678 label1 = gen_label_rtx ();
4679 label2 = gen_label_rtx ();
4680 label3 = gen_label_rtx ();
4681 label4 = gen_label_rtx ();
4682 label5 = gen_label_rtx ();
f5963e61
JL
4683 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4684 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
55c2d311
TG
4685 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4686 quotient, 0, OPTAB_LIB_WIDEN);
4687 if (tem != quotient)
4688 emit_move_insn (quotient, tem);
ec4a505f 4689 emit_jump_insn (targetm.gen_jump (label5));
55c2d311
TG
4690 emit_barrier ();
4691 emit_label (label1);
44037a66 4692 expand_inc (adjusted_op0, const1_rtx);
ec4a505f 4693 emit_jump_insn (targetm.gen_jump (label4));
55c2d311
TG
4694 emit_barrier ();
4695 emit_label (label2);
f5963e61 4696 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
55c2d311
TG
4697 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4698 quotient, 0, OPTAB_LIB_WIDEN);
4699 if (tem != quotient)
4700 emit_move_insn (quotient, tem);
ec4a505f 4701 emit_jump_insn (targetm.gen_jump (label5));
55c2d311
TG
4702 emit_barrier ();
4703 emit_label (label3);
4704 expand_dec (adjusted_op0, const1_rtx);
4705 emit_label (label4);
4706 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4707 quotient, 0, OPTAB_LIB_WIDEN);
4708 if (tem != quotient)
4709 emit_move_insn (quotient, tem);
4710 expand_dec (quotient, const1_rtx);
4711 emit_label (label5);
44037a66 4712 }
55c2d311 4713 break;
44037a66 4714
55c2d311
TG
4715 case CEIL_DIV_EXPR:
4716 case CEIL_MOD_EXPR:
4717 if (unsignedp)
4718 {
76a7314d
JJ
4719 if (op1_is_constant
4720 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4721 && (size <= HOST_BITS_PER_WIDE_INT
4722 || INTVAL (op1) >= 0))
9176af2f
TG
4723 {
4724 rtx t1, t2, t3;
4725 unsigned HOST_WIDE_INT d = INTVAL (op1);
4726 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4727 floor_log2 (d), tquotient, 1);
9176af2f 4728 t2 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4729 gen_int_mode (d - 1, compute_mode),
9176af2f
TG
4730 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4731 t3 = gen_reg_rtx (compute_mode);
4732 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4733 compute_mode, 1, 1);
412381d9
TG
4734 if (t3 == 0)
4735 {
f3f6fb16 4736 rtx_code_label *lab;
412381d9 4737 lab = gen_label_rtx ();
f5963e61 4738 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
412381d9
TG
4739 expand_inc (t1, const1_rtx);
4740 emit_label (lab);
4741 quotient = t1;
4742 }
4743 else
38a448ca
RH
4744 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4745 t1, t3),
412381d9 4746 tquotient);
9176af2f
TG
4747 break;
4748 }
55c2d311
TG
4749
4750 /* Try using an instruction that produces both the quotient and
4751 remainder, using truncation. We can easily compensate the
4752 quotient or remainder to get ceiling rounding, once we have the
4753 remainder. Notice that we compute also the final remainder
4754 value here, and return the result right away. */
a45cf58c 4755 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311 4756 target = gen_reg_rtx (compute_mode);
668443c9 4757
55c2d311
TG
4758 if (rem_flag)
4759 {
f8cfc6aa 4760 remainder = (REG_P (target)
668443c9 4761 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4762 quotient = gen_reg_rtx (compute_mode);
4763 }
4764 else
4765 {
f8cfc6aa 4766 quotient = (REG_P (target)
668443c9 4767 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4768 remainder = gen_reg_rtx (compute_mode);
4769 }
4770
4771 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4772 remainder, 1))
4773 {
4774 /* This could be computed with a branch-less sequence.
4775 Save that for later. */
f3f6fb16 4776 rtx_code_label *label = gen_label_rtx ();
f5963e61
JL
4777 do_cmp_and_jump (remainder, const0_rtx, EQ,
4778 compute_mode, label);
55c2d311
TG
4779 expand_inc (quotient, const1_rtx);
4780 expand_dec (remainder, op1);
4781 emit_label (label);
c8dbc8ca 4782 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4783 }
4784
4785 /* No luck with division elimination or divmod. Have to do it
4786 by conditionally adjusting op0 *and* the result. */
44037a66 4787 {
f3f6fb16 4788 rtx_code_label *label1, *label2;
55c2d311
TG
4789 rtx adjusted_op0, tem;
4790
4791 quotient = gen_reg_rtx (compute_mode);
4792 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4793 label1 = gen_label_rtx ();
4794 label2 = gen_label_rtx ();
f5963e61
JL
4795 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4796 compute_mode, label1);
55c2d311 4797 emit_move_insn (quotient, const0_rtx);
ec4a505f 4798 emit_jump_insn (targetm.gen_jump (label2));
55c2d311
TG
4799 emit_barrier ();
4800 emit_label (label1);
4801 expand_dec (adjusted_op0, const1_rtx);
4802 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4803 quotient, 1, OPTAB_LIB_WIDEN);
4804 if (tem != quotient)
4805 emit_move_insn (quotient, tem);
4806 expand_inc (quotient, const1_rtx);
4807 emit_label (label2);
44037a66 4808 }
55c2d311
TG
4809 }
4810 else /* signed */
4811 {
73f27728
RK
4812 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4813 && INTVAL (op1) >= 0)
4814 {
4815 /* This is extremely similar to the code for the unsigned case
4816 above. For 2.7 we should merge these variants, but for
4817 2.6.1 I don't want to touch the code for unsigned since that
4818 get used in C. The signed case will only be used by other
4819 languages (Ada). */
4820
4821 rtx t1, t2, t3;
4822 unsigned HOST_WIDE_INT d = INTVAL (op1);
4823 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4824 floor_log2 (d), tquotient, 0);
73f27728 4825 t2 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4826 gen_int_mode (d - 1, compute_mode),
73f27728
RK
4827 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4828 t3 = gen_reg_rtx (compute_mode);
4829 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4830 compute_mode, 1, 1);
4831 if (t3 == 0)
4832 {
f3f6fb16 4833 rtx_code_label *lab;
73f27728 4834 lab = gen_label_rtx ();
f5963e61 4835 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
73f27728
RK
4836 expand_inc (t1, const1_rtx);
4837 emit_label (lab);
4838 quotient = t1;
4839 }
4840 else
38a448ca
RH
4841 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4842 t1, t3),
73f27728
RK
4843 tquotient);
4844 break;
4845 }
4846
55c2d311
TG
4847 /* Try using an instruction that produces both the quotient and
4848 remainder, using truncation. We can easily compensate the
4849 quotient or remainder to get ceiling rounding, once we have the
4850 remainder. Notice that we compute also the final remainder
4851 value here, and return the result right away. */
a45cf58c 4852 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311
TG
4853 target = gen_reg_rtx (compute_mode);
4854 if (rem_flag)
4855 {
f8cfc6aa 4856 remainder= (REG_P (target)
668443c9 4857 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4858 quotient = gen_reg_rtx (compute_mode);
4859 }
4860 else
4861 {
f8cfc6aa 4862 quotient = (REG_P (target)
668443c9 4863 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4864 remainder = gen_reg_rtx (compute_mode);
4865 }
4866
4867 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4868 remainder, 0))
4869 {
4870 /* This could be computed with a branch-less sequence.
4871 Save that for later. */
4872 rtx tem;
f3f6fb16 4873 rtx_code_label *label = gen_label_rtx ();
f5963e61
JL
4874 do_cmp_and_jump (remainder, const0_rtx, EQ,
4875 compute_mode, label);
55c2d311
TG
4876 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4877 NULL_RTX, 0, OPTAB_WIDEN);
f5963e61 4878 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
55c2d311
TG
4879 expand_inc (quotient, const1_rtx);
4880 expand_dec (remainder, op1);
4881 emit_label (label);
c8dbc8ca 4882 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4883 }
4884
4885 /* No luck with division elimination or divmod. Have to do it
4886 by conditionally adjusting op0 *and* the result. */
44037a66 4887 {
f3f6fb16 4888 rtx_code_label *label1, *label2, *label3, *label4, *label5;
55c2d311
TG
4889 rtx adjusted_op0;
4890 rtx tem;
4891
4892 quotient = gen_reg_rtx (compute_mode);
4893 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4894 label1 = gen_label_rtx ();
4895 label2 = gen_label_rtx ();
4896 label3 = gen_label_rtx ();
4897 label4 = gen_label_rtx ();
4898 label5 = gen_label_rtx ();
f5963e61
JL
4899 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4900 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4901 compute_mode, label1);
55c2d311
TG
4902 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4903 quotient, 0, OPTAB_LIB_WIDEN);
4904 if (tem != quotient)
4905 emit_move_insn (quotient, tem);
ec4a505f 4906 emit_jump_insn (targetm.gen_jump (label5));
55c2d311
TG
4907 emit_barrier ();
4908 emit_label (label1);
4909 expand_dec (adjusted_op0, const1_rtx);
ec4a505f 4910 emit_jump_insn (targetm.gen_jump (label4));
55c2d311
TG
4911 emit_barrier ();
4912 emit_label (label2);
f5963e61
JL
4913 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4914 compute_mode, label3);
55c2d311
TG
4915 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4916 quotient, 0, OPTAB_LIB_WIDEN);
4917 if (tem != quotient)
4918 emit_move_insn (quotient, tem);
ec4a505f 4919 emit_jump_insn (targetm.gen_jump (label5));
55c2d311
TG
4920 emit_barrier ();
4921 emit_label (label3);
4922 expand_inc (adjusted_op0, const1_rtx);
4923 emit_label (label4);
4924 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4925 quotient, 0, OPTAB_LIB_WIDEN);
4926 if (tem != quotient)
4927 emit_move_insn (quotient, tem);
4928 expand_inc (quotient, const1_rtx);
4929 emit_label (label5);
44037a66 4930 }
55c2d311
TG
4931 }
4932 break;
bc1c7e93 4933
55c2d311 4934 case EXACT_DIV_EXPR:
76a7314d 4935 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
55c2d311
TG
4936 {
4937 HOST_WIDE_INT d = INTVAL (op1);
4938 unsigned HOST_WIDE_INT ml;
91ce572a 4939 int pre_shift;
55c2d311
TG
4940 rtx t1;
4941
146ec50f 4942 pre_shift = ctz_or_zero (d);
91ce572a
CC
4943 ml = invert_mod2n (d >> pre_shift, size);
4944 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4945 pre_shift, NULL_RTX, unsignedp);
69107307 4946 quotient = expand_mult (compute_mode, t1,
2496c7bd 4947 gen_int_mode (ml, compute_mode),
31ff3e0b 4948 NULL_RTX, 1);
55c2d311
TG
4949
4950 insn = get_last_insn ();
7543f918
JR
4951 set_dst_reg_note (insn, REG_EQUAL,
4952 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4953 compute_mode, op0, op1),
4954 quotient);
55c2d311
TG
4955 }
4956 break;
4957
4958 case ROUND_DIV_EXPR:
4959 case ROUND_MOD_EXPR:
69f61901
RK
4960 if (unsignedp)
4961 {
4962 rtx tem;
f3f6fb16 4963 rtx_code_label *label;
69f61901
RK
4964 label = gen_label_rtx ();
4965 quotient = gen_reg_rtx (compute_mode);
4966 remainder = gen_reg_rtx (compute_mode);
4967 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4968 {
4969 rtx tem;
4970 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4971 quotient, 1, OPTAB_LIB_WIDEN);
4972 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4973 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4974 remainder, 1, OPTAB_LIB_WIDEN);
4975 }
0a81f074 4976 tem = plus_constant (compute_mode, op1, -1);
eb6c3df1 4977 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
f5963e61 4978 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
69f61901
RK
4979 expand_inc (quotient, const1_rtx);
4980 expand_dec (remainder, op1);
4981 emit_label (label);
4982 }
4983 else
4984 {
4985 rtx abs_rem, abs_op1, tem, mask;
f3f6fb16 4986 rtx_code_label *label;
69f61901
RK
4987 label = gen_label_rtx ();
4988 quotient = gen_reg_rtx (compute_mode);
4989 remainder = gen_reg_rtx (compute_mode);
4990 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4991 {
4992 rtx tem;
4993 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4994 quotient, 0, OPTAB_LIB_WIDEN);
4995 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4996 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4997 remainder, 0, OPTAB_LIB_WIDEN);
4998 }
91ce572a
CC
4999 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5000 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
69f61901 5001 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
eb6c3df1 5002 1, NULL_RTX, 1);
f5963e61 5003 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
69f61901
RK
5004 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5005 NULL_RTX, 0, OPTAB_WIDEN);
5006 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
eb6c3df1 5007 size - 1, NULL_RTX, 0);
69f61901
RK
5008 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5009 NULL_RTX, 0, OPTAB_WIDEN);
5010 tem = expand_binop (compute_mode, sub_optab, tem, mask,
5011 NULL_RTX, 0, OPTAB_WIDEN);
5012 expand_inc (quotient, tem);
5013 tem = expand_binop (compute_mode, xor_optab, mask, op1,
5014 NULL_RTX, 0, OPTAB_WIDEN);
5015 tem = expand_binop (compute_mode, sub_optab, tem, mask,
5016 NULL_RTX, 0, OPTAB_WIDEN);
5017 expand_dec (remainder, tem);
5018 emit_label (label);
5019 }
5020 return gen_lowpart (mode, rem_flag ? remainder : quotient);
c410d49e 5021
e9a25f70 5022 default:
5b0264cb 5023 gcc_unreachable ();
55c2d311 5024 }
44037a66 5025
55c2d311 5026 if (quotient == 0)
44037a66 5027 {
a45cf58c
RK
5028 if (target && GET_MODE (target) != compute_mode)
5029 target = 0;
5030
55c2d311 5031 if (rem_flag)
44037a66 5032 {
32fdf36b 5033 /* Try to produce the remainder without producing the quotient.
d6a7951f 5034 If we seem to have a divmod pattern that does not require widening,
b20b352b 5035 don't try widening here. We should really have a WIDEN argument
32fdf36b
TG
5036 to expand_twoval_binop, since what we'd really like to do here is
5037 1) try a mod insn in compute_mode
5038 2) try a divmod insn in compute_mode
5039 3) try a div insn in compute_mode and multiply-subtract to get
5040 remainder
5041 4) try the same things with widening allowed. */
5042 remainder
5043 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5044 op0, op1, target,
5045 unsignedp,
947131ba 5046 ((optab_handler (optab2, compute_mode)
32fdf36b
TG
5047 != CODE_FOR_nothing)
5048 ? OPTAB_DIRECT : OPTAB_WIDEN));
55c2d311 5049 if (remainder == 0)
44037a66
TG
5050 {
5051 /* No luck there. Can we do remainder and divide at once
5052 without a library call? */
55c2d311
TG
5053 remainder = gen_reg_rtx (compute_mode);
5054 if (! expand_twoval_binop ((unsignedp
5055 ? udivmod_optab
5056 : sdivmod_optab),
5057 op0, op1,
5058 NULL_RTX, remainder, unsignedp))
5059 remainder = 0;
44037a66 5060 }
55c2d311
TG
5061
5062 if (remainder)
5063 return gen_lowpart (mode, remainder);
44037a66 5064 }
44037a66 5065
dc38b292
RK
5066 /* Produce the quotient. Try a quotient insn, but not a library call.
5067 If we have a divmod in this mode, use it in preference to widening
5068 the div (for this test we assume it will not fail). Note that optab2
5069 is set to the one of the two optabs that the call below will use. */
5070 quotient
5071 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5072 op0, op1, rem_flag ? NULL_RTX : target,
5073 unsignedp,
947131ba 5074 ((optab_handler (optab2, compute_mode)
dc38b292
RK
5075 != CODE_FOR_nothing)
5076 ? OPTAB_DIRECT : OPTAB_WIDEN));
5077
55c2d311 5078 if (quotient == 0)
44037a66
TG
5079 {
5080 /* No luck there. Try a quotient-and-remainder insn,
5081 keeping the quotient alone. */
55c2d311 5082 quotient = gen_reg_rtx (compute_mode);
44037a66 5083 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
55c2d311
TG
5084 op0, op1,
5085 quotient, NULL_RTX, unsignedp))
5086 {
5087 quotient = 0;
5088 if (! rem_flag)
5089 /* Still no luck. If we are not computing the remainder,
5090 use a library call for the quotient. */
5091 quotient = sign_expand_binop (compute_mode,
5092 udiv_optab, sdiv_optab,
5093 op0, op1, target,
5094 unsignedp, OPTAB_LIB_WIDEN);
5095 }
44037a66 5096 }
44037a66
TG
5097 }
5098
44037a66
TG
5099 if (rem_flag)
5100 {
a45cf58c
RK
5101 if (target && GET_MODE (target) != compute_mode)
5102 target = 0;
5103
55c2d311 5104 if (quotient == 0)
b3f8d95d
MM
5105 {
5106 /* No divide instruction either. Use library for remainder. */
5107 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5108 op0, op1, target,
5109 unsignedp, OPTAB_LIB_WIDEN);
5110 /* No remainder function. Try a quotient-and-remainder
5111 function, keeping the remainder. */
5112 if (!remainder)
5113 {
5114 remainder = gen_reg_rtx (compute_mode);
b8698a0f 5115 if (!expand_twoval_binop_libfunc
b3f8d95d
MM
5116 (unsignedp ? udivmod_optab : sdivmod_optab,
5117 op0, op1,
5118 NULL_RTX, remainder,
5119 unsignedp ? UMOD : MOD))
5120 remainder = NULL_RTX;
5121 }
5122 }
44037a66
TG
5123 else
5124 {
5125 /* We divided. Now finish doing X - Y * (X / Y). */
55c2d311
TG
5126 remainder = expand_mult (compute_mode, quotient, op1,
5127 NULL_RTX, unsignedp);
5128 remainder = expand_binop (compute_mode, sub_optab, op0,
5129 remainder, target, unsignedp,
5130 OPTAB_LIB_WIDEN);
44037a66
TG
5131 }
5132 }
5133
55c2d311 5134 return gen_lowpart (mode, rem_flag ? remainder : quotient);
44037a66
TG
5135}
5136\f
5137/* Return a tree node with data type TYPE, describing the value of X.
4dfa0342 5138 Usually this is an VAR_DECL, if there is no obvious better choice.
44037a66 5139 X may be an expression, however we only support those expressions
6d2f8887 5140 generated by loop.c. */
44037a66
TG
5141
5142tree
502b8322 5143make_tree (tree type, rtx x)
44037a66
TG
5144{
5145 tree t;
5146
5147 switch (GET_CODE (x))
5148 {
5149 case CONST_INT:
807e902e 5150 case CONST_WIDE_INT:
f079167a 5151 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
807e902e 5152 return t;
b8698a0f 5153
44037a66 5154 case CONST_DOUBLE:
807e902e
KZ
5155 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5156 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5157 t = wide_int_to_tree (type,
5158 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5159 HOST_BITS_PER_WIDE_INT * 2));
44037a66 5160 else
34a72c33 5161 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
44037a66
TG
5162
5163 return t;
69ef87e2
AH
5164
5165 case CONST_VECTOR:
5166 {
b8b7f162
RS
5167 int units = CONST_VECTOR_NUNITS (x);
5168 tree itype = TREE_TYPE (type);
d2a12ae7 5169 tree *elts;
b8b7f162 5170 int i;
69ef87e2 5171
69ef87e2 5172 /* Build a tree with vector elements. */
d2a12ae7 5173 elts = XALLOCAVEC (tree, units);
69ef87e2
AH
5174 for (i = units - 1; i >= 0; --i)
5175 {
b8b7f162 5176 rtx elt = CONST_VECTOR_ELT (x, i);
d2a12ae7 5177 elts[i] = make_tree (itype, elt);
69ef87e2 5178 }
c410d49e 5179
d2a12ae7 5180 return build_vector (type, elts);
69ef87e2
AH
5181 }
5182
44037a66 5183 case PLUS:
4845b383
KH
5184 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5185 make_tree (type, XEXP (x, 1)));
c410d49e 5186
44037a66 5187 case MINUS:
4845b383
KH
5188 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5189 make_tree (type, XEXP (x, 1)));
c410d49e 5190
44037a66 5191 case NEG:
4845b383 5192 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
44037a66
TG
5193
5194 case MULT:
4845b383
KH
5195 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5196 make_tree (type, XEXP (x, 1)));
c410d49e 5197
44037a66 5198 case ASHIFT:
4845b383
KH
5199 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5200 make_tree (type, XEXP (x, 1)));
c410d49e 5201
44037a66 5202 case LSHIFTRT:
ca5ba2a3 5203 t = unsigned_type_for (type);
aeba6c28
JM
5204 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5205 make_tree (t, XEXP (x, 0)),
5206 make_tree (type, XEXP (x, 1))));
c410d49e 5207
44037a66 5208 case ASHIFTRT:
12753674 5209 t = signed_type_for (type);
aeba6c28
JM
5210 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5211 make_tree (t, XEXP (x, 0)),
5212 make_tree (type, XEXP (x, 1))));
c410d49e 5213
44037a66
TG
5214 case DIV:
5215 if (TREE_CODE (type) != REAL_TYPE)
12753674 5216 t = signed_type_for (type);
44037a66
TG
5217 else
5218 t = type;
5219
aeba6c28
JM
5220 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5221 make_tree (t, XEXP (x, 0)),
5222 make_tree (t, XEXP (x, 1))));
44037a66 5223 case UDIV:
ca5ba2a3 5224 t = unsigned_type_for (type);
aeba6c28
JM
5225 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5226 make_tree (t, XEXP (x, 0)),
5227 make_tree (t, XEXP (x, 1))));
5c45425b
RH
5228
5229 case SIGN_EXTEND:
5230 case ZERO_EXTEND:
ae2bcd98
RS
5231 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5232 GET_CODE (x) == ZERO_EXTEND);
aeba6c28 5233 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5c45425b 5234
84816907
JM
5235 case CONST:
5236 return make_tree (type, XEXP (x, 0));
5237
5238 case SYMBOL_REF:
5239 t = SYMBOL_REF_DECL (x);
5240 if (t)
5241 return fold_convert (type, build_fold_addr_expr (t));
191816a3 5242 /* fall through. */
84816907 5243
4dfa0342 5244 default:
c2255bc4 5245 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
d1608933 5246
d4ebfa65
BE
5247 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5248 address mode to pointer mode. */
5ae6cd0d 5249 if (POINTER_TYPE_P (type))
d4ebfa65
BE
5250 x = convert_memory_address_addr_space
5251 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
d1608933 5252
8a0aa06e
RH
5253 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5254 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
820cc88f 5255 t->decl_with_rtl.rtl = x;
4dfa0342 5256
44037a66
TG
5257 return t;
5258 }
5259}
44037a66
TG
5260\f
5261/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5262 and returning TARGET.
5263
5264 If TARGET is 0, a pseudo-register or constant is returned. */
5265
5266rtx
ef4bddc2 5267expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
44037a66 5268{
22273300 5269 rtx tem = 0;
44037a66 5270
22273300
JJ
5271 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5272 tem = simplify_binary_operation (AND, mode, op0, op1);
5273 if (tem == 0)
44037a66 5274 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
44037a66
TG
5275
5276 if (target == 0)
5277 target = tem;
5278 else if (tem != target)
5279 emit_move_insn (target, tem);
5280 return target;
5281}
495499da 5282
a41a56b6 5283/* Helper function for emit_store_flag. */
2d52a3a1 5284rtx
ef12ae45 5285emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
ef4bddc2 5286 machine_mode mode, machine_mode compare_mode,
92355a9c 5287 int unsignedp, rtx x, rtx y, int normalizep,
ef4bddc2 5288 machine_mode target_mode)
a41a56b6 5289{
a5c7d693 5290 struct expand_operand ops[4];
f3f6fb16
DM
5291 rtx op0, comparison, subtarget;
5292 rtx_insn *last;
ef4bddc2 5293 machine_mode result_mode = targetm.cstore_mode (icode);
45475a3f
PB
5294
5295 last = get_last_insn ();
5296 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5297 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
a5c7d693 5298 if (!x || !y)
45475a3f
PB
5299 {
5300 delete_insns_since (last);
5301 return NULL_RTX;
5302 }
5303
92355a9c
PB
5304 if (target_mode == VOIDmode)
5305 target_mode = result_mode;
5306 if (!target)
5307 target = gen_reg_rtx (target_mode);
b8698a0f 5308
a5c7d693 5309 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
45475a3f 5310
a5c7d693
RS
5311 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5312 create_fixed_operand (&ops[1], comparison);
5313 create_fixed_operand (&ops[2], x);
5314 create_fixed_operand (&ops[3], y);
5315 if (!maybe_expand_insn (icode, 4, ops))
5316 {
5317 delete_insns_since (last);
5318 return NULL_RTX;
5319 }
5320 subtarget = ops[0].value;
495499da 5321
a41a56b6
RE
5322 /* If we are converting to a wider mode, first convert to
5323 TARGET_MODE, then normalize. This produces better combining
5324 opportunities on machines that have a SIGN_EXTRACT when we are
5325 testing a single bit. This mostly benefits the 68k.
5326
5327 If STORE_FLAG_VALUE does not have the sign bit set when
5328 interpreted in MODE, we can do this conversion as unsigned, which
5329 is usually more efficient. */
45475a3f 5330 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
a41a56b6
RE
5331 {
5332 convert_move (target, subtarget,
2d0c270f
BS
5333 val_signbit_known_clear_p (result_mode,
5334 STORE_FLAG_VALUE));
a41a56b6 5335 op0 = target;
45475a3f 5336 result_mode = target_mode;
a41a56b6
RE
5337 }
5338 else
5339 op0 = subtarget;
5340
5341 /* If we want to keep subexpressions around, don't reuse our last
5342 target. */
5343 if (optimize)
5344 subtarget = 0;
5345
5346 /* Now normalize to the proper value in MODE. Sometimes we don't
5347 have to do anything. */
5348 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5349 ;
5350 /* STORE_FLAG_VALUE might be the most negative number, so write
5351 the comparison this way to avoid a compiler-time warning. */
5352 else if (- normalizep == STORE_FLAG_VALUE)
45475a3f 5353 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
a41a56b6
RE
5354
5355 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5356 it hard to use a value of just the sign bit due to ANSI integer
5357 constant typing rules. */
2d0c270f 5358 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
45475a3f 5359 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
eb6c3df1 5360 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
a41a56b6
RE
5361 normalizep == 1);
5362 else
5363 {
5364 gcc_assert (STORE_FLAG_VALUE & 1);
5365
45475a3f 5366 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
a41a56b6 5367 if (normalizep == -1)
45475a3f 5368 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
a41a56b6
RE
5369 }
5370
5371 /* If we were converting to a smaller mode, do the conversion now. */
45475a3f 5372 if (target_mode != result_mode)
a41a56b6
RE
5373 {
5374 convert_move (target, op0, 0);
5375 return target;
5376 }
5377 else
5378 return op0;
5379}
5380
44037a66 5381
ef12ae45
PB
5382/* A subroutine of emit_store_flag only including "tricks" that do not
5383 need a recursive call. These are kept separate to avoid infinite
5384 loops. */
44037a66 5385
ef12ae45
PB
5386static rtx
5387emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
ef4bddc2
RS
5388 machine_mode mode, int unsignedp, int normalizep,
5389 machine_mode target_mode)
44037a66
TG
5390{
5391 rtx subtarget;
5392 enum insn_code icode;
ef4bddc2 5393 machine_mode compare_mode;
f90b7a5a 5394 enum mode_class mclass;
45475a3f 5395 enum rtx_code scode;
44037a66 5396
b30f05db
BS
5397 if (unsignedp)
5398 code = unsigned_condition (code);
45475a3f 5399 scode = swap_condition (code);
b30f05db 5400
c2615a67
RK
5401 /* If one operand is constant, make it the second one. Only do this
5402 if the other operand is not constant as well. */
5403
8c9864f3 5404 if (swap_commutative_operands_p (op0, op1))
c2615a67 5405 {
fab27f52 5406 std::swap (op0, op1);
c2615a67
RK
5407 code = swap_condition (code);
5408 }
5409
6405e07b
DE
5410 if (mode == VOIDmode)
5411 mode = GET_MODE (op0);
5412
c410d49e 5413 /* For some comparisons with 1 and -1, we can convert this to
44037a66 5414 comparisons with zero. This will often produce more opportunities for
0f41302f 5415 store-flag insns. */
44037a66
TG
5416
5417 switch (code)
5418 {
5419 case LT:
5420 if (op1 == const1_rtx)
5421 op1 = const0_rtx, code = LE;
5422 break;
5423 case LE:
5424 if (op1 == constm1_rtx)
5425 op1 = const0_rtx, code = LT;
5426 break;
5427 case GE:
5428 if (op1 == const1_rtx)
5429 op1 = const0_rtx, code = GT;
5430 break;
5431 case GT:
5432 if (op1 == constm1_rtx)
5433 op1 = const0_rtx, code = GE;
5434 break;
5435 case GEU:
5436 if (op1 == const1_rtx)
5437 op1 = const0_rtx, code = NE;
5438 break;
5439 case LTU:
5440 if (op1 == const1_rtx)
5441 op1 = const0_rtx, code = EQ;
5442 break;
e9a25f70
JL
5443 default:
5444 break;
44037a66
TG
5445 }
5446
884815aa
JB
5447 /* If we are comparing a double-word integer with zero or -1, we can
5448 convert the comparison into one involving a single word. */
6912b84b
RK
5449 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5450 && GET_MODE_CLASS (mode) == MODE_INT
3c0cb5de 5451 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
6912b84b 5452 {
fab27f52 5453 rtx tem;
884815aa
JB
5454 if ((code == EQ || code == NE)
5455 && (op1 == const0_rtx || op1 == constm1_rtx))
6912b84b 5456 {
1ed20a40 5457 rtx op00, op01;
8433f113 5458
a41a56b6
RE
5459 /* Do a logical OR or AND of the two words and compare the
5460 result. */
8433f113
RH
5461 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5462 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
1ed20a40
PB
5463 tem = expand_binop (word_mode,
5464 op1 == const0_rtx ? ior_optab : and_optab,
5465 op00, op01, NULL_RTX, unsignedp,
5466 OPTAB_DIRECT);
884815aa 5467
1ed20a40
PB
5468 if (tem != 0)
5469 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
92355a9c 5470 unsignedp, normalizep);
6912b84b 5471 }
884815aa 5472 else if ((code == LT || code == GE) && op1 == const0_rtx)
8433f113
RH
5473 {
5474 rtx op0h;
5475
5476 /* If testing the sign bit, can just test on high word. */
5477 op0h = simplify_gen_subreg (word_mode, op0, mode,
a41a56b6
RE
5478 subreg_highpart_offset (word_mode,
5479 mode));
1ed20a40
PB
5480 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5481 unsignedp, normalizep);
5482 }
5483 else
5484 tem = NULL_RTX;
5485
5486 if (tem)
5487 {
92355a9c 5488 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
1ed20a40 5489 return tem;
92355a9c
PB
5490 if (!target)
5491 target = gen_reg_rtx (target_mode);
1ed20a40
PB
5492
5493 convert_move (target, tem,
2d0c270f
BS
5494 !val_signbit_known_set_p (word_mode,
5495 (normalizep ? normalizep
5496 : STORE_FLAG_VALUE)));
1ed20a40 5497 return target;
8433f113 5498 }
6912b84b
RK
5499 }
5500
44037a66
TG
5501 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5502 complement of A (for GE) and shifting the sign bit to the low bit. */
5503 if (op1 == const0_rtx && (code == LT || code == GE)
5504 && GET_MODE_CLASS (mode) == MODE_INT
5505 && (normalizep || STORE_FLAG_VALUE == 1
2d0c270f 5506 || val_signbit_p (mode, STORE_FLAG_VALUE)))
44037a66 5507 {
8deb7047 5508 subtarget = target;
44037a66 5509
495499da
PB
5510 if (!target)
5511 target_mode = mode;
5512
44037a66
TG
5513 /* If the result is to be wider than OP0, it is best to convert it
5514 first. If it is to be narrower, it is *incorrect* to convert it
5515 first. */
495499da 5516 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
44037a66 5517 {
81722fa9 5518 op0 = convert_modes (target_mode, mode, op0, 0);
44037a66
TG
5519 mode = target_mode;
5520 }
5521
5522 if (target_mode != mode)
5523 subtarget = 0;
5524
5525 if (code == GE)
1d6eaf3d
RK
5526 op0 = expand_unop (mode, one_cmpl_optab, op0,
5527 ((STORE_FLAG_VALUE == 1 || normalizep)
5528 ? 0 : subtarget), 0);
44037a66 5529
1d6eaf3d 5530 if (STORE_FLAG_VALUE == 1 || normalizep)
44037a66
TG
5531 /* If we are supposed to produce a 0/1 value, we want to do
5532 a logical shift from the sign bit to the low-order bit; for
5533 a -1/0 value, we do an arithmetic shift. */
5534 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 5535 GET_MODE_BITSIZE (mode) - 1,
44037a66
TG
5536 subtarget, normalizep != -1);
5537
5538 if (mode != target_mode)
c2ec26b8 5539 op0 = convert_modes (target_mode, mode, op0, 0);
44037a66
TG
5540
5541 return op0;
5542 }
5543
f90b7a5a 5544 mclass = GET_MODE_CLASS (mode);
c94843d2 5545 FOR_EACH_MODE_FROM (compare_mode, mode)
a41a56b6 5546 {
ef4bddc2 5547 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
947131ba 5548 icode = optab_handler (cstore_optab, optab_mode);
f90b7a5a 5549 if (icode != CODE_FOR_nothing)
a41a56b6 5550 {
a41a56b6 5551 do_pending_stack_adjust ();
fab27f52
MM
5552 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5553 unsignedp, op0, op1, normalizep, target_mode);
45475a3f
PB
5554 if (tem)
5555 return tem;
44037a66 5556
45475a3f 5557 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
44037a66 5558 {
ef12ae45 5559 tem = emit_cstore (target, icode, scode, mode, compare_mode,
92355a9c 5560 unsignedp, op1, op0, normalizep, target_mode);
45475a3f
PB
5561 if (tem)
5562 return tem;
44037a66 5563 }
f90b7a5a 5564 break;
44037a66
TG
5565 }
5566 }
5567
ef12ae45
PB
5568 return 0;
5569}
5570
8afacf2c
RS
5571/* Subroutine of emit_store_flag that handles cases in which the operands
5572 are scalar integers. SUBTARGET is the target to use for temporary
5573 operations and TRUEVAL is the value to store when the condition is
5574 true. All other arguments are as for emit_store_flag. */
ef12ae45
PB
5575
5576rtx
8afacf2c
RS
5577emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5578 rtx op1, machine_mode mode, int unsignedp,
5579 int normalizep, rtx trueval)
ef12ae45 5580{
ef4bddc2 5581 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
8afacf2c
RS
5582 rtx_insn *last = get_last_insn ();
5583 rtx tem;
495499da
PB
5584
5585 /* If this is an equality comparison of integers, we can try to exclusive-or
44037a66
TG
5586 (or subtract) the two operands and use a recursive call to try the
5587 comparison with zero. Don't do any of these cases if branches are
5588 very cheap. */
5589
495499da 5590 if ((code == EQ || code == NE) && op1 != const0_rtx)
44037a66
TG
5591 {
5592 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5593 OPTAB_WIDEN);
5594
5595 if (tem == 0)
5596 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5597 OPTAB_WIDEN);
5598 if (tem != 0)
a22fb74c
AK
5599 tem = emit_store_flag (target, code, tem, const0_rtx,
5600 mode, unsignedp, normalizep);
495499da
PB
5601 if (tem != 0)
5602 return tem;
5603
5604 delete_insns_since (last);
5605 }
5606
5607 /* For integer comparisons, try the reverse comparison. However, for
5608 small X and if we'd have anyway to extend, implementing "X != 0"
5609 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
8afacf2c 5610 rtx_code rcode = reverse_condition (code);
495499da 5611 if (can_compare_p (rcode, mode, ccp_store_flag)
947131ba 5612 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
495499da
PB
5613 && code == NE
5614 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5615 && op1 == const0_rtx))
5616 {
533d4b99
PB
5617 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5618 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5619
495499da 5620 /* Again, for the reverse comparison, use either an addition or a XOR. */
533d4b99 5621 if (want_add
e548c9df 5622 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
533d4b99 5623 optimize_insn_for_speed_p ()) == 0)
495499da 5624 {
ef12ae45 5625 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5626 STORE_FLAG_VALUE, target_mode);
495499da 5627 if (tem != 0)
8afacf2c 5628 tem = expand_binop (target_mode, add_optab, tem,
2f1cd2eb
RS
5629 gen_int_mode (normalizep, target_mode),
5630 target, 0, OPTAB_WIDEN);
495499da 5631 }
533d4b99 5632 else if (!want_add
e548c9df 5633 && rtx_cost (trueval, mode, XOR, 1,
533d4b99 5634 optimize_insn_for_speed_p ()) == 0)
495499da 5635 {
ef12ae45 5636 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5637 normalizep, target_mode);
495499da 5638 if (tem != 0)
8afacf2c 5639 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
495499da
PB
5640 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5641 }
5642
5643 if (tem != 0)
5644 return tem;
5645 delete_insns_since (last);
44037a66
TG
5646 }
5647
c410d49e 5648 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
44037a66
TG
5649 the constant zero. Reject all other comparisons at this point. Only
5650 do LE and GT if branches are expensive since they are expensive on
5651 2-operand machines. */
5652
495499da 5653 if (op1 != const0_rtx
44037a66 5654 || (code != EQ && code != NE
3a4fd356
JH
5655 && (BRANCH_COST (optimize_insn_for_speed_p (),
5656 false) <= 1 || (code != LE && code != GT))))
44037a66
TG
5657 return 0;
5658
44037a66
TG
5659 /* Try to put the result of the comparison in the sign bit. Assume we can't
5660 do the necessary operation below. */
5661
5662 tem = 0;
5663
5664 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5665 the sign bit set. */
5666
5667 if (code == LE)
5668 {
5669 /* This is destructive, so SUBTARGET can't be OP0. */
5670 if (rtx_equal_p (subtarget, op0))
5671 subtarget = 0;
5672
5673 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5674 OPTAB_WIDEN);
5675 if (tem)
5676 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5677 OPTAB_WIDEN);
5678 }
5679
5680 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5681 number of bits in the mode of OP0, minus one. */
5682
5683 if (code == GT)
5684 {
5685 if (rtx_equal_p (subtarget, op0))
5686 subtarget = 0;
5687
ea000c3f
EB
5688 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5689 GET_MODE_BITSIZE (mode) - 1,
5690 subtarget, 0);
5691 if (tem)
5692 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5693 OPTAB_WIDEN);
44037a66 5694 }
c410d49e 5695
44037a66
TG
5696 if (code == EQ || code == NE)
5697 {
5698 /* For EQ or NE, one way to do the comparison is to apply an operation
cc2902df 5699 that converts the operand into a positive number if it is nonzero
44037a66
TG
5700 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5701 for NE we negate. This puts the result in the sign bit. Then we
c410d49e 5702 normalize with a shift, if needed.
44037a66
TG
5703
5704 Two operations that can do the above actions are ABS and FFS, so try
5705 them. If that doesn't work, and MODE is smaller than a full word,
36d747f6 5706 we can use zero-extension to the wider mode (an unsigned conversion)
44037a66
TG
5707 as the operation. */
5708
c410d49e
EC
5709 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5710 that is compensated by the subsequent overflow when subtracting
30f7a378 5711 one / negating. */
91ce572a 5712
947131ba 5713 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
44037a66 5714 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
947131ba 5715 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
44037a66
TG
5716 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5717 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5718 {
c2ec26b8 5719 tem = convert_modes (word_mode, mode, op0, 1);
81722fa9 5720 mode = word_mode;
44037a66
TG
5721 }
5722
5723 if (tem != 0)
5724 {
5725 if (code == EQ)
5726 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5727 0, OPTAB_WIDEN);
5728 else
5729 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5730 }
5731
5732 /* If we couldn't do it that way, for NE we can "or" the two's complement
5733 of the value with itself. For EQ, we take the one's complement of
5734 that "or", which is an extra insn, so we only handle EQ if branches
5735 are expensive. */
5736
3a4fd356
JH
5737 if (tem == 0
5738 && (code == NE
5739 || BRANCH_COST (optimize_insn_for_speed_p (),
8afacf2c 5740 false) > 1))
44037a66 5741 {
36d747f6
RS
5742 if (rtx_equal_p (subtarget, op0))
5743 subtarget = 0;
5744
44037a66
TG
5745 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5746 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5747 OPTAB_WIDEN);
5748
5749 if (tem && code == EQ)
5750 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5751 }
5752 }
5753
5754 if (tem && normalizep)
ea000c3f
EB
5755 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5756 GET_MODE_BITSIZE (mode) - 1,
5757 subtarget, normalizep == 1);
44037a66 5758
91e66235 5759 if (tem)
44037a66 5760 {
495499da 5761 if (!target)
8afacf2c 5762 ;
495499da 5763 else if (GET_MODE (tem) != target_mode)
91e66235
MM
5764 {
5765 convert_move (target, tem, 0);
5766 tem = target;
5767 }
5768 else if (!subtarget)
5769 {
5770 emit_move_insn (target, tem);
5771 tem = target;
5772 }
44037a66 5773 }
91e66235 5774 else
44037a66
TG
5775 delete_insns_since (last);
5776
5777 return tem;
5778}
04a8ee2f 5779
8afacf2c
RS
5780/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5781 and storing in TARGET. Normally return TARGET.
5782 Return 0 if that cannot be done.
5783
5784 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5785 it is VOIDmode, they cannot both be CONST_INT.
5786
5787 UNSIGNEDP is for the case where we have to widen the operands
5788 to perform the operation. It says to use zero-extension.
5789
5790 NORMALIZEP is 1 if we should convert the result to be either zero
5791 or one. Normalize is -1 if we should convert the result to be
5792 either zero or -1. If NORMALIZEP is zero, the result will be left
5793 "raw" out of the scc insn. */
5794
5795rtx
5796emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5797 machine_mode mode, int unsignedp, int normalizep)
5798{
5799 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5800 enum rtx_code rcode;
5801 rtx subtarget;
5802 rtx tem, trueval;
5803 rtx_insn *last;
5804
5805 /* If we compare constants, we shouldn't use a store-flag operation,
5806 but a constant load. We can get there via the vanilla route that
5807 usually generates a compare-branch sequence, but will in this case
5808 fold the comparison to a constant, and thus elide the branch. */
5809 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5810 return NULL_RTX;
5811
5812 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5813 target_mode);
5814 if (tem)
5815 return tem;
5816
5817 /* If we reached here, we can't do this with a scc insn, however there
5818 are some comparisons that can be done in other ways. Don't do any
5819 of these cases if branches are very cheap. */
5820 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5821 return 0;
5822
5823 /* See what we need to return. We can only return a 1, -1, or the
5824 sign bit. */
5825
5826 if (normalizep == 0)
5827 {
5828 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5829 normalizep = STORE_FLAG_VALUE;
5830
5831 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5832 ;
5833 else
5834 return 0;
5835 }
5836
5837 last = get_last_insn ();
5838
5839 /* If optimizing, use different pseudo registers for each insn, instead
5840 of reusing the same pseudo. This leads to better CSE, but slows
5841 down the compiler, since there are more pseudos. */
5842 subtarget = (!optimize
5843 && (target_mode == mode)) ? target : NULL_RTX;
5844 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5845
5846 /* For floating-point comparisons, try the reverse comparison or try
5847 changing the "orderedness" of the comparison. */
5848 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5849 {
5850 enum rtx_code first_code;
5851 bool and_them;
5852
5853 rcode = reverse_condition_maybe_unordered (code);
5854 if (can_compare_p (rcode, mode, ccp_store_flag)
5855 && (code == ORDERED || code == UNORDERED
5856 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5857 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5858 {
5859 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5860 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5861
5862 /* For the reverse comparison, use either an addition or a XOR. */
5863 if (want_add
5864 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5865 optimize_insn_for_speed_p ()) == 0)
5866 {
5867 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5868 STORE_FLAG_VALUE, target_mode);
5869 if (tem)
5870 return expand_binop (target_mode, add_optab, tem,
5871 gen_int_mode (normalizep, target_mode),
5872 target, 0, OPTAB_WIDEN);
5873 }
5874 else if (!want_add
5875 && rtx_cost (trueval, mode, XOR, 1,
5876 optimize_insn_for_speed_p ()) == 0)
5877 {
5878 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5879 normalizep, target_mode);
5880 if (tem)
5881 return expand_binop (target_mode, xor_optab, tem, trueval,
5882 target, INTVAL (trueval) >= 0,
5883 OPTAB_WIDEN);
5884 }
5885 }
5886
5887 delete_insns_since (last);
5888
5889 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5890 if (code == ORDERED || code == UNORDERED)
5891 return 0;
5892
5893 and_them = split_comparison (code, mode, &first_code, &code);
5894
5895 /* If there are no NaNs, the first comparison should always fall through.
5896 Effectively change the comparison to the other one. */
5897 if (!HONOR_NANS (mode))
5898 {
5899 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5900 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5901 target_mode);
5902 }
5903
5904 if (!HAVE_conditional_move)
5905 return 0;
5906
5907 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5908 conditional move. */
5909 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5910 normalizep, target_mode);
5911 if (tem == 0)
5912 return 0;
5913
5914 if (and_them)
5915 tem = emit_conditional_move (target, code, op0, op1, mode,
5916 tem, const0_rtx, GET_MODE (tem), 0);
5917 else
5918 tem = emit_conditional_move (target, code, op0, op1, mode,
5919 trueval, tem, GET_MODE (tem), 0);
5920
5921 if (tem == 0)
5922 delete_insns_since (last);
5923 return tem;
5924 }
5925
5926 /* The remaining tricks only apply to integer comparisons. */
5927
5928 if (GET_MODE_CLASS (mode) == MODE_INT)
5929 return emit_store_flag_int (target, subtarget, code, op0, op1, mode,
5930 unsignedp, normalizep, trueval);
5931
5932 return 0;
5933}
5934
04a8ee2f
TG
5935/* Like emit_store_flag, but always succeeds. */
5936
5937rtx
502b8322 5938emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
ef4bddc2 5939 machine_mode mode, int unsignedp, int normalizep)
04a8ee2f 5940{
f3f6fb16
DM
5941 rtx tem;
5942 rtx_code_label *label;
495499da 5943 rtx trueval, falseval;
04a8ee2f
TG
5944
5945 /* First see if emit_store_flag can do the job. */
5946 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5947 if (tem != 0)
5948 return tem;
5949
495499da
PB
5950 if (!target)
5951 target = gen_reg_rtx (word_mode);
04a8ee2f 5952
495499da
PB
5953 /* If this failed, we have to do this with set/compare/jump/set code.
5954 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5955 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
b8698a0f 5956 if (code == NE
495499da
PB
5957 && GET_MODE_CLASS (mode) == MODE_INT
5958 && REG_P (target)
5959 && op0 == target
5960 && op1 == const0_rtx)
5961 {
5962 label = gen_label_rtx ();
1476d1bd 5963 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
357067f2
JH
5964 NULL_RTX, NULL, label,
5965 profile_probability::uninitialized ());
495499da
PB
5966 emit_move_insn (target, trueval);
5967 emit_label (label);
5968 return target;
5969 }
04a8ee2f 5970
f8cfc6aa 5971 if (!REG_P (target)
04a8ee2f
TG
5972 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5973 target = gen_reg_rtx (GET_MODE (target));
5974
495499da
PB
5975 /* Jump in the right direction if the target cannot implement CODE
5976 but can jump on its reverse condition. */
5977 falseval = const0_rtx;
5978 if (! can_compare_p (code, mode, ccp_jump)
5979 && (! FLOAT_MODE_P (mode)
5980 || code == ORDERED || code == UNORDERED
5981 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5982 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5983 {
5984 enum rtx_code rcode;
5985 if (FLOAT_MODE_P (mode))
5986 rcode = reverse_condition_maybe_unordered (code);
5987 else
5988 rcode = reverse_condition (code);
5989
5990 /* Canonicalize to UNORDERED for the libcall. */
5991 if (can_compare_p (rcode, mode, ccp_jump)
5992 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5993 {
5994 falseval = trueval;
5995 trueval = const0_rtx;
5996 code = rcode;
5997 }
5998 }
5999
6000 emit_move_insn (target, trueval);
04a8ee2f 6001 label = gen_label_rtx ();
1476d1bd 6002 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
357067f2 6003 label, profile_probability::uninitialized ());
04a8ee2f 6004
495499da 6005 emit_move_insn (target, falseval);
44037a66
TG
6006 emit_label (label);
6007
6008 return target;
6009}
f5963e61
JL
6010\f
6011/* Perform possibly multi-word comparison and conditional jump to LABEL
feb04780
RS
6012 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6013 now a thin wrapper around do_compare_rtx_and_jump. */
f5963e61
JL
6014
6015static void
ef4bddc2 6016do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
f3f6fb16 6017 rtx_code_label *label)
f5963e61 6018{
feb04780 6019 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
1476d1bd 6020 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
357067f2 6021 NULL, label, profile_probability::uninitialized ());
f5963e61 6022}