]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/expmed.c
tree-core.h: Include symtab.h.
[thirdparty/gcc.git] / gcc / expmed.c
CommitLineData
44037a66
TG
1/* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
5624e564 3 Copyright (C) 1987-2015 Free Software Foundation, Inc.
44037a66 4
1322177d 5This file is part of GCC.
44037a66 6
1322177d
LB
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
1322177d 10version.
44037a66 11
1322177d
LB
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
44037a66
TG
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
44037a66
TG
20
21
22#include "config.h"
670ee920 23#include "system.h"
4977bab6 24#include "coretypes.h"
c7131fb2
AM
25#include "backend.h"
26#include "tree.h"
44037a66 27#include "rtl.h"
c7131fb2
AM
28#include "df.h"
29#include "diagnostic-core.h"
40e23961 30#include "alias.h"
40e23961 31#include "fold-const.h"
d8a2d370 32#include "stor-layout.h"
6baf1cc8 33#include "tm_p.h"
44037a66 34#include "flags.h"
44037a66 35#include "insn-config.h"
36566b39
PK
36#include "expmed.h"
37#include "dojump.h"
38#include "explow.h"
39#include "calls.h"
40#include "emit-rtl.h"
41#include "varasm.h"
42#include "stmt.h"
44037a66 43#include "expr.h"
b0710fe1 44#include "insn-codes.h"
e78d8e51 45#include "optabs.h"
44037a66 46#include "recog.h"
b0c48229 47#include "langhooks.h"
0890b981 48#include "target.h"
462f85ce
RS
49
50struct target_expmed default_target_expmed;
51#if SWITCHABLE_TARGET
52struct target_expmed *this_target_expmed = &default_target_expmed;
53#endif
44037a66 54
502b8322
AJ
55static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
1169e45d
AH
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
1169e45d 59 rtx);
ebb99f96
BE
60static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 rtx);
502b8322 63static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
1169e45d
AH
64 unsigned HOST_WIDE_INT,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT,
67 rtx);
ef4bddc2 68static rtx extract_fixed_bit_field (machine_mode, rtx,
502b8322 69 unsigned HOST_WIDE_INT,
c6285bd7 70 unsigned HOST_WIDE_INT, rtx, int);
ef4bddc2 71static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
6f4e9cf8
BE
72 unsigned HOST_WIDE_INT,
73 unsigned HOST_WIDE_INT, rtx, int);
ef4bddc2 74static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
502b8322
AJ
75static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
76 unsigned HOST_WIDE_INT, int);
ef4bddc2
RS
77static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
78static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
79static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
44037a66 80
807e902e
KZ
81/* Return a constant integer mask value of mode MODE with BITSIZE ones
82 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
83 The mask is truncated if necessary to the width of mode MODE. The
84 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
85
86static inline rtx
ef4bddc2 87mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
807e902e
KZ
88{
89 return immed_wide_int_const
90 (wi::shifted_mask (bitpos, bitsize, complement,
91 GET_MODE_PRECISION (mode)), mode);
92}
93
58b42e19 94/* Test whether a value is zero of a power of two. */
be63b77d
JJ
95#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
96 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
58b42e19 97
84ddb681 98struct init_expmed_rtl
44037a66 99{
c83cf304
JJ
100 rtx reg;
101 rtx plus;
102 rtx neg;
103 rtx mult;
104 rtx sdiv;
105 rtx udiv;
106 rtx sdiv_32;
107 rtx smod_32;
108 rtx wide_mult;
109 rtx wide_lshr;
110 rtx wide_trunc;
111 rtx shift;
112 rtx shift_mult;
113 rtx shift_add;
114 rtx shift_sub0;
115 rtx shift_sub1;
116 rtx zext;
117 rtx trunc;
79b4a8dc 118
965703ed
RS
119 rtx pow2[MAX_BITS_PER_WORD];
120 rtx cint[MAX_BITS_PER_WORD];
84ddb681
RH
121};
122
91f8035e 123static void
ef4bddc2
RS
124init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
125 machine_mode from_mode, bool speed)
91f8035e
RH
126{
127 int to_size, from_size;
128 rtx which;
129
50b6ee8b
DD
130 to_size = GET_MODE_PRECISION (to_mode);
131 from_size = GET_MODE_PRECISION (from_mode);
132
133 /* Most partial integers have a precision less than the "full"
134 integer it requires for storage. In case one doesn't, for
135 comparison purposes here, reduce the bit size by one in that
136 case. */
137 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
138 && exact_log2 (to_size) != -1)
139 to_size --;
140 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
141 && exact_log2 (from_size) != -1)
142 from_size --;
91f8035e
RH
143
144 /* Assume cost of zero-extend and sign-extend is the same. */
c83cf304 145 which = (to_size < from_size ? all->trunc : all->zext);
91f8035e 146
c83cf304 147 PUT_MODE (all->reg, from_mode);
91f8035e
RH
148 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
149}
150
84ddb681
RH
151static void
152init_expmed_one_mode (struct init_expmed_rtl *all,
ef4bddc2 153 machine_mode mode, int speed)
84ddb681
RH
154{
155 int m, n, mode_bitsize;
ef4bddc2 156 machine_mode mode_from;
44037a66 157
84ddb681 158 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
38a448ca 159
c83cf304
JJ
160 PUT_MODE (all->reg, mode);
161 PUT_MODE (all->plus, mode);
162 PUT_MODE (all->neg, mode);
163 PUT_MODE (all->mult, mode);
164 PUT_MODE (all->sdiv, mode);
165 PUT_MODE (all->udiv, mode);
166 PUT_MODE (all->sdiv_32, mode);
167 PUT_MODE (all->smod_32, mode);
168 PUT_MODE (all->wide_trunc, mode);
169 PUT_MODE (all->shift, mode);
170 PUT_MODE (all->shift_mult, mode);
171 PUT_MODE (all->shift_add, mode);
172 PUT_MODE (all->shift_sub0, mode);
173 PUT_MODE (all->shift_sub1, mode);
174 PUT_MODE (all->zext, mode);
175 PUT_MODE (all->trunc, mode);
176
177 set_add_cost (speed, mode, set_src_cost (all->plus, speed));
178 set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
179 set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
180 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
181 set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
182
183 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
5322d07e 184 <= 2 * add_cost (speed, mode)));
c83cf304 185 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
5322d07e
NF
186 <= 4 * add_cost (speed, mode)));
187
188 set_shift_cost (speed, mode, 0, 0);
189 {
190 int cost = add_cost (speed, mode);
191 set_shiftadd_cost (speed, mode, 0, cost);
192 set_shiftsub0_cost (speed, mode, 0, cost);
193 set_shiftsub1_cost (speed, mode, 0, cost);
194 }
84ddb681
RH
195
196 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
197 for (m = 1; m < n; m++)
198 {
c83cf304
JJ
199 XEXP (all->shift, 1) = all->cint[m];
200 XEXP (all->shift_mult, 1) = all->pow2[m];
84ddb681 201
c83cf304
JJ
202 set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
203 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
204 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
205 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
84ddb681
RH
206 }
207
208 if (SCALAR_INT_MODE_P (mode))
965703ed 209 {
91f8035e 210 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
ef4bddc2 211 mode_from = (machine_mode)(mode_from + 1))
91f8035e
RH
212 init_expmed_one_conv (all, mode, mode_from, speed);
213 }
214 if (GET_MODE_CLASS (mode) == MODE_INT)
215 {
ef4bddc2 216 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
84ddb681
RH
217 if (wider_mode != VOIDmode)
218 {
c83cf304
JJ
219 PUT_MODE (all->zext, wider_mode);
220 PUT_MODE (all->wide_mult, wider_mode);
221 PUT_MODE (all->wide_lshr, wider_mode);
222 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
84ddb681 223
91f8035e 224 set_mul_widen_cost (speed, wider_mode,
c83cf304 225 set_src_cost (all->wide_mult, speed));
91f8035e 226 set_mul_highpart_cost (speed, mode,
c83cf304 227 set_src_cost (all->wide_trunc, speed));
84ddb681 228 }
965703ed 229 }
84ddb681
RH
230}
231
232void
233init_expmed (void)
234{
235 struct init_expmed_rtl all;
ef4bddc2 236 machine_mode mode = QImode;
84ddb681
RH
237 int m, speed;
238
79b4a8dc 239 memset (&all, 0, sizeof all);
84ddb681
RH
240 for (m = 1; m < MAX_BITS_PER_WORD; m++)
241 {
242 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
243 all.cint[m] = GEN_INT (m);
244 }
79b4a8dc 245
1d27fed4 246 /* Avoid using hard regs in ways which may be unsupported. */
9fccb335 247 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
c83cf304
JJ
248 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
249 all.neg = gen_rtx_NEG (mode, all.reg);
250 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
251 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
252 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
253 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
254 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
255 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
256 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
257 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
258 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
259 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
260 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
261 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
262 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
263 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
264 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
6dd8f4bb 265
f40751dd 266 for (speed = 0; speed < 2; speed++)
71af73bb 267 {
f40751dd 268 crtl->maybe_hot_insn_p = speed;
5322d07e 269 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
79b4a8dc 270
91f8035e 271 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
ef4bddc2 272 mode = (machine_mode)(mode + 1))
84ddb681 273 init_expmed_one_mode (&all, mode, speed);
79b4a8dc 274
91f8035e
RH
275 if (MIN_MODE_PARTIAL_INT != VOIDmode)
276 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
ef4bddc2 277 mode = (machine_mode)(mode + 1))
91f8035e
RH
278 init_expmed_one_mode (&all, mode, speed);
279
280 if (MIN_MODE_VECTOR_INT != VOIDmode)
281 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
ef4bddc2 282 mode = (machine_mode)(mode + 1))
91f8035e 283 init_expmed_one_mode (&all, mode, speed);
79b4a8dc 284 }
84ddb681 285
5322d07e
NF
286 if (alg_hash_used_p ())
287 {
288 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
289 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
290 }
c371bb73 291 else
5322d07e 292 set_alg_hash_used_p (true);
f40751dd 293 default_rtl_profile ();
c83cf304
JJ
294
295 ggc_free (all.trunc);
296 ggc_free (all.shift_sub1);
297 ggc_free (all.shift_sub0);
298 ggc_free (all.shift_add);
299 ggc_free (all.shift_mult);
300 ggc_free (all.shift);
301 ggc_free (all.wide_trunc);
302 ggc_free (all.wide_lshr);
303 ggc_free (all.wide_mult);
304 ggc_free (all.zext);
305 ggc_free (all.smod_32);
306 ggc_free (all.sdiv_32);
307 ggc_free (all.udiv);
308 ggc_free (all.sdiv);
309 ggc_free (all.mult);
310 ggc_free (all.neg);
311 ggc_free (all.plus);
312 ggc_free (all.reg);
44037a66
TG
313}
314
315/* Return an rtx representing minus the value of X.
316 MODE is the intended mode of the result,
317 useful if X is a CONST_INT. */
318
319rtx
ef4bddc2 320negate_rtx (machine_mode mode, rtx x)
44037a66 321{
a39a7484
RK
322 rtx result = simplify_unary_operation (NEG, mode, x, mode);
323
fdb5537f 324 if (result == 0)
a39a7484
RK
325 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
326
327 return result;
44037a66 328}
da920570 329
26f8b976
RS
330/* Adjust bitfield memory MEM so that it points to the first unit of mode
331 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
332 If MODE is BLKmode, return a reference to every byte in the bitfield.
333 Set *NEW_BITNUM to the bit position of the field within the new memory. */
334
335static rtx
ef4bddc2 336narrow_bit_field_mem (rtx mem, machine_mode mode,
26f8b976
RS
337 unsigned HOST_WIDE_INT bitsize,
338 unsigned HOST_WIDE_INT bitnum,
339 unsigned HOST_WIDE_INT *new_bitnum)
340{
341 if (mode == BLKmode)
342 {
343 *new_bitnum = bitnum % BITS_PER_UNIT;
344 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
345 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
346 / BITS_PER_UNIT);
347 return adjust_bitfield_address_size (mem, mode, offset, size);
348 }
349 else
350 {
351 unsigned int unit = GET_MODE_BITSIZE (mode);
352 *new_bitnum = bitnum % unit;
353 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
354 return adjust_bitfield_address (mem, mode, offset);
355 }
356}
357
fcdd52b7
RS
358/* The caller wants to perform insertion or extraction PATTERN on a
359 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
360 BITREGION_START and BITREGION_END are as for store_bit_field
361 and FIELDMODE is the natural mode of the field.
362
363 Search for a mode that is compatible with the memory access
364 restrictions and (where applicable) with a register insertion or
365 extraction. Return the new memory on success, storing the adjusted
366 bit position in *NEW_BITNUM. Return null otherwise. */
367
368static rtx
369adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
370 rtx op0, HOST_WIDE_INT bitsize,
371 HOST_WIDE_INT bitnum,
372 unsigned HOST_WIDE_INT bitregion_start,
373 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 374 machine_mode fieldmode,
fcdd52b7
RS
375 unsigned HOST_WIDE_INT *new_bitnum)
376{
377 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
378 bitregion_end, MEM_ALIGN (op0),
379 MEM_VOLATILE_P (op0));
ef4bddc2 380 machine_mode best_mode;
fcdd52b7
RS
381 if (iter.next_mode (&best_mode))
382 {
383 /* We can use a memory in BEST_MODE. See whether this is true for
384 any wider modes. All other things being equal, we prefer to
385 use the widest mode possible because it tends to expose more
386 CSE opportunities. */
387 if (!iter.prefer_smaller_modes ())
388 {
389 /* Limit the search to the mode required by the corresponding
390 register insertion or extraction instruction, if any. */
ef4bddc2 391 machine_mode limit_mode = word_mode;
fcdd52b7
RS
392 extraction_insn insn;
393 if (get_best_reg_extraction_insn (&insn, pattern,
394 GET_MODE_BITSIZE (best_mode),
395 fieldmode))
396 limit_mode = insn.field_mode;
397
ef4bddc2 398 machine_mode wider_mode;
fcdd52b7
RS
399 while (iter.next_mode (&wider_mode)
400 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
401 best_mode = wider_mode;
402 }
403 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
404 new_bitnum);
405 }
406 return NULL_RTX;
407}
408
bebf0797
RS
409/* Return true if a bitfield of size BITSIZE at bit number BITNUM within
410 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
411 offset is then BITNUM / BITS_PER_UNIT. */
412
413static bool
414lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
415 unsigned HOST_WIDE_INT bitsize,
ef4bddc2 416 machine_mode struct_mode)
bebf0797
RS
417{
418 if (BYTES_BIG_ENDIAN)
c1a4d0b5 419 return (bitnum % BITS_PER_UNIT == 0
bebf0797
RS
420 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
421 || (bitnum + bitsize) % BITS_PER_WORD == 0));
422 else
423 return bitnum % BITS_PER_WORD == 0;
424}
00efe3ea 425
548cfdc2 426/* Return true if -fstrict-volatile-bitfields applies to an access of OP0
6f4e9cf8
BE
427 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
428 Return false if the access would touch memory outside the range
429 BITREGION_START to BITREGION_END for conformance to the C++ memory
430 model. */
f5d4f18c
SL
431
432static bool
433strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
434 unsigned HOST_WIDE_INT bitnum,
ef4bddc2 435 machine_mode fieldmode,
6f4e9cf8
BE
436 unsigned HOST_WIDE_INT bitregion_start,
437 unsigned HOST_WIDE_INT bitregion_end)
f5d4f18c
SL
438{
439 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
440
441 /* -fstrict-volatile-bitfields must be enabled and we must have a
442 volatile MEM. */
443 if (!MEM_P (op0)
444 || !MEM_VOLATILE_P (op0)
445 || flag_strict_volatile_bitfields <= 0)
446 return false;
447
448 /* Non-integral modes likely only happen with packed structures.
449 Punt. */
450 if (!SCALAR_INT_MODE_P (fieldmode))
451 return false;
452
453 /* The bit size must not be larger than the field mode, and
454 the field mode must not be larger than a word. */
455 if (bitsize > modesize || modesize > BITS_PER_WORD)
456 return false;
457
458 /* Check for cases of unaligned fields that must be split. */
b6dd42a9
BE
459 if (bitnum % modesize + bitsize > modesize)
460 return false;
461
462 /* The memory must be sufficiently aligned for a MODESIZE access.
463 This condition guarantees, that the memory access will not
464 touch anything after the end of the structure. */
465 if (MEM_ALIGN (op0) < modesize)
f5d4f18c
SL
466 return false;
467
6f4e9cf8
BE
468 /* Check for cases where the C++ memory model applies. */
469 if (bitregion_end != 0
470 && (bitnum - bitnum % modesize < bitregion_start
40f94f7d 471 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
6f4e9cf8
BE
472 return false;
473
f5d4f18c
SL
474 return true;
475}
476
00efe3ea
RS
477/* Return true if OP is a memory and if a bitfield of size BITSIZE at
478 bit number BITNUM can be treated as a simple value of mode MODE. */
479
480static bool
481simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
ef4bddc2 482 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
00efe3ea
RS
483{
484 return (MEM_P (op0)
485 && bitnum % BITS_PER_UNIT == 0
486 && bitsize == GET_MODE_BITSIZE (mode)
487 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
488 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
489 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
490}
6d7db3c5 491\f
fcdd52b7
RS
492/* Try to use instruction INSV to store VALUE into a field of OP0.
493 BITSIZE and BITNUM are as for store_bit_field. */
a20556e4
RS
494
495static bool
fcdd52b7
RS
496store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
497 unsigned HOST_WIDE_INT bitsize,
548cfdc2
EB
498 unsigned HOST_WIDE_INT bitnum,
499 rtx value)
a20556e4
RS
500{
501 struct expand_operand ops[4];
502 rtx value1;
503 rtx xop0 = op0;
f3f6fb16 504 rtx_insn *last = get_last_insn ();
a20556e4
RS
505 bool copy_back = false;
506
ef4bddc2 507 machine_mode op_mode = insv->field_mode;
a20556e4
RS
508 unsigned int unit = GET_MODE_BITSIZE (op_mode);
509 if (bitsize == 0 || bitsize > unit)
510 return false;
511
512 if (MEM_P (xop0))
26f8b976 513 /* Get a reference to the first byte of the field. */
fcdd52b7
RS
514 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
515 &bitnum);
a20556e4
RS
516 else
517 {
518 /* Convert from counting within OP0 to counting in OP_MODE. */
519 if (BYTES_BIG_ENDIAN)
520 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
521
522 /* If xop0 is a register, we need it in OP_MODE
523 to make it acceptable to the format of insv. */
524 if (GET_CODE (xop0) == SUBREG)
525 /* We can't just change the mode, because this might clobber op0,
526 and we will need the original value of op0 if insv fails. */
527 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
528 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
529 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
530 }
531
532 /* If the destination is a paradoxical subreg such that we need a
533 truncate to the inner mode, perform the insertion on a temporary and
534 truncate the result to the original destination. Note that we can't
535 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
536 X) 0)) is (reg:N X). */
537 if (GET_CODE (xop0) == SUBREG
538 && REG_P (SUBREG_REG (xop0))
539 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
540 op_mode))
541 {
542 rtx tem = gen_reg_rtx (op_mode);
543 emit_move_insn (tem, xop0);
544 xop0 = tem;
545 copy_back = true;
546 }
547
4ae9783e
JW
548 /* There are similar overflow check at the start of store_bit_field_1,
549 but that only check the situation where the field lies completely
550 outside the register, while there do have situation where the field
551 lies partialy in the register, we need to adjust bitsize for this
552 partial overflow situation. Without this fix, pr48335-2.c on big-endian
553 will broken on those arch support bit insert instruction, like arm, aarch64
554 etc. */
555 if (bitsize + bitnum > unit && bitnum < unit)
556 {
e623cedf
JW
557 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
558 "destination object, data truncated into %wu-bit",
559 bitsize, unit - bitnum);
4ae9783e
JW
560 bitsize = unit - bitnum;
561 }
562
a20556e4
RS
563 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
564 "backwards" from the size of the unit we are inserting into.
565 Otherwise, we count bits from the most significant on a
566 BYTES/BITS_BIG_ENDIAN machine. */
567
568 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
569 bitnum = unit - bitsize - bitnum;
570
571 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
572 value1 = value;
573 if (GET_MODE (value) != op_mode)
574 {
575 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
576 {
577 /* Optimization: Don't bother really extending VALUE
578 if it has all the bits we will actually use. However,
579 if we must narrow it, be sure we do it correctly. */
580
581 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
582 {
583 rtx tmp;
584
585 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
586 if (! tmp)
587 tmp = simplify_gen_subreg (op_mode,
588 force_reg (GET_MODE (value),
589 value1),
590 GET_MODE (value), 0);
591 value1 = tmp;
592 }
593 else
594 value1 = gen_lowpart (op_mode, value1);
595 }
596 else if (CONST_INT_P (value))
597 value1 = gen_int_mode (INTVAL (value), op_mode);
598 else
599 /* Parse phase is supposed to make VALUE's data type
600 match that of the component reference, which is a type
601 at least as wide as the field; so VALUE should have
602 a mode that corresponds to that type. */
603 gcc_assert (CONSTANT_P (value));
604 }
605
606 create_fixed_operand (&ops[0], xop0);
607 create_integer_operand (&ops[1], bitsize);
608 create_integer_operand (&ops[2], bitnum);
609 create_input_operand (&ops[3], value1, op_mode);
fcdd52b7 610 if (maybe_expand_insn (insv->icode, 4, ops))
a20556e4
RS
611 {
612 if (copy_back)
613 convert_move (op0, xop0, true);
614 return true;
615 }
616 delete_insns_since (last);
617 return false;
618}
619
6d7db3c5
RS
620/* A subroutine of store_bit_field, with the same arguments. Return true
621 if the operation could be implemented.
44037a66 622
6d7db3c5
RS
623 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
624 no other way of implementing the operation. If FALLBACK_P is false,
625 return false instead. */
626
627static bool
628store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
629 unsigned HOST_WIDE_INT bitnum,
630 unsigned HOST_WIDE_INT bitregion_start,
631 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 632 machine_mode fieldmode,
6d7db3c5 633 rtx value, bool fallback_p)
44037a66 634{
b3694847 635 rtx op0 = str_rtx;
28526e20 636 rtx orig_value;
da920570 637
44037a66
TG
638 while (GET_CODE (op0) == SUBREG)
639 {
640 /* The following line once was done only if WORDS_BIG_ENDIAN,
641 but I think that is a mistake. WORDS_BIG_ENDIAN is
642 meaningful at a much higher level; when structures are copied
643 between memory and regs, the higher-numbered regs
644 always get higher addresses. */
495db1a1
AK
645 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
646 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
bebf0797 647 int byte_offset = 0;
495db1a1
AK
648
649 /* Paradoxical subregs need special handling on big endian machines. */
650 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
651 {
652 int difference = inner_mode_size - outer_mode_size;
653
654 if (WORDS_BIG_ENDIAN)
655 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
656 if (BYTES_BIG_ENDIAN)
657 byte_offset += difference % UNITS_PER_WORD;
658 }
659 else
660 byte_offset = SUBREG_BYTE (op0);
661
662 bitnum += byte_offset * BITS_PER_UNIT;
44037a66
TG
663 op0 = SUBREG_REG (op0);
664 }
665
2c58f7dd
RS
666 /* No action is needed if the target is a register and if the field
667 lies completely outside that register. This can occur if the source
668 code contains an out-of-bounds access to a small array. */
669 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
6d7db3c5 670 return true;
2c58f7dd 671
b42271d6 672 /* Use vec_set patterns for inserting parts of vectors whenever
997404de
JH
673 available. */
674 if (VECTOR_MODE_P (GET_MODE (op0))
3c0cb5de 675 && !MEM_P (op0)
947131ba 676 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
997404de
JH
677 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
678 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
679 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
680 {
a5c7d693 681 struct expand_operand ops[3];
ef4bddc2
RS
682 machine_mode outermode = GET_MODE (op0);
683 machine_mode innermode = GET_MODE_INNER (outermode);
a5c7d693 684 enum insn_code icode = optab_handler (vec_set_optab, outermode);
997404de 685 int pos = bitnum / GET_MODE_BITSIZE (innermode);
997404de 686
a5c7d693
RS
687 create_fixed_operand (&ops[0], op0);
688 create_input_operand (&ops[1], value, innermode);
689 create_integer_operand (&ops[2], pos);
690 if (maybe_expand_insn (icode, 3, ops))
691 return true;
997404de
JH
692 }
693
308ecea0 694 /* If the target is a register, overwriting the entire object, or storing
bebf0797
RS
695 a full-word or multi-word field can be done with just a SUBREG. */
696 if (!MEM_P (op0)
697 && bitsize == GET_MODE_BITSIZE (fieldmode)
698 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
699 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
700 {
701 /* Use the subreg machinery either to narrow OP0 to the required
d8c84975
JJ
702 words or to cope with mode punning between equal-sized modes.
703 In the latter case, use subreg on the rhs side, not lhs. */
704 rtx sub;
705
706 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
707 {
708 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
709 if (sub)
710 {
711 emit_move_insn (op0, sub);
712 return true;
713 }
714 }
715 else
bebf0797 716 {
d8c84975
JJ
717 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
718 bitnum / BITS_PER_UNIT);
719 if (sub)
720 {
721 emit_move_insn (sub, value);
722 return true;
723 }
bebf0797
RS
724 }
725 }
308ecea0 726
bebf0797 727 /* If the target is memory, storing any naturally aligned field can be
308ecea0 728 done with a simple store. For targets that support fast unaligned
0b69c29f 729 memory, any naturally sized, unit aligned field can be done directly. */
00efe3ea 730 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
44037a66 731 {
bebf0797 732 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
44037a66 733 emit_move_insn (op0, value);
6d7db3c5 734 return true;
44037a66
TG
735 }
736
a8ca7756
JW
737 /* Make sure we are playing with integral modes. Pun with subregs
738 if we aren't. This must come after the entire register case above,
739 since that case is valid for any mode. The following cases are only
740 valid for integral modes. */
741 {
ef4bddc2 742 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
a8ca7756
JW
743 if (imode != GET_MODE (op0))
744 {
3c0cb5de 745 if (MEM_P (op0))
e98fc6de 746 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
a8ca7756 747 else
5b0264cb
NS
748 {
749 gcc_assert (imode != BLKmode);
750 op0 = gen_lowpart (imode, op0);
751 }
a8ca7756
JW
752 }
753 }
754
44037a66 755 /* Storing an lsb-aligned field in a register
bebf0797 756 can be done with a movstrict instruction. */
44037a66 757
3c0cb5de 758 if (!MEM_P (op0)
bebf0797 759 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
44037a66 760 && bitsize == GET_MODE_BITSIZE (fieldmode)
947131ba 761 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
44037a66 762 {
a5c7d693
RS
763 struct expand_operand ops[2];
764 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
5d560619 765 rtx arg0 = op0;
19228b93 766 unsigned HOST_WIDE_INT subreg_off;
5e4900c7 767
a5c7d693 768 if (GET_CODE (arg0) == SUBREG)
44037a66 769 {
5b0264cb
NS
770 /* Else we've got some float mode source being extracted into
771 a different float mode destination -- this combination of
772 subregs results in Severe Tire Damage. */
a5c7d693 773 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
5b0264cb
NS
774 || GET_MODE_CLASS (fieldmode) == MODE_INT
775 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
a5c7d693 776 arg0 = SUBREG_REG (arg0);
5e4900c7 777 }
470032d7 778
bebf0797 779 subreg_off = bitnum / BITS_PER_UNIT;
19228b93
JJ
780 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
781 {
782 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
a5c7d693 783
19228b93
JJ
784 create_fixed_operand (&ops[0], arg0);
785 /* Shrink the source operand to FIELDMODE. */
786 create_convert_operand_to (&ops[1], value, fieldmode, false);
787 if (maybe_expand_insn (icode, 2, ops))
788 return true;
789 }
44037a66
TG
790 }
791
792 /* Handle fields bigger than a word. */
793
794 if (bitsize > BITS_PER_WORD)
795 {
796 /* Here we transfer the words of the field
797 in the order least significant first.
798 This is because the most significant word is the one which may
ad83e87b
PB
799 be less than full.
800 However, only do that if the value is not BLKmode. */
801
770ae6cc
RK
802 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
803 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
804 unsigned int i;
f3f6fb16 805 rtx_insn *last;
44037a66
TG
806
807 /* This is the mode we must force value to, so that there will be enough
808 subwords to extract. Note that fieldmode will often (always?) be
809 VOIDmode, because that is what store_field uses to indicate that this
535a42b1
NS
810 is a bit field, but passing VOIDmode to operand_subword_force
811 is not allowed. */
9f5e2e11
RS
812 fieldmode = GET_MODE (value);
813 if (fieldmode == VOIDmode)
6f83092f 814 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
44037a66 815
6d7db3c5 816 last = get_last_insn ();
44037a66
TG
817 for (i = 0; i < nwords; i++)
818 {
ad83e87b
PB
819 /* If I is 0, use the low-order word in both field and target;
820 if I is 1, use the next to lowest word; and so on. */
00d6b19a
AB
821 unsigned int wordnum = (backwards
822 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
823 - i - 1
824 : i);
770ae6cc 825 unsigned int bit_offset = (backwards
04050c69
RK
826 ? MAX ((int) bitsize - ((int) i + 1)
827 * BITS_PER_WORD,
828 0)
829 : (int) i * BITS_PER_WORD);
6d7db3c5 830 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
3bdb97b8
AK
831 unsigned HOST_WIDE_INT new_bitsize =
832 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
833
834 /* If the remaining chunk doesn't have full wordsize we have
835 to make sure that for big endian machines the higher order
836 bits are used. */
837 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
838 value_word = simplify_expand_binop (word_mode, lshr_optab,
839 value_word,
840 GEN_INT (BITS_PER_WORD
841 - new_bitsize),
842 NULL_RTX, true,
843 OPTAB_LIB_WIDEN);
844
845 if (!store_bit_field_1 (op0, new_bitsize,
1169e45d
AH
846 bitnum + bit_offset,
847 bitregion_start, bitregion_end,
848 word_mode,
6d7db3c5
RS
849 value_word, fallback_p))
850 {
851 delete_insns_since (last);
852 return false;
853 }
44037a66 854 }
6d7db3c5 855 return true;
44037a66
TG
856 }
857
4f1da2e9
RS
858 /* If VALUE has a floating-point or complex mode, access it as an
859 integer of the corresponding size. This can occur on a machine
860 with 64 bit registers that uses SFmode for float. It can also
861 occur for unaligned float or complex fields. */
28526e20 862 orig_value = value;
4f1da2e9
RS
863 if (GET_MODE (value) != VOIDmode
864 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
32b069d3 865 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
4f1da2e9
RS
866 {
867 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
868 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
869 }
2305bcad 870
bebf0797
RS
871 /* If OP0 is a multi-word register, narrow it to the affected word.
872 If the region spans two words, defer to store_split_bit_field. */
873 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
874 {
875 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
876 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
877 gcc_assert (op0);
878 bitnum %= BITS_PER_WORD;
879 if (bitnum + bitsize > BITS_PER_WORD)
880 {
881 if (!fallback_p)
882 return false;
883
884 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
885 bitregion_end, value);
886 return true;
887 }
888 }
889
890 /* From here on we can assume that the field to be stored in fits
891 within a word. If the destination is a register, it too fits
892 in a word. */
44037a66 893
fcdd52b7
RS
894 extraction_insn insv;
895 if (!MEM_P (op0)
896 && get_best_reg_extraction_insn (&insv, EP_insv,
897 GET_MODE_BITSIZE (GET_MODE (op0)),
898 fieldmode)
899 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
a20556e4 900 return true;
6d7db3c5
RS
901
902 /* If OP0 is a memory, try copying it to a register and seeing if a
903 cheap register alternative is available. */
fcdd52b7 904 if (MEM_P (op0))
6d7db3c5 905 {
f5d4f18c
SL
906 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
907 fieldmode)
fcdd52b7 908 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
17a73ba0
RS
909 return true;
910
f3f6fb16 911 rtx_insn *last = get_last_insn ();
6d7db3c5 912
fcdd52b7
RS
913 /* Try loading part of OP0 into a register, inserting the bitfield
914 into that, and then copying the result back to OP0. */
915 unsigned HOST_WIDE_INT bitpos;
916 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
917 bitregion_start, bitregion_end,
918 fieldmode, &bitpos);
919 if (xop0)
0fb7aeda 920 {
fcdd52b7 921 rtx tempreg = copy_to_reg (xop0);
bebf0797 922 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1169e45d 923 bitregion_start, bitregion_end,
6d7db3c5
RS
924 fieldmode, orig_value, false))
925 {
926 emit_move_insn (xop0, tempreg);
927 return true;
928 }
44037a66 929 delete_insns_since (last);
44037a66
TG
930 }
931 }
6d7db3c5
RS
932
933 if (!fallback_p)
934 return false;
935
bebf0797
RS
936 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
937 bitregion_end, value);
6d7db3c5
RS
938 return true;
939}
940
941/* Generate code to store value from rtx VALUE
942 into a bit-field within structure STR_RTX
943 containing BITSIZE bits starting at bit BITNUM.
1169e45d
AH
944
945 BITREGION_START is bitpos of the first bitfield in this region.
946 BITREGION_END is the bitpos of the ending bitfield in this region.
947 These two fields are 0, if the C++ memory model does not apply,
948 or we are not interested in keeping track of bitfield regions.
949
6d7db3c5
RS
950 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
951
952void
953store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
954 unsigned HOST_WIDE_INT bitnum,
955 unsigned HOST_WIDE_INT bitregion_start,
956 unsigned HOST_WIDE_INT bitregion_end,
ef4bddc2 957 machine_mode fieldmode,
6d7db3c5
RS
958 rtx value)
959{
f5d4f18c 960 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
6f4e9cf8
BE
961 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
962 bitregion_start, bitregion_end))
f5d4f18c 963 {
b6dd42a9
BE
964 /* Storing of a full word can be done with a simple store.
965 We know here that the field can be accessed with one single
966 instruction. For targets that support unaligned memory,
967 an unaligned access may be necessary. */
53c615a2 968 if (bitsize == GET_MODE_BITSIZE (fieldmode))
f5d4f18c
SL
969 {
970 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
971 bitnum / BITS_PER_UNIT);
b6dd42a9 972 gcc_assert (bitnum % BITS_PER_UNIT == 0);
f5d4f18c
SL
973 emit_move_insn (str_rtx, value);
974 }
975 else
ebb99f96 976 {
53c615a2
BE
977 rtx temp;
978
ebb99f96
BE
979 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
980 &bitnum);
b6dd42a9 981 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
53c615a2
BE
982 temp = copy_to_reg (str_rtx);
983 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
984 fieldmode, value, true))
985 gcc_unreachable ();
986
987 emit_move_insn (str_rtx, temp);
ebb99f96
BE
988 }
989
f5d4f18c
SL
990 return;
991 }
992
1169e45d
AH
993 /* Under the C++0x memory model, we must not touch bits outside the
994 bit region. Adjust the address to start at the beginning of the
995 bit region. */
a59b038c 996 if (MEM_P (str_rtx) && bitregion_start > 0)
1169e45d 997 {
ef4bddc2 998 machine_mode bestmode;
ee88e690 999 HOST_WIDE_INT offset, size;
1169e45d 1000
a59b038c
EB
1001 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1002
1169e45d
AH
1003 offset = bitregion_start / BITS_PER_UNIT;
1004 bitnum -= bitregion_start;
ee88e690 1005 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1169e45d
AH
1006 bitregion_end -= bitregion_start;
1007 bitregion_start = 0;
1008 bestmode = get_best_mode (bitsize, bitnum,
1009 bitregion_start, bitregion_end,
fcdd52b7 1010 MEM_ALIGN (str_rtx), VOIDmode,
1169e45d 1011 MEM_VOLATILE_P (str_rtx));
ee88e690 1012 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1169e45d
AH
1013 }
1014
1015 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1016 bitregion_start, bitregion_end,
1017 fieldmode, value, true))
6d7db3c5 1018 gcc_unreachable ();
44037a66
TG
1019}
1020\f
bebf0797
RS
1021/* Use shifts and boolean operations to store VALUE into a bit field of
1022 width BITSIZE in OP0, starting at bit BITNUM. */
44037a66
TG
1023
1024static void
bebf0797
RS
1025store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1026 unsigned HOST_WIDE_INT bitnum,
1169e45d
AH
1027 unsigned HOST_WIDE_INT bitregion_start,
1028 unsigned HOST_WIDE_INT bitregion_end,
1029 rtx value)
44037a66 1030{
44037a66
TG
1031 /* There is a case not handled here:
1032 a structure with a known alignment of just a halfword
1033 and a field split across two aligned halfwords within the structure.
1034 Or likewise a structure with a known alignment of just a byte
1035 and a field split across two bytes.
1036 Such cases are not supposed to be able to occur. */
1037
bebf0797 1038 if (MEM_P (op0))
44037a66 1039 {
ef4bddc2 1040 machine_mode mode = GET_MODE (op0);
053a35af 1041 if (GET_MODE_BITSIZE (mode) == 0
0fb7aeda
KH
1042 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1043 mode = word_mode;
f5d4f18c
SL
1044 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1045 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
44037a66
TG
1046
1047 if (mode == VOIDmode)
1048 {
1049 /* The only way this should occur is if the field spans word
1050 boundaries. */
bebf0797
RS
1051 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1052 bitregion_end, value);
44037a66
TG
1053 return;
1054 }
1055
26f8b976 1056 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
44037a66
TG
1057 }
1058
ebb99f96 1059 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
ebb99f96
BE
1060}
1061
1062/* Helper function for store_fixed_bit_field, stores
1063 the bit field always using the MODE of OP0. */
1064
1065static void
1066store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
548cfdc2
EB
1067 unsigned HOST_WIDE_INT bitnum,
1068 rtx value)
ebb99f96 1069{
ef4bddc2 1070 machine_mode mode;
ebb99f96
BE
1071 rtx temp;
1072 int all_zero = 0;
1073 int all_one = 0;
1074
44037a66 1075 mode = GET_MODE (op0);
bebf0797 1076 gcc_assert (SCALAR_INT_MODE_P (mode));
44037a66 1077
bebf0797
RS
1078 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1079 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
44037a66 1080
f76b9db2 1081 if (BYTES_BIG_ENDIAN)
bebf0797
RS
1082 /* BITNUM is the distance between our msb
1083 and that of the containing datum.
1084 Convert it to the distance from the lsb. */
1085 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
44037a66 1086
bebf0797 1087 /* Now BITNUM is always the distance between our lsb
44037a66
TG
1088 and that of OP0. */
1089
bebf0797 1090 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
44037a66
TG
1091 we must first convert its mode to MODE. */
1092
481683e1 1093 if (CONST_INT_P (value))
44037a66 1094 {
e507a433 1095 unsigned HOST_WIDE_INT v = UINTVAL (value);
44037a66 1096
b1ec3c92 1097 if (bitsize < HOST_BITS_PER_WIDE_INT)
e507a433 1098 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
44037a66
TG
1099
1100 if (v == 0)
1101 all_zero = 1;
b1ec3c92 1102 else if ((bitsize < HOST_BITS_PER_WIDE_INT
e507a433
MP
1103 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1104 || (bitsize == HOST_BITS_PER_WIDE_INT
1105 && v == (unsigned HOST_WIDE_INT) -1))
44037a66
TG
1106 all_one = 1;
1107
088c5368 1108 value = lshift_value (mode, v, bitnum);
44037a66
TG
1109 }
1110 else
1111 {
1112 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
bebf0797 1113 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
44037a66
TG
1114
1115 if (GET_MODE (value) != mode)
86cfb27a 1116 value = convert_to_mode (mode, value, 1);
44037a66
TG
1117
1118 if (must_and)
1119 value = expand_binop (mode, and_optab, value,
1120 mask_rtx (mode, 0, bitsize, 0),
b1ec3c92 1121 NULL_RTX, 1, OPTAB_LIB_WIDEN);
bebf0797 1122 if (bitnum > 0)
44037a66 1123 value = expand_shift (LSHIFT_EXPR, mode, value,
bebf0797 1124 bitnum, NULL_RTX, 1);
44037a66
TG
1125 }
1126
1127 /* Now clear the chosen bits in OP0,
1128 except that if VALUE is -1 we need not bother. */
c505fc06
RS
1129 /* We keep the intermediates in registers to allow CSE to combine
1130 consecutive bitfield assignments. */
44037a66 1131
c505fc06 1132 temp = force_reg (mode, op0);
44037a66
TG
1133
1134 if (! all_one)
1135 {
c505fc06 1136 temp = expand_binop (mode, and_optab, temp,
bebf0797 1137 mask_rtx (mode, bitnum, bitsize, 1),
c505fc06
RS
1138 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1139 temp = force_reg (mode, temp);
44037a66 1140 }
44037a66
TG
1141
1142 /* Now logical-or VALUE into OP0, unless it is zero. */
1143
1144 if (! all_zero)
c505fc06
RS
1145 {
1146 temp = expand_binop (mode, ior_optab, temp, value,
1147 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1148 temp = force_reg (mode, temp);
1149 }
1150
44037a66 1151 if (op0 != temp)
4679504c
UB
1152 {
1153 op0 = copy_rtx (op0);
1154 emit_move_insn (op0, temp);
1155 }
44037a66
TG
1156}
1157\f
06c94bce 1158/* Store a bit field that is split across multiple accessible memory objects.
44037a66 1159
06c94bce 1160 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
44037a66
TG
1161 BITSIZE is the field width; BITPOS the position of its first bit
1162 (within the word).
06c94bce 1163 VALUE is the value to store.
06c94bce
RS
1164
1165 This does not yet handle fields wider than BITS_PER_WORD. */
44037a66
TG
1166
1167static void
502b8322 1168store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169e45d
AH
1169 unsigned HOST_WIDE_INT bitpos,
1170 unsigned HOST_WIDE_INT bitregion_start,
1171 unsigned HOST_WIDE_INT bitregion_end,
1172 rtx value)
44037a66 1173{
770ae6cc
RK
1174 unsigned int unit;
1175 unsigned int bitsdone = 0;
4ee16841 1176
0eb61c19
DE
1177 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1178 much at a time. */
f8cfc6aa 1179 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
4ee16841
DE
1180 unit = BITS_PER_WORD;
1181 else
04050c69 1182 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
e54d80d0 1183
ebb99f96
BE
1184 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1185 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1186 again, and we will mutually recurse forever. */
1187 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1188 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1189
3d709ff0
RS
1190 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1191 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1192 that VALUE might be a floating-point constant. */
481683e1 1193 if (CONSTANT_P (value) && !CONST_INT_P (value))
3d709ff0
RS
1194 {
1195 rtx word = gen_lowpart_common (word_mode, value);
1196
bc8a0e39 1197 if (word && (value != word))
3d709ff0
RS
1198 value = word;
1199 else
1200 value = gen_lowpart_common (word_mode,
d01bc862
DE
1201 force_reg (GET_MODE (value) != VOIDmode
1202 ? GET_MODE (value)
1203 : word_mode, value));
3d709ff0 1204 }
44037a66 1205
06c94bce 1206 while (bitsdone < bitsize)
44037a66 1207 {
770ae6cc 1208 unsigned HOST_WIDE_INT thissize;
06c94bce 1209 rtx part, word;
770ae6cc
RK
1210 unsigned HOST_WIDE_INT thispos;
1211 unsigned HOST_WIDE_INT offset;
44037a66 1212
06c94bce
RS
1213 offset = (bitpos + bitsdone) / unit;
1214 thispos = (bitpos + bitsdone) % unit;
44037a66 1215
f1cc9589 1216 /* When region of bytes we can touch is restricted, decrease
bd3647bf
JJ
1217 UNIT close to the end of the region as needed. If op0 is a REG
1218 or SUBREG of REG, don't do this, as there can't be data races
1219 on a register and we can expand shorter code in some cases. */
f1cc9589
JJ
1220 if (bitregion_end
1221 && unit > BITS_PER_UNIT
bd3647bf
JJ
1222 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1223 && !REG_P (op0)
1224 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
f1cc9589
JJ
1225 {
1226 unit = unit / 2;
1227 continue;
1228 }
1229
0eb61c19
DE
1230 /* THISSIZE must not overrun a word boundary. Otherwise,
1231 store_fixed_bit_field will call us again, and we will mutually
1232 recurse forever. */
1233 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1234 thissize = MIN (thissize, unit - thispos);
44037a66 1235
f76b9db2
ILT
1236 if (BYTES_BIG_ENDIAN)
1237 {
1238 /* Fetch successively less significant portions. */
481683e1 1239 if (CONST_INT_P (value))
f76b9db2
ILT
1240 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1241 >> (bitsize - bitsdone - thissize))
1242 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1243 else
b8ab7fc8
RS
1244 {
1245 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1246 /* The args are chosen so that the last part includes the
1247 lsb. Give extract_bit_field the value it needs (with
1248 endianness compensation) to fetch the piece we want. */
1249 part = extract_fixed_bit_field (word_mode, value, thissize,
1250 total_bits - bitsize + bitsdone,
c6285bd7 1251 NULL_RTX, 1);
b8ab7fc8 1252 }
f76b9db2 1253 }
06c94bce 1254 else
f76b9db2
ILT
1255 {
1256 /* Fetch successively more significant portions. */
481683e1 1257 if (CONST_INT_P (value))
f76b9db2
ILT
1258 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1259 >> bitsdone)
1260 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1261 else
b8ab7fc8 1262 part = extract_fixed_bit_field (word_mode, value, thissize,
c6285bd7 1263 bitsdone, NULL_RTX, 1);
f76b9db2 1264 }
44037a66 1265
06c94bce 1266 /* If OP0 is a register, then handle OFFSET here.
5f57dff0
JW
1267
1268 When handling multiword bitfields, extract_bit_field may pass
1269 down a word_mode SUBREG of a larger REG for a bitfield that actually
1270 crosses a word boundary. Thus, for a SUBREG, we must find
1271 the current word starting from the base register. */
1272 if (GET_CODE (op0) == SUBREG)
1273 {
bd3647bf
JJ
1274 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1275 + (offset * unit / BITS_PER_WORD);
ef4bddc2 1276 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
19228b93
JJ
1277 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1278 word = word_offset ? const0_rtx : op0;
1279 else
1280 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1281 GET_MODE (SUBREG_REG (op0)));
bd3647bf 1282 offset &= BITS_PER_WORD / unit - 1;
5f57dff0 1283 }
f8cfc6aa 1284 else if (REG_P (op0))
06c94bce 1285 {
ef4bddc2 1286 machine_mode op0_mode = GET_MODE (op0);
19228b93
JJ
1287 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1288 word = offset ? const0_rtx : op0;
1289 else
bd3647bf
JJ
1290 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1291 GET_MODE (op0));
1292 offset &= BITS_PER_WORD / unit - 1;
06c94bce
RS
1293 }
1294 else
1295 word = op0;
44037a66 1296
bebf0797 1297 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
19228b93
JJ
1298 it is just an out-of-bounds access. Ignore it. */
1299 if (word != const0_rtx)
bebf0797
RS
1300 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1301 bitregion_start, bitregion_end, part);
06c94bce
RS
1302 bitsdone += thissize;
1303 }
44037a66
TG
1304}
1305\f
6d7db3c5
RS
1306/* A subroutine of extract_bit_field_1 that converts return value X
1307 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1308 to extract_bit_field. */
44037a66 1309
6d7db3c5 1310static rtx
ef4bddc2
RS
1311convert_extracted_bit_field (rtx x, machine_mode mode,
1312 machine_mode tmode, bool unsignedp)
6d7db3c5
RS
1313{
1314 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1315 return x;
44037a66 1316
6d7db3c5
RS
1317 /* If the x mode is not a scalar integral, first convert to the
1318 integer mode of that size and then access it as a floating-point
1319 value via a SUBREG. */
1320 if (!SCALAR_INT_MODE_P (tmode))
1321 {
ef4bddc2 1322 machine_mode smode;
44037a66 1323
6d7db3c5
RS
1324 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1325 x = convert_to_mode (smode, x, unsignedp);
1326 x = force_reg (smode, x);
1327 return gen_lowpart (tmode, x);
1328 }
44037a66 1329
6d7db3c5
RS
1330 return convert_to_mode (tmode, x, unsignedp);
1331}
1332
a20556e4
RS
1333/* Try to use an ext(z)v pattern to extract a field from OP0.
1334 Return the extracted value on success, otherwise return null.
1335 EXT_MODE is the mode of the extraction and the other arguments
1336 are as for extract_bit_field. */
1337
1338static rtx
fcdd52b7
RS
1339extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1340 unsigned HOST_WIDE_INT bitsize,
a20556e4
RS
1341 unsigned HOST_WIDE_INT bitnum,
1342 int unsignedp, rtx target,
ef4bddc2 1343 machine_mode mode, machine_mode tmode)
a20556e4
RS
1344{
1345 struct expand_operand ops[4];
1346 rtx spec_target = target;
1347 rtx spec_target_subreg = 0;
ef4bddc2 1348 machine_mode ext_mode = extv->field_mode;
a20556e4
RS
1349 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1350
1351 if (bitsize == 0 || unit < bitsize)
1352 return NULL_RTX;
1353
1354 if (MEM_P (op0))
26f8b976 1355 /* Get a reference to the first byte of the field. */
fcdd52b7
RS
1356 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1357 &bitnum);
a20556e4
RS
1358 else
1359 {
1360 /* Convert from counting within OP0 to counting in EXT_MODE. */
1361 if (BYTES_BIG_ENDIAN)
1362 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1363
1364 /* If op0 is a register, we need it in EXT_MODE to make it
1365 acceptable to the format of ext(z)v. */
1366 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1367 return NULL_RTX;
1368 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1369 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1370 }
1371
1372 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1373 "backwards" from the size of the unit we are extracting from.
1374 Otherwise, we count bits from the most significant on a
1375 BYTES/BITS_BIG_ENDIAN machine. */
1376
1377 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1378 bitnum = unit - bitsize - bitnum;
1379
1380 if (target == 0)
1381 target = spec_target = gen_reg_rtx (tmode);
1382
1383 if (GET_MODE (target) != ext_mode)
1384 {
1385 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1386 between the mode of the extraction (word_mode) and the target
1387 mode. Instead, create a temporary and use convert_move to set
1388 the target. */
1389 if (REG_P (target)
1390 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1391 {
1392 target = gen_lowpart (ext_mode, target);
1393 if (GET_MODE_PRECISION (ext_mode)
1394 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1395 spec_target_subreg = target;
1396 }
1397 else
1398 target = gen_reg_rtx (ext_mode);
1399 }
1400
1401 create_output_operand (&ops[0], target, ext_mode);
1402 create_fixed_operand (&ops[1], op0);
1403 create_integer_operand (&ops[2], bitsize);
1404 create_integer_operand (&ops[3], bitnum);
fcdd52b7 1405 if (maybe_expand_insn (extv->icode, 4, ops))
a20556e4
RS
1406 {
1407 target = ops[0].value;
1408 if (target == spec_target)
1409 return target;
1410 if (target == spec_target_subreg)
1411 return spec_target;
1412 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1413 }
1414 return NULL_RTX;
1415}
1416
6d7db3c5
RS
1417/* A subroutine of extract_bit_field, with the same arguments.
1418 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1419 if we can find no other means of implementing the operation.
1420 if FALLBACK_P is false, return NULL instead. */
1421
1422static rtx
1423extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
c6285bd7 1424 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
ef4bddc2 1425 machine_mode mode, machine_mode tmode,
6d7db3c5 1426 bool fallback_p)
44037a66 1427{
b3694847 1428 rtx op0 = str_rtx;
ef4bddc2
RS
1429 machine_mode int_mode;
1430 machine_mode mode1;
44037a66 1431
44037a66
TG
1432 if (tmode == VOIDmode)
1433 tmode = mode;
6ca6193b 1434
44037a66
TG
1435 while (GET_CODE (op0) == SUBREG)
1436 {
2c58f7dd 1437 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
44037a66
TG
1438 op0 = SUBREG_REG (op0);
1439 }
77295dec 1440
2c58f7dd 1441 /* If we have an out-of-bounds access to a register, just return an
647eea9d 1442 uninitialized register of the required mode. This can occur if the
2c58f7dd
RS
1443 source code contains an out-of-bounds access to a small array. */
1444 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1445 return gen_reg_rtx (tmode);
1446
f8cfc6aa 1447 if (REG_P (op0)
aac280fb
DD
1448 && mode == GET_MODE (op0)
1449 && bitnum == 0
0b69c29f 1450 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
aac280fb 1451 {
0b69c29f 1452 /* We're trying to extract a full register from itself. */
aac280fb
DD
1453 return op0;
1454 }
1455
0890b981
AP
1456 /* See if we can get a better vector mode before extracting. */
1457 if (VECTOR_MODE_P (GET_MODE (op0))
1458 && !MEM_P (op0)
1459 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1460 {
ef4bddc2 1461 machine_mode new_mode;
0890b981
AP
1462
1463 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1464 new_mode = MIN_MODE_VECTOR_FLOAT;
325217ed
CF
1465 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1466 new_mode = MIN_MODE_VECTOR_FRACT;
1467 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1468 new_mode = MIN_MODE_VECTOR_UFRACT;
1469 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1470 new_mode = MIN_MODE_VECTOR_ACCUM;
1471 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1472 new_mode = MIN_MODE_VECTOR_UACCUM;
0890b981
AP
1473 else
1474 new_mode = MIN_MODE_VECTOR_INT;
1475
1476 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
b147c5b9 1477 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
0890b981
AP
1478 && targetm.vector_mode_supported_p (new_mode))
1479 break;
1480 if (new_mode != VOIDmode)
1481 op0 = gen_lowpart (new_mode, op0);
1482 }
1483
997404de
JH
1484 /* Use vec_extract patterns for extracting parts of vectors whenever
1485 available. */
1486 if (VECTOR_MODE_P (GET_MODE (op0))
3c0cb5de 1487 && !MEM_P (op0)
947131ba 1488 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
b42271d6
JB
1489 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1490 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
997404de 1491 {
a5c7d693 1492 struct expand_operand ops[3];
ef4bddc2
RS
1493 machine_mode outermode = GET_MODE (op0);
1494 machine_mode innermode = GET_MODE_INNER (outermode);
a5c7d693 1495 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
b42271d6 1496 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
997404de 1497
a5c7d693
RS
1498 create_output_operand (&ops[0], target, innermode);
1499 create_input_operand (&ops[1], op0, outermode);
1500 create_integer_operand (&ops[2], pos);
1501 if (maybe_expand_insn (icode, 3, ops))
997404de 1502 {
a5c7d693
RS
1503 target = ops[0].value;
1504 if (GET_MODE (target) != mode)
1505 return gen_lowpart (tmode, target);
1506 return target;
997404de
JH
1507 }
1508 }
1509
d006aa54
RH
1510 /* Make sure we are playing with integral modes. Pun with subregs
1511 if we aren't. */
1512 {
ef4bddc2 1513 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
d006aa54
RH
1514 if (imode != GET_MODE (op0))
1515 {
a6d2976a 1516 if (MEM_P (op0))
e98fc6de 1517 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
7d293b58 1518 else if (imode != BLKmode)
a6d2976a 1519 {
a6d2976a 1520 op0 = gen_lowpart (imode, op0);
360e3535 1521
a6d2976a
JDA
1522 /* If we got a SUBREG, force it into a register since we
1523 aren't going to be able to do another SUBREG on it. */
1524 if (GET_CODE (op0) == SUBREG)
1525 op0 = force_reg (imode, op0);
1526 }
7d293b58
JJ
1527 else if (REG_P (op0))
1528 {
1529 rtx reg, subreg;
1530 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1531 MODE_INT);
1532 reg = gen_reg_rtx (imode);
1533 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1534 emit_move_insn (subreg, op0);
1535 op0 = reg;
1536 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1537 }
1538 else
1539 {
e98fc6de
RS
1540 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1541 rtx mem = assign_stack_temp (GET_MODE (op0), size);
7d293b58 1542 emit_move_insn (mem, op0);
e98fc6de 1543 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
7d293b58 1544 }
d006aa54
RH
1545 }
1546 }
1547
6ca6193b
JDA
1548 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1549 If that's wrong, the solution is to test for it and set TARGET to 0
1550 if needed. */
e98f90d3 1551
f5d4f18c
SL
1552 /* Get the mode of the field to use for atomic access or subreg
1553 conversion. */
b8ab7fc8
RS
1554 mode1 = mode;
1555 if (SCALAR_INT_MODE_P (tmode))
44037a66 1556 {
ef4bddc2 1557 machine_mode try_mode = mode_for_size (bitsize,
b8ab7fc8
RS
1558 GET_MODE_CLASS (tmode), 0);
1559 if (try_mode != BLKmode)
1560 mode1 = try_mode;
1561 }
1562 gcc_assert (mode1 != BLKmode);
1563
1564 /* Extraction of a full MODE1 value can be done with a subreg as long
1565 as the least significant bit of the value is the least significant
1566 bit of either OP0 or a word of OP0. */
1567 if (!MEM_P (op0)
1568 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1569 && bitsize == GET_MODE_BITSIZE (mode1)
1570 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1571 {
1572 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1573 bitnum / BITS_PER_UNIT);
1574 if (sub)
1575 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1576 }
1577
1578 /* Extraction of a full MODE1 value can be done with a load as long as
1579 the field is on a byte boundary and is sufficiently aligned. */
00efe3ea 1580 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
b8ab7fc8
RS
1581 {
1582 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1583 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
44037a66 1584 }
b8ab7fc8 1585
44037a66 1586 /* Handle fields bigger than a word. */
c410d49e 1587
44037a66
TG
1588 if (bitsize > BITS_PER_WORD)
1589 {
1590 /* Here we transfer the words of the field
1591 in the order least significant first.
1592 This is because the most significant word is the one which may
1593 be less than full. */
1594
0cd9e9ee 1595 unsigned int backwards = WORDS_BIG_ENDIAN;
770ae6cc
RK
1596 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1597 unsigned int i;
f3f6fb16 1598 rtx_insn *last;
44037a66 1599
02972eaf 1600 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
44037a66
TG
1601 target = gen_reg_rtx (mode);
1602
7d21a61e
DD
1603 /* In case we're about to clobber a base register or something
1604 (see gcc.c-torture/execute/20040625-1.c). */
1605 if (reg_mentioned_p (target, str_rtx))
1606 target = gen_reg_rtx (mode);
1607
34ea783b 1608 /* Indicate for flow that the entire target reg is being set. */
c41c1387 1609 emit_clobber (target);
34ea783b 1610
5ef0b50d 1611 last = get_last_insn ();
44037a66
TG
1612 for (i = 0; i < nwords; i++)
1613 {
1614 /* If I is 0, use the low-order word in both field and target;
1615 if I is 1, use the next to lowest word; and so on. */
77295dec 1616 /* Word number in TARGET to use. */
770ae6cc 1617 unsigned int wordnum
0cd9e9ee 1618 = (backwards
770ae6cc
RK
1619 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1620 : i);
77295dec 1621 /* Offset from start of field in OP0. */
0cd9e9ee
EB
1622 unsigned int bit_offset = (backwards
1623 ? MAX ((int) bitsize - ((int) i + 1)
1624 * BITS_PER_WORD,
1625 0)
770ae6cc 1626 : (int) i * BITS_PER_WORD);
44037a66
TG
1627 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1628 rtx result_part
5ef0b50d
EB
1629 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1630 bitsize - i * BITS_PER_WORD),
c6285bd7 1631 bitnum + bit_offset, 1, target_part,
5ef0b50d 1632 mode, word_mode, fallback_p);
44037a66 1633
5b0264cb 1634 gcc_assert (target_part);
5ef0b50d
EB
1635 if (!result_part)
1636 {
1637 delete_insns_since (last);
1638 return NULL;
1639 }
44037a66
TG
1640
1641 if (result_part != target_part)
1642 emit_move_insn (target_part, result_part);
1643 }
1644
5f57dff0 1645 if (unsignedp)
77295dec
DE
1646 {
1647 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1648 need to be zero'd out. */
1649 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1650 {
770ae6cc 1651 unsigned int i, total_words;
77295dec
DE
1652
1653 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1654 for (i = nwords; i < total_words; i++)
04050c69
RK
1655 emit_move_insn
1656 (operand_subword (target,
0cd9e9ee 1657 backwards ? total_words - i - 1 : i,
04050c69
RK
1658 1, VOIDmode),
1659 const0_rtx);
77295dec
DE
1660 }
1661 return target;
1662 }
1663
5f57dff0
JW
1664 /* Signed bit field: sign-extend with two arithmetic shifts. */
1665 target = expand_shift (LSHIFT_EXPR, mode, target,
eb6c3df1 1666 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
5f57dff0 1667 return expand_shift (RSHIFT_EXPR, mode, target,
eb6c3df1 1668 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
44037a66 1669 }
c410d49e 1670
b8ab7fc8
RS
1671 /* If OP0 is a multi-word register, narrow it to the affected word.
1672 If the region spans two words, defer to extract_split_bit_field. */
1673 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
44037a66 1674 {
b8ab7fc8
RS
1675 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1676 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1677 bitnum %= BITS_PER_WORD;
1678 if (bitnum + bitsize > BITS_PER_WORD)
470032d7 1679 {
b8ab7fc8
RS
1680 if (!fallback_p)
1681 return NULL_RTX;
1682 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1683 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
470032d7 1684 }
44037a66 1685 }
44037a66 1686
b8ab7fc8
RS
1687 /* From here on we know the desired field is smaller than a word.
1688 If OP0 is a register, it too fits within a word. */
fcdd52b7
RS
1689 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1690 extraction_insn extv;
1691 if (!MEM_P (op0)
c0a8a3e6
RS
1692 /* ??? We could limit the structure size to the part of OP0 that
1693 contains the field, with appropriate checks for endianness
1694 and TRULY_NOOP_TRUNCATION. */
1695 && get_best_reg_extraction_insn (&extv, pattern,
1696 GET_MODE_BITSIZE (GET_MODE (op0)),
fcdd52b7 1697 tmode))
44037a66 1698 {
fcdd52b7 1699 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
a20556e4 1700 unsignedp, target, mode,
fcdd52b7 1701 tmode);
a20556e4
RS
1702 if (result)
1703 return result;
44037a66 1704 }
f76b9db2 1705
6d7db3c5
RS
1706 /* If OP0 is a memory, try copying it to a register and seeing if a
1707 cheap register alternative is available. */
fcdd52b7 1708 if (MEM_P (op0))
6d7db3c5 1709 {
f5d4f18c
SL
1710 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1711 tmode))
17a73ba0 1712 {
fcdd52b7
RS
1713 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1714 bitnum, unsignedp,
1715 target, mode,
1716 tmode);
17a73ba0
RS
1717 if (result)
1718 return result;
1719 }
1720
f3f6fb16 1721 rtx_insn *last = get_last_insn ();
f76b9db2 1722
fcdd52b7
RS
1723 /* Try loading part of OP0 into a register and extracting the
1724 bitfield from that. */
1725 unsigned HOST_WIDE_INT bitpos;
1726 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1727 0, 0, tmode, &bitpos);
1728 if (xop0)
6d7db3c5 1729 {
fcdd52b7
RS
1730 xop0 = copy_to_reg (xop0);
1731 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
c6285bd7 1732 unsignedp, target,
6d7db3c5 1733 mode, tmode, false);
fcdd52b7
RS
1734 if (result)
1735 return result;
1736 delete_insns_since (last);
c410d49e 1737 }
44037a66 1738 }
562fc702 1739
6d7db3c5
RS
1740 if (!fallback_p)
1741 return NULL;
1742
b8ab7fc8
RS
1743 /* Find a correspondingly-sized integer field, so we can apply
1744 shifts and masks to it. */
1745 int_mode = int_mode_for_mode (tmode);
1746 if (int_mode == BLKmode)
1747 int_mode = int_mode_for_mode (mode);
1748 /* Should probably push op0 out to memory and then do a load. */
1749 gcc_assert (int_mode != BLKmode);
1750
1751 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
c6285bd7 1752 target, unsignedp);
6d7db3c5
RS
1753 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1754}
1755
1756/* Generate code to extract a byte-field from STR_RTX
1757 containing BITSIZE bits, starting at BITNUM,
1758 and put it in TARGET if possible (if TARGET is nonzero).
1759 Regardless of TARGET, we return the rtx for where the value is placed.
1760
1761 STR_RTX is the structure containing the byte (a REG or MEM).
1762 UNSIGNEDP is nonzero if this is an unsigned bit field.
1763 MODE is the natural mode of the field value once extracted.
1764 TMODE is the mode the caller would like the value to have;
1765 but the value may be returned with type MODE instead.
1766
1767 If a TARGET is specified and we can store in it at no extra cost,
1768 we do so, and return TARGET.
1769 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1770 if they are equally easy. */
1771
1772rtx
1773extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
c6285bd7 1774 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
ef4bddc2 1775 machine_mode mode, machine_mode tmode)
6d7db3c5 1776{
ef4bddc2 1777 machine_mode mode1;
f5d4f18c
SL
1778
1779 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1780 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1781 mode1 = GET_MODE (str_rtx);
1782 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1783 mode1 = GET_MODE (target);
1784 else
1785 mode1 = tmode;
1786
6f4e9cf8 1787 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
f5d4f18c 1788 {
b6dd42a9
BE
1789 /* Extraction of a full MODE1 value can be done with a simple load.
1790 We know here that the field can be accessed with one single
1791 instruction. For targets that support unaligned memory,
1792 an unaligned access may be necessary. */
1793 if (bitsize == GET_MODE_BITSIZE (mode1))
6f4e9cf8 1794 {
53c615a2
BE
1795 rtx result = adjust_bitfield_address (str_rtx, mode1,
1796 bitnum / BITS_PER_UNIT);
b6dd42a9 1797 gcc_assert (bitnum % BITS_PER_UNIT == 0);
53c615a2 1798 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
6f4e9cf8
BE
1799 }
1800
53c615a2
BE
1801 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1802 &bitnum);
b6dd42a9 1803 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
53c615a2 1804 str_rtx = copy_to_reg (str_rtx);
f5d4f18c 1805 }
53c615a2 1806
c6285bd7 1807 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
6d7db3c5 1808 target, mode, tmode, true);
44037a66
TG
1809}
1810\f
b8ab7fc8
RS
1811/* Use shifts and boolean operations to extract a field of BITSIZE bits
1812 from bit BITNUM of OP0.
44037a66
TG
1813
1814 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1815 If TARGET is nonzero, attempts to store the value there
1816 and return TARGET, but this is not guaranteed.
04050c69 1817 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
44037a66
TG
1818
1819static rtx
ef4bddc2 1820extract_fixed_bit_field (machine_mode tmode, rtx op0,
502b8322 1821 unsigned HOST_WIDE_INT bitsize,
b8ab7fc8 1822 unsigned HOST_WIDE_INT bitnum, rtx target,
c6285bd7 1823 int unsignedp)
44037a66 1824{
b8ab7fc8 1825 if (MEM_P (op0))
44037a66 1826 {
ef4bddc2 1827 machine_mode mode
548cfdc2
EB
1828 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1829 MEM_VOLATILE_P (op0));
44037a66
TG
1830
1831 if (mode == VOIDmode)
1832 /* The only way this should occur is if the field spans word
1833 boundaries. */
b8ab7fc8 1834 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
44037a66 1835
f5d4f18c 1836 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
44037a66
TG
1837 }
1838
6f4e9cf8
BE
1839 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1840 target, unsignedp);
1841}
1842
1843/* Helper function for extract_fixed_bit_field, extracts
1844 the bit field always using the MODE of OP0. */
1845
1846static rtx
ef4bddc2 1847extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
6f4e9cf8
BE
1848 unsigned HOST_WIDE_INT bitsize,
1849 unsigned HOST_WIDE_INT bitnum, rtx target,
1850 int unsignedp)
1851{
ef4bddc2 1852 machine_mode mode = GET_MODE (op0);
b8ab7fc8
RS
1853 gcc_assert (SCALAR_INT_MODE_P (mode));
1854
1855 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1856 for invalid input, such as extract equivalent of f5 from
1857 gcc.dg/pr48335-2.c. */
37811a73 1858
f76b9db2 1859 if (BYTES_BIG_ENDIAN)
b8ab7fc8 1860 /* BITNUM is the distance between our msb and that of OP0.
04050c69 1861 Convert it to the distance from the lsb. */
b8ab7fc8 1862 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
44037a66 1863
b8ab7fc8 1864 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
44037a66
TG
1865 We have reduced the big-endian case to the little-endian case. */
1866
1867 if (unsignedp)
1868 {
b8ab7fc8 1869 if (bitnum)
44037a66
TG
1870 {
1871 /* If the field does not already start at the lsb,
1872 shift it so it does. */
44037a66 1873 /* Maybe propagate the target for the shift. */
f8cfc6aa 1874 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
6399c0ab
SB
1875 if (tmode != mode)
1876 subtarget = 0;
b8ab7fc8 1877 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
44037a66
TG
1878 }
1879 /* Convert the value to the desired mode. */
1880 if (mode != tmode)
1881 op0 = convert_to_mode (tmode, op0, 1);
1882
1883 /* Unless the msb of the field used to be the msb when we shifted,
1884 mask out the upper bits. */
1885
b8ab7fc8 1886 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
44037a66
TG
1887 return expand_binop (GET_MODE (op0), and_optab, op0,
1888 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1889 target, 1, OPTAB_LIB_WIDEN);
1890 return op0;
1891 }
1892
1893 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1894 then arithmetic-shift its lsb to the lsb of the word. */
1895 op0 = force_reg (mode, op0);
44037a66
TG
1896
1897 /* Find the narrowest integer mode that contains the field. */
1898
1899 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1900 mode = GET_MODE_WIDER_MODE (mode))
b8ab7fc8 1901 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
44037a66
TG
1902 {
1903 op0 = convert_to_mode (mode, op0, 0);
1904 break;
1905 }
1906
ccb1b17b
JJ
1907 if (mode != tmode)
1908 target = 0;
1909
b8ab7fc8 1910 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
44037a66 1911 {
b8ab7fc8 1912 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
44037a66 1913 /* Maybe propagate the target for the shift. */
f8cfc6aa 1914 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
44037a66
TG
1915 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1916 }
1917
1918 return expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 1919 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
44037a66 1920}
44037a66
TG
1921
1922/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
088c5368 1923 VALUE << BITPOS. */
44037a66
TG
1924
1925static rtx
ef4bddc2 1926lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
088c5368 1927 int bitpos)
44037a66 1928{
807e902e 1929 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
44037a66
TG
1930}
1931\f
1932/* Extract a bit field that is split across two words
1933 and return an RTX for the result.
1934
1935 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1936 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
04050c69 1937 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
44037a66
TG
1938
1939static rtx
502b8322
AJ
1940extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1941 unsigned HOST_WIDE_INT bitpos, int unsignedp)
44037a66 1942{
770ae6cc
RK
1943 unsigned int unit;
1944 unsigned int bitsdone = 0;
c16ddde3 1945 rtx result = NULL_RTX;
06c94bce 1946 int first = 1;
44037a66 1947
4ee16841
DE
1948 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1949 much at a time. */
f8cfc6aa 1950 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
4ee16841
DE
1951 unit = BITS_PER_WORD;
1952 else
609023ff 1953 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
4ee16841 1954
06c94bce
RS
1955 while (bitsdone < bitsize)
1956 {
770ae6cc 1957 unsigned HOST_WIDE_INT thissize;
06c94bce 1958 rtx part, word;
770ae6cc
RK
1959 unsigned HOST_WIDE_INT thispos;
1960 unsigned HOST_WIDE_INT offset;
06c94bce
RS
1961
1962 offset = (bitpos + bitsdone) / unit;
1963 thispos = (bitpos + bitsdone) % unit;
1964
0eb61c19
DE
1965 /* THISSIZE must not overrun a word boundary. Otherwise,
1966 extract_fixed_bit_field will call us again, and we will mutually
1967 recurse forever. */
1968 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1969 thissize = MIN (thissize, unit - thispos);
06c94bce
RS
1970
1971 /* If OP0 is a register, then handle OFFSET here.
5f57dff0
JW
1972
1973 When handling multiword bitfields, extract_bit_field may pass
1974 down a word_mode SUBREG of a larger REG for a bitfield that actually
1975 crosses a word boundary. Thus, for a SUBREG, we must find
1976 the current word starting from the base register. */
1977 if (GET_CODE (op0) == SUBREG)
1978 {
ddef6bc7
JJ
1979 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1980 word = operand_subword_force (SUBREG_REG (op0), word_offset,
5f57dff0
JW
1981 GET_MODE (SUBREG_REG (op0)));
1982 offset = 0;
1983 }
f8cfc6aa 1984 else if (REG_P (op0))
06c94bce
RS
1985 {
1986 word = operand_subword_force (op0, offset, GET_MODE (op0));
1987 offset = 0;
1988 }
1989 else
1990 word = op0;
1991
06c94bce 1992 /* Extract the parts in bit-counting order,
0eb61c19 1993 whose meaning is determined by BYTES_PER_UNIT.
b8ab7fc8
RS
1994 OFFSET is in UNITs, and UNIT is in bits. */
1995 part = extract_fixed_bit_field (word_mode, word, thissize,
c6285bd7 1996 offset * unit + thispos, 0, 1);
06c94bce 1997 bitsdone += thissize;
44037a66 1998
06c94bce 1999 /* Shift this part into place for the result. */
f76b9db2
ILT
2000 if (BYTES_BIG_ENDIAN)
2001 {
2002 if (bitsize != bitsdone)
2003 part = expand_shift (LSHIFT_EXPR, word_mode, part,
eb6c3df1 2004 bitsize - bitsdone, 0, 1);
f76b9db2
ILT
2005 }
2006 else
2007 {
2008 if (bitsdone != thissize)
2009 part = expand_shift (LSHIFT_EXPR, word_mode, part,
eb6c3df1 2010 bitsdone - thissize, 0, 1);
f76b9db2 2011 }
44037a66 2012
06c94bce
RS
2013 if (first)
2014 result = part;
2015 else
2016 /* Combine the parts with bitwise or. This works
2017 because we extracted each part as an unsigned bit field. */
2018 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2019 OPTAB_LIB_WIDEN);
2020
2021 first = 0;
2022 }
44037a66
TG
2023
2024 /* Unsigned bit field: we are done. */
2025 if (unsignedp)
2026 return result;
2027 /* Signed bit field: sign-extend with two arithmetic shifts. */
2028 result = expand_shift (LSHIFT_EXPR, word_mode, result,
eb6c3df1 2029 BITS_PER_WORD - bitsize, NULL_RTX, 0);
44037a66 2030 return expand_shift (RSHIFT_EXPR, word_mode, result,
eb6c3df1 2031 BITS_PER_WORD - bitsize, NULL_RTX, 0);
44037a66
TG
2032}
2033\f
18b526e8
RS
2034/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2035 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2036 MODE, fill the upper bits with zeros. Fail if the layout of either
2037 mode is unknown (as for CC modes) or if the extraction would involve
2038 unprofitable mode punning. Return the value on success, otherwise
2039 return null.
2040
2041 This is different from gen_lowpart* in these respects:
2042
2043 - the returned value must always be considered an rvalue
2044
2045 - when MODE is wider than SRC_MODE, the extraction involves
2046 a zero extension
2047
2048 - when MODE is smaller than SRC_MODE, the extraction involves
2049 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2050
2051 In other words, this routine performs a computation, whereas the
2052 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2053 operations. */
2054
2055rtx
ef4bddc2 2056extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
18b526e8 2057{
ef4bddc2 2058 machine_mode int_mode, src_int_mode;
18b526e8
RS
2059
2060 if (mode == src_mode)
2061 return src;
2062
2063 if (CONSTANT_P (src))
d898d29b
JJ
2064 {
2065 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2066 fails, it will happily create (subreg (symbol_ref)) or similar
2067 invalid SUBREGs. */
2068 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2069 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2070 if (ret)
2071 return ret;
2072
2073 if (GET_MODE (src) == VOIDmode
2074 || !validate_subreg (mode, src_mode, src, byte))
2075 return NULL_RTX;
2076
2077 src = force_reg (GET_MODE (src), src);
2078 return gen_rtx_SUBREG (mode, src, byte);
2079 }
18b526e8
RS
2080
2081 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2082 return NULL_RTX;
2083
2084 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2085 && MODES_TIEABLE_P (mode, src_mode))
2086 {
2087 rtx x = gen_lowpart_common (mode, src);
2088 if (x)
2089 return x;
2090 }
2091
2092 src_int_mode = int_mode_for_mode (src_mode);
2093 int_mode = int_mode_for_mode (mode);
2094 if (src_int_mode == BLKmode || int_mode == BLKmode)
2095 return NULL_RTX;
2096
2097 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2098 return NULL_RTX;
2099 if (!MODES_TIEABLE_P (int_mode, mode))
2100 return NULL_RTX;
2101
2102 src = gen_lowpart (src_int_mode, src);
2103 src = convert_modes (int_mode, src_int_mode, src, true);
2104 src = gen_lowpart (mode, src);
2105 return src;
2106}
2107\f
44037a66
TG
2108/* Add INC into TARGET. */
2109
2110void
502b8322 2111expand_inc (rtx target, rtx inc)
44037a66
TG
2112{
2113 rtx value = expand_binop (GET_MODE (target), add_optab,
2114 target, inc,
2115 target, 0, OPTAB_LIB_WIDEN);
2116 if (value != target)
2117 emit_move_insn (target, value);
2118}
2119
2120/* Subtract DEC from TARGET. */
2121
2122void
502b8322 2123expand_dec (rtx target, rtx dec)
44037a66
TG
2124{
2125 rtx value = expand_binop (GET_MODE (target), sub_optab,
2126 target, dec,
2127 target, 0, OPTAB_LIB_WIDEN);
2128 if (value != target)
2129 emit_move_insn (target, value);
2130}
2131\f
2132/* Output a shift instruction for expression code CODE,
2133 with SHIFTED being the rtx for the value to shift,
86529a49 2134 and AMOUNT the rtx for the amount to shift by.
44037a66
TG
2135 Store the result in the rtx TARGET, if that is convenient.
2136 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2137 Return the rtx for where the value is. */
2138
86529a49 2139static rtx
ef4bddc2 2140expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
86529a49 2141 rtx amount, rtx target, int unsignedp)
44037a66 2142{
b3694847
SS
2143 rtx op1, temp = 0;
2144 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2145 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
71d46ca5
MM
2146 optab lshift_optab = ashl_optab;
2147 optab rshift_arith_optab = ashr_optab;
2148 optab rshift_uns_optab = lshr_optab;
2149 optab lrotate_optab = rotl_optab;
2150 optab rrotate_optab = rotr_optab;
ef4bddc2
RS
2151 machine_mode op1_mode;
2152 machine_mode scalar_mode = mode;
d858f359 2153 int attempt;
f40751dd 2154 bool speed = optimize_insn_for_speed_p ();
44037a66 2155
a4ee446d
JJ
2156 if (VECTOR_MODE_P (mode))
2157 scalar_mode = GET_MODE_INNER (mode);
86529a49 2158 op1 = amount;
71d46ca5
MM
2159 op1_mode = GET_MODE (op1);
2160
2161 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2162 shift amount is a vector, use the vector/vector shift patterns. */
2163 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2164 {
2165 lshift_optab = vashl_optab;
2166 rshift_arith_optab = vashr_optab;
2167 rshift_uns_optab = vlshr_optab;
2168 lrotate_optab = vrotl_optab;
2169 rrotate_optab = vrotr_optab;
2170 }
2171
44037a66
TG
2172 /* Previously detected shift-counts computed by NEGATE_EXPR
2173 and shifted in the other direction; but that does not work
2174 on all machines. */
2175
166cdf4a
RH
2176 if (SHIFT_COUNT_TRUNCATED)
2177 {
481683e1 2178 if (CONST_INT_P (op1)
0fb7aeda 2179 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
a4ee446d 2180 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
0fb7aeda 2181 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
a4ee446d 2182 % GET_MODE_BITSIZE (scalar_mode));
166cdf4a 2183 else if (GET_CODE (op1) == SUBREG
c1cb09ad 2184 && subreg_lowpart_p (op1)
7afe2801
DM
2185 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2186 && SCALAR_INT_MODE_P (GET_MODE (op1)))
166cdf4a
RH
2187 op1 = SUBREG_REG (op1);
2188 }
2ab0a5c4 2189
75776c6d
JJ
2190 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2191 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2192 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2193 amount instead. */
2194 if (rotate
2195 && CONST_INT_P (op1)
a4ee446d
JJ
2196 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2197 GET_MODE_BITSIZE (scalar_mode) - 1))
75776c6d 2198 {
a4ee446d 2199 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
75776c6d
JJ
2200 left = !left;
2201 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2202 }
2203
a25efea0
TP
2204 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2205 Note that this is not the case for bigger values. For instance a rotation
2206 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2207 0x04030201 (bswapsi). */
2208 if (rotate
2209 && CONST_INT_P (op1)
2210 && INTVAL (op1) == BITS_PER_UNIT
2211 && GET_MODE_SIZE (scalar_mode) == 2
2212 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2213 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2214 unsignedp);
2215
44037a66
TG
2216 if (op1 == const0_rtx)
2217 return shifted;
2218
15bad393
RS
2219 /* Check whether its cheaper to implement a left shift by a constant
2220 bit count by a sequence of additions. */
2221 if (code == LSHIFT_EXPR
481683e1 2222 && CONST_INT_P (op1)
15bad393 2223 && INTVAL (op1) > 0
a4ee446d 2224 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
cb2eb96f 2225 && INTVAL (op1) < MAX_BITS_PER_WORD
5322d07e
NF
2226 && (shift_cost (speed, mode, INTVAL (op1))
2227 > INTVAL (op1) * add_cost (speed, mode))
2228 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
15bad393
RS
2229 {
2230 int i;
2231 for (i = 0; i < INTVAL (op1); i++)
2232 {
2233 temp = force_reg (mode, shifted);
2234 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2235 unsignedp, OPTAB_LIB_WIDEN);
2236 }
2237 return shifted;
2238 }
2239
d858f359 2240 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
44037a66
TG
2241 {
2242 enum optab_methods methods;
2243
d858f359 2244 if (attempt == 0)
44037a66 2245 methods = OPTAB_DIRECT;
d858f359 2246 else if (attempt == 1)
44037a66
TG
2247 methods = OPTAB_WIDEN;
2248 else
2249 methods = OPTAB_LIB_WIDEN;
2250
2251 if (rotate)
2252 {
2253 /* Widening does not work for rotation. */
2254 if (methods == OPTAB_WIDEN)
2255 continue;
2256 else if (methods == OPTAB_LIB_WIDEN)
cbec710e 2257 {
39e71615 2258 /* If we have been unable to open-code this by a rotation,
cbec710e 2259 do it as the IOR of two shifts. I.e., to rotate A
ae6fa899
JJ
2260 by N bits, compute
2261 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
cbec710e
RK
2262 where C is the bitsize of A.
2263
2264 It is theoretically possible that the target machine might
2265 not be able to perform either shift and hence we would
2266 be making two libcalls rather than just the one for the
2267 shift (similarly if IOR could not be done). We will allow
2268 this extremely unlikely lossage to avoid complicating the
2269 code below. */
2270
39e71615 2271 rtx subtarget = target == shifted ? 0 : target;
86529a49 2272 rtx new_amount, other_amount;
39e71615 2273 rtx temp1;
86529a49
RG
2274
2275 new_amount = op1;
ae6fa899
JJ
2276 if (op1 == const0_rtx)
2277 return shifted;
2278 else if (CONST_INT_P (op1))
a4ee446d 2279 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
5c049507
RG
2280 - INTVAL (op1));
2281 else
ae6fa899
JJ
2282 {
2283 other_amount
2284 = simplify_gen_unary (NEG, GET_MODE (op1),
2285 op1, GET_MODE (op1));
a4ee446d 2286 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
ae6fa899 2287 other_amount
69a59f0f
RS
2288 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2289 gen_int_mode (mask, GET_MODE (op1)));
ae6fa899 2290 }
39e71615
RK
2291
2292 shifted = force_reg (mode, shifted);
2293
86529a49
RG
2294 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2295 mode, shifted, new_amount, 0, 1);
2296 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2297 mode, shifted, other_amount,
2298 subtarget, 1);
39e71615
RK
2299 return expand_binop (mode, ior_optab, temp, temp1, target,
2300 unsignedp, methods);
cbec710e 2301 }
44037a66
TG
2302
2303 temp = expand_binop (mode,
71d46ca5 2304 left ? lrotate_optab : rrotate_optab,
44037a66
TG
2305 shifted, op1, target, unsignedp, methods);
2306 }
2307 else if (unsignedp)
a34958c9 2308 temp = expand_binop (mode,
71d46ca5 2309 left ? lshift_optab : rshift_uns_optab,
a34958c9 2310 shifted, op1, target, unsignedp, methods);
44037a66
TG
2311
2312 /* Do arithmetic shifts.
2313 Also, if we are going to widen the operand, we can just as well
2314 use an arithmetic right-shift instead of a logical one. */
2315 if (temp == 0 && ! rotate
2316 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2317 {
2318 enum optab_methods methods1 = methods;
2319
2320 /* If trying to widen a log shift to an arithmetic shift,
2321 don't accept an arithmetic shift of the same size. */
2322 if (unsignedp)
2323 methods1 = OPTAB_MUST_WIDEN;
2324
2325 /* Arithmetic shift */
2326
2327 temp = expand_binop (mode,
71d46ca5 2328 left ? lshift_optab : rshift_arith_optab,
44037a66
TG
2329 shifted, op1, target, unsignedp, methods1);
2330 }
2331
711a5e64 2332 /* We used to try extzv here for logical right shifts, but that was
c410d49e 2333 only useful for one machine, the VAX, and caused poor code
711a5e64
RK
2334 generation there for lshrdi3, so the code was deleted and a
2335 define_expand for lshrsi3 was added to vax.md. */
44037a66
TG
2336 }
2337
5b0264cb 2338 gcc_assert (temp);
44037a66
TG
2339 return temp;
2340}
eb6c3df1
RG
2341
2342/* Output a shift instruction for expression code CODE,
2343 with SHIFTED being the rtx for the value to shift,
2344 and AMOUNT the amount to shift by.
2345 Store the result in the rtx TARGET, if that is convenient.
2346 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2347 Return the rtx for where the value is. */
2348
2349rtx
ef4bddc2 2350expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
eb6c3df1
RG
2351 int amount, rtx target, int unsignedp)
2352{
86529a49
RG
2353 return expand_shift_1 (code, mode,
2354 shifted, GEN_INT (amount), target, unsignedp);
2355}
2356
2357/* Output a shift instruction for expression code CODE,
2358 with SHIFTED being the rtx for the value to shift,
2359 and AMOUNT the tree for the amount to shift by.
2360 Store the result in the rtx TARGET, if that is convenient.
2361 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2362 Return the rtx for where the value is. */
2363
2364rtx
ef4bddc2 2365expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
86529a49
RG
2366 tree amount, rtx target, int unsignedp)
2367{
2368 return expand_shift_1 (code, mode,
2369 shifted, expand_normal (amount), target, unsignedp);
eb6c3df1 2370}
86529a49 2371
44037a66 2372\f
8efc8980
RS
2373/* Indicates the type of fixup needed after a constant multiplication.
2374 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2375 the result should be negated, and ADD_VARIANT means that the
2376 multiplicand should be added to the result. */
2377enum mult_variant {basic_variant, negate_variant, add_variant};
2378
41c64ac0 2379static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
ef4bddc2
RS
2380 const struct mult_cost *, machine_mode mode);
2381static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
f258e38b 2382 struct algorithm *, enum mult_variant *, int);
ef4bddc2 2383static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
8efc8980 2384 const struct algorithm *, enum mult_variant);
502b8322 2385static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
ef4bddc2
RS
2386static rtx extract_high_half (machine_mode, rtx);
2387static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2388static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
8efc8980 2389 int, int);
44037a66 2390/* Compute and return the best algorithm for multiplying by T.
7963ac37
RK
2391 The algorithm must cost less than cost_limit
2392 If retval.cost >= COST_LIMIT, no algorithm was found and all
41c64ac0
RS
2393 other field of the returned struct are undefined.
2394 MODE is the machine mode of the multiplication. */
44037a66 2395
819126a6 2396static void
502b8322 2397synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
ef4bddc2 2398 const struct mult_cost *cost_limit, machine_mode mode)
44037a66 2399{
b2fb324c 2400 int m;
52786026 2401 struct algorithm *alg_in, *best_alg;
26276705
RS
2402 struct mult_cost best_cost;
2403 struct mult_cost new_limit;
2404 int op_cost, op_latency;
ef268d34 2405 unsigned HOST_WIDE_INT orig_t = t;
b2fb324c 2406 unsigned HOST_WIDE_INT q;
84ddb681 2407 int maxm, hash_index;
7b13ee6b
KH
2408 bool cache_hit = false;
2409 enum alg_code cache_alg = alg_zero;
f40751dd 2410 bool speed = optimize_insn_for_speed_p ();
ef4bddc2 2411 machine_mode imode;
5322d07e 2412 struct alg_hash_entry *entry_ptr;
44037a66 2413
7963ac37
RK
2414 /* Indicate that no algorithm is yet found. If no algorithm
2415 is found, this value will be returned and indicate failure. */
26276705 2416 alg_out->cost.cost = cost_limit->cost + 1;
3ab0f290 2417 alg_out->cost.latency = cost_limit->latency + 1;
44037a66 2418
26276705
RS
2419 if (cost_limit->cost < 0
2420 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
819126a6 2421 return;
44037a66 2422
84ddb681
RH
2423 /* Be prepared for vector modes. */
2424 imode = GET_MODE_INNER (mode);
2425 if (imode == VOIDmode)
2426 imode = mode;
2427
2428 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2429
0792ab19 2430 /* Restrict the bits of "t" to the multiplication's mode. */
84ddb681 2431 t &= GET_MODE_MASK (imode);
0792ab19 2432
b385aeda
RK
2433 /* t == 1 can be done in zero cost. */
2434 if (t == 1)
b2fb324c 2435 {
819126a6 2436 alg_out->ops = 1;
26276705
RS
2437 alg_out->cost.cost = 0;
2438 alg_out->cost.latency = 0;
819126a6
RK
2439 alg_out->op[0] = alg_m;
2440 return;
b2fb324c
RK
2441 }
2442
b385aeda
RK
2443 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2444 fail now. */
819126a6 2445 if (t == 0)
b385aeda 2446 {
5322d07e 2447 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
819126a6 2448 return;
b385aeda
RK
2449 else
2450 {
819126a6 2451 alg_out->ops = 1;
5322d07e
NF
2452 alg_out->cost.cost = zero_cost (speed);
2453 alg_out->cost.latency = zero_cost (speed);
819126a6
RK
2454 alg_out->op[0] = alg_zero;
2455 return;
b385aeda
RK
2456 }
2457 }
2458
52786026
RK
2459 /* We'll be needing a couple extra algorithm structures now. */
2460
1b4572a8
KG
2461 alg_in = XALLOCA (struct algorithm);
2462 best_alg = XALLOCA (struct algorithm);
26276705 2463 best_cost = *cost_limit;
52786026 2464
7b13ee6b 2465 /* Compute the hash index. */
f40751dd 2466 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
7b13ee6b
KH
2467
2468 /* See if we already know what to do for T. */
5322d07e
NF
2469 entry_ptr = alg_hash_entry_ptr (hash_index);
2470 if (entry_ptr->t == t
2471 && entry_ptr->mode == mode
2472 && entry_ptr->mode == mode
2473 && entry_ptr->speed == speed
2474 && entry_ptr->alg != alg_unknown)
7b13ee6b 2475 {
5322d07e 2476 cache_alg = entry_ptr->alg;
0178027c
KH
2477
2478 if (cache_alg == alg_impossible)
7b13ee6b 2479 {
0178027c 2480 /* The cache tells us that it's impossible to synthesize
5322d07e
NF
2481 multiplication by T within entry_ptr->cost. */
2482 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
0178027c
KH
2483 /* COST_LIMIT is at least as restrictive as the one
2484 recorded in the hash table, in which case we have no
2485 hope of synthesizing a multiplication. Just
2486 return. */
2487 return;
2488
2489 /* If we get here, COST_LIMIT is less restrictive than the
2490 one recorded in the hash table, so we may be able to
2491 synthesize a multiplication. Proceed as if we didn't
2492 have the cache entry. */
2493 }
2494 else
2495 {
5322d07e 2496 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
0178027c
KH
2497 /* The cached algorithm shows that this multiplication
2498 requires more cost than COST_LIMIT. Just return. This
2499 way, we don't clobber this cache entry with
2500 alg_impossible but retain useful information. */
2501 return;
7b13ee6b 2502
0178027c
KH
2503 cache_hit = true;
2504
2505 switch (cache_alg)
2506 {
2507 case alg_shift:
2508 goto do_alg_shift;
7b13ee6b 2509
0178027c
KH
2510 case alg_add_t_m2:
2511 case alg_sub_t_m2:
2512 goto do_alg_addsub_t_m2;
7b13ee6b 2513
0178027c
KH
2514 case alg_add_factor:
2515 case alg_sub_factor:
2516 goto do_alg_addsub_factor;
7b13ee6b 2517
0178027c
KH
2518 case alg_add_t2_m:
2519 goto do_alg_add_t2_m;
7b13ee6b 2520
0178027c
KH
2521 case alg_sub_t2_m:
2522 goto do_alg_sub_t2_m;
2523
2524 default:
2525 gcc_unreachable ();
2526 }
7b13ee6b
KH
2527 }
2528 }
2529
b385aeda
RK
2530 /* If we have a group of zero bits at the low-order part of T, try
2531 multiplying by the remaining bits and then doing a shift. */
2532
b2fb324c 2533 if ((t & 1) == 0)
44037a66 2534 {
7b13ee6b 2535 do_alg_shift:
b2fb324c 2536 m = floor_log2 (t & -t); /* m = number of low zero bits */
0792ab19 2537 if (m < maxm)
44037a66 2538 {
02a65aef 2539 q = t >> m;
15bad393
RS
2540 /* The function expand_shift will choose between a shift and
2541 a sequence of additions, so the observed cost is given as
5322d07e
NF
2542 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2543 op_cost = m * add_cost (speed, mode);
2544 if (shift_cost (speed, mode, m) < op_cost)
2545 op_cost = shift_cost (speed, mode, m);
26276705
RS
2546 new_limit.cost = best_cost.cost - op_cost;
2547 new_limit.latency = best_cost.latency - op_cost;
2548 synth_mult (alg_in, q, &new_limit, mode);
2549
2550 alg_in->cost.cost += op_cost;
2551 alg_in->cost.latency += op_cost;
2552 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
02a65aef 2553 {
26276705 2554 best_cost = alg_in->cost;
076701b6 2555 std::swap (alg_in, best_alg);
02a65aef
R
2556 best_alg->log[best_alg->ops] = m;
2557 best_alg->op[best_alg->ops] = alg_shift;
02a65aef 2558 }
ddc2690a
KH
2559
2560 /* See if treating ORIG_T as a signed number yields a better
2561 sequence. Try this sequence only for a negative ORIG_T
2562 as it would be useless for a non-negative ORIG_T. */
2563 if ((HOST_WIDE_INT) orig_t < 0)
2564 {
2565 /* Shift ORIG_T as follows because a right shift of a
2566 negative-valued signed type is implementation
2567 defined. */
2568 q = ~(~orig_t >> m);
2569 /* The function expand_shift will choose between a shift
2570 and a sequence of additions, so the observed cost is
5322d07e
NF
2571 given as MIN (m * add_cost(speed, mode),
2572 shift_cost(speed, mode, m)). */
2573 op_cost = m * add_cost (speed, mode);
2574 if (shift_cost (speed, mode, m) < op_cost)
2575 op_cost = shift_cost (speed, mode, m);
ddc2690a
KH
2576 new_limit.cost = best_cost.cost - op_cost;
2577 new_limit.latency = best_cost.latency - op_cost;
2578 synth_mult (alg_in, q, &new_limit, mode);
2579
2580 alg_in->cost.cost += op_cost;
2581 alg_in->cost.latency += op_cost;
2582 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2583 {
ddc2690a 2584 best_cost = alg_in->cost;
076701b6 2585 std::swap (alg_in, best_alg);
ddc2690a
KH
2586 best_alg->log[best_alg->ops] = m;
2587 best_alg->op[best_alg->ops] = alg_shift;
2588 }
2589 }
819126a6 2590 }
7b13ee6b
KH
2591 if (cache_hit)
2592 goto done;
819126a6
RK
2593 }
2594
2595 /* If we have an odd number, add or subtract one. */
2596 if ((t & 1) != 0)
2597 {
2598 unsigned HOST_WIDE_INT w;
2599
7b13ee6b 2600 do_alg_addsub_t_m2:
819126a6
RK
2601 for (w = 1; (w & t) != 0; w <<= 1)
2602 ;
31031edd 2603 /* If T was -1, then W will be zero after the loop. This is another
c410d49e 2604 case where T ends with ...111. Handling this with (T + 1) and
31031edd
JL
2605 subtract 1 produces slightly better code and results in algorithm
2606 selection much faster than treating it like the ...0111 case
2607 below. */
2608 if (w == 0
2609 || (w > 2
2610 /* Reject the case where t is 3.
2611 Thus we prefer addition in that case. */
2612 && t != 3))
819126a6 2613 {
c61928d0 2614 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
819126a6 2615
5322d07e 2616 op_cost = add_cost (speed, mode);
26276705
RS
2617 new_limit.cost = best_cost.cost - op_cost;
2618 new_limit.latency = best_cost.latency - op_cost;
2619 synth_mult (alg_in, t + 1, &new_limit, mode);
b2fb324c 2620
26276705
RS
2621 alg_in->cost.cost += op_cost;
2622 alg_in->cost.latency += op_cost;
2623 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2624 {
26276705 2625 best_cost = alg_in->cost;
076701b6 2626 std::swap (alg_in, best_alg);
819126a6
RK
2627 best_alg->log[best_alg->ops] = 0;
2628 best_alg->op[best_alg->ops] = alg_sub_t_m2;
44037a66 2629 }
44037a66 2630 }
819126a6
RK
2631 else
2632 {
c61928d0 2633 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
44037a66 2634
5322d07e 2635 op_cost = add_cost (speed, mode);
26276705
RS
2636 new_limit.cost = best_cost.cost - op_cost;
2637 new_limit.latency = best_cost.latency - op_cost;
2638 synth_mult (alg_in, t - 1, &new_limit, mode);
819126a6 2639
26276705
RS
2640 alg_in->cost.cost += op_cost;
2641 alg_in->cost.latency += op_cost;
2642 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
819126a6 2643 {
26276705 2644 best_cost = alg_in->cost;
076701b6 2645 std::swap (alg_in, best_alg);
819126a6
RK
2646 best_alg->log[best_alg->ops] = 0;
2647 best_alg->op[best_alg->ops] = alg_add_t_m2;
819126a6
RK
2648 }
2649 }
ef268d34
KH
2650
2651 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2652 quickly with a - a * n for some appropriate constant n. */
2653 m = exact_log2 (-orig_t + 1);
2654 if (m >= 0 && m < maxm)
2655 {
35430ca0
KT
2656 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2657 /* If the target has a cheap shift-and-subtract insn use
2658 that in preference to a shift insn followed by a sub insn.
2659 Assume that the shift-and-sub is "atomic" with a latency
2660 equal to it's cost, otherwise assume that on superscalar
2661 hardware the shift may be executed concurrently with the
2662 earlier steps in the algorithm. */
2663 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2664 {
2665 op_cost = shiftsub1_cost (speed, mode, m);
2666 op_latency = op_cost;
2667 }
2668 else
2669 op_latency = add_cost (speed, mode);
2670
ef268d34 2671 new_limit.cost = best_cost.cost - op_cost;
35430ca0 2672 new_limit.latency = best_cost.latency - op_latency;
84ddb681
RH
2673 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2674 &new_limit, mode);
ef268d34
KH
2675
2676 alg_in->cost.cost += op_cost;
35430ca0 2677 alg_in->cost.latency += op_latency;
ef268d34
KH
2678 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2679 {
ef268d34 2680 best_cost = alg_in->cost;
076701b6 2681 std::swap (alg_in, best_alg);
ef268d34
KH
2682 best_alg->log[best_alg->ops] = m;
2683 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2684 }
2685 }
2686
7b13ee6b
KH
2687 if (cache_hit)
2688 goto done;
819126a6 2689 }
63610db9 2690
44037a66 2691 /* Look for factors of t of the form
7963ac37 2692 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
44037a66 2693 If we find such a factor, we can multiply by t using an algorithm that
7963ac37 2694 multiplies by q, shift the result by m and add/subtract it to itself.
44037a66 2695
7963ac37
RK
2696 We search for large factors first and loop down, even if large factors
2697 are less probable than small; if we find a large factor we will find a
2698 good sequence quickly, and therefore be able to prune (by decreasing
2699 COST_LIMIT) the search. */
2700
7b13ee6b 2701 do_alg_addsub_factor:
7963ac37 2702 for (m = floor_log2 (t - 1); m >= 2; m--)
44037a66 2703 {
7963ac37 2704 unsigned HOST_WIDE_INT d;
44037a66 2705
7963ac37 2706 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
7b13ee6b
KH
2707 if (t % d == 0 && t > d && m < maxm
2708 && (!cache_hit || cache_alg == alg_add_factor))
44037a66 2709 {
5322d07e 2710 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
35430ca0
KT
2711 if (shiftadd_cost (speed, mode, m) <= op_cost)
2712 op_cost = shiftadd_cost (speed, mode, m);
2713
2714 op_latency = op_cost;
2715
26276705
RS
2716
2717 new_limit.cost = best_cost.cost - op_cost;
2718 new_limit.latency = best_cost.latency - op_latency;
2719 synth_mult (alg_in, t / d, &new_limit, mode);
44037a66 2720
26276705
RS
2721 alg_in->cost.cost += op_cost;
2722 alg_in->cost.latency += op_latency;
2723 if (alg_in->cost.latency < op_cost)
2724 alg_in->cost.latency = op_cost;
2725 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2726 {
26276705 2727 best_cost = alg_in->cost;
076701b6 2728 std::swap (alg_in, best_alg);
b385aeda 2729 best_alg->log[best_alg->ops] = m;
819126a6 2730 best_alg->op[best_alg->ops] = alg_add_factor;
44037a66 2731 }
c0b262c1
TG
2732 /* Other factors will have been taken care of in the recursion. */
2733 break;
44037a66
TG
2734 }
2735
7963ac37 2736 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
7b13ee6b
KH
2737 if (t % d == 0 && t > d && m < maxm
2738 && (!cache_hit || cache_alg == alg_sub_factor))
44037a66 2739 {
5322d07e 2740 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
35430ca0
KT
2741 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2742 op_cost = shiftsub0_cost (speed, mode, m);
2743
2744 op_latency = op_cost;
26276705
RS
2745
2746 new_limit.cost = best_cost.cost - op_cost;
417c735c 2747 new_limit.latency = best_cost.latency - op_latency;
26276705 2748 synth_mult (alg_in, t / d, &new_limit, mode);
44037a66 2749
26276705
RS
2750 alg_in->cost.cost += op_cost;
2751 alg_in->cost.latency += op_latency;
2752 if (alg_in->cost.latency < op_cost)
2753 alg_in->cost.latency = op_cost;
2754 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
44037a66 2755 {
26276705 2756 best_cost = alg_in->cost;
076701b6 2757 std::swap (alg_in, best_alg);
b385aeda 2758 best_alg->log[best_alg->ops] = m;
819126a6 2759 best_alg->op[best_alg->ops] = alg_sub_factor;
44037a66 2760 }
c0b262c1 2761 break;
44037a66
TG
2762 }
2763 }
7b13ee6b
KH
2764 if (cache_hit)
2765 goto done;
44037a66 2766
7963ac37
RK
2767 /* Try shift-and-add (load effective address) instructions,
2768 i.e. do a*3, a*5, a*9. */
2769 if ((t & 1) != 0)
2770 {
7b13ee6b 2771 do_alg_add_t2_m:
7963ac37
RK
2772 q = t - 1;
2773 q = q & -q;
2774 m = exact_log2 (q);
0792ab19 2775 if (m >= 0 && m < maxm)
b385aeda 2776 {
5322d07e 2777 op_cost = shiftadd_cost (speed, mode, m);
26276705
RS
2778 new_limit.cost = best_cost.cost - op_cost;
2779 new_limit.latency = best_cost.latency - op_cost;
2780 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2781
2782 alg_in->cost.cost += op_cost;
2783 alg_in->cost.latency += op_cost;
2784 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
5eebe2eb 2785 {
26276705 2786 best_cost = alg_in->cost;
076701b6 2787 std::swap (alg_in, best_alg);
5eebe2eb 2788 best_alg->log[best_alg->ops] = m;
819126a6 2789 best_alg->op[best_alg->ops] = alg_add_t2_m;
5eebe2eb 2790 }
7963ac37 2791 }
7b13ee6b
KH
2792 if (cache_hit)
2793 goto done;
44037a66 2794
7b13ee6b 2795 do_alg_sub_t2_m:
7963ac37
RK
2796 q = t + 1;
2797 q = q & -q;
2798 m = exact_log2 (q);
0792ab19 2799 if (m >= 0 && m < maxm)
b385aeda 2800 {
5322d07e 2801 op_cost = shiftsub0_cost (speed, mode, m);
26276705
RS
2802 new_limit.cost = best_cost.cost - op_cost;
2803 new_limit.latency = best_cost.latency - op_cost;
2804 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2805
2806 alg_in->cost.cost += op_cost;
2807 alg_in->cost.latency += op_cost;
2808 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
5eebe2eb 2809 {
26276705 2810 best_cost = alg_in->cost;
076701b6 2811 std::swap (alg_in, best_alg);
5eebe2eb 2812 best_alg->log[best_alg->ops] = m;
819126a6 2813 best_alg->op[best_alg->ops] = alg_sub_t2_m;
5eebe2eb 2814 }
7963ac37 2815 }
7b13ee6b
KH
2816 if (cache_hit)
2817 goto done;
7963ac37 2818 }
44037a66 2819
7b13ee6b 2820 done:
3ab0f290
DJ
2821 /* If best_cost has not decreased, we have not found any algorithm. */
2822 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
0178027c
KH
2823 {
2824 /* We failed to find an algorithm. Record alg_impossible for
2825 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2826 we are asked to find an algorithm for T within the same or
2827 lower COST_LIMIT, we can immediately return to the
2828 caller. */
5322d07e
NF
2829 entry_ptr->t = t;
2830 entry_ptr->mode = mode;
2831 entry_ptr->speed = speed;
2832 entry_ptr->alg = alg_impossible;
2833 entry_ptr->cost = *cost_limit;
0178027c
KH
2834 return;
2835 }
3ab0f290 2836
7b13ee6b
KH
2837 /* Cache the result. */
2838 if (!cache_hit)
2839 {
5322d07e
NF
2840 entry_ptr->t = t;
2841 entry_ptr->mode = mode;
2842 entry_ptr->speed = speed;
2843 entry_ptr->alg = best_alg->op[best_alg->ops];
2844 entry_ptr->cost.cost = best_cost.cost;
2845 entry_ptr->cost.latency = best_cost.latency;
7b13ee6b
KH
2846 }
2847
52786026
RK
2848 /* If we are getting a too long sequence for `struct algorithm'
2849 to record, make this search fail. */
2850 if (best_alg->ops == MAX_BITS_PER_WORD)
2851 return;
2852
819126a6
RK
2853 /* Copy the algorithm from temporary space to the space at alg_out.
2854 We avoid using structure assignment because the majority of
2855 best_alg is normally undefined, and this is a critical function. */
2856 alg_out->ops = best_alg->ops + 1;
26276705 2857 alg_out->cost = best_cost;
4e135bdd
KG
2858 memcpy (alg_out->op, best_alg->op,
2859 alg_out->ops * sizeof *alg_out->op);
2860 memcpy (alg_out->log, best_alg->log,
2861 alg_out->ops * sizeof *alg_out->log);
44037a66
TG
2862}
2863\f
d1a6adeb 2864/* Find the cheapest way of multiplying a value of mode MODE by VAL.
8efc8980
RS
2865 Try three variations:
2866
2867 - a shift/add sequence based on VAL itself
2868 - a shift/add sequence based on -VAL, followed by a negation
2869 - a shift/add sequence based on VAL - 1, followed by an addition.
2870
f258e38b
UW
2871 Return true if the cheapest of these cost less than MULT_COST,
2872 describing the algorithm in *ALG and final fixup in *VARIANT. */
8efc8980
RS
2873
2874static bool
ef4bddc2 2875choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
f258e38b
UW
2876 struct algorithm *alg, enum mult_variant *variant,
2877 int mult_cost)
8efc8980 2878{
8efc8980 2879 struct algorithm alg2;
26276705
RS
2880 struct mult_cost limit;
2881 int op_cost;
f40751dd 2882 bool speed = optimize_insn_for_speed_p ();
8efc8980 2883
18eaea7f
RS
2884 /* Fail quickly for impossible bounds. */
2885 if (mult_cost < 0)
2886 return false;
2887
2888 /* Ensure that mult_cost provides a reasonable upper bound.
2889 Any constant multiplication can be performed with less
2890 than 2 * bits additions. */
5322d07e 2891 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
18eaea7f
RS
2892 if (mult_cost > op_cost)
2893 mult_cost = op_cost;
2894
8efc8980 2895 *variant = basic_variant;
26276705
RS
2896 limit.cost = mult_cost;
2897 limit.latency = mult_cost;
2898 synth_mult (alg, val, &limit, mode);
8efc8980
RS
2899
2900 /* This works only if the inverted value actually fits in an
2901 `unsigned int' */
84ddb681 2902 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
8efc8980 2903 {
c3284718 2904 op_cost = neg_cost (speed, mode);
26276705
RS
2905 if (MULT_COST_LESS (&alg->cost, mult_cost))
2906 {
2907 limit.cost = alg->cost.cost - op_cost;
2908 limit.latency = alg->cost.latency - op_cost;
2909 }
2910 else
2911 {
2912 limit.cost = mult_cost - op_cost;
2913 limit.latency = mult_cost - op_cost;
2914 }
2915
2916 synth_mult (&alg2, -val, &limit, mode);
2917 alg2.cost.cost += op_cost;
2918 alg2.cost.latency += op_cost;
2919 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
8efc8980
RS
2920 *alg = alg2, *variant = negate_variant;
2921 }
2922
2923 /* This proves very useful for division-by-constant. */
5322d07e 2924 op_cost = add_cost (speed, mode);
26276705
RS
2925 if (MULT_COST_LESS (&alg->cost, mult_cost))
2926 {
2927 limit.cost = alg->cost.cost - op_cost;
2928 limit.latency = alg->cost.latency - op_cost;
2929 }
2930 else
2931 {
2932 limit.cost = mult_cost - op_cost;
2933 limit.latency = mult_cost - op_cost;
2934 }
2935
2936 synth_mult (&alg2, val - 1, &limit, mode);
2937 alg2.cost.cost += op_cost;
2938 alg2.cost.latency += op_cost;
2939 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
8efc8980
RS
2940 *alg = alg2, *variant = add_variant;
2941
26276705 2942 return MULT_COST_LESS (&alg->cost, mult_cost);
8efc8980
RS
2943}
2944
2945/* A subroutine of expand_mult, used for constant multiplications.
2946 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2947 convenient. Use the shift/add sequence described by ALG and apply
2948 the final fixup specified by VARIANT. */
2949
2950static rtx
ef4bddc2 2951expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
8efc8980
RS
2952 rtx target, const struct algorithm *alg,
2953 enum mult_variant variant)
2954{
2955 HOST_WIDE_INT val_so_far;
f3f6fb16
DM
2956 rtx_insn *insn;
2957 rtx accum, tem;
8efc8980 2958 int opno;
ef4bddc2 2959 machine_mode nmode;
8efc8980 2960
d448860e
JH
2961 /* Avoid referencing memory over and over and invalid sharing
2962 on SUBREGs. */
2963 op0 = force_reg (mode, op0);
8efc8980
RS
2964
2965 /* ACCUM starts out either as OP0 or as a zero, depending on
2966 the first operation. */
2967
2968 if (alg->op[0] == alg_zero)
2969 {
84ddb681 2970 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
8efc8980
RS
2971 val_so_far = 0;
2972 }
2973 else if (alg->op[0] == alg_m)
2974 {
2975 accum = copy_to_mode_reg (mode, op0);
2976 val_so_far = 1;
2977 }
2978 else
5b0264cb 2979 gcc_unreachable ();
8efc8980
RS
2980
2981 for (opno = 1; opno < alg->ops; opno++)
2982 {
2983 int log = alg->log[opno];
7c27e184 2984 rtx shift_subtarget = optimize ? 0 : accum;
8efc8980
RS
2985 rtx add_target
2986 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
7c27e184 2987 && !optimize)
8efc8980 2988 ? target : 0;
7c27e184 2989 rtx accum_target = optimize ? 0 : accum;
7543f918 2990 rtx accum_inner;
8efc8980
RS
2991
2992 switch (alg->op[opno])
2993 {
2994 case alg_shift:
eb6c3df1 2995 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
4caa21a1
UB
2996 /* REG_EQUAL note will be attached to the following insn. */
2997 emit_move_insn (accum, tem);
8efc8980
RS
2998 val_so_far <<= log;
2999 break;
3000
3001 case alg_add_t_m2:
eb6c3df1 3002 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
8efc8980
RS
3003 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3004 add_target ? add_target : accum_target);
3005 val_so_far += (HOST_WIDE_INT) 1 << log;
3006 break;
3007
3008 case alg_sub_t_m2:
eb6c3df1 3009 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
8efc8980
RS
3010 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3011 add_target ? add_target : accum_target);
3012 val_so_far -= (HOST_WIDE_INT) 1 << log;
3013 break;
3014
3015 case alg_add_t2_m:
3016 accum = expand_shift (LSHIFT_EXPR, mode, accum,
eb6c3df1 3017 log, shift_subtarget, 0);
8efc8980
RS
3018 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3019 add_target ? add_target : accum_target);
3020 val_so_far = (val_so_far << log) + 1;
3021 break;
3022
3023 case alg_sub_t2_m:
3024 accum = expand_shift (LSHIFT_EXPR, mode, accum,
eb6c3df1 3025 log, shift_subtarget, 0);
8efc8980
RS
3026 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3027 add_target ? add_target : accum_target);
3028 val_so_far = (val_so_far << log) - 1;
3029 break;
3030
3031 case alg_add_factor:
eb6c3df1 3032 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
8efc8980
RS
3033 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3034 add_target ? add_target : accum_target);
3035 val_so_far += val_so_far << log;
3036 break;
3037
3038 case alg_sub_factor:
eb6c3df1 3039 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
8efc8980 3040 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
7c27e184
PB
3041 (add_target
3042 ? add_target : (optimize ? 0 : tem)));
8efc8980
RS
3043 val_so_far = (val_so_far << log) - val_so_far;
3044 break;
3045
3046 default:
5b0264cb 3047 gcc_unreachable ();
8efc8980
RS
3048 }
3049
84ddb681 3050 if (SCALAR_INT_MODE_P (mode))
8efc8980 3051 {
84ddb681
RH
3052 /* Write a REG_EQUAL note on the last insn so that we can cse
3053 multiplication sequences. Note that if ACCUM is a SUBREG,
3054 we've set the inner register and must properly indicate that. */
3055 tem = op0, nmode = mode;
3056 accum_inner = accum;
3057 if (GET_CODE (accum) == SUBREG)
3058 {
3059 accum_inner = SUBREG_REG (accum);
3060 nmode = GET_MODE (accum_inner);
3061 tem = gen_lowpart (nmode, op0);
3062 }
8efc8980 3063
84ddb681
RH
3064 insn = get_last_insn ();
3065 set_dst_reg_note (insn, REG_EQUAL,
4789c0ce
RS
3066 gen_rtx_MULT (nmode, tem,
3067 gen_int_mode (val_so_far, nmode)),
84ddb681
RH
3068 accum_inner);
3069 }
8efc8980
RS
3070 }
3071
3072 if (variant == negate_variant)
3073 {
3074 val_so_far = -val_so_far;
3075 accum = expand_unop (mode, neg_optab, accum, target, 0);
3076 }
3077 else if (variant == add_variant)
3078 {
3079 val_so_far = val_so_far + 1;
3080 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3081 }
3082
42eb30b5
ZW
3083 /* Compare only the bits of val and val_so_far that are significant
3084 in the result mode, to avoid sign-/zero-extension confusion. */
84ddb681
RH
3085 nmode = GET_MODE_INNER (mode);
3086 if (nmode == VOIDmode)
3087 nmode = mode;
3088 val &= GET_MODE_MASK (nmode);
3089 val_so_far &= GET_MODE_MASK (nmode);
5b0264cb 3090 gcc_assert (val == val_so_far);
8efc8980
RS
3091
3092 return accum;
3093}
3094
44037a66
TG
3095/* Perform a multiplication and return an rtx for the result.
3096 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3097 TARGET is a suggestion for where to store the result (an rtx).
3098
3099 We check specially for a constant integer as OP1.
3100 If you want this check for OP0 as well, then before calling
3101 you should swap the two operands if OP0 would be constant. */
3102
3103rtx
ef4bddc2 3104expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
f2593a66 3105 int unsignedp)
44037a66 3106{
8efc8980
RS
3107 enum mult_variant variant;
3108 struct algorithm algorithm;
84ddb681 3109 rtx scalar_op1;
65dc9350 3110 int max_cost;
f40751dd 3111 bool speed = optimize_insn_for_speed_p ();
84ddb681 3112 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
44037a66 3113
84ddb681 3114 if (CONSTANT_P (op0))
4c278134 3115 std::swap (op0, op1);
84ddb681
RH
3116
3117 /* For vectors, there are several simplifications that can be made if
3118 all elements of the vector constant are identical. */
3119 scalar_op1 = op1;
3120 if (GET_CODE (op1) == CONST_VECTOR)
3121 {
3122 int i, n = CONST_VECTOR_NUNITS (op1);
3123 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3124 for (i = 1; i < n; ++i)
3125 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3126 goto skip_scalar;
3127 }
3128
3129 if (INTEGRAL_MODE_P (mode))
3130 {
3131 rtx fake_reg;
caf62455
JDA
3132 HOST_WIDE_INT coeff;
3133 bool is_neg;
84ddb681
RH
3134 int mode_bitsize;
3135
3136 if (op1 == CONST0_RTX (mode))
3137 return op1;
3138 if (op1 == CONST1_RTX (mode))
3139 return op0;
3140 if (op1 == CONSTM1_RTX (mode))
3141 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3142 op0, target, 0);
3143
3144 if (do_trapv)
3145 goto skip_synth;
3146
66b3ed5f
JJ
3147 /* If mode is integer vector mode, check if the backend supports
3148 vector lshift (by scalar or vector) at all. If not, we can't use
3149 synthetized multiply. */
3150 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3151 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3152 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3153 goto skip_synth;
3154
84ddb681
RH
3155 /* These are the operations that are potentially turned into
3156 a sequence of shifts and additions. */
3157 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
65dc9350
RS
3158
3159 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3160 less than or equal in size to `unsigned int' this doesn't matter.
3161 If the mode is larger than `unsigned int', then synth_mult works
3162 only if the constant value exactly fits in an `unsigned int' without
3163 any truncation. This means that multiplying by negative values does
3164 not work; results are off by 2^32 on a 32 bit machine. */
84ddb681 3165 if (CONST_INT_P (scalar_op1))
58b42e19 3166 {
84ddb681
RH
3167 coeff = INTVAL (scalar_op1);
3168 is_neg = coeff < 0;
65dc9350 3169 }
807e902e
KZ
3170#if TARGET_SUPPORTS_WIDE_INT
3171 else if (CONST_WIDE_INT_P (scalar_op1))
3172#else
48175537 3173 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
807e902e 3174#endif
65dc9350 3175 {
807e902e
KZ
3176 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3177 /* Perfect power of 2 (other than 1, which is handled above). */
3178 if (shift > 0)
3179 return expand_shift (LSHIFT_EXPR, mode, op0,
3180 shift, target, unsignedp);
caf62455
JDA
3181 else
3182 goto skip_synth;
65dc9350 3183 }
84ddb681
RH
3184 else
3185 goto skip_synth;
b8698a0f 3186
65dc9350
RS
3187 /* We used to test optimize here, on the grounds that it's better to
3188 produce a smaller program when -O is not used. But this causes
3189 such a terrible slowdown sometimes that it seems better to always
3190 use synth_mult. */
65dc9350 3191
84ddb681 3192 /* Special case powers of two. */
be63b77d
JJ
3193 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3194 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
84ddb681
RH
3195 return expand_shift (LSHIFT_EXPR, mode, op0,
3196 floor_log2 (coeff), target, unsignedp);
3197
3198 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3199
3200 /* Attempt to handle multiplication of DImode values by negative
3201 coefficients, by performing the multiplication by a positive
3202 multiplier and then inverting the result. */
84ddb681
RH
3203 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3204 {
3205 /* Its safe to use -coeff even for INT_MIN, as the
3206 result is interpreted as an unsigned coefficient.
3207 Exclude cost of op0 from max_cost to match the cost
5e839bc8 3208 calculation of the synth_mult. */
be63b77d 3209 coeff = -(unsigned HOST_WIDE_INT) coeff;
84ddb681 3210 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
c3284718 3211 - neg_cost (speed, mode));
be63b77d
JJ
3212 if (max_cost <= 0)
3213 goto skip_synth;
3214
3215 /* Special case powers of two. */
3216 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3217 {
3218 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3219 floor_log2 (coeff), target, unsignedp);
3220 return expand_unop (mode, neg_optab, temp, target, 0);
3221 }
3222
3223 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3224 max_cost))
84ddb681 3225 {
be63b77d 3226 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
84ddb681
RH
3227 &algorithm, variant);
3228 return expand_unop (mode, neg_optab, temp, target, 0);
3229 }
b216b86b 3230 goto skip_synth;
58b42e19 3231 }
44037a66 3232
84ddb681
RH
3233 /* Exclude cost of op0 from max_cost to match the cost
3234 calculation of the synth_mult. */
3235 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3236 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3237 return expand_mult_const (mode, op0, coeff, target,
3238 &algorithm, variant);
f2593a66 3239 }
84ddb681 3240 skip_synth:
f2593a66
RS
3241
3242 /* Expand x*2.0 as x+x. */
48175537 3243 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
f2593a66
RS
3244 {
3245 REAL_VALUE_TYPE d;
84ddb681 3246 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
f2593a66
RS
3247
3248 if (REAL_VALUES_EQUAL (d, dconst2))
3249 {
3250 op0 = force_reg (GET_MODE (op0), op0);
3251 return expand_binop (mode, add_optab, op0, op0,
3252 target, unsignedp, OPTAB_LIB_WIDEN);
3253 }
3254 }
84ddb681 3255 skip_scalar:
f2593a66 3256
819126a6
RK
3257 /* This used to use umul_optab if unsigned, but for non-widening multiply
3258 there is no difference between signed and unsigned. */
84ddb681 3259 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
44037a66 3260 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
5b0264cb 3261 gcc_assert (op0);
44037a66
TG
3262 return op0;
3263}
5b58b39b 3264
6dd8f4bb
BS
3265/* Return a cost estimate for multiplying a register by the given
3266 COEFFicient in the given MODE and SPEED. */
3267
3268int
ef4bddc2 3269mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
6dd8f4bb
BS
3270{
3271 int max_cost;
3272 struct algorithm algorithm;
3273 enum mult_variant variant;
3274
3275 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3276 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3277 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3278 return algorithm.cost.cost;
3279 else
3280 return max_cost;
3281}
3282
5b58b39b
BS
3283/* Perform a widening multiplication and return an rtx for the result.
3284 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3285 TARGET is a suggestion for where to store the result (an rtx).
3286 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3287 or smul_widen_optab.
3288
3289 We check specially for a constant integer as OP1, comparing the
3290 cost of a widening multiply against the cost of a sequence of shifts
3291 and adds. */
3292
3293rtx
ef4bddc2 3294expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
5b58b39b
BS
3295 int unsignedp, optab this_optab)
3296{
3297 bool speed = optimize_insn_for_speed_p ();
e7ef91dc 3298 rtx cop1;
5b58b39b
BS
3299
3300 if (CONST_INT_P (op1)
e7ef91dc
JJ
3301 && GET_MODE (op0) != VOIDmode
3302 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3303 this_optab == umul_widen_optab))
3304 && CONST_INT_P (cop1)
3305 && (INTVAL (cop1) >= 0
46c9550f 3306 || HWI_COMPUTABLE_MODE_P (mode)))
5b58b39b 3307 {
e7ef91dc 3308 HOST_WIDE_INT coeff = INTVAL (cop1);
5b58b39b
BS
3309 int max_cost;
3310 enum mult_variant variant;
3311 struct algorithm algorithm;
3312
e9082138
JJ
3313 if (coeff == 0)
3314 return CONST0_RTX (mode);
3315
5b58b39b
BS
3316 /* Special case powers of two. */
3317 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3318 {
3319 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3320 return expand_shift (LSHIFT_EXPR, mode, op0,
eb6c3df1 3321 floor_log2 (coeff), target, unsignedp);
5b58b39b
BS
3322 }
3323
3324 /* Exclude cost of op0 from max_cost to match the cost
3325 calculation of the synth_mult. */
5322d07e 3326 max_cost = mul_widen_cost (speed, mode);
5b58b39b
BS
3327 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3328 max_cost))
3329 {
3330 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3331 return expand_mult_const (mode, op0, coeff, target,
3332 &algorithm, variant);
3333 }
3334 }
3335 return expand_binop (mode, this_optab, op0, op1, target,
3336 unsignedp, OPTAB_LIB_WIDEN);
3337}
44037a66 3338\f
55c2d311
TG
3339/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3340 replace division by D, and put the least significant N bits of the result
3341 in *MULTIPLIER_PTR and return the most significant bit.
3342
3343 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3344 needed precision is in PRECISION (should be <= N).
3345
3346 PRECISION should be as small as possible so this function can choose
3347 multiplier more freely.
3348
3349 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3350 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3351
3352 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3353 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3354
55c2d311 3355unsigned HOST_WIDE_INT
502b8322 3356choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
079c527f
JJ
3357 unsigned HOST_WIDE_INT *multiplier_ptr,
3358 int *post_shift_ptr, int *lgup_ptr)
55c2d311 3359{
55c2d311
TG
3360 int lgup, post_shift;
3361 int pow, pow2;
55c2d311
TG
3362
3363 /* lgup = ceil(log2(divisor)); */
3364 lgup = ceil_log2 (d);
3365
5b0264cb 3366 gcc_assert (lgup <= n);
55c2d311
TG
3367
3368 pow = n + lgup;
3369 pow2 = n + lgup - precision;
3370
55c2d311 3371 /* mlow = 2^(N + lgup)/d */
807e902e
KZ
3372 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3373 wide_int mlow = wi::udiv_trunc (val, d);
55c2d311 3374
9be0ac8c 3375 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
807e902e
KZ
3376 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3377 wide_int mhigh = wi::udiv_trunc (val, d);
55c2d311
TG
3378
3379 /* If precision == N, then mlow, mhigh exceed 2^N
3380 (but they do not exceed 2^(N+1)). */
3381
f9da5064 3382 /* Reduce to lowest terms. */
55c2d311
TG
3383 for (post_shift = lgup; post_shift > 0; post_shift--)
3384 {
807e902e
KZ
3385 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3386 HOST_BITS_PER_WIDE_INT);
3387 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3388 HOST_BITS_PER_WIDE_INT);
55c2d311
TG
3389 if (ml_lo >= mh_lo)
3390 break;
3391
807e902e
KZ
3392 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3393 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
55c2d311
TG
3394 }
3395
3396 *post_shift_ptr = post_shift;
3397 *lgup_ptr = lgup;
3398 if (n < HOST_BITS_PER_WIDE_INT)
3399 {
3400 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
807e902e
KZ
3401 *multiplier_ptr = mhigh.to_uhwi () & mask;
3402 return mhigh.to_uhwi () >= mask;
55c2d311
TG
3403 }
3404 else
3405 {
807e902e
KZ
3406 *multiplier_ptr = mhigh.to_uhwi ();
3407 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
55c2d311
TG
3408 }
3409}
3410
3411/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3412 congruent to 1 (mod 2**N). */
3413
3414static unsigned HOST_WIDE_INT
502b8322 3415invert_mod2n (unsigned HOST_WIDE_INT x, int n)
55c2d311 3416{
0f41302f 3417 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
55c2d311
TG
3418
3419 /* The algorithm notes that the choice y = x satisfies
3420 x*y == 1 mod 2^3, since x is assumed odd.
3421 Each iteration doubles the number of bits of significance in y. */
3422
3423 unsigned HOST_WIDE_INT mask;
3424 unsigned HOST_WIDE_INT y = x;
3425 int nbit = 3;
3426
3427 mask = (n == HOST_BITS_PER_WIDE_INT
3428 ? ~(unsigned HOST_WIDE_INT) 0
3429 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3430
3431 while (nbit < n)
3432 {
3433 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3434 nbit *= 2;
3435 }
3436 return y;
3437}
3438
3439/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3440 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3441 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3442 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3443 become signed.
3444
3445 The result is put in TARGET if that is convenient.
3446
3447 MODE is the mode of operation. */
3448
3449rtx
ef4bddc2 3450expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
502b8322 3451 rtx op1, rtx target, int unsignedp)
55c2d311
TG
3452{
3453 rtx tem;
3454 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3455
3456 tem = expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 3457 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
22273300 3458 tem = expand_and (mode, tem, op1, NULL_RTX);
38a448ca
RH
3459 adj_operand
3460 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3461 adj_operand);
55c2d311
TG
3462
3463 tem = expand_shift (RSHIFT_EXPR, mode, op1,
eb6c3df1 3464 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
22273300 3465 tem = expand_and (mode, tem, op0, NULL_RTX);
38a448ca
RH
3466 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3467 target);
55c2d311
TG
3468
3469 return target;
3470}
3471
00f07b86 3472/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
55c2d311 3473
8efc8980 3474static rtx
ef4bddc2 3475extract_high_half (machine_mode mode, rtx op)
8efc8980 3476{
ef4bddc2 3477 machine_mode wider_mode;
55c2d311 3478
8efc8980
RS
3479 if (mode == word_mode)
3480 return gen_highpart (mode, op);
71af73bb 3481
15ed7b52
JG
3482 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3483
8efc8980
RS
3484 wider_mode = GET_MODE_WIDER_MODE (mode);
3485 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
eb6c3df1 3486 GET_MODE_BITSIZE (mode), 0, 1);
8efc8980
RS
3487 return convert_modes (mode, wider_mode, op, 0);
3488}
55c2d311 3489
00f07b86 3490/* Like expmed_mult_highpart, but only consider using a multiplication
8efc8980
RS
3491 optab. OP1 is an rtx for the constant operand. */
3492
3493static rtx
ef4bddc2 3494expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
8efc8980 3495 rtx target, int unsignedp, int max_cost)
55c2d311 3496{
665acd1e 3497 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
ef4bddc2 3498 machine_mode wider_mode;
55c2d311
TG
3499 optab moptab;
3500 rtx tem;
8efc8980 3501 int size;
f40751dd 3502 bool speed = optimize_insn_for_speed_p ();
55c2d311 3503
15ed7b52
JG
3504 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3505
8efc8980
RS
3506 wider_mode = GET_MODE_WIDER_MODE (mode);
3507 size = GET_MODE_BITSIZE (mode);
55c2d311
TG
3508
3509 /* Firstly, try using a multiplication insn that only generates the needed
3510 high part of the product, and in the sign flavor of unsignedp. */
5322d07e 3511 if (mul_highpart_cost (speed, mode) < max_cost)
71af73bb 3512 {
8efc8980 3513 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
665acd1e 3514 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
8efc8980
RS
3515 unsignedp, OPTAB_DIRECT);
3516 if (tem)
3517 return tem;
71af73bb 3518 }
55c2d311
TG
3519
3520 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3521 Need to adjust the result after the multiplication. */
02a65aef 3522 if (size - 1 < BITS_PER_WORD
5322d07e
NF
3523 && (mul_highpart_cost (speed, mode)
3524 + 2 * shift_cost (speed, mode, size-1)
3525 + 4 * add_cost (speed, mode) < max_cost))
71af73bb 3526 {
8efc8980 3527 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
665acd1e 3528 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
8efc8980
RS
3529 unsignedp, OPTAB_DIRECT);
3530 if (tem)
71af73bb 3531 /* We used the wrong signedness. Adjust the result. */
77278891 3532 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
8efc8980 3533 tem, unsignedp);
71af73bb 3534 }
55c2d311 3535
71af73bb 3536 /* Try widening multiplication. */
55c2d311 3537 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
4d8752f0 3538 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
5322d07e 3539 && mul_widen_cost (speed, wider_mode) < max_cost)
a295d331 3540 {
665acd1e 3541 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
8efc8980
RS
3542 unsignedp, OPTAB_WIDEN);
3543 if (tem)
3544 return extract_high_half (mode, tem);
c410d49e 3545 }
71af73bb
TG
3546
3547 /* Try widening the mode and perform a non-widening multiplication. */
947131ba 3548 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
02a65aef 3549 && size - 1 < BITS_PER_WORD
5322d07e
NF
3550 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3551 < max_cost))
a295d331 3552 {
f3f6fb16
DM
3553 rtx_insn *insns;
3554 rtx wop0, wop1;
82dfb9a5
RS
3555
3556 /* We need to widen the operands, for example to ensure the
3557 constant multiplier is correctly sign or zero extended.
3558 Use a sequence to clean-up any instructions emitted by
3559 the conversions if things don't work out. */
3560 start_sequence ();
3561 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3562 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3563 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
8efc8980 3564 unsignedp, OPTAB_WIDEN);
82dfb9a5
RS
3565 insns = get_insns ();
3566 end_sequence ();
3567
8efc8980 3568 if (tem)
82dfb9a5
RS
3569 {
3570 emit_insn (insns);
3571 return extract_high_half (mode, tem);
3572 }
a295d331 3573 }
71af73bb
TG
3574
3575 /* Try widening multiplication of opposite signedness, and adjust. */
3576 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
4d8752f0 3577 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
02a65aef 3578 && size - 1 < BITS_PER_WORD
5322d07e
NF
3579 && (mul_widen_cost (speed, wider_mode)
3580 + 2 * shift_cost (speed, mode, size-1)
3581 + 4 * add_cost (speed, mode) < max_cost))
55c2d311 3582 {
665acd1e 3583 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
71af73bb
TG
3584 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3585 if (tem != 0)
55c2d311 3586 {
8efc8980 3587 tem = extract_high_half (mode, tem);
71af73bb 3588 /* We used the wrong signedness. Adjust the result. */
77278891 3589 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
71af73bb 3590 target, unsignedp);
55c2d311 3591 }
55c2d311
TG
3592 }
3593
71af73bb 3594 return 0;
8efc8980 3595}
71af73bb 3596
0d282692
RS
3597/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3598 putting the high half of the result in TARGET if that is convenient,
3599 and return where the result is. If the operation can not be performed,
3600 0 is returned.
55c2d311 3601
8efc8980
RS
3602 MODE is the mode of operation and result.
3603
3604 UNSIGNEDP nonzero means unsigned multiply.
3605
3606 MAX_COST is the total allowed cost for the expanded RTL. */
3607
0d282692 3608static rtx
ef4bddc2 3609expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
0d282692 3610 rtx target, int unsignedp, int max_cost)
8efc8980 3611{
ef4bddc2 3612 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
0d282692 3613 unsigned HOST_WIDE_INT cnst1;
f258e38b
UW
3614 int extra_cost;
3615 bool sign_adjust = false;
8efc8980
RS
3616 enum mult_variant variant;
3617 struct algorithm alg;
0d282692 3618 rtx tem;
f40751dd 3619 bool speed = optimize_insn_for_speed_p ();
8efc8980 3620
15ed7b52 3621 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
8efc8980 3622 /* We can't support modes wider than HOST_BITS_PER_INT. */
46c9550f 3623 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
8efc8980 3624
0d282692 3625 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
f258e38b 3626
b8698a0f
L
3627 /* We can't optimize modes wider than BITS_PER_WORD.
3628 ??? We might be able to perform double-word arithmetic if
f258e38b
UW
3629 mode == word_mode, however all the cost calculations in
3630 synth_mult etc. assume single-word operations. */
3631 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
00f07b86 3632 return expmed_mult_highpart_optab (mode, op0, op1, target,
f258e38b
UW
3633 unsignedp, max_cost);
3634
5322d07e 3635 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
f258e38b
UW
3636
3637 /* Check whether we try to multiply by a negative constant. */
3638 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3639 {
3640 sign_adjust = true;
5322d07e 3641 extra_cost += add_cost (speed, mode);
f258e38b 3642 }
8efc8980
RS
3643
3644 /* See whether shift/add multiplication is cheap enough. */
f258e38b
UW
3645 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3646 max_cost - extra_cost))
a295d331 3647 {
8efc8980
RS
3648 /* See whether the specialized multiplication optabs are
3649 cheaper than the shift/add version. */
00f07b86 3650 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
26276705 3651 alg.cost.cost + extra_cost);
8efc8980
RS
3652 if (tem)
3653 return tem;
3654
f258e38b
UW
3655 tem = convert_to_mode (wider_mode, op0, unsignedp);
3656 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3657 tem = extract_high_half (mode, tem);
3658
9cf737f8 3659 /* Adjust result for signedness. */
f258e38b
UW
3660 if (sign_adjust)
3661 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3662
3663 return tem;
a295d331 3664 }
00f07b86 3665 return expmed_mult_highpart_optab (mode, op0, op1, target,
8efc8980 3666 unsignedp, max_cost);
55c2d311 3667}
0b55e932
RS
3668
3669
3670/* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3671
3672static rtx
ef4bddc2 3673expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
0b55e932 3674{
f3f6fb16
DM
3675 rtx result, temp, shift;
3676 rtx_code_label *label;
0b55e932 3677 int logd;
807e902e 3678 int prec = GET_MODE_PRECISION (mode);
0b55e932
RS
3679
3680 logd = floor_log2 (d);
3681 result = gen_reg_rtx (mode);
3682
3683 /* Avoid conditional branches when they're expensive. */
3a4fd356 3684 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
22660666 3685 && optimize_insn_for_speed_p ())
0b55e932
RS
3686 {
3687 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3688 mode, 0, -1);
3689 if (signmask)
3690 {
807e902e 3691 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
0b55e932 3692 signmask = force_reg (mode, signmask);
1c234fcb
RS
3693 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3694
3695 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3696 which instruction sequence to use. If logical right shifts
3697 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3698 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
6e7a355c 3699
1c234fcb 3700 temp = gen_rtx_LSHIFTRT (mode, result, shift);
947131ba 3701 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
5e8f01f4
RS
3702 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3703 > COSTS_N_INSNS (2)))
1c234fcb
RS
3704 {
3705 temp = expand_binop (mode, xor_optab, op0, signmask,
3706 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3707 temp = expand_binop (mode, sub_optab, temp, signmask,
3708 NULL_RTX, 1, OPTAB_LIB_WIDEN);
2f1cd2eb
RS
3709 temp = expand_binop (mode, and_optab, temp,
3710 gen_int_mode (masklow, mode),
1c234fcb
RS
3711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712 temp = expand_binop (mode, xor_optab, temp, signmask,
3713 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3714 temp = expand_binop (mode, sub_optab, temp, signmask,
3715 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3716 }
3717 else
3718 {
3719 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3720 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3721 signmask = force_reg (mode, signmask);
3722
3723 temp = expand_binop (mode, add_optab, op0, signmask,
3724 NULL_RTX, 1, OPTAB_LIB_WIDEN);
2f1cd2eb
RS
3725 temp = expand_binop (mode, and_optab, temp,
3726 gen_int_mode (masklow, mode),
1c234fcb
RS
3727 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3728 temp = expand_binop (mode, sub_optab, temp, signmask,
3729 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3730 }
0b55e932
RS
3731 return temp;
3732 }
3733 }
3734
3735 /* Mask contains the mode's signbit and the significant bits of the
3736 modulus. By including the signbit in the operation, many targets
3737 can avoid an explicit compare operation in the following comparison
3738 against zero. */
807e902e
KZ
3739 wide_int mask = wi::mask (logd, false, prec);
3740 mask = wi::set_bit (mask, prec - 1);
0b55e932 3741
6e7a355c 3742 temp = expand_binop (mode, and_optab, op0,
807e902e 3743 immed_wide_int_const (mask, mode),
6e7a355c 3744 result, 1, OPTAB_LIB_WIDEN);
0b55e932
RS
3745 if (temp != result)
3746 emit_move_insn (result, temp);
3747
3748 label = gen_label_rtx ();
3749 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3750
3751 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3752 0, OPTAB_LIB_WIDEN);
807e902e
KZ
3753
3754 mask = wi::mask (logd, true, prec);
6e7a355c 3755 temp = expand_binop (mode, ior_optab, temp,
807e902e 3756 immed_wide_int_const (mask, mode),
6e7a355c 3757 result, 1, OPTAB_LIB_WIDEN);
0b55e932
RS
3758 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3759 0, OPTAB_LIB_WIDEN);
3760 if (temp != result)
3761 emit_move_insn (result, temp);
3762 emit_label (label);
3763 return result;
3764}
39cab019
RS
3765
3766/* Expand signed division of OP0 by a power of two D in mode MODE.
3767 This routine is only called for positive values of D. */
3768
3769static rtx
ef4bddc2 3770expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
39cab019 3771{
f3f6fb16
DM
3772 rtx temp;
3773 rtx_code_label *label;
39cab019
RS
3774 int logd;
3775
3776 logd = floor_log2 (d);
39cab019 3777
3a4fd356
JH
3778 if (d == 2
3779 && BRANCH_COST (optimize_insn_for_speed_p (),
3780 false) >= 1)
39cab019
RS
3781 {
3782 temp = gen_reg_rtx (mode);
3783 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3784 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3785 0, OPTAB_LIB_WIDEN);
eb6c3df1 3786 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019
RS
3787 }
3788
692e0312
TS
3789 if (HAVE_conditional_move
3790 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
fdded401
RS
3791 {
3792 rtx temp2;
3793
3794 start_sequence ();
3795 temp2 = copy_to_mode_reg (mode, op0);
2f1cd2eb 3796 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
fdded401
RS
3797 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3798 temp = force_reg (mode, temp);
3799
3800 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3801 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3802 mode, temp, temp2, mode, 0);
3803 if (temp2)
3804 {
f3f6fb16 3805 rtx_insn *seq = get_insns ();
fdded401
RS
3806 end_sequence ();
3807 emit_insn (seq);
eb6c3df1 3808 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
fdded401
RS
3809 }
3810 end_sequence ();
3811 }
fdded401 3812
3a4fd356
JH
3813 if (BRANCH_COST (optimize_insn_for_speed_p (),
3814 false) >= 2)
39cab019
RS
3815 {
3816 int ushift = GET_MODE_BITSIZE (mode) - logd;
3817
3818 temp = gen_reg_rtx (mode);
3819 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
00a7ba58
JJ
3820 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3821 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3822 > COSTS_N_INSNS (1))
2f1cd2eb 3823 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
39cab019
RS
3824 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3825 else
3826 temp = expand_shift (RSHIFT_EXPR, mode, temp,
eb6c3df1 3827 ushift, NULL_RTX, 1);
39cab019
RS
3828 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3829 0, OPTAB_LIB_WIDEN);
eb6c3df1 3830 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019
RS
3831 }
3832
3833 label = gen_label_rtx ();
3834 temp = copy_to_mode_reg (mode, op0);
3835 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
2f1cd2eb 3836 expand_inc (temp, gen_int_mode (d - 1, mode));
39cab019 3837 emit_label (label);
eb6c3df1 3838 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
39cab019 3839}
55c2d311 3840\f
44037a66
TG
3841/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3842 if that is convenient, and returning where the result is.
3843 You may request either the quotient or the remainder as the result;
3844 specify REM_FLAG nonzero to get the remainder.
3845
3846 CODE is the expression code for which kind of division this is;
3847 it controls how rounding is done. MODE is the machine mode to use.
3848 UNSIGNEDP nonzero means do unsigned division. */
3849
3850/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3851 and then correct it by or'ing in missing high bits
3852 if result of ANDI is nonzero.
3853 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3854 This could optimize to a bfexts instruction.
3855 But C doesn't use these operations, so their optimizations are
3856 left for later. */
5353610b
R
3857/* ??? For modulo, we don't actually need the highpart of the first product,
3858 the low part will do nicely. And for small divisors, the second multiply
3859 can also be a low-part only multiply or even be completely left out.
3860 E.g. to calculate the remainder of a division by 3 with a 32 bit
3861 multiply, multiply with 0x55555556 and extract the upper two bits;
3862 the result is exact for inputs up to 0x1fffffff.
3863 The input range can be reduced by using cross-sum rules.
3864 For odd divisors >= 3, the following table gives right shift counts
09da1532 3865 so that if a number is shifted by an integer multiple of the given
5353610b
R
3866 amount, the remainder stays the same:
3867 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3868 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3869 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3870 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3871 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3872
3873 Cross-sum rules for even numbers can be derived by leaving as many bits
3874 to the right alone as the divisor has zeros to the right.
3875 E.g. if x is an unsigned 32 bit number:
3876 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3877 */
44037a66
TG
3878
3879rtx
ef4bddc2 3880expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
502b8322 3881 rtx op0, rtx op1, rtx target, int unsignedp)
44037a66 3882{
ef4bddc2 3883 machine_mode compute_mode;
b3694847 3884 rtx tquotient;
55c2d311 3885 rtx quotient = 0, remainder = 0;
f3f6fb16 3886 rtx_insn *last;
2c414fba 3887 int size;
f3f6fb16 3888 rtx_insn *insn;
44037a66 3889 optab optab1, optab2;
1c4a429a 3890 int op1_is_constant, op1_is_pow2 = 0;
71af73bb 3891 int max_cost, extra_cost;
9ec36da5 3892 static HOST_WIDE_INT last_div_const = 0;
f40751dd 3893 bool speed = optimize_insn_for_speed_p ();
55c2d311 3894
481683e1 3895 op1_is_constant = CONST_INT_P (op1);
1c4a429a
JH
3896 if (op1_is_constant)
3897 {
be63b77d 3898 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
1c4a429a
JH
3899 if (unsignedp)
3900 ext_op1 &= GET_MODE_MASK (mode);
3901 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3902 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3903 }
55c2d311
TG
3904
3905 /*
3906 This is the structure of expand_divmod:
3907
3908 First comes code to fix up the operands so we can perform the operations
3909 correctly and efficiently.
3910
3911 Second comes a switch statement with code specific for each rounding mode.
3912 For some special operands this code emits all RTL for the desired
69f61901 3913 operation, for other cases, it generates only a quotient and stores it in
55c2d311
TG
3914 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3915 to indicate that it has not done anything.
3916
69f61901
RK
3917 Last comes code that finishes the operation. If QUOTIENT is set and
3918 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3919 QUOTIENT is not set, it is computed using trunc rounding.
44037a66 3920
55c2d311
TG
3921 We try to generate special code for division and remainder when OP1 is a
3922 constant. If |OP1| = 2**n we can use shifts and some other fast
3923 operations. For other values of OP1, we compute a carefully selected
3924 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3925 by m.
3926
3927 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3928 half of the product. Different strategies for generating the product are
00f07b86 3929 implemented in expmed_mult_highpart.
55c2d311
TG
3930
3931 If what we actually want is the remainder, we generate that by another
3932 by-constant multiplication and a subtraction. */
3933
3934 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3d32ffd1
TW
3935 code below will malfunction if we are, so check here and handle
3936 the special case if so. */
3937 if (op1 == const1_rtx)
3938 return rem_flag ? const0_rtx : op0;
3939
91ce572a
CC
3940 /* When dividing by -1, we could get an overflow.
3941 negv_optab can handle overflows. */
3942 if (! unsignedp && op1 == constm1_rtx)
3943 {
3944 if (rem_flag)
0fb7aeda 3945 return const0_rtx;
c3284718 3946 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
0fb7aeda 3947 ? negv_optab : neg_optab, op0, target, 0);
91ce572a
CC
3948 }
3949
bc1c7e93
RK
3950 if (target
3951 /* Don't use the function value register as a target
3952 since we have to read it as well as write it,
3953 and function-inlining gets confused by this. */
3954 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3955 /* Don't clobber an operand while doing a multi-step calculation. */
515dfc7a 3956 || ((rem_flag || op1_is_constant)
bc1c7e93 3957 && (reg_mentioned_p (target, op0)
3c0cb5de 3958 || (MEM_P (op0) && MEM_P (target))))
bc1c7e93 3959 || reg_mentioned_p (target, op1)
3c0cb5de 3960 || (MEM_P (op1) && MEM_P (target))))
44037a66
TG
3961 target = 0;
3962
44037a66
TG
3963 /* Get the mode in which to perform this computation. Normally it will
3964 be MODE, but sometimes we can't do the desired operation in MODE.
3965 If so, pick a wider mode in which we can do the operation. Convert
3966 to that mode at the start to avoid repeated conversions.
3967
3968 First see what operations we need. These depend on the expression
3969 we are evaluating. (We assume that divxx3 insns exist under the
3970 same conditions that modxx3 insns and that these insns don't normally
3971 fail. If these assumptions are not correct, we may generate less
3972 efficient code in some cases.)
3973
3974 Then see if we find a mode in which we can open-code that operation
3975 (either a division, modulus, or shift). Finally, check for the smallest
3976 mode for which we can do the operation with a library call. */
3977
55c2d311 3978 /* We might want to refine this now that we have division-by-constant
00f07b86 3979 optimization. Since expmed_mult_highpart tries so many variants, it is
55c2d311
TG
3980 not straightforward to generalize this. Maybe we should make an array
3981 of possible modes in init_expmed? Save this for GCC 2.7. */
3982
556a56ac
DM
3983 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3984 ? (unsignedp ? lshr_optab : ashr_optab)
44037a66 3985 : (unsignedp ? udiv_optab : sdiv_optab));
556a56ac
DM
3986 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3987 ? optab1
3988 : (unsignedp ? udivmod_optab : sdivmod_optab));
44037a66
TG
3989
3990 for (compute_mode = mode; compute_mode != VOIDmode;
3991 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
947131ba
RS
3992 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3993 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
44037a66
TG
3994 break;
3995
3996 if (compute_mode == VOIDmode)
3997 for (compute_mode = mode; compute_mode != VOIDmode;
3998 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
8a33f100
JH
3999 if (optab_libfunc (optab1, compute_mode)
4000 || optab_libfunc (optab2, compute_mode))
44037a66
TG
4001 break;
4002
535a42b1
NS
4003 /* If we still couldn't find a mode, use MODE, but expand_binop will
4004 probably die. */
44037a66
TG
4005 if (compute_mode == VOIDmode)
4006 compute_mode = mode;
4007
55c2d311
TG
4008 if (target && GET_MODE (target) == compute_mode)
4009 tquotient = target;
4010 else
4011 tquotient = gen_reg_rtx (compute_mode);
2c414fba 4012
55c2d311
TG
4013 size = GET_MODE_BITSIZE (compute_mode);
4014#if 0
4015 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
71af73bb
TG
4016 (mode), and thereby get better code when OP1 is a constant. Do that
4017 later. It will require going over all usages of SIZE below. */
55c2d311
TG
4018 size = GET_MODE_BITSIZE (mode);
4019#endif
bc1c7e93 4020
9ec36da5
JL
4021 /* Only deduct something for a REM if the last divide done was
4022 for a different constant. Then set the constant of the last
4023 divide. */
5322d07e
NF
4024 max_cost = (unsignedp
4025 ? udiv_cost (speed, compute_mode)
4026 : sdiv_cost (speed, compute_mode));
a28b2ac6
RS
4027 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4028 && INTVAL (op1) == last_div_const))
5322d07e
NF
4029 max_cost -= (mul_cost (speed, compute_mode)
4030 + add_cost (speed, compute_mode));
9ec36da5
JL
4031
4032 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
71af73bb 4033
55c2d311 4034 /* Now convert to the best mode to use. */
44037a66
TG
4035 if (compute_mode != mode)
4036 {
55c2d311 4037 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
81722fa9 4038 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
e13a25d5 4039
e9a25f70
JL
4040 /* convert_modes may have placed op1 into a register, so we
4041 must recompute the following. */
481683e1 4042 op1_is_constant = CONST_INT_P (op1);
e13a25d5
DM
4043 op1_is_pow2 = (op1_is_constant
4044 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4045 || (! unsignedp
be63b77d 4046 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
44037a66
TG
4047 }
4048
55c2d311 4049 /* If one of the operands is a volatile MEM, copy it into a register. */
c2a47e48 4050
3c0cb5de 4051 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
55c2d311 4052 op0 = force_reg (compute_mode, op0);
3c0cb5de 4053 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
c2a47e48
RK
4054 op1 = force_reg (compute_mode, op1);
4055
ab0b6581
TG
4056 /* If we need the remainder or if OP1 is constant, we need to
4057 put OP0 in a register in case it has any queued subexpressions. */
4058 if (rem_flag || op1_is_constant)
4059 op0 = force_reg (compute_mode, op0);
bc1c7e93 4060
55c2d311 4061 last = get_last_insn ();
44037a66 4062
9faa82d8 4063 /* Promote floor rounding to trunc rounding for unsigned operations. */
55c2d311 4064 if (unsignedp)
44037a66 4065 {
55c2d311
TG
4066 if (code == FLOOR_DIV_EXPR)
4067 code = TRUNC_DIV_EXPR;
4068 if (code == FLOOR_MOD_EXPR)
4069 code = TRUNC_MOD_EXPR;
db7cafb0
JL
4070 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4071 code = TRUNC_DIV_EXPR;
55c2d311 4072 }
bc1c7e93 4073
55c2d311
TG
4074 if (op1 != const0_rtx)
4075 switch (code)
4076 {
4077 case TRUNC_MOD_EXPR:
4078 case TRUNC_DIV_EXPR:
34f016ed 4079 if (op1_is_constant)
55c2d311 4080 {
d8f1376c 4081 if (unsignedp)
55c2d311 4082 {
079c527f 4083 unsigned HOST_WIDE_INT mh, ml;
55c2d311
TG
4084 int pre_shift, post_shift;
4085 int dummy;
1c4a429a
JH
4086 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4087 & GET_MODE_MASK (compute_mode));
55c2d311
TG
4088
4089 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4090 {
4091 pre_shift = floor_log2 (d);
4092 if (rem_flag)
4093 {
2f1cd2eb
RS
4094 unsigned HOST_WIDE_INT mask
4095 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
db3cf6fb
MS
4096 remainder
4097 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4098 gen_int_mode (mask, compute_mode),
db3cf6fb
MS
4099 remainder, 1,
4100 OPTAB_LIB_WIDEN);
55c2d311 4101 if (remainder)
c8dbc8ca 4102 return gen_lowpart (mode, remainder);
55c2d311
TG
4103 }
4104 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4105 pre_shift, tquotient, 1);
55c2d311 4106 }
34f016ed 4107 else if (size <= HOST_BITS_PER_WIDE_INT)
55c2d311 4108 {
dc1d6150 4109 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
55c2d311 4110 {
dc1d6150
TG
4111 /* Most significant bit of divisor is set; emit an scc
4112 insn. */
b45f0e58
PB
4113 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4114 compute_mode, 1, 1);
55c2d311
TG
4115 }
4116 else
4117 {
dc1d6150
TG
4118 /* Find a suitable multiplier and right shift count
4119 instead of multiplying with D. */
4120
4121 mh = choose_multiplier (d, size, size,
4122 &ml, &post_shift, &dummy);
4123
4124 /* If the suggested multiplier is more than SIZE bits,
4125 we can do better for even divisors, using an
4126 initial right shift. */
4127 if (mh != 0 && (d & 1) == 0)
4128 {
4129 pre_shift = floor_log2 (d & -d);
4130 mh = choose_multiplier (d >> pre_shift, size,
4131 size - pre_shift,
4132 &ml, &post_shift, &dummy);
5b0264cb 4133 gcc_assert (!mh);
dc1d6150
TG
4134 }
4135 else
4136 pre_shift = 0;
4137
4138 if (mh != 0)
4139 {
4140 rtx t1, t2, t3, t4;
4141
02a65aef
R
4142 if (post_shift - 1 >= BITS_PER_WORD)
4143 goto fail1;
4144
965703ed 4145 extra_cost
5322d07e
NF
4146 = (shift_cost (speed, compute_mode, post_shift - 1)
4147 + shift_cost (speed, compute_mode, 1)
4148 + 2 * add_cost (speed, compute_mode));
2f1cd2eb
RS
4149 t1 = expmed_mult_highpart
4150 (compute_mode, op0,
4151 gen_int_mode (ml, compute_mode),
4152 NULL_RTX, 1, max_cost - extra_cost);
dc1d6150
TG
4153 if (t1 == 0)
4154 goto fail1;
38a448ca
RH
4155 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4156 op0, t1),
dc1d6150 4157 NULL_RTX);
eb6c3df1
RG
4158 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4159 t2, 1, NULL_RTX, 1);
38a448ca
RH
4160 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4161 t1, t3),
dc1d6150 4162 NULL_RTX);
4a90aeeb
NS
4163 quotient = expand_shift
4164 (RSHIFT_EXPR, compute_mode, t4,
eb6c3df1 4165 post_shift - 1, tquotient, 1);
dc1d6150
TG
4166 }
4167 else
4168 {
4169 rtx t1, t2;
4170
02a65aef
R
4171 if (pre_shift >= BITS_PER_WORD
4172 || post_shift >= BITS_PER_WORD)
4173 goto fail1;
4174
4a90aeeb
NS
4175 t1 = expand_shift
4176 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4177 pre_shift, NULL_RTX, 1);
965703ed 4178 extra_cost
5322d07e
NF
4179 = (shift_cost (speed, compute_mode, pre_shift)
4180 + shift_cost (speed, compute_mode, post_shift));
2f1cd2eb
RS
4181 t2 = expmed_mult_highpart
4182 (compute_mode, t1,
4183 gen_int_mode (ml, compute_mode),
4184 NULL_RTX, 1, max_cost - extra_cost);
dc1d6150
TG
4185 if (t2 == 0)
4186 goto fail1;
4a90aeeb
NS
4187 quotient = expand_shift
4188 (RSHIFT_EXPR, compute_mode, t2,
eb6c3df1 4189 post_shift, tquotient, 1);
dc1d6150 4190 }
55c2d311
TG
4191 }
4192 }
34f016ed
TG
4193 else /* Too wide mode to use tricky code */
4194 break;
55c2d311
TG
4195
4196 insn = get_last_insn ();
7543f918
JR
4197 if (insn != last)
4198 set_dst_reg_note (insn, REG_EQUAL,
4199 gen_rtx_UDIV (compute_mode, op0, op1),
4200 quotient);
55c2d311
TG
4201 }
4202 else /* TRUNC_DIV, signed */
4203 {
4204 unsigned HOST_WIDE_INT ml;
4205 int lgup, post_shift;
e71c0aa7 4206 rtx mlr;
55c2d311 4207 HOST_WIDE_INT d = INTVAL (op1);
e4c9f3c2
ILT
4208 unsigned HOST_WIDE_INT abs_d;
4209
093253be
ILT
4210 /* Since d might be INT_MIN, we have to cast to
4211 unsigned HOST_WIDE_INT before negating to avoid
4212 undefined signed overflow. */
6d9c91e9
ILT
4213 abs_d = (d >= 0
4214 ? (unsigned HOST_WIDE_INT) d
4215 : - (unsigned HOST_WIDE_INT) d);
55c2d311
TG
4216
4217 /* n rem d = n rem -d */
4218 if (rem_flag && d < 0)
4219 {
4220 d = abs_d;
2496c7bd 4221 op1 = gen_int_mode (abs_d, compute_mode);
55c2d311
TG
4222 }
4223
4224 if (d == 1)
4225 quotient = op0;
4226 else if (d == -1)
4227 quotient = expand_unop (compute_mode, neg_optab, op0,
4228 tquotient, 0);
f6c1336c
ILT
4229 else if (HOST_BITS_PER_WIDE_INT >= size
4230 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
f737b132
RK
4231 {
4232 /* This case is not handled correctly below. */
4233 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4234 compute_mode, 1, 1);
4235 if (quotient == 0)
4236 goto fail1;
4237 }
55c2d311 4238 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
5322d07e
NF
4239 && (rem_flag
4240 ? smod_pow2_cheap (speed, compute_mode)
4241 : sdiv_pow2_cheap (speed, compute_mode))
0b55e932
RS
4242 /* We assume that cheap metric is true if the
4243 optab has an expander for this mode. */
166cdb08
JH
4244 && ((optab_handler ((rem_flag ? smod_optab
4245 : sdiv_optab),
947131ba 4246 compute_mode)
a8c7e72d 4247 != CODE_FOR_nothing)
947131ba
RS
4248 || (optab_handler (sdivmod_optab,
4249 compute_mode)
4250 != CODE_FOR_nothing)))
55c2d311
TG
4251 ;
4252 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4253 {
0b55e932
RS
4254 if (rem_flag)
4255 {
4256 remainder = expand_smod_pow2 (compute_mode, op0, d);
4257 if (remainder)
4258 return gen_lowpart (mode, remainder);
4259 }
3d520aaf 4260
5322d07e 4261 if (sdiv_pow2_cheap (speed, compute_mode)
947131ba 4262 && ((optab_handler (sdiv_optab, compute_mode)
3d520aaf 4263 != CODE_FOR_nothing)
947131ba 4264 || (optab_handler (sdivmod_optab, compute_mode)
3d520aaf
DE
4265 != CODE_FOR_nothing)))
4266 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4267 compute_mode, op0,
4268 gen_int_mode (abs_d,
4269 compute_mode),
4270 NULL_RTX, 0);
4271 else
4272 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
55c2d311 4273
0b55e932
RS
4274 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4275 negate the quotient. */
55c2d311
TG
4276 if (d < 0)
4277 {
4278 insn = get_last_insn ();
4e430df8 4279 if (insn != last
c8e7fe58
DE
4280 && abs_d < ((unsigned HOST_WIDE_INT) 1
4281 << (HOST_BITS_PER_WIDE_INT - 1)))
7543f918
JR
4282 set_dst_reg_note (insn, REG_EQUAL,
4283 gen_rtx_DIV (compute_mode, op0,
6d26322f
JR
4284 gen_int_mode
4285 (abs_d,
4286 compute_mode)),
7543f918 4287 quotient);
55c2d311
TG
4288
4289 quotient = expand_unop (compute_mode, neg_optab,
4290 quotient, quotient, 0);
4291 }
4292 }
34f016ed 4293 else if (size <= HOST_BITS_PER_WIDE_INT)
55c2d311
TG
4294 {
4295 choose_multiplier (abs_d, size, size - 1,
079c527f 4296 &ml, &post_shift, &lgup);
55c2d311
TG
4297 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4298 {
4299 rtx t1, t2, t3;
4300
02a65aef
R
4301 if (post_shift >= BITS_PER_WORD
4302 || size - 1 >= BITS_PER_WORD)
4303 goto fail1;
4304
5322d07e
NF
4305 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4306 + shift_cost (speed, compute_mode, size - 1)
4307 + add_cost (speed, compute_mode));
2f1cd2eb
RS
4308 t1 = expmed_mult_highpart
4309 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4310 NULL_RTX, 0, max_cost - extra_cost);
55c2d311
TG
4311 if (t1 == 0)
4312 goto fail1;
4a90aeeb
NS
4313 t2 = expand_shift
4314 (RSHIFT_EXPR, compute_mode, t1,
eb6c3df1 4315 post_shift, NULL_RTX, 0);
4a90aeeb
NS
4316 t3 = expand_shift
4317 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4318 size - 1, NULL_RTX, 0);
55c2d311 4319 if (d < 0)
c5c76735
JL
4320 quotient
4321 = force_operand (gen_rtx_MINUS (compute_mode,
4322 t3, t2),
4323 tquotient);
55c2d311 4324 else
c5c76735
JL
4325 quotient
4326 = force_operand (gen_rtx_MINUS (compute_mode,
4327 t2, t3),
4328 tquotient);
55c2d311
TG
4329 }
4330 else
4331 {
4332 rtx t1, t2, t3, t4;
4333
02a65aef
R
4334 if (post_shift >= BITS_PER_WORD
4335 || size - 1 >= BITS_PER_WORD)
4336 goto fail1;
4337
55c2d311 4338 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
e71c0aa7 4339 mlr = gen_int_mode (ml, compute_mode);
5322d07e
NF
4340 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4341 + shift_cost (speed, compute_mode, size - 1)
4342 + 2 * add_cost (speed, compute_mode));
00f07b86 4343 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
71af73bb
TG
4344 NULL_RTX, 0,
4345 max_cost - extra_cost);
55c2d311
TG
4346 if (t1 == 0)
4347 goto fail1;
c5c76735
JL
4348 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4349 t1, op0),
55c2d311 4350 NULL_RTX);
4a90aeeb
NS
4351 t3 = expand_shift
4352 (RSHIFT_EXPR, compute_mode, t2,
eb6c3df1 4353 post_shift, NULL_RTX, 0);
4a90aeeb
NS
4354 t4 = expand_shift
4355 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4356 size - 1, NULL_RTX, 0);
55c2d311 4357 if (d < 0)
c5c76735
JL
4358 quotient
4359 = force_operand (gen_rtx_MINUS (compute_mode,
4360 t4, t3),
4361 tquotient);
55c2d311 4362 else
c5c76735
JL
4363 quotient
4364 = force_operand (gen_rtx_MINUS (compute_mode,
4365 t3, t4),
4366 tquotient);
55c2d311
TG
4367 }
4368 }
34f016ed
TG
4369 else /* Too wide mode to use tricky code */
4370 break;
55c2d311 4371
4e430df8 4372 insn = get_last_insn ();
7543f918
JR
4373 if (insn != last)
4374 set_dst_reg_note (insn, REG_EQUAL,
4375 gen_rtx_DIV (compute_mode, op0, op1),
4376 quotient);
55c2d311
TG
4377 }
4378 break;
4379 }
4380 fail1:
4381 delete_insns_since (last);
4382 break;
44037a66 4383
55c2d311
TG
4384 case FLOOR_DIV_EXPR:
4385 case FLOOR_MOD_EXPR:
4386 /* We will come here only for signed operations. */
4387 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4388 {
079c527f 4389 unsigned HOST_WIDE_INT mh, ml;
55c2d311
TG
4390 int pre_shift, lgup, post_shift;
4391 HOST_WIDE_INT d = INTVAL (op1);
4392
4393 if (d > 0)
4394 {
4395 /* We could just as easily deal with negative constants here,
4396 but it does not seem worth the trouble for GCC 2.6. */
4397 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4398 {
4399 pre_shift = floor_log2 (d);
4400 if (rem_flag)
4401 {
2f1cd2eb
RS
4402 unsigned HOST_WIDE_INT mask
4403 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4404 remainder = expand_binop
4405 (compute_mode, and_optab, op0,
4406 gen_int_mode (mask, compute_mode),
4407 remainder, 0, OPTAB_LIB_WIDEN);
55c2d311 4408 if (remainder)
c8dbc8ca 4409 return gen_lowpart (mode, remainder);
55c2d311 4410 }
4a90aeeb
NS
4411 quotient = expand_shift
4412 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4413 pre_shift, tquotient, 0);
55c2d311
TG
4414 }
4415 else
4416 {
4417 rtx t1, t2, t3, t4;
4418
4419 mh = choose_multiplier (d, size, size - 1,
4420 &ml, &post_shift, &lgup);
5b0264cb 4421 gcc_assert (!mh);
55c2d311 4422
02a65aef
R
4423 if (post_shift < BITS_PER_WORD
4424 && size - 1 < BITS_PER_WORD)
55c2d311 4425 {
4a90aeeb
NS
4426 t1 = expand_shift
4427 (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4428 size - 1, NULL_RTX, 0);
02a65aef
R
4429 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4430 NULL_RTX, 0, OPTAB_WIDEN);
5322d07e
NF
4431 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4432 + shift_cost (speed, compute_mode, size - 1)
4433 + 2 * add_cost (speed, compute_mode));
2f1cd2eb
RS
4434 t3 = expmed_mult_highpart
4435 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4436 NULL_RTX, 1, max_cost - extra_cost);
02a65aef
R
4437 if (t3 != 0)
4438 {
4a90aeeb
NS
4439 t4 = expand_shift
4440 (RSHIFT_EXPR, compute_mode, t3,
eb6c3df1 4441 post_shift, NULL_RTX, 1);
02a65aef
R
4442 quotient = expand_binop (compute_mode, xor_optab,
4443 t4, t1, tquotient, 0,
4444 OPTAB_WIDEN);
4445 }
55c2d311
TG
4446 }
4447 }
4448 }
4449 else
4450 {
4451 rtx nsign, t1, t2, t3, t4;
38a448ca
RH
4452 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4453 op0, constm1_rtx), NULL_RTX);
55c2d311
TG
4454 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4455 0, OPTAB_WIDEN);
4a90aeeb
NS
4456 nsign = expand_shift
4457 (RSHIFT_EXPR, compute_mode, t2,
eb6c3df1 4458 size - 1, NULL_RTX, 0);
38a448ca 4459 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
55c2d311
TG
4460 NULL_RTX);
4461 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4462 NULL_RTX, 0);
4463 if (t4)
4464 {
4465 rtx t5;
4466 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4467 NULL_RTX, 0);
38a448ca
RH
4468 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4469 t4, t5),
55c2d311
TG
4470 tquotient);
4471 }
4472 }
4473 }
4474
4475 if (quotient != 0)
4476 break;
4477 delete_insns_since (last);
4478
4479 /* Try using an instruction that produces both the quotient and
4480 remainder, using truncation. We can easily compensate the quotient
4481 or remainder to get floor rounding, once we have the remainder.
4482 Notice that we compute also the final remainder value here,
4483 and return the result right away. */
a45cf58c 4484 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311 4485 target = gen_reg_rtx (compute_mode);
668443c9 4486
55c2d311
TG
4487 if (rem_flag)
4488 {
668443c9 4489 remainder
f8cfc6aa 4490 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
55c2d311
TG
4491 quotient = gen_reg_rtx (compute_mode);
4492 }
4493 else
4494 {
668443c9 4495 quotient
f8cfc6aa 4496 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
55c2d311
TG
4497 remainder = gen_reg_rtx (compute_mode);
4498 }
4499
4500 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4501 quotient, remainder, 0))
4502 {
4503 /* This could be computed with a branch-less sequence.
4504 Save that for later. */
4505 rtx tem;
f3f6fb16 4506 rtx_code_label *label = gen_label_rtx ();
f5963e61 4507 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
55c2d311
TG
4508 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4509 NULL_RTX, 0, OPTAB_WIDEN);
f5963e61 4510 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
55c2d311
TG
4511 expand_dec (quotient, const1_rtx);
4512 expand_inc (remainder, op1);
4513 emit_label (label);
c8dbc8ca 4514 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4515 }
4516
4517 /* No luck with division elimination or divmod. Have to do it
4518 by conditionally adjusting op0 *and* the result. */
44037a66 4519 {
f3f6fb16 4520 rtx_code_label *label1, *label2, *label3, *label4, *label5;
55c2d311
TG
4521 rtx adjusted_op0;
4522 rtx tem;
4523
4524 quotient = gen_reg_rtx (compute_mode);
4525 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4526 label1 = gen_label_rtx ();
4527 label2 = gen_label_rtx ();
4528 label3 = gen_label_rtx ();
4529 label4 = gen_label_rtx ();
4530 label5 = gen_label_rtx ();
f5963e61
JL
4531 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4532 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
55c2d311
TG
4533 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4534 quotient, 0, OPTAB_LIB_WIDEN);
4535 if (tem != quotient)
4536 emit_move_insn (quotient, tem);
ec4a505f 4537 emit_jump_insn (targetm.gen_jump (label5));
55c2d311
TG
4538 emit_barrier ();
4539 emit_label (label1);
44037a66 4540 expand_inc (adjusted_op0, const1_rtx);
ec4a505f 4541 emit_jump_insn (targetm.gen_jump (label4));
55c2d311
TG
4542 emit_barrier ();
4543 emit_label (label2);
f5963e61 4544 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
55c2d311
TG
4545 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4546 quotient, 0, OPTAB_LIB_WIDEN);
4547 if (tem != quotient)
4548 emit_move_insn (quotient, tem);
ec4a505f 4549 emit_jump_insn (targetm.gen_jump (label5));
55c2d311
TG
4550 emit_barrier ();
4551 emit_label (label3);
4552 expand_dec (adjusted_op0, const1_rtx);
4553 emit_label (label4);
4554 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4555 quotient, 0, OPTAB_LIB_WIDEN);
4556 if (tem != quotient)
4557 emit_move_insn (quotient, tem);
4558 expand_dec (quotient, const1_rtx);
4559 emit_label (label5);
44037a66 4560 }
55c2d311 4561 break;
44037a66 4562
55c2d311
TG
4563 case CEIL_DIV_EXPR:
4564 case CEIL_MOD_EXPR:
4565 if (unsignedp)
4566 {
9176af2f
TG
4567 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4568 {
4569 rtx t1, t2, t3;
4570 unsigned HOST_WIDE_INT d = INTVAL (op1);
4571 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4572 floor_log2 (d), tquotient, 1);
9176af2f 4573 t2 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4574 gen_int_mode (d - 1, compute_mode),
9176af2f
TG
4575 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4576 t3 = gen_reg_rtx (compute_mode);
4577 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4578 compute_mode, 1, 1);
412381d9
TG
4579 if (t3 == 0)
4580 {
f3f6fb16 4581 rtx_code_label *lab;
412381d9 4582 lab = gen_label_rtx ();
f5963e61 4583 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
412381d9
TG
4584 expand_inc (t1, const1_rtx);
4585 emit_label (lab);
4586 quotient = t1;
4587 }
4588 else
38a448ca
RH
4589 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4590 t1, t3),
412381d9 4591 tquotient);
9176af2f
TG
4592 break;
4593 }
55c2d311
TG
4594
4595 /* Try using an instruction that produces both the quotient and
4596 remainder, using truncation. We can easily compensate the
4597 quotient or remainder to get ceiling rounding, once we have the
4598 remainder. Notice that we compute also the final remainder
4599 value here, and return the result right away. */
a45cf58c 4600 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311 4601 target = gen_reg_rtx (compute_mode);
668443c9 4602
55c2d311
TG
4603 if (rem_flag)
4604 {
f8cfc6aa 4605 remainder = (REG_P (target)
668443c9 4606 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4607 quotient = gen_reg_rtx (compute_mode);
4608 }
4609 else
4610 {
f8cfc6aa 4611 quotient = (REG_P (target)
668443c9 4612 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4613 remainder = gen_reg_rtx (compute_mode);
4614 }
4615
4616 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4617 remainder, 1))
4618 {
4619 /* This could be computed with a branch-less sequence.
4620 Save that for later. */
f3f6fb16 4621 rtx_code_label *label = gen_label_rtx ();
f5963e61
JL
4622 do_cmp_and_jump (remainder, const0_rtx, EQ,
4623 compute_mode, label);
55c2d311
TG
4624 expand_inc (quotient, const1_rtx);
4625 expand_dec (remainder, op1);
4626 emit_label (label);
c8dbc8ca 4627 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4628 }
4629
4630 /* No luck with division elimination or divmod. Have to do it
4631 by conditionally adjusting op0 *and* the result. */
44037a66 4632 {
f3f6fb16 4633 rtx_code_label *label1, *label2;
55c2d311
TG
4634 rtx adjusted_op0, tem;
4635
4636 quotient = gen_reg_rtx (compute_mode);
4637 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4638 label1 = gen_label_rtx ();
4639 label2 = gen_label_rtx ();
f5963e61
JL
4640 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4641 compute_mode, label1);
55c2d311 4642 emit_move_insn (quotient, const0_rtx);
ec4a505f 4643 emit_jump_insn (targetm.gen_jump (label2));
55c2d311
TG
4644 emit_barrier ();
4645 emit_label (label1);
4646 expand_dec (adjusted_op0, const1_rtx);
4647 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4648 quotient, 1, OPTAB_LIB_WIDEN);
4649 if (tem != quotient)
4650 emit_move_insn (quotient, tem);
4651 expand_inc (quotient, const1_rtx);
4652 emit_label (label2);
44037a66 4653 }
55c2d311
TG
4654 }
4655 else /* signed */
4656 {
73f27728
RK
4657 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4658 && INTVAL (op1) >= 0)
4659 {
4660 /* This is extremely similar to the code for the unsigned case
4661 above. For 2.7 we should merge these variants, but for
4662 2.6.1 I don't want to touch the code for unsigned since that
4663 get used in C. The signed case will only be used by other
4664 languages (Ada). */
4665
4666 rtx t1, t2, t3;
4667 unsigned HOST_WIDE_INT d = INTVAL (op1);
4668 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4669 floor_log2 (d), tquotient, 0);
73f27728 4670 t2 = expand_binop (compute_mode, and_optab, op0,
2f1cd2eb 4671 gen_int_mode (d - 1, compute_mode),
73f27728
RK
4672 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4673 t3 = gen_reg_rtx (compute_mode);
4674 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4675 compute_mode, 1, 1);
4676 if (t3 == 0)
4677 {
f3f6fb16 4678 rtx_code_label *lab;
73f27728 4679 lab = gen_label_rtx ();
f5963e61 4680 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
73f27728
RK
4681 expand_inc (t1, const1_rtx);
4682 emit_label (lab);
4683 quotient = t1;
4684 }
4685 else
38a448ca
RH
4686 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4687 t1, t3),
73f27728
RK
4688 tquotient);
4689 break;
4690 }
4691
55c2d311
TG
4692 /* Try using an instruction that produces both the quotient and
4693 remainder, using truncation. We can easily compensate the
4694 quotient or remainder to get ceiling rounding, once we have the
4695 remainder. Notice that we compute also the final remainder
4696 value here, and return the result right away. */
a45cf58c 4697 if (target == 0 || GET_MODE (target) != compute_mode)
55c2d311
TG
4698 target = gen_reg_rtx (compute_mode);
4699 if (rem_flag)
4700 {
f8cfc6aa 4701 remainder= (REG_P (target)
668443c9 4702 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4703 quotient = gen_reg_rtx (compute_mode);
4704 }
4705 else
4706 {
f8cfc6aa 4707 quotient = (REG_P (target)
668443c9 4708 ? target : gen_reg_rtx (compute_mode));
55c2d311
TG
4709 remainder = gen_reg_rtx (compute_mode);
4710 }
4711
4712 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4713 remainder, 0))
4714 {
4715 /* This could be computed with a branch-less sequence.
4716 Save that for later. */
4717 rtx tem;
f3f6fb16 4718 rtx_code_label *label = gen_label_rtx ();
f5963e61
JL
4719 do_cmp_and_jump (remainder, const0_rtx, EQ,
4720 compute_mode, label);
55c2d311
TG
4721 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4722 NULL_RTX, 0, OPTAB_WIDEN);
f5963e61 4723 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
55c2d311
TG
4724 expand_inc (quotient, const1_rtx);
4725 expand_dec (remainder, op1);
4726 emit_label (label);
c8dbc8ca 4727 return gen_lowpart (mode, rem_flag ? remainder : quotient);
55c2d311
TG
4728 }
4729
4730 /* No luck with division elimination or divmod. Have to do it
4731 by conditionally adjusting op0 *and* the result. */
44037a66 4732 {
f3f6fb16 4733 rtx_code_label *label1, *label2, *label3, *label4, *label5;
55c2d311
TG
4734 rtx adjusted_op0;
4735 rtx tem;
4736
4737 quotient = gen_reg_rtx (compute_mode);
4738 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4739 label1 = gen_label_rtx ();
4740 label2 = gen_label_rtx ();
4741 label3 = gen_label_rtx ();
4742 label4 = gen_label_rtx ();
4743 label5 = gen_label_rtx ();
f5963e61
JL
4744 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4745 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4746 compute_mode, label1);
55c2d311
TG
4747 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4748 quotient, 0, OPTAB_LIB_WIDEN);
4749 if (tem != quotient)
4750 emit_move_insn (quotient, tem);
ec4a505f 4751 emit_jump_insn (targetm.gen_jump (label5));
55c2d311
TG
4752 emit_barrier ();
4753 emit_label (label1);
4754 expand_dec (adjusted_op0, const1_rtx);
ec4a505f 4755 emit_jump_insn (targetm.gen_jump (label4));
55c2d311
TG
4756 emit_barrier ();
4757 emit_label (label2);
f5963e61
JL
4758 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4759 compute_mode, label3);
55c2d311
TG
4760 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4761 quotient, 0, OPTAB_LIB_WIDEN);
4762 if (tem != quotient)
4763 emit_move_insn (quotient, tem);
ec4a505f 4764 emit_jump_insn (targetm.gen_jump (label5));
55c2d311
TG
4765 emit_barrier ();
4766 emit_label (label3);
4767 expand_inc (adjusted_op0, const1_rtx);
4768 emit_label (label4);
4769 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4770 quotient, 0, OPTAB_LIB_WIDEN);
4771 if (tem != quotient)
4772 emit_move_insn (quotient, tem);
4773 expand_inc (quotient, const1_rtx);
4774 emit_label (label5);
44037a66 4775 }
55c2d311
TG
4776 }
4777 break;
bc1c7e93 4778
55c2d311
TG
4779 case EXACT_DIV_EXPR:
4780 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4781 {
4782 HOST_WIDE_INT d = INTVAL (op1);
4783 unsigned HOST_WIDE_INT ml;
91ce572a 4784 int pre_shift;
55c2d311
TG
4785 rtx t1;
4786
91ce572a
CC
4787 pre_shift = floor_log2 (d & -d);
4788 ml = invert_mod2n (d >> pre_shift, size);
4789 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
eb6c3df1 4790 pre_shift, NULL_RTX, unsignedp);
69107307 4791 quotient = expand_mult (compute_mode, t1,
2496c7bd 4792 gen_int_mode (ml, compute_mode),
31ff3e0b 4793 NULL_RTX, 1);
55c2d311
TG
4794
4795 insn = get_last_insn ();
7543f918
JR
4796 set_dst_reg_note (insn, REG_EQUAL,
4797 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4798 compute_mode, op0, op1),
4799 quotient);
55c2d311
TG
4800 }
4801 break;
4802
4803 case ROUND_DIV_EXPR:
4804 case ROUND_MOD_EXPR:
69f61901
RK
4805 if (unsignedp)
4806 {
4807 rtx tem;
f3f6fb16 4808 rtx_code_label *label;
69f61901
RK
4809 label = gen_label_rtx ();
4810 quotient = gen_reg_rtx (compute_mode);
4811 remainder = gen_reg_rtx (compute_mode);
4812 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4813 {
4814 rtx tem;
4815 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4816 quotient, 1, OPTAB_LIB_WIDEN);
4817 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4818 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4819 remainder, 1, OPTAB_LIB_WIDEN);
4820 }
0a81f074 4821 tem = plus_constant (compute_mode, op1, -1);
eb6c3df1 4822 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
f5963e61 4823 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
69f61901
RK
4824 expand_inc (quotient, const1_rtx);
4825 expand_dec (remainder, op1);
4826 emit_label (label);
4827 }
4828 else
4829 {
4830 rtx abs_rem, abs_op1, tem, mask;
f3f6fb16 4831 rtx_code_label *label;
69f61901
RK
4832 label = gen_label_rtx ();
4833 quotient = gen_reg_rtx (compute_mode);
4834 remainder = gen_reg_rtx (compute_mode);
4835 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4836 {
4837 rtx tem;
4838 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4839 quotient, 0, OPTAB_LIB_WIDEN);
4840 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4841 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4842 remainder, 0, OPTAB_LIB_WIDEN);
4843 }
91ce572a
CC
4844 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4845 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
69f61901 4846 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
eb6c3df1 4847 1, NULL_RTX, 1);
f5963e61 4848 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
69f61901
RK
4849 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4850 NULL_RTX, 0, OPTAB_WIDEN);
4851 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
eb6c3df1 4852 size - 1, NULL_RTX, 0);
69f61901
RK
4853 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4854 NULL_RTX, 0, OPTAB_WIDEN);
4855 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4856 NULL_RTX, 0, OPTAB_WIDEN);
4857 expand_inc (quotient, tem);
4858 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4859 NULL_RTX, 0, OPTAB_WIDEN);
4860 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4861 NULL_RTX, 0, OPTAB_WIDEN);
4862 expand_dec (remainder, tem);
4863 emit_label (label);
4864 }
4865 return gen_lowpart (mode, rem_flag ? remainder : quotient);
c410d49e 4866
e9a25f70 4867 default:
5b0264cb 4868 gcc_unreachable ();
55c2d311 4869 }
44037a66 4870
55c2d311 4871 if (quotient == 0)
44037a66 4872 {
a45cf58c
RK
4873 if (target && GET_MODE (target) != compute_mode)
4874 target = 0;
4875
55c2d311 4876 if (rem_flag)
44037a66 4877 {
32fdf36b 4878 /* Try to produce the remainder without producing the quotient.
d6a7951f 4879 If we seem to have a divmod pattern that does not require widening,
b20b352b 4880 don't try widening here. We should really have a WIDEN argument
32fdf36b
TG
4881 to expand_twoval_binop, since what we'd really like to do here is
4882 1) try a mod insn in compute_mode
4883 2) try a divmod insn in compute_mode
4884 3) try a div insn in compute_mode and multiply-subtract to get
4885 remainder
4886 4) try the same things with widening allowed. */
4887 remainder
4888 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4889 op0, op1, target,
4890 unsignedp,
947131ba 4891 ((optab_handler (optab2, compute_mode)
32fdf36b
TG
4892 != CODE_FOR_nothing)
4893 ? OPTAB_DIRECT : OPTAB_WIDEN));
55c2d311 4894 if (remainder == 0)
44037a66
TG
4895 {
4896 /* No luck there. Can we do remainder and divide at once
4897 without a library call? */
55c2d311
TG
4898 remainder = gen_reg_rtx (compute_mode);
4899 if (! expand_twoval_binop ((unsignedp
4900 ? udivmod_optab
4901 : sdivmod_optab),
4902 op0, op1,
4903 NULL_RTX, remainder, unsignedp))
4904 remainder = 0;
44037a66 4905 }
55c2d311
TG
4906
4907 if (remainder)
4908 return gen_lowpart (mode, remainder);
44037a66 4909 }
44037a66 4910
dc38b292
RK
4911 /* Produce the quotient. Try a quotient insn, but not a library call.
4912 If we have a divmod in this mode, use it in preference to widening
4913 the div (for this test we assume it will not fail). Note that optab2
4914 is set to the one of the two optabs that the call below will use. */
4915 quotient
4916 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4917 op0, op1, rem_flag ? NULL_RTX : target,
4918 unsignedp,
947131ba 4919 ((optab_handler (optab2, compute_mode)
dc38b292
RK
4920 != CODE_FOR_nothing)
4921 ? OPTAB_DIRECT : OPTAB_WIDEN));
4922
55c2d311 4923 if (quotient == 0)
44037a66
TG
4924 {
4925 /* No luck there. Try a quotient-and-remainder insn,
4926 keeping the quotient alone. */
55c2d311 4927 quotient = gen_reg_rtx (compute_mode);
44037a66 4928 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
55c2d311
TG
4929 op0, op1,
4930 quotient, NULL_RTX, unsignedp))
4931 {
4932 quotient = 0;
4933 if (! rem_flag)
4934 /* Still no luck. If we are not computing the remainder,
4935 use a library call for the quotient. */
4936 quotient = sign_expand_binop (compute_mode,
4937 udiv_optab, sdiv_optab,
4938 op0, op1, target,
4939 unsignedp, OPTAB_LIB_WIDEN);
4940 }
44037a66 4941 }
44037a66
TG
4942 }
4943
44037a66
TG
4944 if (rem_flag)
4945 {
a45cf58c
RK
4946 if (target && GET_MODE (target) != compute_mode)
4947 target = 0;
4948
55c2d311 4949 if (quotient == 0)
b3f8d95d
MM
4950 {
4951 /* No divide instruction either. Use library for remainder. */
4952 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4953 op0, op1, target,
4954 unsignedp, OPTAB_LIB_WIDEN);
4955 /* No remainder function. Try a quotient-and-remainder
4956 function, keeping the remainder. */
4957 if (!remainder)
4958 {
4959 remainder = gen_reg_rtx (compute_mode);
b8698a0f 4960 if (!expand_twoval_binop_libfunc
b3f8d95d
MM
4961 (unsignedp ? udivmod_optab : sdivmod_optab,
4962 op0, op1,
4963 NULL_RTX, remainder,
4964 unsignedp ? UMOD : MOD))
4965 remainder = NULL_RTX;
4966 }
4967 }
44037a66
TG
4968 else
4969 {
4970 /* We divided. Now finish doing X - Y * (X / Y). */
55c2d311
TG
4971 remainder = expand_mult (compute_mode, quotient, op1,
4972 NULL_RTX, unsignedp);
4973 remainder = expand_binop (compute_mode, sub_optab, op0,
4974 remainder, target, unsignedp,
4975 OPTAB_LIB_WIDEN);
44037a66
TG
4976 }
4977 }
4978
55c2d311 4979 return gen_lowpart (mode, rem_flag ? remainder : quotient);
44037a66
TG
4980}
4981\f
4982/* Return a tree node with data type TYPE, describing the value of X.
4dfa0342 4983 Usually this is an VAR_DECL, if there is no obvious better choice.
44037a66 4984 X may be an expression, however we only support those expressions
6d2f8887 4985 generated by loop.c. */
44037a66
TG
4986
4987tree
502b8322 4988make_tree (tree type, rtx x)
44037a66
TG
4989{
4990 tree t;
4991
4992 switch (GET_CODE (x))
4993 {
4994 case CONST_INT:
807e902e
KZ
4995 case CONST_WIDE_INT:
4996 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4997 return t;
b8698a0f 4998
44037a66 4999 case CONST_DOUBLE:
807e902e
KZ
5000 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5001 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5002 t = wide_int_to_tree (type,
5003 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5004 HOST_BITS_PER_WIDE_INT * 2));
44037a66
TG
5005 else
5006 {
5007 REAL_VALUE_TYPE d;
5008
5009 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5010 t = build_real (type, d);
5011 }
5012
5013 return t;
69ef87e2
AH
5014
5015 case CONST_VECTOR:
5016 {
b8b7f162
RS
5017 int units = CONST_VECTOR_NUNITS (x);
5018 tree itype = TREE_TYPE (type);
d2a12ae7 5019 tree *elts;
b8b7f162 5020 int i;
69ef87e2 5021
69ef87e2 5022 /* Build a tree with vector elements. */
d2a12ae7 5023 elts = XALLOCAVEC (tree, units);
69ef87e2
AH
5024 for (i = units - 1; i >= 0; --i)
5025 {
b8b7f162 5026 rtx elt = CONST_VECTOR_ELT (x, i);
d2a12ae7 5027 elts[i] = make_tree (itype, elt);
69ef87e2 5028 }
c410d49e 5029
d2a12ae7 5030 return build_vector (type, elts);
69ef87e2
AH
5031 }
5032
44037a66 5033 case PLUS:
4845b383
KH
5034 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5035 make_tree (type, XEXP (x, 1)));
c410d49e 5036
44037a66 5037 case MINUS:
4845b383
KH
5038 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5039 make_tree (type, XEXP (x, 1)));
c410d49e 5040
44037a66 5041 case NEG:
4845b383 5042 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
44037a66
TG
5043
5044 case MULT:
4845b383
KH
5045 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5046 make_tree (type, XEXP (x, 1)));
c410d49e 5047
44037a66 5048 case ASHIFT:
4845b383
KH
5049 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5050 make_tree (type, XEXP (x, 1)));
c410d49e 5051
44037a66 5052 case LSHIFTRT:
ca5ba2a3 5053 t = unsigned_type_for (type);
aeba6c28
JM
5054 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5055 make_tree (t, XEXP (x, 0)),
5056 make_tree (type, XEXP (x, 1))));
c410d49e 5057
44037a66 5058 case ASHIFTRT:
12753674 5059 t = signed_type_for (type);
aeba6c28
JM
5060 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5061 make_tree (t, XEXP (x, 0)),
5062 make_tree (type, XEXP (x, 1))));
c410d49e 5063
44037a66
TG
5064 case DIV:
5065 if (TREE_CODE (type) != REAL_TYPE)
12753674 5066 t = signed_type_for (type);
44037a66
TG
5067 else
5068 t = type;
5069
aeba6c28
JM
5070 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5071 make_tree (t, XEXP (x, 0)),
5072 make_tree (t, XEXP (x, 1))));
44037a66 5073 case UDIV:
ca5ba2a3 5074 t = unsigned_type_for (type);
aeba6c28
JM
5075 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5076 make_tree (t, XEXP (x, 0)),
5077 make_tree (t, XEXP (x, 1))));
5c45425b
RH
5078
5079 case SIGN_EXTEND:
5080 case ZERO_EXTEND:
ae2bcd98
RS
5081 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5082 GET_CODE (x) == ZERO_EXTEND);
aeba6c28 5083 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5c45425b 5084
84816907
JM
5085 case CONST:
5086 return make_tree (type, XEXP (x, 0));
5087
5088 case SYMBOL_REF:
5089 t = SYMBOL_REF_DECL (x);
5090 if (t)
5091 return fold_convert (type, build_fold_addr_expr (t));
5092 /* else fall through. */
5093
4dfa0342 5094 default:
c2255bc4 5095 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
d1608933 5096
d4ebfa65
BE
5097 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5098 address mode to pointer mode. */
5ae6cd0d 5099 if (POINTER_TYPE_P (type))
d4ebfa65
BE
5100 x = convert_memory_address_addr_space
5101 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
d1608933 5102
8a0aa06e
RH
5103 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5104 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
820cc88f 5105 t->decl_with_rtl.rtl = x;
4dfa0342 5106
44037a66
TG
5107 return t;
5108 }
5109}
44037a66
TG
5110\f
5111/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5112 and returning TARGET.
5113
5114 If TARGET is 0, a pseudo-register or constant is returned. */
5115
5116rtx
ef4bddc2 5117expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
44037a66 5118{
22273300 5119 rtx tem = 0;
44037a66 5120
22273300
JJ
5121 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5122 tem = simplify_binary_operation (AND, mode, op0, op1);
5123 if (tem == 0)
44037a66 5124 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
44037a66
TG
5125
5126 if (target == 0)
5127 target = tem;
5128 else if (tem != target)
5129 emit_move_insn (target, tem);
5130 return target;
5131}
495499da 5132
a41a56b6 5133/* Helper function for emit_store_flag. */
2d52a3a1 5134rtx
ef12ae45 5135emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
ef4bddc2 5136 machine_mode mode, machine_mode compare_mode,
92355a9c 5137 int unsignedp, rtx x, rtx y, int normalizep,
ef4bddc2 5138 machine_mode target_mode)
a41a56b6 5139{
a5c7d693 5140 struct expand_operand ops[4];
f3f6fb16
DM
5141 rtx op0, comparison, subtarget;
5142 rtx_insn *last;
ef4bddc2 5143 machine_mode result_mode = targetm.cstore_mode (icode);
45475a3f
PB
5144
5145 last = get_last_insn ();
5146 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5147 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
a5c7d693 5148 if (!x || !y)
45475a3f
PB
5149 {
5150 delete_insns_since (last);
5151 return NULL_RTX;
5152 }
5153
92355a9c
PB
5154 if (target_mode == VOIDmode)
5155 target_mode = result_mode;
5156 if (!target)
5157 target = gen_reg_rtx (target_mode);
b8698a0f 5158
a5c7d693 5159 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
45475a3f 5160
a5c7d693
RS
5161 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5162 create_fixed_operand (&ops[1], comparison);
5163 create_fixed_operand (&ops[2], x);
5164 create_fixed_operand (&ops[3], y);
5165 if (!maybe_expand_insn (icode, 4, ops))
5166 {
5167 delete_insns_since (last);
5168 return NULL_RTX;
5169 }
5170 subtarget = ops[0].value;
495499da 5171
a41a56b6
RE
5172 /* If we are converting to a wider mode, first convert to
5173 TARGET_MODE, then normalize. This produces better combining
5174 opportunities on machines that have a SIGN_EXTRACT when we are
5175 testing a single bit. This mostly benefits the 68k.
5176
5177 If STORE_FLAG_VALUE does not have the sign bit set when
5178 interpreted in MODE, we can do this conversion as unsigned, which
5179 is usually more efficient. */
45475a3f 5180 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
a41a56b6
RE
5181 {
5182 convert_move (target, subtarget,
2d0c270f
BS
5183 val_signbit_known_clear_p (result_mode,
5184 STORE_FLAG_VALUE));
a41a56b6 5185 op0 = target;
45475a3f 5186 result_mode = target_mode;
a41a56b6
RE
5187 }
5188 else
5189 op0 = subtarget;
5190
5191 /* If we want to keep subexpressions around, don't reuse our last
5192 target. */
5193 if (optimize)
5194 subtarget = 0;
5195
5196 /* Now normalize to the proper value in MODE. Sometimes we don't
5197 have to do anything. */
5198 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5199 ;
5200 /* STORE_FLAG_VALUE might be the most negative number, so write
5201 the comparison this way to avoid a compiler-time warning. */
5202 else if (- normalizep == STORE_FLAG_VALUE)
45475a3f 5203 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
a41a56b6
RE
5204
5205 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5206 it hard to use a value of just the sign bit due to ANSI integer
5207 constant typing rules. */
2d0c270f 5208 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
45475a3f 5209 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
eb6c3df1 5210 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
a41a56b6
RE
5211 normalizep == 1);
5212 else
5213 {
5214 gcc_assert (STORE_FLAG_VALUE & 1);
5215
45475a3f 5216 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
a41a56b6 5217 if (normalizep == -1)
45475a3f 5218 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
a41a56b6
RE
5219 }
5220
5221 /* If we were converting to a smaller mode, do the conversion now. */
45475a3f 5222 if (target_mode != result_mode)
a41a56b6
RE
5223 {
5224 convert_move (target, op0, 0);
5225 return target;
5226 }
5227 else
5228 return op0;
5229}
5230
44037a66 5231
ef12ae45
PB
5232/* A subroutine of emit_store_flag only including "tricks" that do not
5233 need a recursive call. These are kept separate to avoid infinite
5234 loops. */
44037a66 5235
ef12ae45
PB
5236static rtx
5237emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
ef4bddc2
RS
5238 machine_mode mode, int unsignedp, int normalizep,
5239 machine_mode target_mode)
44037a66
TG
5240{
5241 rtx subtarget;
5242 enum insn_code icode;
ef4bddc2 5243 machine_mode compare_mode;
f90b7a5a 5244 enum mode_class mclass;
45475a3f 5245 enum rtx_code scode;
44037a66 5246
b30f05db
BS
5247 if (unsignedp)
5248 code = unsigned_condition (code);
45475a3f 5249 scode = swap_condition (code);
b30f05db 5250
c2615a67
RK
5251 /* If one operand is constant, make it the second one. Only do this
5252 if the other operand is not constant as well. */
5253
8c9864f3 5254 if (swap_commutative_operands_p (op0, op1))
c2615a67 5255 {
fab27f52 5256 std::swap (op0, op1);
c2615a67
RK
5257 code = swap_condition (code);
5258 }
5259
6405e07b
DE
5260 if (mode == VOIDmode)
5261 mode = GET_MODE (op0);
5262
c410d49e 5263 /* For some comparisons with 1 and -1, we can convert this to
44037a66 5264 comparisons with zero. This will often produce more opportunities for
0f41302f 5265 store-flag insns. */
44037a66
TG
5266
5267 switch (code)
5268 {
5269 case LT:
5270 if (op1 == const1_rtx)
5271 op1 = const0_rtx, code = LE;
5272 break;
5273 case LE:
5274 if (op1 == constm1_rtx)
5275 op1 = const0_rtx, code = LT;
5276 break;
5277 case GE:
5278 if (op1 == const1_rtx)
5279 op1 = const0_rtx, code = GT;
5280 break;
5281 case GT:
5282 if (op1 == constm1_rtx)
5283 op1 = const0_rtx, code = GE;
5284 break;
5285 case GEU:
5286 if (op1 == const1_rtx)
5287 op1 = const0_rtx, code = NE;
5288 break;
5289 case LTU:
5290 if (op1 == const1_rtx)
5291 op1 = const0_rtx, code = EQ;
5292 break;
e9a25f70
JL
5293 default:
5294 break;
44037a66
TG
5295 }
5296
884815aa
JB
5297 /* If we are comparing a double-word integer with zero or -1, we can
5298 convert the comparison into one involving a single word. */
6912b84b
RK
5299 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5300 && GET_MODE_CLASS (mode) == MODE_INT
3c0cb5de 5301 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
6912b84b 5302 {
fab27f52 5303 rtx tem;
884815aa
JB
5304 if ((code == EQ || code == NE)
5305 && (op1 == const0_rtx || op1 == constm1_rtx))
6912b84b 5306 {
1ed20a40 5307 rtx op00, op01;
8433f113 5308
a41a56b6
RE
5309 /* Do a logical OR or AND of the two words and compare the
5310 result. */
8433f113
RH
5311 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5312 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
1ed20a40
PB
5313 tem = expand_binop (word_mode,
5314 op1 == const0_rtx ? ior_optab : and_optab,
5315 op00, op01, NULL_RTX, unsignedp,
5316 OPTAB_DIRECT);
884815aa 5317
1ed20a40
PB
5318 if (tem != 0)
5319 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
92355a9c 5320 unsignedp, normalizep);
6912b84b 5321 }
884815aa 5322 else if ((code == LT || code == GE) && op1 == const0_rtx)
8433f113
RH
5323 {
5324 rtx op0h;
5325
5326 /* If testing the sign bit, can just test on high word. */
5327 op0h = simplify_gen_subreg (word_mode, op0, mode,
a41a56b6
RE
5328 subreg_highpart_offset (word_mode,
5329 mode));
1ed20a40
PB
5330 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5331 unsignedp, normalizep);
5332 }
5333 else
5334 tem = NULL_RTX;
5335
5336 if (tem)
5337 {
92355a9c 5338 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
1ed20a40 5339 return tem;
92355a9c
PB
5340 if (!target)
5341 target = gen_reg_rtx (target_mode);
1ed20a40
PB
5342
5343 convert_move (target, tem,
2d0c270f
BS
5344 !val_signbit_known_set_p (word_mode,
5345 (normalizep ? normalizep
5346 : STORE_FLAG_VALUE)));
1ed20a40 5347 return target;
8433f113 5348 }
6912b84b
RK
5349 }
5350
44037a66
TG
5351 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5352 complement of A (for GE) and shifting the sign bit to the low bit. */
5353 if (op1 == const0_rtx && (code == LT || code == GE)
5354 && GET_MODE_CLASS (mode) == MODE_INT
5355 && (normalizep || STORE_FLAG_VALUE == 1
2d0c270f 5356 || val_signbit_p (mode, STORE_FLAG_VALUE)))
44037a66 5357 {
8deb7047 5358 subtarget = target;
44037a66 5359
495499da
PB
5360 if (!target)
5361 target_mode = mode;
5362
44037a66
TG
5363 /* If the result is to be wider than OP0, it is best to convert it
5364 first. If it is to be narrower, it is *incorrect* to convert it
5365 first. */
495499da 5366 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
44037a66 5367 {
81722fa9 5368 op0 = convert_modes (target_mode, mode, op0, 0);
44037a66
TG
5369 mode = target_mode;
5370 }
5371
5372 if (target_mode != mode)
5373 subtarget = 0;
5374
5375 if (code == GE)
1d6eaf3d
RK
5376 op0 = expand_unop (mode, one_cmpl_optab, op0,
5377 ((STORE_FLAG_VALUE == 1 || normalizep)
5378 ? 0 : subtarget), 0);
44037a66 5379
1d6eaf3d 5380 if (STORE_FLAG_VALUE == 1 || normalizep)
44037a66
TG
5381 /* If we are supposed to produce a 0/1 value, we want to do
5382 a logical shift from the sign bit to the low-order bit; for
5383 a -1/0 value, we do an arithmetic shift. */
5384 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 5385 GET_MODE_BITSIZE (mode) - 1,
44037a66
TG
5386 subtarget, normalizep != -1);
5387
5388 if (mode != target_mode)
c2ec26b8 5389 op0 = convert_modes (target_mode, mode, op0, 0);
44037a66
TG
5390
5391 return op0;
5392 }
5393
f90b7a5a
PB
5394 mclass = GET_MODE_CLASS (mode);
5395 for (compare_mode = mode; compare_mode != VOIDmode;
5396 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
a41a56b6 5397 {
ef4bddc2 5398 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
947131ba 5399 icode = optab_handler (cstore_optab, optab_mode);
f90b7a5a 5400 if (icode != CODE_FOR_nothing)
a41a56b6 5401 {
a41a56b6 5402 do_pending_stack_adjust ();
fab27f52
MM
5403 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5404 unsignedp, op0, op1, normalizep, target_mode);
45475a3f
PB
5405 if (tem)
5406 return tem;
44037a66 5407
45475a3f 5408 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
44037a66 5409 {
ef12ae45 5410 tem = emit_cstore (target, icode, scode, mode, compare_mode,
92355a9c 5411 unsignedp, op1, op0, normalizep, target_mode);
45475a3f
PB
5412 if (tem)
5413 return tem;
44037a66 5414 }
f90b7a5a 5415 break;
44037a66
TG
5416 }
5417 }
5418
ef12ae45
PB
5419 return 0;
5420}
5421
5422/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5423 and storing in TARGET. Normally return TARGET.
5424 Return 0 if that cannot be done.
5425
5426 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5427 it is VOIDmode, they cannot both be CONST_INT.
5428
5429 UNSIGNEDP is for the case where we have to widen the operands
5430 to perform the operation. It says to use zero-extension.
5431
5432 NORMALIZEP is 1 if we should convert the result to be either zero
5433 or one. Normalize is -1 if we should convert the result to be
5434 either zero or -1. If NORMALIZEP is zero, the result will be left
5435 "raw" out of the scc insn. */
5436
5437rtx
5438emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
ef4bddc2 5439 machine_mode mode, int unsignedp, int normalizep)
ef12ae45 5440{
ef4bddc2 5441 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
ef12ae45
PB
5442 enum rtx_code rcode;
5443 rtx subtarget;
f3f6fb16
DM
5444 rtx tem, trueval;
5445 rtx_insn *last;
ef12ae45 5446
b2b262e3
JR
5447 /* If we compare constants, we shouldn't use a store-flag operation,
5448 but a constant load. We can get there via the vanilla route that
5449 usually generates a compare-branch sequence, but will in this case
5450 fold the comparison to a constant, and thus elide the branch. */
5451 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5452 return NULL_RTX;
5453
92355a9c
PB
5454 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5455 target_mode);
ef12ae45
PB
5456 if (tem)
5457 return tem;
44037a66 5458
495499da
PB
5459 /* If we reached here, we can't do this with a scc insn, however there
5460 are some comparisons that can be done in other ways. Don't do any
5461 of these cases if branches are very cheap. */
5462 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5463 return 0;
5464
5465 /* See what we need to return. We can only return a 1, -1, or the
5466 sign bit. */
5467
5468 if (normalizep == 0)
5469 {
5470 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5471 normalizep = STORE_FLAG_VALUE;
5472
2d0c270f 5473 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
495499da
PB
5474 ;
5475 else
5476 return 0;
5477 }
5478
ef12ae45
PB
5479 last = get_last_insn ();
5480
7c27e184
PB
5481 /* If optimizing, use different pseudo registers for each insn, instead
5482 of reusing the same pseudo. This leads to better CSE, but slows
5483 down the compiler, since there are more pseudos */
5484 subtarget = (!optimize
91e66235 5485 && (target_mode == mode)) ? target : NULL_RTX;
495499da
PB
5486 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5487
5488 /* For floating-point comparisons, try the reverse comparison or try
5489 changing the "orderedness" of the comparison. */
5490 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5491 {
5492 enum rtx_code first_code;
5493 bool and_them;
5494
5495 rcode = reverse_condition_maybe_unordered (code);
5496 if (can_compare_p (rcode, mode, ccp_store_flag)
5497 && (code == ORDERED || code == UNORDERED
5498 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5499 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5500 {
533d4b99
PB
5501 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5502 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5503
495499da 5504 /* For the reverse comparison, use either an addition or a XOR. */
533d4b99 5505 if (want_add
68f932c4 5506 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
533d4b99 5507 optimize_insn_for_speed_p ()) == 0)
495499da 5508 {
ef12ae45 5509 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5510 STORE_FLAG_VALUE, target_mode);
495499da
PB
5511 if (tem)
5512 return expand_binop (target_mode, add_optab, tem,
2f1cd2eb 5513 gen_int_mode (normalizep, target_mode),
495499da
PB
5514 target, 0, OPTAB_WIDEN);
5515 }
533d4b99 5516 else if (!want_add
68f932c4 5517 && rtx_cost (trueval, XOR, 1,
533d4b99 5518 optimize_insn_for_speed_p ()) == 0)
495499da 5519 {
ef12ae45 5520 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5521 normalizep, target_mode);
495499da
PB
5522 if (tem)
5523 return expand_binop (target_mode, xor_optab, tem, trueval,
5524 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5525 }
5526 }
5527
5528 delete_insns_since (last);
5529
5530 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5531 if (code == ORDERED || code == UNORDERED)
5532 return 0;
b8698a0f 5533
495499da
PB
5534 and_them = split_comparison (code, mode, &first_code, &code);
5535
5536 /* If there are no NaNs, the first comparison should always fall through.
5537 Effectively change the comparison to the other one. */
5538 if (!HONOR_NANS (mode))
5539 {
5540 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
92355a9c
PB
5541 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5542 target_mode);
495499da
PB
5543 }
5544
692e0312
TS
5545 if (!HAVE_conditional_move)
5546 return 0;
5547
495499da
PB
5548 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5549 conditional move. */
92355a9c
PB
5550 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5551 normalizep, target_mode);
495499da
PB
5552 if (tem == 0)
5553 return 0;
5554
5555 if (and_them)
5556 tem = emit_conditional_move (target, code, op0, op1, mode,
5557 tem, const0_rtx, GET_MODE (tem), 0);
5558 else
5559 tem = emit_conditional_move (target, code, op0, op1, mode,
5560 trueval, tem, GET_MODE (tem), 0);
5561
5562 if (tem == 0)
5563 delete_insns_since (last);
5564 return tem;
495499da 5565 }
44037a66 5566
495499da
PB
5567 /* The remaining tricks only apply to integer comparisons. */
5568
5569 if (GET_MODE_CLASS (mode) != MODE_INT)
5570 return 0;
5571
5572 /* If this is an equality comparison of integers, we can try to exclusive-or
44037a66
TG
5573 (or subtract) the two operands and use a recursive call to try the
5574 comparison with zero. Don't do any of these cases if branches are
5575 very cheap. */
5576
495499da 5577 if ((code == EQ || code == NE) && op1 != const0_rtx)
44037a66
TG
5578 {
5579 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5580 OPTAB_WIDEN);
5581
5582 if (tem == 0)
5583 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5584 OPTAB_WIDEN);
5585 if (tem != 0)
a22fb74c
AK
5586 tem = emit_store_flag (target, code, tem, const0_rtx,
5587 mode, unsignedp, normalizep);
495499da
PB
5588 if (tem != 0)
5589 return tem;
5590
5591 delete_insns_since (last);
5592 }
5593
5594 /* For integer comparisons, try the reverse comparison. However, for
5595 small X and if we'd have anyway to extend, implementing "X != 0"
5596 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5597 rcode = reverse_condition (code);
5598 if (can_compare_p (rcode, mode, ccp_store_flag)
947131ba 5599 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
495499da
PB
5600 && code == NE
5601 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5602 && op1 == const0_rtx))
5603 {
533d4b99
PB
5604 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5605 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5606
495499da 5607 /* Again, for the reverse comparison, use either an addition or a XOR. */
533d4b99 5608 if (want_add
68f932c4 5609 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
533d4b99 5610 optimize_insn_for_speed_p ()) == 0)
495499da 5611 {
ef12ae45 5612 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5613 STORE_FLAG_VALUE, target_mode);
495499da
PB
5614 if (tem != 0)
5615 tem = expand_binop (target_mode, add_optab, tem,
2f1cd2eb
RS
5616 gen_int_mode (normalizep, target_mode),
5617 target, 0, OPTAB_WIDEN);
495499da 5618 }
533d4b99 5619 else if (!want_add
68f932c4 5620 && rtx_cost (trueval, XOR, 1,
533d4b99 5621 optimize_insn_for_speed_p ()) == 0)
495499da 5622 {
ef12ae45 5623 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
92355a9c 5624 normalizep, target_mode);
495499da
PB
5625 if (tem != 0)
5626 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5627 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5628 }
5629
5630 if (tem != 0)
5631 return tem;
5632 delete_insns_since (last);
44037a66
TG
5633 }
5634
c410d49e 5635 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
44037a66
TG
5636 the constant zero. Reject all other comparisons at this point. Only
5637 do LE and GT if branches are expensive since they are expensive on
5638 2-operand machines. */
5639
495499da 5640 if (op1 != const0_rtx
44037a66 5641 || (code != EQ && code != NE
3a4fd356
JH
5642 && (BRANCH_COST (optimize_insn_for_speed_p (),
5643 false) <= 1 || (code != LE && code != GT))))
44037a66
TG
5644 return 0;
5645
44037a66
TG
5646 /* Try to put the result of the comparison in the sign bit. Assume we can't
5647 do the necessary operation below. */
5648
5649 tem = 0;
5650
5651 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5652 the sign bit set. */
5653
5654 if (code == LE)
5655 {
5656 /* This is destructive, so SUBTARGET can't be OP0. */
5657 if (rtx_equal_p (subtarget, op0))
5658 subtarget = 0;
5659
5660 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5661 OPTAB_WIDEN);
5662 if (tem)
5663 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5664 OPTAB_WIDEN);
5665 }
5666
5667 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5668 number of bits in the mode of OP0, minus one. */
5669
5670 if (code == GT)
5671 {
5672 if (rtx_equal_p (subtarget, op0))
5673 subtarget = 0;
5674
5675 tem = expand_shift (RSHIFT_EXPR, mode, op0,
eb6c3df1 5676 GET_MODE_BITSIZE (mode) - 1,
44037a66
TG
5677 subtarget, 0);
5678 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5679 OPTAB_WIDEN);
5680 }
c410d49e 5681
44037a66
TG
5682 if (code == EQ || code == NE)
5683 {
5684 /* For EQ or NE, one way to do the comparison is to apply an operation
cc2902df 5685 that converts the operand into a positive number if it is nonzero
44037a66
TG
5686 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5687 for NE we negate. This puts the result in the sign bit. Then we
c410d49e 5688 normalize with a shift, if needed.
44037a66
TG
5689
5690 Two operations that can do the above actions are ABS and FFS, so try
5691 them. If that doesn't work, and MODE is smaller than a full word,
36d747f6 5692 we can use zero-extension to the wider mode (an unsigned conversion)
44037a66
TG
5693 as the operation. */
5694
c410d49e
EC
5695 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5696 that is compensated by the subsequent overflow when subtracting
30f7a378 5697 one / negating. */
91ce572a 5698
947131ba 5699 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
44037a66 5700 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
947131ba 5701 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
44037a66
TG
5702 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5703 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5704 {
c2ec26b8 5705 tem = convert_modes (word_mode, mode, op0, 1);
81722fa9 5706 mode = word_mode;
44037a66
TG
5707 }
5708
5709 if (tem != 0)
5710 {
5711 if (code == EQ)
5712 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5713 0, OPTAB_WIDEN);
5714 else
5715 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5716 }
5717
5718 /* If we couldn't do it that way, for NE we can "or" the two's complement
5719 of the value with itself. For EQ, we take the one's complement of
5720 that "or", which is an extra insn, so we only handle EQ if branches
5721 are expensive. */
5722
3a4fd356
JH
5723 if (tem == 0
5724 && (code == NE
5725 || BRANCH_COST (optimize_insn_for_speed_p (),
5726 false) > 1))
44037a66 5727 {
36d747f6
RS
5728 if (rtx_equal_p (subtarget, op0))
5729 subtarget = 0;
5730
44037a66
TG
5731 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5732 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5733 OPTAB_WIDEN);
5734
5735 if (tem && code == EQ)
5736 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5737 }
5738 }
5739
5740 if (tem && normalizep)
5741 tem = expand_shift (RSHIFT_EXPR, mode, tem,
eb6c3df1 5742 GET_MODE_BITSIZE (mode) - 1,
91e66235 5743 subtarget, normalizep == 1);
44037a66 5744
91e66235 5745 if (tem)
44037a66 5746 {
495499da
PB
5747 if (!target)
5748 ;
5749 else if (GET_MODE (tem) != target_mode)
91e66235
MM
5750 {
5751 convert_move (target, tem, 0);
5752 tem = target;
5753 }
5754 else if (!subtarget)
5755 {
5756 emit_move_insn (target, tem);
5757 tem = target;
5758 }
44037a66 5759 }
91e66235 5760 else
44037a66
TG
5761 delete_insns_since (last);
5762
5763 return tem;
5764}
04a8ee2f
TG
5765
5766/* Like emit_store_flag, but always succeeds. */
5767
5768rtx
502b8322 5769emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
ef4bddc2 5770 machine_mode mode, int unsignedp, int normalizep)
04a8ee2f 5771{
f3f6fb16
DM
5772 rtx tem;
5773 rtx_code_label *label;
495499da 5774 rtx trueval, falseval;
04a8ee2f
TG
5775
5776 /* First see if emit_store_flag can do the job. */
5777 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5778 if (tem != 0)
5779 return tem;
5780
495499da
PB
5781 if (!target)
5782 target = gen_reg_rtx (word_mode);
04a8ee2f 5783
495499da
PB
5784 /* If this failed, we have to do this with set/compare/jump/set code.
5785 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5786 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
b8698a0f 5787 if (code == NE
495499da
PB
5788 && GET_MODE_CLASS (mode) == MODE_INT
5789 && REG_P (target)
5790 && op0 == target
5791 && op1 == const0_rtx)
5792 {
5793 label = gen_label_rtx ();
1476d1bd
MM
5794 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5795 NULL_RTX, NULL, label, -1);
495499da
PB
5796 emit_move_insn (target, trueval);
5797 emit_label (label);
5798 return target;
5799 }
04a8ee2f 5800
f8cfc6aa 5801 if (!REG_P (target)
04a8ee2f
TG
5802 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5803 target = gen_reg_rtx (GET_MODE (target));
5804
495499da
PB
5805 /* Jump in the right direction if the target cannot implement CODE
5806 but can jump on its reverse condition. */
5807 falseval = const0_rtx;
5808 if (! can_compare_p (code, mode, ccp_jump)
5809 && (! FLOAT_MODE_P (mode)
5810 || code == ORDERED || code == UNORDERED
5811 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5812 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5813 {
5814 enum rtx_code rcode;
5815 if (FLOAT_MODE_P (mode))
5816 rcode = reverse_condition_maybe_unordered (code);
5817 else
5818 rcode = reverse_condition (code);
5819
5820 /* Canonicalize to UNORDERED for the libcall. */
5821 if (can_compare_p (rcode, mode, ccp_jump)
5822 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5823 {
5824 falseval = trueval;
5825 trueval = const0_rtx;
5826 code = rcode;
5827 }
5828 }
5829
5830 emit_move_insn (target, trueval);
04a8ee2f 5831 label = gen_label_rtx ();
1476d1bd
MM
5832 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5833 label, -1);
04a8ee2f 5834
495499da 5835 emit_move_insn (target, falseval);
44037a66
TG
5836 emit_label (label);
5837
5838 return target;
5839}
f5963e61
JL
5840\f
5841/* Perform possibly multi-word comparison and conditional jump to LABEL
feb04780
RS
5842 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5843 now a thin wrapper around do_compare_rtx_and_jump. */
f5963e61
JL
5844
5845static void
ef4bddc2 5846do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
f3f6fb16 5847 rtx_code_label *label)
f5963e61 5848{
feb04780 5849 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
1476d1bd
MM
5850 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5851 NULL, label, -1);
f5963e61 5852}