]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/expmed.c
genattrtab.c (write_header): Include hash-set.h...
[thirdparty/gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2015 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "tm_p.h"
41 #include "flags.h"
42 #include "insn-config.h"
43 #include "expr.h"
44 #include "insn-codes.h"
45 #include "optabs.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "predict.h"
49 #include "basic-block.h"
50 #include "df.h"
51 #include "target.h"
52 #include "expmed.h"
53
54 struct target_expmed default_target_expmed;
55 #if SWITCHABLE_TARGET
56 struct target_expmed *this_target_expmed = &default_target_expmed;
57 #endif
58
59 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT,
63 rtx);
64 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
65 unsigned HOST_WIDE_INT,
66 rtx);
67 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT,
69 unsigned HOST_WIDE_INT,
70 unsigned HOST_WIDE_INT,
71 rtx);
72 static rtx extract_fixed_bit_field (machine_mode, rtx,
73 unsigned HOST_WIDE_INT,
74 unsigned HOST_WIDE_INT, rtx, int);
75 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
76 unsigned HOST_WIDE_INT,
77 unsigned HOST_WIDE_INT, rtx, int);
78 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
79 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
80 unsigned HOST_WIDE_INT, int);
81 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
82 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
83 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
84
85 /* Return a constant integer mask value of mode MODE with BITSIZE ones
86 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
87 The mask is truncated if necessary to the width of mode MODE. The
88 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
89
90 static inline rtx
91 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
92 {
93 return immed_wide_int_const
94 (wi::shifted_mask (bitpos, bitsize, complement,
95 GET_MODE_PRECISION (mode)), mode);
96 }
97
98 /* Test whether a value is zero of a power of two. */
99 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
100 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
101
102 struct init_expmed_rtl
103 {
104 rtx reg;
105 rtx plus;
106 rtx neg;
107 rtx mult;
108 rtx sdiv;
109 rtx udiv;
110 rtx sdiv_32;
111 rtx smod_32;
112 rtx wide_mult;
113 rtx wide_lshr;
114 rtx wide_trunc;
115 rtx shift;
116 rtx shift_mult;
117 rtx shift_add;
118 rtx shift_sub0;
119 rtx shift_sub1;
120 rtx zext;
121 rtx trunc;
122
123 rtx pow2[MAX_BITS_PER_WORD];
124 rtx cint[MAX_BITS_PER_WORD];
125 };
126
127 static void
128 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
129 machine_mode from_mode, bool speed)
130 {
131 int to_size, from_size;
132 rtx which;
133
134 to_size = GET_MODE_PRECISION (to_mode);
135 from_size = GET_MODE_PRECISION (from_mode);
136
137 /* Most partial integers have a precision less than the "full"
138 integer it requires for storage. In case one doesn't, for
139 comparison purposes here, reduce the bit size by one in that
140 case. */
141 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
142 && exact_log2 (to_size) != -1)
143 to_size --;
144 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
145 && exact_log2 (from_size) != -1)
146 from_size --;
147
148 /* Assume cost of zero-extend and sign-extend is the same. */
149 which = (to_size < from_size ? all->trunc : all->zext);
150
151 PUT_MODE (all->reg, from_mode);
152 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
153 }
154
155 static void
156 init_expmed_one_mode (struct init_expmed_rtl *all,
157 machine_mode mode, int speed)
158 {
159 int m, n, mode_bitsize;
160 machine_mode mode_from;
161
162 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
163
164 PUT_MODE (all->reg, mode);
165 PUT_MODE (all->plus, mode);
166 PUT_MODE (all->neg, mode);
167 PUT_MODE (all->mult, mode);
168 PUT_MODE (all->sdiv, mode);
169 PUT_MODE (all->udiv, mode);
170 PUT_MODE (all->sdiv_32, mode);
171 PUT_MODE (all->smod_32, mode);
172 PUT_MODE (all->wide_trunc, mode);
173 PUT_MODE (all->shift, mode);
174 PUT_MODE (all->shift_mult, mode);
175 PUT_MODE (all->shift_add, mode);
176 PUT_MODE (all->shift_sub0, mode);
177 PUT_MODE (all->shift_sub1, mode);
178 PUT_MODE (all->zext, mode);
179 PUT_MODE (all->trunc, mode);
180
181 set_add_cost (speed, mode, set_src_cost (all->plus, speed));
182 set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
183 set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
184 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
185 set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
186
187 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
188 <= 2 * add_cost (speed, mode)));
189 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
190 <= 4 * add_cost (speed, mode)));
191
192 set_shift_cost (speed, mode, 0, 0);
193 {
194 int cost = add_cost (speed, mode);
195 set_shiftadd_cost (speed, mode, 0, cost);
196 set_shiftsub0_cost (speed, mode, 0, cost);
197 set_shiftsub1_cost (speed, mode, 0, cost);
198 }
199
200 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
201 for (m = 1; m < n; m++)
202 {
203 XEXP (all->shift, 1) = all->cint[m];
204 XEXP (all->shift_mult, 1) = all->pow2[m];
205
206 set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
207 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
208 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
209 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
210 }
211
212 if (SCALAR_INT_MODE_P (mode))
213 {
214 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
215 mode_from = (machine_mode)(mode_from + 1))
216 init_expmed_one_conv (all, mode, mode_from, speed);
217 }
218 if (GET_MODE_CLASS (mode) == MODE_INT)
219 {
220 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
221 if (wider_mode != VOIDmode)
222 {
223 PUT_MODE (all->zext, wider_mode);
224 PUT_MODE (all->wide_mult, wider_mode);
225 PUT_MODE (all->wide_lshr, wider_mode);
226 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
227
228 set_mul_widen_cost (speed, wider_mode,
229 set_src_cost (all->wide_mult, speed));
230 set_mul_highpart_cost (speed, mode,
231 set_src_cost (all->wide_trunc, speed));
232 }
233 }
234 }
235
236 void
237 init_expmed (void)
238 {
239 struct init_expmed_rtl all;
240 machine_mode mode = QImode;
241 int m, speed;
242
243 memset (&all, 0, sizeof all);
244 for (m = 1; m < MAX_BITS_PER_WORD; m++)
245 {
246 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
247 all.cint[m] = GEN_INT (m);
248 }
249
250 /* Avoid using hard regs in ways which may be unsupported. */
251 all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
252 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
253 all.neg = gen_rtx_NEG (mode, all.reg);
254 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
255 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
256 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
257 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
258 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
259 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
260 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
261 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
262 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
263 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
264 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
265 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
266 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
267 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
268 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
269
270 for (speed = 0; speed < 2; speed++)
271 {
272 crtl->maybe_hot_insn_p = speed;
273 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
274
275 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
276 mode = (machine_mode)(mode + 1))
277 init_expmed_one_mode (&all, mode, speed);
278
279 if (MIN_MODE_PARTIAL_INT != VOIDmode)
280 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
281 mode = (machine_mode)(mode + 1))
282 init_expmed_one_mode (&all, mode, speed);
283
284 if (MIN_MODE_VECTOR_INT != VOIDmode)
285 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
286 mode = (machine_mode)(mode + 1))
287 init_expmed_one_mode (&all, mode, speed);
288 }
289
290 if (alg_hash_used_p ())
291 {
292 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
293 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
294 }
295 else
296 set_alg_hash_used_p (true);
297 default_rtl_profile ();
298
299 ggc_free (all.trunc);
300 ggc_free (all.shift_sub1);
301 ggc_free (all.shift_sub0);
302 ggc_free (all.shift_add);
303 ggc_free (all.shift_mult);
304 ggc_free (all.shift);
305 ggc_free (all.wide_trunc);
306 ggc_free (all.wide_lshr);
307 ggc_free (all.wide_mult);
308 ggc_free (all.zext);
309 ggc_free (all.smod_32);
310 ggc_free (all.sdiv_32);
311 ggc_free (all.udiv);
312 ggc_free (all.sdiv);
313 ggc_free (all.mult);
314 ggc_free (all.neg);
315 ggc_free (all.plus);
316 ggc_free (all.reg);
317 }
318
319 /* Return an rtx representing minus the value of X.
320 MODE is the intended mode of the result,
321 useful if X is a CONST_INT. */
322
323 rtx
324 negate_rtx (machine_mode mode, rtx x)
325 {
326 rtx result = simplify_unary_operation (NEG, mode, x, mode);
327
328 if (result == 0)
329 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
330
331 return result;
332 }
333
334 /* Adjust bitfield memory MEM so that it points to the first unit of mode
335 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
336 If MODE is BLKmode, return a reference to every byte in the bitfield.
337 Set *NEW_BITNUM to the bit position of the field within the new memory. */
338
339 static rtx
340 narrow_bit_field_mem (rtx mem, machine_mode mode,
341 unsigned HOST_WIDE_INT bitsize,
342 unsigned HOST_WIDE_INT bitnum,
343 unsigned HOST_WIDE_INT *new_bitnum)
344 {
345 if (mode == BLKmode)
346 {
347 *new_bitnum = bitnum % BITS_PER_UNIT;
348 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
349 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
350 / BITS_PER_UNIT);
351 return adjust_bitfield_address_size (mem, mode, offset, size);
352 }
353 else
354 {
355 unsigned int unit = GET_MODE_BITSIZE (mode);
356 *new_bitnum = bitnum % unit;
357 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
358 return adjust_bitfield_address (mem, mode, offset);
359 }
360 }
361
362 /* The caller wants to perform insertion or extraction PATTERN on a
363 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
364 BITREGION_START and BITREGION_END are as for store_bit_field
365 and FIELDMODE is the natural mode of the field.
366
367 Search for a mode that is compatible with the memory access
368 restrictions and (where applicable) with a register insertion or
369 extraction. Return the new memory on success, storing the adjusted
370 bit position in *NEW_BITNUM. Return null otherwise. */
371
372 static rtx
373 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
374 rtx op0, HOST_WIDE_INT bitsize,
375 HOST_WIDE_INT bitnum,
376 unsigned HOST_WIDE_INT bitregion_start,
377 unsigned HOST_WIDE_INT bitregion_end,
378 machine_mode fieldmode,
379 unsigned HOST_WIDE_INT *new_bitnum)
380 {
381 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
382 bitregion_end, MEM_ALIGN (op0),
383 MEM_VOLATILE_P (op0));
384 machine_mode best_mode;
385 if (iter.next_mode (&best_mode))
386 {
387 /* We can use a memory in BEST_MODE. See whether this is true for
388 any wider modes. All other things being equal, we prefer to
389 use the widest mode possible because it tends to expose more
390 CSE opportunities. */
391 if (!iter.prefer_smaller_modes ())
392 {
393 /* Limit the search to the mode required by the corresponding
394 register insertion or extraction instruction, if any. */
395 machine_mode limit_mode = word_mode;
396 extraction_insn insn;
397 if (get_best_reg_extraction_insn (&insn, pattern,
398 GET_MODE_BITSIZE (best_mode),
399 fieldmode))
400 limit_mode = insn.field_mode;
401
402 machine_mode wider_mode;
403 while (iter.next_mode (&wider_mode)
404 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
405 best_mode = wider_mode;
406 }
407 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
408 new_bitnum);
409 }
410 return NULL_RTX;
411 }
412
413 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
414 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
415 offset is then BITNUM / BITS_PER_UNIT. */
416
417 static bool
418 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
419 unsigned HOST_WIDE_INT bitsize,
420 machine_mode struct_mode)
421 {
422 if (BYTES_BIG_ENDIAN)
423 return (bitnum % BITS_PER_UNIT == 0
424 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
425 || (bitnum + bitsize) % BITS_PER_WORD == 0));
426 else
427 return bitnum % BITS_PER_WORD == 0;
428 }
429
430 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
431 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
432 Return false if the access would touch memory outside the range
433 BITREGION_START to BITREGION_END for conformance to the C++ memory
434 model. */
435
436 static bool
437 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
438 unsigned HOST_WIDE_INT bitnum,
439 machine_mode fieldmode,
440 unsigned HOST_WIDE_INT bitregion_start,
441 unsigned HOST_WIDE_INT bitregion_end)
442 {
443 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
444
445 /* -fstrict-volatile-bitfields must be enabled and we must have a
446 volatile MEM. */
447 if (!MEM_P (op0)
448 || !MEM_VOLATILE_P (op0)
449 || flag_strict_volatile_bitfields <= 0)
450 return false;
451
452 /* Non-integral modes likely only happen with packed structures.
453 Punt. */
454 if (!SCALAR_INT_MODE_P (fieldmode))
455 return false;
456
457 /* The bit size must not be larger than the field mode, and
458 the field mode must not be larger than a word. */
459 if (bitsize > modesize || modesize > BITS_PER_WORD)
460 return false;
461
462 /* Check for cases of unaligned fields that must be split. */
463 if (bitnum % BITS_PER_UNIT + bitsize > modesize
464 || (STRICT_ALIGNMENT
465 && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
466 return false;
467
468 /* Check for cases where the C++ memory model applies. */
469 if (bitregion_end != 0
470 && (bitnum - bitnum % modesize < bitregion_start
471 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
472 return false;
473
474 return true;
475 }
476
477 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
478 bit number BITNUM can be treated as a simple value of mode MODE. */
479
480 static bool
481 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
482 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
483 {
484 return (MEM_P (op0)
485 && bitnum % BITS_PER_UNIT == 0
486 && bitsize == GET_MODE_BITSIZE (mode)
487 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
488 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
489 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
490 }
491 \f
492 /* Try to use instruction INSV to store VALUE into a field of OP0.
493 BITSIZE and BITNUM are as for store_bit_field. */
494
495 static bool
496 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
497 unsigned HOST_WIDE_INT bitsize,
498 unsigned HOST_WIDE_INT bitnum,
499 rtx value)
500 {
501 struct expand_operand ops[4];
502 rtx value1;
503 rtx xop0 = op0;
504 rtx_insn *last = get_last_insn ();
505 bool copy_back = false;
506
507 machine_mode op_mode = insv->field_mode;
508 unsigned int unit = GET_MODE_BITSIZE (op_mode);
509 if (bitsize == 0 || bitsize > unit)
510 return false;
511
512 if (MEM_P (xop0))
513 /* Get a reference to the first byte of the field. */
514 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
515 &bitnum);
516 else
517 {
518 /* Convert from counting within OP0 to counting in OP_MODE. */
519 if (BYTES_BIG_ENDIAN)
520 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
521
522 /* If xop0 is a register, we need it in OP_MODE
523 to make it acceptable to the format of insv. */
524 if (GET_CODE (xop0) == SUBREG)
525 /* We can't just change the mode, because this might clobber op0,
526 and we will need the original value of op0 if insv fails. */
527 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
528 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
529 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
530 }
531
532 /* If the destination is a paradoxical subreg such that we need a
533 truncate to the inner mode, perform the insertion on a temporary and
534 truncate the result to the original destination. Note that we can't
535 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
536 X) 0)) is (reg:N X). */
537 if (GET_CODE (xop0) == SUBREG
538 && REG_P (SUBREG_REG (xop0))
539 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
540 op_mode))
541 {
542 rtx tem = gen_reg_rtx (op_mode);
543 emit_move_insn (tem, xop0);
544 xop0 = tem;
545 copy_back = true;
546 }
547
548 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
549 "backwards" from the size of the unit we are inserting into.
550 Otherwise, we count bits from the most significant on a
551 BYTES/BITS_BIG_ENDIAN machine. */
552
553 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
554 bitnum = unit - bitsize - bitnum;
555
556 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
557 value1 = value;
558 if (GET_MODE (value) != op_mode)
559 {
560 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
561 {
562 /* Optimization: Don't bother really extending VALUE
563 if it has all the bits we will actually use. However,
564 if we must narrow it, be sure we do it correctly. */
565
566 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
567 {
568 rtx tmp;
569
570 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
571 if (! tmp)
572 tmp = simplify_gen_subreg (op_mode,
573 force_reg (GET_MODE (value),
574 value1),
575 GET_MODE (value), 0);
576 value1 = tmp;
577 }
578 else
579 value1 = gen_lowpart (op_mode, value1);
580 }
581 else if (CONST_INT_P (value))
582 value1 = gen_int_mode (INTVAL (value), op_mode);
583 else
584 /* Parse phase is supposed to make VALUE's data type
585 match that of the component reference, which is a type
586 at least as wide as the field; so VALUE should have
587 a mode that corresponds to that type. */
588 gcc_assert (CONSTANT_P (value));
589 }
590
591 create_fixed_operand (&ops[0], xop0);
592 create_integer_operand (&ops[1], bitsize);
593 create_integer_operand (&ops[2], bitnum);
594 create_input_operand (&ops[3], value1, op_mode);
595 if (maybe_expand_insn (insv->icode, 4, ops))
596 {
597 if (copy_back)
598 convert_move (op0, xop0, true);
599 return true;
600 }
601 delete_insns_since (last);
602 return false;
603 }
604
605 /* A subroutine of store_bit_field, with the same arguments. Return true
606 if the operation could be implemented.
607
608 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
609 no other way of implementing the operation. If FALLBACK_P is false,
610 return false instead. */
611
612 static bool
613 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
614 unsigned HOST_WIDE_INT bitnum,
615 unsigned HOST_WIDE_INT bitregion_start,
616 unsigned HOST_WIDE_INT bitregion_end,
617 machine_mode fieldmode,
618 rtx value, bool fallback_p)
619 {
620 rtx op0 = str_rtx;
621 rtx orig_value;
622
623 while (GET_CODE (op0) == SUBREG)
624 {
625 /* The following line once was done only if WORDS_BIG_ENDIAN,
626 but I think that is a mistake. WORDS_BIG_ENDIAN is
627 meaningful at a much higher level; when structures are copied
628 between memory and regs, the higher-numbered regs
629 always get higher addresses. */
630 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
631 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
632 int byte_offset = 0;
633
634 /* Paradoxical subregs need special handling on big endian machines. */
635 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
636 {
637 int difference = inner_mode_size - outer_mode_size;
638
639 if (WORDS_BIG_ENDIAN)
640 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
641 if (BYTES_BIG_ENDIAN)
642 byte_offset += difference % UNITS_PER_WORD;
643 }
644 else
645 byte_offset = SUBREG_BYTE (op0);
646
647 bitnum += byte_offset * BITS_PER_UNIT;
648 op0 = SUBREG_REG (op0);
649 }
650
651 /* No action is needed if the target is a register and if the field
652 lies completely outside that register. This can occur if the source
653 code contains an out-of-bounds access to a small array. */
654 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
655 return true;
656
657 /* Use vec_set patterns for inserting parts of vectors whenever
658 available. */
659 if (VECTOR_MODE_P (GET_MODE (op0))
660 && !MEM_P (op0)
661 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
662 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
663 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
664 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
665 {
666 struct expand_operand ops[3];
667 machine_mode outermode = GET_MODE (op0);
668 machine_mode innermode = GET_MODE_INNER (outermode);
669 enum insn_code icode = optab_handler (vec_set_optab, outermode);
670 int pos = bitnum / GET_MODE_BITSIZE (innermode);
671
672 create_fixed_operand (&ops[0], op0);
673 create_input_operand (&ops[1], value, innermode);
674 create_integer_operand (&ops[2], pos);
675 if (maybe_expand_insn (icode, 3, ops))
676 return true;
677 }
678
679 /* If the target is a register, overwriting the entire object, or storing
680 a full-word or multi-word field can be done with just a SUBREG. */
681 if (!MEM_P (op0)
682 && bitsize == GET_MODE_BITSIZE (fieldmode)
683 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
684 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
685 {
686 /* Use the subreg machinery either to narrow OP0 to the required
687 words or to cope with mode punning between equal-sized modes.
688 In the latter case, use subreg on the rhs side, not lhs. */
689 rtx sub;
690
691 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
692 {
693 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
694 if (sub)
695 {
696 emit_move_insn (op0, sub);
697 return true;
698 }
699 }
700 else
701 {
702 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
703 bitnum / BITS_PER_UNIT);
704 if (sub)
705 {
706 emit_move_insn (sub, value);
707 return true;
708 }
709 }
710 }
711
712 /* If the target is memory, storing any naturally aligned field can be
713 done with a simple store. For targets that support fast unaligned
714 memory, any naturally sized, unit aligned field can be done directly. */
715 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
716 {
717 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
718 emit_move_insn (op0, value);
719 return true;
720 }
721
722 /* Make sure we are playing with integral modes. Pun with subregs
723 if we aren't. This must come after the entire register case above,
724 since that case is valid for any mode. The following cases are only
725 valid for integral modes. */
726 {
727 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
728 if (imode != GET_MODE (op0))
729 {
730 if (MEM_P (op0))
731 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
732 else
733 {
734 gcc_assert (imode != BLKmode);
735 op0 = gen_lowpart (imode, op0);
736 }
737 }
738 }
739
740 /* Storing an lsb-aligned field in a register
741 can be done with a movstrict instruction. */
742
743 if (!MEM_P (op0)
744 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
745 && bitsize == GET_MODE_BITSIZE (fieldmode)
746 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
747 {
748 struct expand_operand ops[2];
749 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
750 rtx arg0 = op0;
751 unsigned HOST_WIDE_INT subreg_off;
752
753 if (GET_CODE (arg0) == SUBREG)
754 {
755 /* Else we've got some float mode source being extracted into
756 a different float mode destination -- this combination of
757 subregs results in Severe Tire Damage. */
758 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
759 || GET_MODE_CLASS (fieldmode) == MODE_INT
760 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
761 arg0 = SUBREG_REG (arg0);
762 }
763
764 subreg_off = bitnum / BITS_PER_UNIT;
765 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
766 {
767 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
768
769 create_fixed_operand (&ops[0], arg0);
770 /* Shrink the source operand to FIELDMODE. */
771 create_convert_operand_to (&ops[1], value, fieldmode, false);
772 if (maybe_expand_insn (icode, 2, ops))
773 return true;
774 }
775 }
776
777 /* Handle fields bigger than a word. */
778
779 if (bitsize > BITS_PER_WORD)
780 {
781 /* Here we transfer the words of the field
782 in the order least significant first.
783 This is because the most significant word is the one which may
784 be less than full.
785 However, only do that if the value is not BLKmode. */
786
787 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
788 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
789 unsigned int i;
790 rtx_insn *last;
791
792 /* This is the mode we must force value to, so that there will be enough
793 subwords to extract. Note that fieldmode will often (always?) be
794 VOIDmode, because that is what store_field uses to indicate that this
795 is a bit field, but passing VOIDmode to operand_subword_force
796 is not allowed. */
797 fieldmode = GET_MODE (value);
798 if (fieldmode == VOIDmode)
799 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
800
801 last = get_last_insn ();
802 for (i = 0; i < nwords; i++)
803 {
804 /* If I is 0, use the low-order word in both field and target;
805 if I is 1, use the next to lowest word; and so on. */
806 unsigned int wordnum = (backwards
807 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
808 - i - 1
809 : i);
810 unsigned int bit_offset = (backwards
811 ? MAX ((int) bitsize - ((int) i + 1)
812 * BITS_PER_WORD,
813 0)
814 : (int) i * BITS_PER_WORD);
815 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
816 unsigned HOST_WIDE_INT new_bitsize =
817 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
818
819 /* If the remaining chunk doesn't have full wordsize we have
820 to make sure that for big endian machines the higher order
821 bits are used. */
822 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
823 value_word = simplify_expand_binop (word_mode, lshr_optab,
824 value_word,
825 GEN_INT (BITS_PER_WORD
826 - new_bitsize),
827 NULL_RTX, true,
828 OPTAB_LIB_WIDEN);
829
830 if (!store_bit_field_1 (op0, new_bitsize,
831 bitnum + bit_offset,
832 bitregion_start, bitregion_end,
833 word_mode,
834 value_word, fallback_p))
835 {
836 delete_insns_since (last);
837 return false;
838 }
839 }
840 return true;
841 }
842
843 /* If VALUE has a floating-point or complex mode, access it as an
844 integer of the corresponding size. This can occur on a machine
845 with 64 bit registers that uses SFmode for float. It can also
846 occur for unaligned float or complex fields. */
847 orig_value = value;
848 if (GET_MODE (value) != VOIDmode
849 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
850 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
851 {
852 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
853 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
854 }
855
856 /* If OP0 is a multi-word register, narrow it to the affected word.
857 If the region spans two words, defer to store_split_bit_field. */
858 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
859 {
860 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
861 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
862 gcc_assert (op0);
863 bitnum %= BITS_PER_WORD;
864 if (bitnum + bitsize > BITS_PER_WORD)
865 {
866 if (!fallback_p)
867 return false;
868
869 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
870 bitregion_end, value);
871 return true;
872 }
873 }
874
875 /* From here on we can assume that the field to be stored in fits
876 within a word. If the destination is a register, it too fits
877 in a word. */
878
879 extraction_insn insv;
880 if (!MEM_P (op0)
881 && get_best_reg_extraction_insn (&insv, EP_insv,
882 GET_MODE_BITSIZE (GET_MODE (op0)),
883 fieldmode)
884 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
885 return true;
886
887 /* If OP0 is a memory, try copying it to a register and seeing if a
888 cheap register alternative is available. */
889 if (MEM_P (op0))
890 {
891 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
892 fieldmode)
893 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
894 return true;
895
896 rtx_insn *last = get_last_insn ();
897
898 /* Try loading part of OP0 into a register, inserting the bitfield
899 into that, and then copying the result back to OP0. */
900 unsigned HOST_WIDE_INT bitpos;
901 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
902 bitregion_start, bitregion_end,
903 fieldmode, &bitpos);
904 if (xop0)
905 {
906 rtx tempreg = copy_to_reg (xop0);
907 if (store_bit_field_1 (tempreg, bitsize, bitpos,
908 bitregion_start, bitregion_end,
909 fieldmode, orig_value, false))
910 {
911 emit_move_insn (xop0, tempreg);
912 return true;
913 }
914 delete_insns_since (last);
915 }
916 }
917
918 if (!fallback_p)
919 return false;
920
921 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
922 bitregion_end, value);
923 return true;
924 }
925
926 /* Generate code to store value from rtx VALUE
927 into a bit-field within structure STR_RTX
928 containing BITSIZE bits starting at bit BITNUM.
929
930 BITREGION_START is bitpos of the first bitfield in this region.
931 BITREGION_END is the bitpos of the ending bitfield in this region.
932 These two fields are 0, if the C++ memory model does not apply,
933 or we are not interested in keeping track of bitfield regions.
934
935 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
936
937 void
938 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
939 unsigned HOST_WIDE_INT bitnum,
940 unsigned HOST_WIDE_INT bitregion_start,
941 unsigned HOST_WIDE_INT bitregion_end,
942 machine_mode fieldmode,
943 rtx value)
944 {
945 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
946 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
947 bitregion_start, bitregion_end))
948 {
949 /* Storing any naturally aligned field can be done with a simple
950 store. For targets that support fast unaligned memory, any
951 naturally sized, unit aligned field can be done directly. */
952 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
953 {
954 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
955 bitnum / BITS_PER_UNIT);
956 emit_move_insn (str_rtx, value);
957 }
958 else
959 {
960 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
961 &bitnum);
962 /* Explicitly override the C/C++ memory model; ignore the
963 bit range so that we can do the access in the mode mandated
964 by -fstrict-volatile-bitfields instead. */
965 store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
966 }
967
968 return;
969 }
970
971 /* Under the C++0x memory model, we must not touch bits outside the
972 bit region. Adjust the address to start at the beginning of the
973 bit region. */
974 if (MEM_P (str_rtx) && bitregion_start > 0)
975 {
976 machine_mode bestmode;
977 HOST_WIDE_INT offset, size;
978
979 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
980
981 offset = bitregion_start / BITS_PER_UNIT;
982 bitnum -= bitregion_start;
983 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
984 bitregion_end -= bitregion_start;
985 bitregion_start = 0;
986 bestmode = get_best_mode (bitsize, bitnum,
987 bitregion_start, bitregion_end,
988 MEM_ALIGN (str_rtx), VOIDmode,
989 MEM_VOLATILE_P (str_rtx));
990 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
991 }
992
993 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
994 bitregion_start, bitregion_end,
995 fieldmode, value, true))
996 gcc_unreachable ();
997 }
998 \f
999 /* Use shifts and boolean operations to store VALUE into a bit field of
1000 width BITSIZE in OP0, starting at bit BITNUM. */
1001
1002 static void
1003 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1004 unsigned HOST_WIDE_INT bitnum,
1005 unsigned HOST_WIDE_INT bitregion_start,
1006 unsigned HOST_WIDE_INT bitregion_end,
1007 rtx value)
1008 {
1009 /* There is a case not handled here:
1010 a structure with a known alignment of just a halfword
1011 and a field split across two aligned halfwords within the structure.
1012 Or likewise a structure with a known alignment of just a byte
1013 and a field split across two bytes.
1014 Such cases are not supposed to be able to occur. */
1015
1016 if (MEM_P (op0))
1017 {
1018 machine_mode mode = GET_MODE (op0);
1019 if (GET_MODE_BITSIZE (mode) == 0
1020 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1021 mode = word_mode;
1022 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1023 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1024
1025 if (mode == VOIDmode)
1026 {
1027 /* The only way this should occur is if the field spans word
1028 boundaries. */
1029 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1030 bitregion_end, value);
1031 return;
1032 }
1033
1034 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1035 }
1036
1037 store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1038 }
1039
1040 /* Helper function for store_fixed_bit_field, stores
1041 the bit field always using the MODE of OP0. */
1042
1043 static void
1044 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1045 unsigned HOST_WIDE_INT bitnum,
1046 rtx value)
1047 {
1048 machine_mode mode;
1049 rtx temp;
1050 int all_zero = 0;
1051 int all_one = 0;
1052
1053 mode = GET_MODE (op0);
1054 gcc_assert (SCALAR_INT_MODE_P (mode));
1055
1056 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1057 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1058
1059 if (BYTES_BIG_ENDIAN)
1060 /* BITNUM is the distance between our msb
1061 and that of the containing datum.
1062 Convert it to the distance from the lsb. */
1063 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1064
1065 /* Now BITNUM is always the distance between our lsb
1066 and that of OP0. */
1067
1068 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1069 we must first convert its mode to MODE. */
1070
1071 if (CONST_INT_P (value))
1072 {
1073 unsigned HOST_WIDE_INT v = UINTVAL (value);
1074
1075 if (bitsize < HOST_BITS_PER_WIDE_INT)
1076 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1077
1078 if (v == 0)
1079 all_zero = 1;
1080 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1081 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1082 || (bitsize == HOST_BITS_PER_WIDE_INT
1083 && v == (unsigned HOST_WIDE_INT) -1))
1084 all_one = 1;
1085
1086 value = lshift_value (mode, v, bitnum);
1087 }
1088 else
1089 {
1090 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1091 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1092
1093 if (GET_MODE (value) != mode)
1094 value = convert_to_mode (mode, value, 1);
1095
1096 if (must_and)
1097 value = expand_binop (mode, and_optab, value,
1098 mask_rtx (mode, 0, bitsize, 0),
1099 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1100 if (bitnum > 0)
1101 value = expand_shift (LSHIFT_EXPR, mode, value,
1102 bitnum, NULL_RTX, 1);
1103 }
1104
1105 /* Now clear the chosen bits in OP0,
1106 except that if VALUE is -1 we need not bother. */
1107 /* We keep the intermediates in registers to allow CSE to combine
1108 consecutive bitfield assignments. */
1109
1110 temp = force_reg (mode, op0);
1111
1112 if (! all_one)
1113 {
1114 temp = expand_binop (mode, and_optab, temp,
1115 mask_rtx (mode, bitnum, bitsize, 1),
1116 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1117 temp = force_reg (mode, temp);
1118 }
1119
1120 /* Now logical-or VALUE into OP0, unless it is zero. */
1121
1122 if (! all_zero)
1123 {
1124 temp = expand_binop (mode, ior_optab, temp, value,
1125 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1126 temp = force_reg (mode, temp);
1127 }
1128
1129 if (op0 != temp)
1130 {
1131 op0 = copy_rtx (op0);
1132 emit_move_insn (op0, temp);
1133 }
1134 }
1135 \f
1136 /* Store a bit field that is split across multiple accessible memory objects.
1137
1138 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1139 BITSIZE is the field width; BITPOS the position of its first bit
1140 (within the word).
1141 VALUE is the value to store.
1142
1143 This does not yet handle fields wider than BITS_PER_WORD. */
1144
1145 static void
1146 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1147 unsigned HOST_WIDE_INT bitpos,
1148 unsigned HOST_WIDE_INT bitregion_start,
1149 unsigned HOST_WIDE_INT bitregion_end,
1150 rtx value)
1151 {
1152 unsigned int unit;
1153 unsigned int bitsdone = 0;
1154
1155 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1156 much at a time. */
1157 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1158 unit = BITS_PER_WORD;
1159 else
1160 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1161
1162 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1163 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1164 again, and we will mutually recurse forever. */
1165 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1166 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1167
1168 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1169 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1170 that VALUE might be a floating-point constant. */
1171 if (CONSTANT_P (value) && !CONST_INT_P (value))
1172 {
1173 rtx word = gen_lowpart_common (word_mode, value);
1174
1175 if (word && (value != word))
1176 value = word;
1177 else
1178 value = gen_lowpart_common (word_mode,
1179 force_reg (GET_MODE (value) != VOIDmode
1180 ? GET_MODE (value)
1181 : word_mode, value));
1182 }
1183
1184 while (bitsdone < bitsize)
1185 {
1186 unsigned HOST_WIDE_INT thissize;
1187 rtx part, word;
1188 unsigned HOST_WIDE_INT thispos;
1189 unsigned HOST_WIDE_INT offset;
1190
1191 offset = (bitpos + bitsdone) / unit;
1192 thispos = (bitpos + bitsdone) % unit;
1193
1194 /* When region of bytes we can touch is restricted, decrease
1195 UNIT close to the end of the region as needed. If op0 is a REG
1196 or SUBREG of REG, don't do this, as there can't be data races
1197 on a register and we can expand shorter code in some cases. */
1198 if (bitregion_end
1199 && unit > BITS_PER_UNIT
1200 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1201 && !REG_P (op0)
1202 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1203 {
1204 unit = unit / 2;
1205 continue;
1206 }
1207
1208 /* THISSIZE must not overrun a word boundary. Otherwise,
1209 store_fixed_bit_field will call us again, and we will mutually
1210 recurse forever. */
1211 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1212 thissize = MIN (thissize, unit - thispos);
1213
1214 if (BYTES_BIG_ENDIAN)
1215 {
1216 /* Fetch successively less significant portions. */
1217 if (CONST_INT_P (value))
1218 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1219 >> (bitsize - bitsdone - thissize))
1220 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1221 else
1222 {
1223 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1224 /* The args are chosen so that the last part includes the
1225 lsb. Give extract_bit_field the value it needs (with
1226 endianness compensation) to fetch the piece we want. */
1227 part = extract_fixed_bit_field (word_mode, value, thissize,
1228 total_bits - bitsize + bitsdone,
1229 NULL_RTX, 1);
1230 }
1231 }
1232 else
1233 {
1234 /* Fetch successively more significant portions. */
1235 if (CONST_INT_P (value))
1236 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1237 >> bitsdone)
1238 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1239 else
1240 part = extract_fixed_bit_field (word_mode, value, thissize,
1241 bitsdone, NULL_RTX, 1);
1242 }
1243
1244 /* If OP0 is a register, then handle OFFSET here.
1245
1246 When handling multiword bitfields, extract_bit_field may pass
1247 down a word_mode SUBREG of a larger REG for a bitfield that actually
1248 crosses a word boundary. Thus, for a SUBREG, we must find
1249 the current word starting from the base register. */
1250 if (GET_CODE (op0) == SUBREG)
1251 {
1252 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1253 + (offset * unit / BITS_PER_WORD);
1254 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1255 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1256 word = word_offset ? const0_rtx : op0;
1257 else
1258 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1259 GET_MODE (SUBREG_REG (op0)));
1260 offset &= BITS_PER_WORD / unit - 1;
1261 }
1262 else if (REG_P (op0))
1263 {
1264 machine_mode op0_mode = GET_MODE (op0);
1265 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1266 word = offset ? const0_rtx : op0;
1267 else
1268 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1269 GET_MODE (op0));
1270 offset &= BITS_PER_WORD / unit - 1;
1271 }
1272 else
1273 word = op0;
1274
1275 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1276 it is just an out-of-bounds access. Ignore it. */
1277 if (word != const0_rtx)
1278 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1279 bitregion_start, bitregion_end, part);
1280 bitsdone += thissize;
1281 }
1282 }
1283 \f
1284 /* A subroutine of extract_bit_field_1 that converts return value X
1285 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1286 to extract_bit_field. */
1287
1288 static rtx
1289 convert_extracted_bit_field (rtx x, machine_mode mode,
1290 machine_mode tmode, bool unsignedp)
1291 {
1292 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1293 return x;
1294
1295 /* If the x mode is not a scalar integral, first convert to the
1296 integer mode of that size and then access it as a floating-point
1297 value via a SUBREG. */
1298 if (!SCALAR_INT_MODE_P (tmode))
1299 {
1300 machine_mode smode;
1301
1302 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1303 x = convert_to_mode (smode, x, unsignedp);
1304 x = force_reg (smode, x);
1305 return gen_lowpart (tmode, x);
1306 }
1307
1308 return convert_to_mode (tmode, x, unsignedp);
1309 }
1310
1311 /* Try to use an ext(z)v pattern to extract a field from OP0.
1312 Return the extracted value on success, otherwise return null.
1313 EXT_MODE is the mode of the extraction and the other arguments
1314 are as for extract_bit_field. */
1315
1316 static rtx
1317 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1318 unsigned HOST_WIDE_INT bitsize,
1319 unsigned HOST_WIDE_INT bitnum,
1320 int unsignedp, rtx target,
1321 machine_mode mode, machine_mode tmode)
1322 {
1323 struct expand_operand ops[4];
1324 rtx spec_target = target;
1325 rtx spec_target_subreg = 0;
1326 machine_mode ext_mode = extv->field_mode;
1327 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1328
1329 if (bitsize == 0 || unit < bitsize)
1330 return NULL_RTX;
1331
1332 if (MEM_P (op0))
1333 /* Get a reference to the first byte of the field. */
1334 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1335 &bitnum);
1336 else
1337 {
1338 /* Convert from counting within OP0 to counting in EXT_MODE. */
1339 if (BYTES_BIG_ENDIAN)
1340 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1341
1342 /* If op0 is a register, we need it in EXT_MODE to make it
1343 acceptable to the format of ext(z)v. */
1344 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1345 return NULL_RTX;
1346 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1347 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1348 }
1349
1350 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1351 "backwards" from the size of the unit we are extracting from.
1352 Otherwise, we count bits from the most significant on a
1353 BYTES/BITS_BIG_ENDIAN machine. */
1354
1355 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1356 bitnum = unit - bitsize - bitnum;
1357
1358 if (target == 0)
1359 target = spec_target = gen_reg_rtx (tmode);
1360
1361 if (GET_MODE (target) != ext_mode)
1362 {
1363 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1364 between the mode of the extraction (word_mode) and the target
1365 mode. Instead, create a temporary and use convert_move to set
1366 the target. */
1367 if (REG_P (target)
1368 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1369 {
1370 target = gen_lowpart (ext_mode, target);
1371 if (GET_MODE_PRECISION (ext_mode)
1372 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1373 spec_target_subreg = target;
1374 }
1375 else
1376 target = gen_reg_rtx (ext_mode);
1377 }
1378
1379 create_output_operand (&ops[0], target, ext_mode);
1380 create_fixed_operand (&ops[1], op0);
1381 create_integer_operand (&ops[2], bitsize);
1382 create_integer_operand (&ops[3], bitnum);
1383 if (maybe_expand_insn (extv->icode, 4, ops))
1384 {
1385 target = ops[0].value;
1386 if (target == spec_target)
1387 return target;
1388 if (target == spec_target_subreg)
1389 return spec_target;
1390 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1391 }
1392 return NULL_RTX;
1393 }
1394
1395 /* A subroutine of extract_bit_field, with the same arguments.
1396 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1397 if we can find no other means of implementing the operation.
1398 if FALLBACK_P is false, return NULL instead. */
1399
1400 static rtx
1401 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1402 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1403 machine_mode mode, machine_mode tmode,
1404 bool fallback_p)
1405 {
1406 rtx op0 = str_rtx;
1407 machine_mode int_mode;
1408 machine_mode mode1;
1409
1410 if (tmode == VOIDmode)
1411 tmode = mode;
1412
1413 while (GET_CODE (op0) == SUBREG)
1414 {
1415 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1416 op0 = SUBREG_REG (op0);
1417 }
1418
1419 /* If we have an out-of-bounds access to a register, just return an
1420 uninitialized register of the required mode. This can occur if the
1421 source code contains an out-of-bounds access to a small array. */
1422 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1423 return gen_reg_rtx (tmode);
1424
1425 if (REG_P (op0)
1426 && mode == GET_MODE (op0)
1427 && bitnum == 0
1428 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1429 {
1430 /* We're trying to extract a full register from itself. */
1431 return op0;
1432 }
1433
1434 /* See if we can get a better vector mode before extracting. */
1435 if (VECTOR_MODE_P (GET_MODE (op0))
1436 && !MEM_P (op0)
1437 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1438 {
1439 machine_mode new_mode;
1440
1441 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1442 new_mode = MIN_MODE_VECTOR_FLOAT;
1443 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1444 new_mode = MIN_MODE_VECTOR_FRACT;
1445 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1446 new_mode = MIN_MODE_VECTOR_UFRACT;
1447 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1448 new_mode = MIN_MODE_VECTOR_ACCUM;
1449 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1450 new_mode = MIN_MODE_VECTOR_UACCUM;
1451 else
1452 new_mode = MIN_MODE_VECTOR_INT;
1453
1454 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1455 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1456 && targetm.vector_mode_supported_p (new_mode))
1457 break;
1458 if (new_mode != VOIDmode)
1459 op0 = gen_lowpart (new_mode, op0);
1460 }
1461
1462 /* Use vec_extract patterns for extracting parts of vectors whenever
1463 available. */
1464 if (VECTOR_MODE_P (GET_MODE (op0))
1465 && !MEM_P (op0)
1466 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1467 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1468 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1469 {
1470 struct expand_operand ops[3];
1471 machine_mode outermode = GET_MODE (op0);
1472 machine_mode innermode = GET_MODE_INNER (outermode);
1473 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1474 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1475
1476 create_output_operand (&ops[0], target, innermode);
1477 create_input_operand (&ops[1], op0, outermode);
1478 create_integer_operand (&ops[2], pos);
1479 if (maybe_expand_insn (icode, 3, ops))
1480 {
1481 target = ops[0].value;
1482 if (GET_MODE (target) != mode)
1483 return gen_lowpart (tmode, target);
1484 return target;
1485 }
1486 }
1487
1488 /* Make sure we are playing with integral modes. Pun with subregs
1489 if we aren't. */
1490 {
1491 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1492 if (imode != GET_MODE (op0))
1493 {
1494 if (MEM_P (op0))
1495 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1496 else if (imode != BLKmode)
1497 {
1498 op0 = gen_lowpart (imode, op0);
1499
1500 /* If we got a SUBREG, force it into a register since we
1501 aren't going to be able to do another SUBREG on it. */
1502 if (GET_CODE (op0) == SUBREG)
1503 op0 = force_reg (imode, op0);
1504 }
1505 else if (REG_P (op0))
1506 {
1507 rtx reg, subreg;
1508 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1509 MODE_INT);
1510 reg = gen_reg_rtx (imode);
1511 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1512 emit_move_insn (subreg, op0);
1513 op0 = reg;
1514 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1515 }
1516 else
1517 {
1518 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1519 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1520 emit_move_insn (mem, op0);
1521 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1522 }
1523 }
1524 }
1525
1526 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1527 If that's wrong, the solution is to test for it and set TARGET to 0
1528 if needed. */
1529
1530 /* Get the mode of the field to use for atomic access or subreg
1531 conversion. */
1532 mode1 = mode;
1533 if (SCALAR_INT_MODE_P (tmode))
1534 {
1535 machine_mode try_mode = mode_for_size (bitsize,
1536 GET_MODE_CLASS (tmode), 0);
1537 if (try_mode != BLKmode)
1538 mode1 = try_mode;
1539 }
1540 gcc_assert (mode1 != BLKmode);
1541
1542 /* Extraction of a full MODE1 value can be done with a subreg as long
1543 as the least significant bit of the value is the least significant
1544 bit of either OP0 or a word of OP0. */
1545 if (!MEM_P (op0)
1546 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1547 && bitsize == GET_MODE_BITSIZE (mode1)
1548 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1549 {
1550 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1551 bitnum / BITS_PER_UNIT);
1552 if (sub)
1553 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1554 }
1555
1556 /* Extraction of a full MODE1 value can be done with a load as long as
1557 the field is on a byte boundary and is sufficiently aligned. */
1558 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1559 {
1560 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1561 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1562 }
1563
1564 /* Handle fields bigger than a word. */
1565
1566 if (bitsize > BITS_PER_WORD)
1567 {
1568 /* Here we transfer the words of the field
1569 in the order least significant first.
1570 This is because the most significant word is the one which may
1571 be less than full. */
1572
1573 unsigned int backwards = WORDS_BIG_ENDIAN;
1574 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1575 unsigned int i;
1576 rtx_insn *last;
1577
1578 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1579 target = gen_reg_rtx (mode);
1580
1581 /* Indicate for flow that the entire target reg is being set. */
1582 emit_clobber (target);
1583
1584 last = get_last_insn ();
1585 for (i = 0; i < nwords; i++)
1586 {
1587 /* If I is 0, use the low-order word in both field and target;
1588 if I is 1, use the next to lowest word; and so on. */
1589 /* Word number in TARGET to use. */
1590 unsigned int wordnum
1591 = (backwards
1592 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1593 : i);
1594 /* Offset from start of field in OP0. */
1595 unsigned int bit_offset = (backwards
1596 ? MAX ((int) bitsize - ((int) i + 1)
1597 * BITS_PER_WORD,
1598 0)
1599 : (int) i * BITS_PER_WORD);
1600 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1601 rtx result_part
1602 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1603 bitsize - i * BITS_PER_WORD),
1604 bitnum + bit_offset, 1, target_part,
1605 mode, word_mode, fallback_p);
1606
1607 gcc_assert (target_part);
1608 if (!result_part)
1609 {
1610 delete_insns_since (last);
1611 return NULL;
1612 }
1613
1614 if (result_part != target_part)
1615 emit_move_insn (target_part, result_part);
1616 }
1617
1618 if (unsignedp)
1619 {
1620 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1621 need to be zero'd out. */
1622 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1623 {
1624 unsigned int i, total_words;
1625
1626 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1627 for (i = nwords; i < total_words; i++)
1628 emit_move_insn
1629 (operand_subword (target,
1630 backwards ? total_words - i - 1 : i,
1631 1, VOIDmode),
1632 const0_rtx);
1633 }
1634 return target;
1635 }
1636
1637 /* Signed bit field: sign-extend with two arithmetic shifts. */
1638 target = expand_shift (LSHIFT_EXPR, mode, target,
1639 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1640 return expand_shift (RSHIFT_EXPR, mode, target,
1641 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1642 }
1643
1644 /* If OP0 is a multi-word register, narrow it to the affected word.
1645 If the region spans two words, defer to extract_split_bit_field. */
1646 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1647 {
1648 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1649 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1650 bitnum %= BITS_PER_WORD;
1651 if (bitnum + bitsize > BITS_PER_WORD)
1652 {
1653 if (!fallback_p)
1654 return NULL_RTX;
1655 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1656 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1657 }
1658 }
1659
1660 /* From here on we know the desired field is smaller than a word.
1661 If OP0 is a register, it too fits within a word. */
1662 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1663 extraction_insn extv;
1664 if (!MEM_P (op0)
1665 /* ??? We could limit the structure size to the part of OP0 that
1666 contains the field, with appropriate checks for endianness
1667 and TRULY_NOOP_TRUNCATION. */
1668 && get_best_reg_extraction_insn (&extv, pattern,
1669 GET_MODE_BITSIZE (GET_MODE (op0)),
1670 tmode))
1671 {
1672 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1673 unsignedp, target, mode,
1674 tmode);
1675 if (result)
1676 return result;
1677 }
1678
1679 /* If OP0 is a memory, try copying it to a register and seeing if a
1680 cheap register alternative is available. */
1681 if (MEM_P (op0))
1682 {
1683 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1684 tmode))
1685 {
1686 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1687 bitnum, unsignedp,
1688 target, mode,
1689 tmode);
1690 if (result)
1691 return result;
1692 }
1693
1694 rtx_insn *last = get_last_insn ();
1695
1696 /* Try loading part of OP0 into a register and extracting the
1697 bitfield from that. */
1698 unsigned HOST_WIDE_INT bitpos;
1699 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1700 0, 0, tmode, &bitpos);
1701 if (xop0)
1702 {
1703 xop0 = copy_to_reg (xop0);
1704 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1705 unsignedp, target,
1706 mode, tmode, false);
1707 if (result)
1708 return result;
1709 delete_insns_since (last);
1710 }
1711 }
1712
1713 if (!fallback_p)
1714 return NULL;
1715
1716 /* Find a correspondingly-sized integer field, so we can apply
1717 shifts and masks to it. */
1718 int_mode = int_mode_for_mode (tmode);
1719 if (int_mode == BLKmode)
1720 int_mode = int_mode_for_mode (mode);
1721 /* Should probably push op0 out to memory and then do a load. */
1722 gcc_assert (int_mode != BLKmode);
1723
1724 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1725 target, unsignedp);
1726 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1727 }
1728
1729 /* Generate code to extract a byte-field from STR_RTX
1730 containing BITSIZE bits, starting at BITNUM,
1731 and put it in TARGET if possible (if TARGET is nonzero).
1732 Regardless of TARGET, we return the rtx for where the value is placed.
1733
1734 STR_RTX is the structure containing the byte (a REG or MEM).
1735 UNSIGNEDP is nonzero if this is an unsigned bit field.
1736 MODE is the natural mode of the field value once extracted.
1737 TMODE is the mode the caller would like the value to have;
1738 but the value may be returned with type MODE instead.
1739
1740 If a TARGET is specified and we can store in it at no extra cost,
1741 we do so, and return TARGET.
1742 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1743 if they are equally easy. */
1744
1745 rtx
1746 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1747 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1748 machine_mode mode, machine_mode tmode)
1749 {
1750 machine_mode mode1;
1751
1752 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1753 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1754 mode1 = GET_MODE (str_rtx);
1755 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1756 mode1 = GET_MODE (target);
1757 else
1758 mode1 = tmode;
1759
1760 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1761 {
1762 rtx result;
1763
1764 /* Extraction of a full MODE1 value can be done with a load as long as
1765 the field is on a byte boundary and is sufficiently aligned. */
1766 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1767 result = adjust_bitfield_address (str_rtx, mode1,
1768 bitnum / BITS_PER_UNIT);
1769 else
1770 {
1771 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1772 &bitnum);
1773 result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1774 target, unsignedp);
1775 }
1776
1777 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1778 }
1779
1780 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1781 target, mode, tmode, true);
1782 }
1783 \f
1784 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1785 from bit BITNUM of OP0.
1786
1787 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1788 If TARGET is nonzero, attempts to store the value there
1789 and return TARGET, but this is not guaranteed.
1790 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1791
1792 static rtx
1793 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1794 unsigned HOST_WIDE_INT bitsize,
1795 unsigned HOST_WIDE_INT bitnum, rtx target,
1796 int unsignedp)
1797 {
1798 if (MEM_P (op0))
1799 {
1800 machine_mode mode
1801 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1802 MEM_VOLATILE_P (op0));
1803
1804 if (mode == VOIDmode)
1805 /* The only way this should occur is if the field spans word
1806 boundaries. */
1807 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1808
1809 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1810 }
1811
1812 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1813 target, unsignedp);
1814 }
1815
1816 /* Helper function for extract_fixed_bit_field, extracts
1817 the bit field always using the MODE of OP0. */
1818
1819 static rtx
1820 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1821 unsigned HOST_WIDE_INT bitsize,
1822 unsigned HOST_WIDE_INT bitnum, rtx target,
1823 int unsignedp)
1824 {
1825 machine_mode mode = GET_MODE (op0);
1826 gcc_assert (SCALAR_INT_MODE_P (mode));
1827
1828 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1829 for invalid input, such as extract equivalent of f5 from
1830 gcc.dg/pr48335-2.c. */
1831
1832 if (BYTES_BIG_ENDIAN)
1833 /* BITNUM is the distance between our msb and that of OP0.
1834 Convert it to the distance from the lsb. */
1835 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1836
1837 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1838 We have reduced the big-endian case to the little-endian case. */
1839
1840 if (unsignedp)
1841 {
1842 if (bitnum)
1843 {
1844 /* If the field does not already start at the lsb,
1845 shift it so it does. */
1846 /* Maybe propagate the target for the shift. */
1847 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1848 if (tmode != mode)
1849 subtarget = 0;
1850 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1851 }
1852 /* Convert the value to the desired mode. */
1853 if (mode != tmode)
1854 op0 = convert_to_mode (tmode, op0, 1);
1855
1856 /* Unless the msb of the field used to be the msb when we shifted,
1857 mask out the upper bits. */
1858
1859 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1860 return expand_binop (GET_MODE (op0), and_optab, op0,
1861 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1862 target, 1, OPTAB_LIB_WIDEN);
1863 return op0;
1864 }
1865
1866 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1867 then arithmetic-shift its lsb to the lsb of the word. */
1868 op0 = force_reg (mode, op0);
1869
1870 /* Find the narrowest integer mode that contains the field. */
1871
1872 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1873 mode = GET_MODE_WIDER_MODE (mode))
1874 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1875 {
1876 op0 = convert_to_mode (mode, op0, 0);
1877 break;
1878 }
1879
1880 if (mode != tmode)
1881 target = 0;
1882
1883 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1884 {
1885 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1886 /* Maybe propagate the target for the shift. */
1887 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1888 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1889 }
1890
1891 return expand_shift (RSHIFT_EXPR, mode, op0,
1892 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1893 }
1894
1895 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1896 VALUE << BITPOS. */
1897
1898 static rtx
1899 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1900 int bitpos)
1901 {
1902 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1903 }
1904 \f
1905 /* Extract a bit field that is split across two words
1906 and return an RTX for the result.
1907
1908 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1909 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1910 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1911
1912 static rtx
1913 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1914 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1915 {
1916 unsigned int unit;
1917 unsigned int bitsdone = 0;
1918 rtx result = NULL_RTX;
1919 int first = 1;
1920
1921 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1922 much at a time. */
1923 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1924 unit = BITS_PER_WORD;
1925 else
1926 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1927
1928 while (bitsdone < bitsize)
1929 {
1930 unsigned HOST_WIDE_INT thissize;
1931 rtx part, word;
1932 unsigned HOST_WIDE_INT thispos;
1933 unsigned HOST_WIDE_INT offset;
1934
1935 offset = (bitpos + bitsdone) / unit;
1936 thispos = (bitpos + bitsdone) % unit;
1937
1938 /* THISSIZE must not overrun a word boundary. Otherwise,
1939 extract_fixed_bit_field will call us again, and we will mutually
1940 recurse forever. */
1941 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1942 thissize = MIN (thissize, unit - thispos);
1943
1944 /* If OP0 is a register, then handle OFFSET here.
1945
1946 When handling multiword bitfields, extract_bit_field may pass
1947 down a word_mode SUBREG of a larger REG for a bitfield that actually
1948 crosses a word boundary. Thus, for a SUBREG, we must find
1949 the current word starting from the base register. */
1950 if (GET_CODE (op0) == SUBREG)
1951 {
1952 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1953 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1954 GET_MODE (SUBREG_REG (op0)));
1955 offset = 0;
1956 }
1957 else if (REG_P (op0))
1958 {
1959 word = operand_subword_force (op0, offset, GET_MODE (op0));
1960 offset = 0;
1961 }
1962 else
1963 word = op0;
1964
1965 /* Extract the parts in bit-counting order,
1966 whose meaning is determined by BYTES_PER_UNIT.
1967 OFFSET is in UNITs, and UNIT is in bits. */
1968 part = extract_fixed_bit_field (word_mode, word, thissize,
1969 offset * unit + thispos, 0, 1);
1970 bitsdone += thissize;
1971
1972 /* Shift this part into place for the result. */
1973 if (BYTES_BIG_ENDIAN)
1974 {
1975 if (bitsize != bitsdone)
1976 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1977 bitsize - bitsdone, 0, 1);
1978 }
1979 else
1980 {
1981 if (bitsdone != thissize)
1982 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1983 bitsdone - thissize, 0, 1);
1984 }
1985
1986 if (first)
1987 result = part;
1988 else
1989 /* Combine the parts with bitwise or. This works
1990 because we extracted each part as an unsigned bit field. */
1991 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1992 OPTAB_LIB_WIDEN);
1993
1994 first = 0;
1995 }
1996
1997 /* Unsigned bit field: we are done. */
1998 if (unsignedp)
1999 return result;
2000 /* Signed bit field: sign-extend with two arithmetic shifts. */
2001 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2002 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2003 return expand_shift (RSHIFT_EXPR, word_mode, result,
2004 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2005 }
2006 \f
2007 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2008 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2009 MODE, fill the upper bits with zeros. Fail if the layout of either
2010 mode is unknown (as for CC modes) or if the extraction would involve
2011 unprofitable mode punning. Return the value on success, otherwise
2012 return null.
2013
2014 This is different from gen_lowpart* in these respects:
2015
2016 - the returned value must always be considered an rvalue
2017
2018 - when MODE is wider than SRC_MODE, the extraction involves
2019 a zero extension
2020
2021 - when MODE is smaller than SRC_MODE, the extraction involves
2022 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2023
2024 In other words, this routine performs a computation, whereas the
2025 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2026 operations. */
2027
2028 rtx
2029 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2030 {
2031 machine_mode int_mode, src_int_mode;
2032
2033 if (mode == src_mode)
2034 return src;
2035
2036 if (CONSTANT_P (src))
2037 {
2038 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2039 fails, it will happily create (subreg (symbol_ref)) or similar
2040 invalid SUBREGs. */
2041 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2042 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2043 if (ret)
2044 return ret;
2045
2046 if (GET_MODE (src) == VOIDmode
2047 || !validate_subreg (mode, src_mode, src, byte))
2048 return NULL_RTX;
2049
2050 src = force_reg (GET_MODE (src), src);
2051 return gen_rtx_SUBREG (mode, src, byte);
2052 }
2053
2054 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2055 return NULL_RTX;
2056
2057 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2058 && MODES_TIEABLE_P (mode, src_mode))
2059 {
2060 rtx x = gen_lowpart_common (mode, src);
2061 if (x)
2062 return x;
2063 }
2064
2065 src_int_mode = int_mode_for_mode (src_mode);
2066 int_mode = int_mode_for_mode (mode);
2067 if (src_int_mode == BLKmode || int_mode == BLKmode)
2068 return NULL_RTX;
2069
2070 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2071 return NULL_RTX;
2072 if (!MODES_TIEABLE_P (int_mode, mode))
2073 return NULL_RTX;
2074
2075 src = gen_lowpart (src_int_mode, src);
2076 src = convert_modes (int_mode, src_int_mode, src, true);
2077 src = gen_lowpart (mode, src);
2078 return src;
2079 }
2080 \f
2081 /* Add INC into TARGET. */
2082
2083 void
2084 expand_inc (rtx target, rtx inc)
2085 {
2086 rtx value = expand_binop (GET_MODE (target), add_optab,
2087 target, inc,
2088 target, 0, OPTAB_LIB_WIDEN);
2089 if (value != target)
2090 emit_move_insn (target, value);
2091 }
2092
2093 /* Subtract DEC from TARGET. */
2094
2095 void
2096 expand_dec (rtx target, rtx dec)
2097 {
2098 rtx value = expand_binop (GET_MODE (target), sub_optab,
2099 target, dec,
2100 target, 0, OPTAB_LIB_WIDEN);
2101 if (value != target)
2102 emit_move_insn (target, value);
2103 }
2104 \f
2105 /* Output a shift instruction for expression code CODE,
2106 with SHIFTED being the rtx for the value to shift,
2107 and AMOUNT the rtx for the amount to shift by.
2108 Store the result in the rtx TARGET, if that is convenient.
2109 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2110 Return the rtx for where the value is. */
2111
2112 static rtx
2113 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2114 rtx amount, rtx target, int unsignedp)
2115 {
2116 rtx op1, temp = 0;
2117 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2118 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2119 optab lshift_optab = ashl_optab;
2120 optab rshift_arith_optab = ashr_optab;
2121 optab rshift_uns_optab = lshr_optab;
2122 optab lrotate_optab = rotl_optab;
2123 optab rrotate_optab = rotr_optab;
2124 machine_mode op1_mode;
2125 machine_mode scalar_mode = mode;
2126 int attempt;
2127 bool speed = optimize_insn_for_speed_p ();
2128
2129 if (VECTOR_MODE_P (mode))
2130 scalar_mode = GET_MODE_INNER (mode);
2131 op1 = amount;
2132 op1_mode = GET_MODE (op1);
2133
2134 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2135 shift amount is a vector, use the vector/vector shift patterns. */
2136 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2137 {
2138 lshift_optab = vashl_optab;
2139 rshift_arith_optab = vashr_optab;
2140 rshift_uns_optab = vlshr_optab;
2141 lrotate_optab = vrotl_optab;
2142 rrotate_optab = vrotr_optab;
2143 }
2144
2145 /* Previously detected shift-counts computed by NEGATE_EXPR
2146 and shifted in the other direction; but that does not work
2147 on all machines. */
2148
2149 if (SHIFT_COUNT_TRUNCATED)
2150 {
2151 if (CONST_INT_P (op1)
2152 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2153 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2154 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2155 % GET_MODE_BITSIZE (scalar_mode));
2156 else if (GET_CODE (op1) == SUBREG
2157 && subreg_lowpart_p (op1)
2158 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2159 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2160 op1 = SUBREG_REG (op1);
2161 }
2162
2163 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2164 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2165 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2166 amount instead. */
2167 if (rotate
2168 && CONST_INT_P (op1)
2169 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2170 GET_MODE_BITSIZE (scalar_mode) - 1))
2171 {
2172 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2173 left = !left;
2174 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2175 }
2176
2177 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2178 Note that this is not the case for bigger values. For instance a rotation
2179 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2180 0x04030201 (bswapsi). */
2181 if (rotate
2182 && CONST_INT_P (op1)
2183 && INTVAL (op1) == BITS_PER_UNIT
2184 && GET_MODE_SIZE (scalar_mode) == 2
2185 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2186 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2187 unsignedp);
2188
2189 if (op1 == const0_rtx)
2190 return shifted;
2191
2192 /* Check whether its cheaper to implement a left shift by a constant
2193 bit count by a sequence of additions. */
2194 if (code == LSHIFT_EXPR
2195 && CONST_INT_P (op1)
2196 && INTVAL (op1) > 0
2197 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2198 && INTVAL (op1) < MAX_BITS_PER_WORD
2199 && (shift_cost (speed, mode, INTVAL (op1))
2200 > INTVAL (op1) * add_cost (speed, mode))
2201 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2202 {
2203 int i;
2204 for (i = 0; i < INTVAL (op1); i++)
2205 {
2206 temp = force_reg (mode, shifted);
2207 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2208 unsignedp, OPTAB_LIB_WIDEN);
2209 }
2210 return shifted;
2211 }
2212
2213 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2214 {
2215 enum optab_methods methods;
2216
2217 if (attempt == 0)
2218 methods = OPTAB_DIRECT;
2219 else if (attempt == 1)
2220 methods = OPTAB_WIDEN;
2221 else
2222 methods = OPTAB_LIB_WIDEN;
2223
2224 if (rotate)
2225 {
2226 /* Widening does not work for rotation. */
2227 if (methods == OPTAB_WIDEN)
2228 continue;
2229 else if (methods == OPTAB_LIB_WIDEN)
2230 {
2231 /* If we have been unable to open-code this by a rotation,
2232 do it as the IOR of two shifts. I.e., to rotate A
2233 by N bits, compute
2234 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2235 where C is the bitsize of A.
2236
2237 It is theoretically possible that the target machine might
2238 not be able to perform either shift and hence we would
2239 be making two libcalls rather than just the one for the
2240 shift (similarly if IOR could not be done). We will allow
2241 this extremely unlikely lossage to avoid complicating the
2242 code below. */
2243
2244 rtx subtarget = target == shifted ? 0 : target;
2245 rtx new_amount, other_amount;
2246 rtx temp1;
2247
2248 new_amount = op1;
2249 if (op1 == const0_rtx)
2250 return shifted;
2251 else if (CONST_INT_P (op1))
2252 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2253 - INTVAL (op1));
2254 else
2255 {
2256 other_amount
2257 = simplify_gen_unary (NEG, GET_MODE (op1),
2258 op1, GET_MODE (op1));
2259 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2260 other_amount
2261 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2262 gen_int_mode (mask, GET_MODE (op1)));
2263 }
2264
2265 shifted = force_reg (mode, shifted);
2266
2267 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2268 mode, shifted, new_amount, 0, 1);
2269 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2270 mode, shifted, other_amount,
2271 subtarget, 1);
2272 return expand_binop (mode, ior_optab, temp, temp1, target,
2273 unsignedp, methods);
2274 }
2275
2276 temp = expand_binop (mode,
2277 left ? lrotate_optab : rrotate_optab,
2278 shifted, op1, target, unsignedp, methods);
2279 }
2280 else if (unsignedp)
2281 temp = expand_binop (mode,
2282 left ? lshift_optab : rshift_uns_optab,
2283 shifted, op1, target, unsignedp, methods);
2284
2285 /* Do arithmetic shifts.
2286 Also, if we are going to widen the operand, we can just as well
2287 use an arithmetic right-shift instead of a logical one. */
2288 if (temp == 0 && ! rotate
2289 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2290 {
2291 enum optab_methods methods1 = methods;
2292
2293 /* If trying to widen a log shift to an arithmetic shift,
2294 don't accept an arithmetic shift of the same size. */
2295 if (unsignedp)
2296 methods1 = OPTAB_MUST_WIDEN;
2297
2298 /* Arithmetic shift */
2299
2300 temp = expand_binop (mode,
2301 left ? lshift_optab : rshift_arith_optab,
2302 shifted, op1, target, unsignedp, methods1);
2303 }
2304
2305 /* We used to try extzv here for logical right shifts, but that was
2306 only useful for one machine, the VAX, and caused poor code
2307 generation there for lshrdi3, so the code was deleted and a
2308 define_expand for lshrsi3 was added to vax.md. */
2309 }
2310
2311 gcc_assert (temp);
2312 return temp;
2313 }
2314
2315 /* Output a shift instruction for expression code CODE,
2316 with SHIFTED being the rtx for the value to shift,
2317 and AMOUNT the amount to shift by.
2318 Store the result in the rtx TARGET, if that is convenient.
2319 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2320 Return the rtx for where the value is. */
2321
2322 rtx
2323 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2324 int amount, rtx target, int unsignedp)
2325 {
2326 return expand_shift_1 (code, mode,
2327 shifted, GEN_INT (amount), target, unsignedp);
2328 }
2329
2330 /* Output a shift instruction for expression code CODE,
2331 with SHIFTED being the rtx for the value to shift,
2332 and AMOUNT the tree for the amount to shift by.
2333 Store the result in the rtx TARGET, if that is convenient.
2334 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2335 Return the rtx for where the value is. */
2336
2337 rtx
2338 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2339 tree amount, rtx target, int unsignedp)
2340 {
2341 return expand_shift_1 (code, mode,
2342 shifted, expand_normal (amount), target, unsignedp);
2343 }
2344
2345 \f
2346 /* Indicates the type of fixup needed after a constant multiplication.
2347 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2348 the result should be negated, and ADD_VARIANT means that the
2349 multiplicand should be added to the result. */
2350 enum mult_variant {basic_variant, negate_variant, add_variant};
2351
2352 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2353 const struct mult_cost *, machine_mode mode);
2354 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2355 struct algorithm *, enum mult_variant *, int);
2356 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2357 const struct algorithm *, enum mult_variant);
2358 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2359 static rtx extract_high_half (machine_mode, rtx);
2360 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2361 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2362 int, int);
2363 /* Compute and return the best algorithm for multiplying by T.
2364 The algorithm must cost less than cost_limit
2365 If retval.cost >= COST_LIMIT, no algorithm was found and all
2366 other field of the returned struct are undefined.
2367 MODE is the machine mode of the multiplication. */
2368
2369 static void
2370 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2371 const struct mult_cost *cost_limit, machine_mode mode)
2372 {
2373 int m;
2374 struct algorithm *alg_in, *best_alg;
2375 struct mult_cost best_cost;
2376 struct mult_cost new_limit;
2377 int op_cost, op_latency;
2378 unsigned HOST_WIDE_INT orig_t = t;
2379 unsigned HOST_WIDE_INT q;
2380 int maxm, hash_index;
2381 bool cache_hit = false;
2382 enum alg_code cache_alg = alg_zero;
2383 bool speed = optimize_insn_for_speed_p ();
2384 machine_mode imode;
2385 struct alg_hash_entry *entry_ptr;
2386
2387 /* Indicate that no algorithm is yet found. If no algorithm
2388 is found, this value will be returned and indicate failure. */
2389 alg_out->cost.cost = cost_limit->cost + 1;
2390 alg_out->cost.latency = cost_limit->latency + 1;
2391
2392 if (cost_limit->cost < 0
2393 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2394 return;
2395
2396 /* Be prepared for vector modes. */
2397 imode = GET_MODE_INNER (mode);
2398 if (imode == VOIDmode)
2399 imode = mode;
2400
2401 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2402
2403 /* Restrict the bits of "t" to the multiplication's mode. */
2404 t &= GET_MODE_MASK (imode);
2405
2406 /* t == 1 can be done in zero cost. */
2407 if (t == 1)
2408 {
2409 alg_out->ops = 1;
2410 alg_out->cost.cost = 0;
2411 alg_out->cost.latency = 0;
2412 alg_out->op[0] = alg_m;
2413 return;
2414 }
2415
2416 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2417 fail now. */
2418 if (t == 0)
2419 {
2420 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2421 return;
2422 else
2423 {
2424 alg_out->ops = 1;
2425 alg_out->cost.cost = zero_cost (speed);
2426 alg_out->cost.latency = zero_cost (speed);
2427 alg_out->op[0] = alg_zero;
2428 return;
2429 }
2430 }
2431
2432 /* We'll be needing a couple extra algorithm structures now. */
2433
2434 alg_in = XALLOCA (struct algorithm);
2435 best_alg = XALLOCA (struct algorithm);
2436 best_cost = *cost_limit;
2437
2438 /* Compute the hash index. */
2439 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2440
2441 /* See if we already know what to do for T. */
2442 entry_ptr = alg_hash_entry_ptr (hash_index);
2443 if (entry_ptr->t == t
2444 && entry_ptr->mode == mode
2445 && entry_ptr->mode == mode
2446 && entry_ptr->speed == speed
2447 && entry_ptr->alg != alg_unknown)
2448 {
2449 cache_alg = entry_ptr->alg;
2450
2451 if (cache_alg == alg_impossible)
2452 {
2453 /* The cache tells us that it's impossible to synthesize
2454 multiplication by T within entry_ptr->cost. */
2455 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2456 /* COST_LIMIT is at least as restrictive as the one
2457 recorded in the hash table, in which case we have no
2458 hope of synthesizing a multiplication. Just
2459 return. */
2460 return;
2461
2462 /* If we get here, COST_LIMIT is less restrictive than the
2463 one recorded in the hash table, so we may be able to
2464 synthesize a multiplication. Proceed as if we didn't
2465 have the cache entry. */
2466 }
2467 else
2468 {
2469 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2470 /* The cached algorithm shows that this multiplication
2471 requires more cost than COST_LIMIT. Just return. This
2472 way, we don't clobber this cache entry with
2473 alg_impossible but retain useful information. */
2474 return;
2475
2476 cache_hit = true;
2477
2478 switch (cache_alg)
2479 {
2480 case alg_shift:
2481 goto do_alg_shift;
2482
2483 case alg_add_t_m2:
2484 case alg_sub_t_m2:
2485 goto do_alg_addsub_t_m2;
2486
2487 case alg_add_factor:
2488 case alg_sub_factor:
2489 goto do_alg_addsub_factor;
2490
2491 case alg_add_t2_m:
2492 goto do_alg_add_t2_m;
2493
2494 case alg_sub_t2_m:
2495 goto do_alg_sub_t2_m;
2496
2497 default:
2498 gcc_unreachable ();
2499 }
2500 }
2501 }
2502
2503 /* If we have a group of zero bits at the low-order part of T, try
2504 multiplying by the remaining bits and then doing a shift. */
2505
2506 if ((t & 1) == 0)
2507 {
2508 do_alg_shift:
2509 m = floor_log2 (t & -t); /* m = number of low zero bits */
2510 if (m < maxm)
2511 {
2512 q = t >> m;
2513 /* The function expand_shift will choose between a shift and
2514 a sequence of additions, so the observed cost is given as
2515 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2516 op_cost = m * add_cost (speed, mode);
2517 if (shift_cost (speed, mode, m) < op_cost)
2518 op_cost = shift_cost (speed, mode, m);
2519 new_limit.cost = best_cost.cost - op_cost;
2520 new_limit.latency = best_cost.latency - op_cost;
2521 synth_mult (alg_in, q, &new_limit, mode);
2522
2523 alg_in->cost.cost += op_cost;
2524 alg_in->cost.latency += op_cost;
2525 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2526 {
2527 struct algorithm *x;
2528 best_cost = alg_in->cost;
2529 x = alg_in, alg_in = best_alg, best_alg = x;
2530 best_alg->log[best_alg->ops] = m;
2531 best_alg->op[best_alg->ops] = alg_shift;
2532 }
2533
2534 /* See if treating ORIG_T as a signed number yields a better
2535 sequence. Try this sequence only for a negative ORIG_T
2536 as it would be useless for a non-negative ORIG_T. */
2537 if ((HOST_WIDE_INT) orig_t < 0)
2538 {
2539 /* Shift ORIG_T as follows because a right shift of a
2540 negative-valued signed type is implementation
2541 defined. */
2542 q = ~(~orig_t >> m);
2543 /* The function expand_shift will choose between a shift
2544 and a sequence of additions, so the observed cost is
2545 given as MIN (m * add_cost(speed, mode),
2546 shift_cost(speed, mode, m)). */
2547 op_cost = m * add_cost (speed, mode);
2548 if (shift_cost (speed, mode, m) < op_cost)
2549 op_cost = shift_cost (speed, mode, m);
2550 new_limit.cost = best_cost.cost - op_cost;
2551 new_limit.latency = best_cost.latency - op_cost;
2552 synth_mult (alg_in, q, &new_limit, mode);
2553
2554 alg_in->cost.cost += op_cost;
2555 alg_in->cost.latency += op_cost;
2556 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2557 {
2558 struct algorithm *x;
2559 best_cost = alg_in->cost;
2560 x = alg_in, alg_in = best_alg, best_alg = x;
2561 best_alg->log[best_alg->ops] = m;
2562 best_alg->op[best_alg->ops] = alg_shift;
2563 }
2564 }
2565 }
2566 if (cache_hit)
2567 goto done;
2568 }
2569
2570 /* If we have an odd number, add or subtract one. */
2571 if ((t & 1) != 0)
2572 {
2573 unsigned HOST_WIDE_INT w;
2574
2575 do_alg_addsub_t_m2:
2576 for (w = 1; (w & t) != 0; w <<= 1)
2577 ;
2578 /* If T was -1, then W will be zero after the loop. This is another
2579 case where T ends with ...111. Handling this with (T + 1) and
2580 subtract 1 produces slightly better code and results in algorithm
2581 selection much faster than treating it like the ...0111 case
2582 below. */
2583 if (w == 0
2584 || (w > 2
2585 /* Reject the case where t is 3.
2586 Thus we prefer addition in that case. */
2587 && t != 3))
2588 {
2589 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2590
2591 op_cost = add_cost (speed, mode);
2592 new_limit.cost = best_cost.cost - op_cost;
2593 new_limit.latency = best_cost.latency - op_cost;
2594 synth_mult (alg_in, t + 1, &new_limit, mode);
2595
2596 alg_in->cost.cost += op_cost;
2597 alg_in->cost.latency += op_cost;
2598 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2599 {
2600 struct algorithm *x;
2601 best_cost = alg_in->cost;
2602 x = alg_in, alg_in = best_alg, best_alg = x;
2603 best_alg->log[best_alg->ops] = 0;
2604 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2605 }
2606 }
2607 else
2608 {
2609 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2610
2611 op_cost = add_cost (speed, mode);
2612 new_limit.cost = best_cost.cost - op_cost;
2613 new_limit.latency = best_cost.latency - op_cost;
2614 synth_mult (alg_in, t - 1, &new_limit, mode);
2615
2616 alg_in->cost.cost += op_cost;
2617 alg_in->cost.latency += op_cost;
2618 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619 {
2620 struct algorithm *x;
2621 best_cost = alg_in->cost;
2622 x = alg_in, alg_in = best_alg, best_alg = x;
2623 best_alg->log[best_alg->ops] = 0;
2624 best_alg->op[best_alg->ops] = alg_add_t_m2;
2625 }
2626 }
2627
2628 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2629 quickly with a - a * n for some appropriate constant n. */
2630 m = exact_log2 (-orig_t + 1);
2631 if (m >= 0 && m < maxm)
2632 {
2633 op_cost = shiftsub1_cost (speed, mode, m);
2634 new_limit.cost = best_cost.cost - op_cost;
2635 new_limit.latency = best_cost.latency - op_cost;
2636 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2637 &new_limit, mode);
2638
2639 alg_in->cost.cost += op_cost;
2640 alg_in->cost.latency += op_cost;
2641 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2642 {
2643 struct algorithm *x;
2644 best_cost = alg_in->cost;
2645 x = alg_in, alg_in = best_alg, best_alg = x;
2646 best_alg->log[best_alg->ops] = m;
2647 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2648 }
2649 }
2650
2651 if (cache_hit)
2652 goto done;
2653 }
2654
2655 /* Look for factors of t of the form
2656 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2657 If we find such a factor, we can multiply by t using an algorithm that
2658 multiplies by q, shift the result by m and add/subtract it to itself.
2659
2660 We search for large factors first and loop down, even if large factors
2661 are less probable than small; if we find a large factor we will find a
2662 good sequence quickly, and therefore be able to prune (by decreasing
2663 COST_LIMIT) the search. */
2664
2665 do_alg_addsub_factor:
2666 for (m = floor_log2 (t - 1); m >= 2; m--)
2667 {
2668 unsigned HOST_WIDE_INT d;
2669
2670 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2671 if (t % d == 0 && t > d && m < maxm
2672 && (!cache_hit || cache_alg == alg_add_factor))
2673 {
2674 /* If the target has a cheap shift-and-add instruction use
2675 that in preference to a shift insn followed by an add insn.
2676 Assume that the shift-and-add is "atomic" with a latency
2677 equal to its cost, otherwise assume that on superscalar
2678 hardware the shift may be executed concurrently with the
2679 earlier steps in the algorithm. */
2680 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2681 if (shiftadd_cost (speed, mode, m) < op_cost)
2682 {
2683 op_cost = shiftadd_cost (speed, mode, m);
2684 op_latency = op_cost;
2685 }
2686 else
2687 op_latency = add_cost (speed, mode);
2688
2689 new_limit.cost = best_cost.cost - op_cost;
2690 new_limit.latency = best_cost.latency - op_latency;
2691 synth_mult (alg_in, t / d, &new_limit, mode);
2692
2693 alg_in->cost.cost += op_cost;
2694 alg_in->cost.latency += op_latency;
2695 if (alg_in->cost.latency < op_cost)
2696 alg_in->cost.latency = op_cost;
2697 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2698 {
2699 struct algorithm *x;
2700 best_cost = alg_in->cost;
2701 x = alg_in, alg_in = best_alg, best_alg = x;
2702 best_alg->log[best_alg->ops] = m;
2703 best_alg->op[best_alg->ops] = alg_add_factor;
2704 }
2705 /* Other factors will have been taken care of in the recursion. */
2706 break;
2707 }
2708
2709 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2710 if (t % d == 0 && t > d && m < maxm
2711 && (!cache_hit || cache_alg == alg_sub_factor))
2712 {
2713 /* If the target has a cheap shift-and-subtract insn use
2714 that in preference to a shift insn followed by a sub insn.
2715 Assume that the shift-and-sub is "atomic" with a latency
2716 equal to it's cost, otherwise assume that on superscalar
2717 hardware the shift may be executed concurrently with the
2718 earlier steps in the algorithm. */
2719 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2720 if (shiftsub0_cost (speed, mode, m) < op_cost)
2721 {
2722 op_cost = shiftsub0_cost (speed, mode, m);
2723 op_latency = op_cost;
2724 }
2725 else
2726 op_latency = add_cost (speed, mode);
2727
2728 new_limit.cost = best_cost.cost - op_cost;
2729 new_limit.latency = best_cost.latency - op_latency;
2730 synth_mult (alg_in, t / d, &new_limit, mode);
2731
2732 alg_in->cost.cost += op_cost;
2733 alg_in->cost.latency += op_latency;
2734 if (alg_in->cost.latency < op_cost)
2735 alg_in->cost.latency = op_cost;
2736 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2737 {
2738 struct algorithm *x;
2739 best_cost = alg_in->cost;
2740 x = alg_in, alg_in = best_alg, best_alg = x;
2741 best_alg->log[best_alg->ops] = m;
2742 best_alg->op[best_alg->ops] = alg_sub_factor;
2743 }
2744 break;
2745 }
2746 }
2747 if (cache_hit)
2748 goto done;
2749
2750 /* Try shift-and-add (load effective address) instructions,
2751 i.e. do a*3, a*5, a*9. */
2752 if ((t & 1) != 0)
2753 {
2754 do_alg_add_t2_m:
2755 q = t - 1;
2756 q = q & -q;
2757 m = exact_log2 (q);
2758 if (m >= 0 && m < maxm)
2759 {
2760 op_cost = shiftadd_cost (speed, mode, m);
2761 new_limit.cost = best_cost.cost - op_cost;
2762 new_limit.latency = best_cost.latency - op_cost;
2763 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2764
2765 alg_in->cost.cost += op_cost;
2766 alg_in->cost.latency += op_cost;
2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768 {
2769 struct algorithm *x;
2770 best_cost = alg_in->cost;
2771 x = alg_in, alg_in = best_alg, best_alg = x;
2772 best_alg->log[best_alg->ops] = m;
2773 best_alg->op[best_alg->ops] = alg_add_t2_m;
2774 }
2775 }
2776 if (cache_hit)
2777 goto done;
2778
2779 do_alg_sub_t2_m:
2780 q = t + 1;
2781 q = q & -q;
2782 m = exact_log2 (q);
2783 if (m >= 0 && m < maxm)
2784 {
2785 op_cost = shiftsub0_cost (speed, mode, m);
2786 new_limit.cost = best_cost.cost - op_cost;
2787 new_limit.latency = best_cost.latency - op_cost;
2788 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2789
2790 alg_in->cost.cost += op_cost;
2791 alg_in->cost.latency += op_cost;
2792 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2793 {
2794 struct algorithm *x;
2795 best_cost = alg_in->cost;
2796 x = alg_in, alg_in = best_alg, best_alg = x;
2797 best_alg->log[best_alg->ops] = m;
2798 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2799 }
2800 }
2801 if (cache_hit)
2802 goto done;
2803 }
2804
2805 done:
2806 /* If best_cost has not decreased, we have not found any algorithm. */
2807 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2808 {
2809 /* We failed to find an algorithm. Record alg_impossible for
2810 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2811 we are asked to find an algorithm for T within the same or
2812 lower COST_LIMIT, we can immediately return to the
2813 caller. */
2814 entry_ptr->t = t;
2815 entry_ptr->mode = mode;
2816 entry_ptr->speed = speed;
2817 entry_ptr->alg = alg_impossible;
2818 entry_ptr->cost = *cost_limit;
2819 return;
2820 }
2821
2822 /* Cache the result. */
2823 if (!cache_hit)
2824 {
2825 entry_ptr->t = t;
2826 entry_ptr->mode = mode;
2827 entry_ptr->speed = speed;
2828 entry_ptr->alg = best_alg->op[best_alg->ops];
2829 entry_ptr->cost.cost = best_cost.cost;
2830 entry_ptr->cost.latency = best_cost.latency;
2831 }
2832
2833 /* If we are getting a too long sequence for `struct algorithm'
2834 to record, make this search fail. */
2835 if (best_alg->ops == MAX_BITS_PER_WORD)
2836 return;
2837
2838 /* Copy the algorithm from temporary space to the space at alg_out.
2839 We avoid using structure assignment because the majority of
2840 best_alg is normally undefined, and this is a critical function. */
2841 alg_out->ops = best_alg->ops + 1;
2842 alg_out->cost = best_cost;
2843 memcpy (alg_out->op, best_alg->op,
2844 alg_out->ops * sizeof *alg_out->op);
2845 memcpy (alg_out->log, best_alg->log,
2846 alg_out->ops * sizeof *alg_out->log);
2847 }
2848 \f
2849 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2850 Try three variations:
2851
2852 - a shift/add sequence based on VAL itself
2853 - a shift/add sequence based on -VAL, followed by a negation
2854 - a shift/add sequence based on VAL - 1, followed by an addition.
2855
2856 Return true if the cheapest of these cost less than MULT_COST,
2857 describing the algorithm in *ALG and final fixup in *VARIANT. */
2858
2859 static bool
2860 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2861 struct algorithm *alg, enum mult_variant *variant,
2862 int mult_cost)
2863 {
2864 struct algorithm alg2;
2865 struct mult_cost limit;
2866 int op_cost;
2867 bool speed = optimize_insn_for_speed_p ();
2868
2869 /* Fail quickly for impossible bounds. */
2870 if (mult_cost < 0)
2871 return false;
2872
2873 /* Ensure that mult_cost provides a reasonable upper bound.
2874 Any constant multiplication can be performed with less
2875 than 2 * bits additions. */
2876 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2877 if (mult_cost > op_cost)
2878 mult_cost = op_cost;
2879
2880 *variant = basic_variant;
2881 limit.cost = mult_cost;
2882 limit.latency = mult_cost;
2883 synth_mult (alg, val, &limit, mode);
2884
2885 /* This works only if the inverted value actually fits in an
2886 `unsigned int' */
2887 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2888 {
2889 op_cost = neg_cost (speed, mode);
2890 if (MULT_COST_LESS (&alg->cost, mult_cost))
2891 {
2892 limit.cost = alg->cost.cost - op_cost;
2893 limit.latency = alg->cost.latency - op_cost;
2894 }
2895 else
2896 {
2897 limit.cost = mult_cost - op_cost;
2898 limit.latency = mult_cost - op_cost;
2899 }
2900
2901 synth_mult (&alg2, -val, &limit, mode);
2902 alg2.cost.cost += op_cost;
2903 alg2.cost.latency += op_cost;
2904 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2905 *alg = alg2, *variant = negate_variant;
2906 }
2907
2908 /* This proves very useful for division-by-constant. */
2909 op_cost = add_cost (speed, mode);
2910 if (MULT_COST_LESS (&alg->cost, mult_cost))
2911 {
2912 limit.cost = alg->cost.cost - op_cost;
2913 limit.latency = alg->cost.latency - op_cost;
2914 }
2915 else
2916 {
2917 limit.cost = mult_cost - op_cost;
2918 limit.latency = mult_cost - op_cost;
2919 }
2920
2921 synth_mult (&alg2, val - 1, &limit, mode);
2922 alg2.cost.cost += op_cost;
2923 alg2.cost.latency += op_cost;
2924 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2925 *alg = alg2, *variant = add_variant;
2926
2927 return MULT_COST_LESS (&alg->cost, mult_cost);
2928 }
2929
2930 /* A subroutine of expand_mult, used for constant multiplications.
2931 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2932 convenient. Use the shift/add sequence described by ALG and apply
2933 the final fixup specified by VARIANT. */
2934
2935 static rtx
2936 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2937 rtx target, const struct algorithm *alg,
2938 enum mult_variant variant)
2939 {
2940 HOST_WIDE_INT val_so_far;
2941 rtx_insn *insn;
2942 rtx accum, tem;
2943 int opno;
2944 machine_mode nmode;
2945
2946 /* Avoid referencing memory over and over and invalid sharing
2947 on SUBREGs. */
2948 op0 = force_reg (mode, op0);
2949
2950 /* ACCUM starts out either as OP0 or as a zero, depending on
2951 the first operation. */
2952
2953 if (alg->op[0] == alg_zero)
2954 {
2955 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2956 val_so_far = 0;
2957 }
2958 else if (alg->op[0] == alg_m)
2959 {
2960 accum = copy_to_mode_reg (mode, op0);
2961 val_so_far = 1;
2962 }
2963 else
2964 gcc_unreachable ();
2965
2966 for (opno = 1; opno < alg->ops; opno++)
2967 {
2968 int log = alg->log[opno];
2969 rtx shift_subtarget = optimize ? 0 : accum;
2970 rtx add_target
2971 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2972 && !optimize)
2973 ? target : 0;
2974 rtx accum_target = optimize ? 0 : accum;
2975 rtx accum_inner;
2976
2977 switch (alg->op[opno])
2978 {
2979 case alg_shift:
2980 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2981 /* REG_EQUAL note will be attached to the following insn. */
2982 emit_move_insn (accum, tem);
2983 val_so_far <<= log;
2984 break;
2985
2986 case alg_add_t_m2:
2987 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2988 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2989 add_target ? add_target : accum_target);
2990 val_so_far += (HOST_WIDE_INT) 1 << log;
2991 break;
2992
2993 case alg_sub_t_m2:
2994 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2995 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2996 add_target ? add_target : accum_target);
2997 val_so_far -= (HOST_WIDE_INT) 1 << log;
2998 break;
2999
3000 case alg_add_t2_m:
3001 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3002 log, shift_subtarget, 0);
3003 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3004 add_target ? add_target : accum_target);
3005 val_so_far = (val_so_far << log) + 1;
3006 break;
3007
3008 case alg_sub_t2_m:
3009 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3010 log, shift_subtarget, 0);
3011 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3012 add_target ? add_target : accum_target);
3013 val_so_far = (val_so_far << log) - 1;
3014 break;
3015
3016 case alg_add_factor:
3017 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3018 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3019 add_target ? add_target : accum_target);
3020 val_so_far += val_so_far << log;
3021 break;
3022
3023 case alg_sub_factor:
3024 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3025 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3026 (add_target
3027 ? add_target : (optimize ? 0 : tem)));
3028 val_so_far = (val_so_far << log) - val_so_far;
3029 break;
3030
3031 default:
3032 gcc_unreachable ();
3033 }
3034
3035 if (SCALAR_INT_MODE_P (mode))
3036 {
3037 /* Write a REG_EQUAL note on the last insn so that we can cse
3038 multiplication sequences. Note that if ACCUM is a SUBREG,
3039 we've set the inner register and must properly indicate that. */
3040 tem = op0, nmode = mode;
3041 accum_inner = accum;
3042 if (GET_CODE (accum) == SUBREG)
3043 {
3044 accum_inner = SUBREG_REG (accum);
3045 nmode = GET_MODE (accum_inner);
3046 tem = gen_lowpart (nmode, op0);
3047 }
3048
3049 insn = get_last_insn ();
3050 set_dst_reg_note (insn, REG_EQUAL,
3051 gen_rtx_MULT (nmode, tem,
3052 gen_int_mode (val_so_far, nmode)),
3053 accum_inner);
3054 }
3055 }
3056
3057 if (variant == negate_variant)
3058 {
3059 val_so_far = -val_so_far;
3060 accum = expand_unop (mode, neg_optab, accum, target, 0);
3061 }
3062 else if (variant == add_variant)
3063 {
3064 val_so_far = val_so_far + 1;
3065 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3066 }
3067
3068 /* Compare only the bits of val and val_so_far that are significant
3069 in the result mode, to avoid sign-/zero-extension confusion. */
3070 nmode = GET_MODE_INNER (mode);
3071 if (nmode == VOIDmode)
3072 nmode = mode;
3073 val &= GET_MODE_MASK (nmode);
3074 val_so_far &= GET_MODE_MASK (nmode);
3075 gcc_assert (val == val_so_far);
3076
3077 return accum;
3078 }
3079
3080 /* Perform a multiplication and return an rtx for the result.
3081 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3082 TARGET is a suggestion for where to store the result (an rtx).
3083
3084 We check specially for a constant integer as OP1.
3085 If you want this check for OP0 as well, then before calling
3086 you should swap the two operands if OP0 would be constant. */
3087
3088 rtx
3089 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3090 int unsignedp)
3091 {
3092 enum mult_variant variant;
3093 struct algorithm algorithm;
3094 rtx scalar_op1;
3095 int max_cost;
3096 bool speed = optimize_insn_for_speed_p ();
3097 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3098
3099 if (CONSTANT_P (op0))
3100 std::swap (op0, op1);
3101
3102 /* For vectors, there are several simplifications that can be made if
3103 all elements of the vector constant are identical. */
3104 scalar_op1 = op1;
3105 if (GET_CODE (op1) == CONST_VECTOR)
3106 {
3107 int i, n = CONST_VECTOR_NUNITS (op1);
3108 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3109 for (i = 1; i < n; ++i)
3110 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3111 goto skip_scalar;
3112 }
3113
3114 if (INTEGRAL_MODE_P (mode))
3115 {
3116 rtx fake_reg;
3117 HOST_WIDE_INT coeff;
3118 bool is_neg;
3119 int mode_bitsize;
3120
3121 if (op1 == CONST0_RTX (mode))
3122 return op1;
3123 if (op1 == CONST1_RTX (mode))
3124 return op0;
3125 if (op1 == CONSTM1_RTX (mode))
3126 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3127 op0, target, 0);
3128
3129 if (do_trapv)
3130 goto skip_synth;
3131
3132 /* If mode is integer vector mode, check if the backend supports
3133 vector lshift (by scalar or vector) at all. If not, we can't use
3134 synthetized multiply. */
3135 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3136 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3137 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3138 goto skip_synth;
3139
3140 /* These are the operations that are potentially turned into
3141 a sequence of shifts and additions. */
3142 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3143
3144 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3145 less than or equal in size to `unsigned int' this doesn't matter.
3146 If the mode is larger than `unsigned int', then synth_mult works
3147 only if the constant value exactly fits in an `unsigned int' without
3148 any truncation. This means that multiplying by negative values does
3149 not work; results are off by 2^32 on a 32 bit machine. */
3150 if (CONST_INT_P (scalar_op1))
3151 {
3152 coeff = INTVAL (scalar_op1);
3153 is_neg = coeff < 0;
3154 }
3155 #if TARGET_SUPPORTS_WIDE_INT
3156 else if (CONST_WIDE_INT_P (scalar_op1))
3157 #else
3158 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3159 #endif
3160 {
3161 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3162 /* Perfect power of 2 (other than 1, which is handled above). */
3163 if (shift > 0)
3164 return expand_shift (LSHIFT_EXPR, mode, op0,
3165 shift, target, unsignedp);
3166 else
3167 goto skip_synth;
3168 }
3169 else
3170 goto skip_synth;
3171
3172 /* We used to test optimize here, on the grounds that it's better to
3173 produce a smaller program when -O is not used. But this causes
3174 such a terrible slowdown sometimes that it seems better to always
3175 use synth_mult. */
3176
3177 /* Special case powers of two. */
3178 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3179 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3180 return expand_shift (LSHIFT_EXPR, mode, op0,
3181 floor_log2 (coeff), target, unsignedp);
3182
3183 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3184
3185 /* Attempt to handle multiplication of DImode values by negative
3186 coefficients, by performing the multiplication by a positive
3187 multiplier and then inverting the result. */
3188 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3189 {
3190 /* Its safe to use -coeff even for INT_MIN, as the
3191 result is interpreted as an unsigned coefficient.
3192 Exclude cost of op0 from max_cost to match the cost
3193 calculation of the synth_mult. */
3194 coeff = -(unsigned HOST_WIDE_INT) coeff;
3195 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3196 - neg_cost (speed, mode));
3197 if (max_cost <= 0)
3198 goto skip_synth;
3199
3200 /* Special case powers of two. */
3201 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3202 {
3203 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3204 floor_log2 (coeff), target, unsignedp);
3205 return expand_unop (mode, neg_optab, temp, target, 0);
3206 }
3207
3208 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3209 max_cost))
3210 {
3211 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3212 &algorithm, variant);
3213 return expand_unop (mode, neg_optab, temp, target, 0);
3214 }
3215 goto skip_synth;
3216 }
3217
3218 /* Exclude cost of op0 from max_cost to match the cost
3219 calculation of the synth_mult. */
3220 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3221 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3222 return expand_mult_const (mode, op0, coeff, target,
3223 &algorithm, variant);
3224 }
3225 skip_synth:
3226
3227 /* Expand x*2.0 as x+x. */
3228 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3229 {
3230 REAL_VALUE_TYPE d;
3231 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3232
3233 if (REAL_VALUES_EQUAL (d, dconst2))
3234 {
3235 op0 = force_reg (GET_MODE (op0), op0);
3236 return expand_binop (mode, add_optab, op0, op0,
3237 target, unsignedp, OPTAB_LIB_WIDEN);
3238 }
3239 }
3240 skip_scalar:
3241
3242 /* This used to use umul_optab if unsigned, but for non-widening multiply
3243 there is no difference between signed and unsigned. */
3244 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3245 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3246 gcc_assert (op0);
3247 return op0;
3248 }
3249
3250 /* Return a cost estimate for multiplying a register by the given
3251 COEFFicient in the given MODE and SPEED. */
3252
3253 int
3254 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3255 {
3256 int max_cost;
3257 struct algorithm algorithm;
3258 enum mult_variant variant;
3259
3260 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3261 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3262 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3263 return algorithm.cost.cost;
3264 else
3265 return max_cost;
3266 }
3267
3268 /* Perform a widening multiplication and return an rtx for the result.
3269 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3270 TARGET is a suggestion for where to store the result (an rtx).
3271 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3272 or smul_widen_optab.
3273
3274 We check specially for a constant integer as OP1, comparing the
3275 cost of a widening multiply against the cost of a sequence of shifts
3276 and adds. */
3277
3278 rtx
3279 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3280 int unsignedp, optab this_optab)
3281 {
3282 bool speed = optimize_insn_for_speed_p ();
3283 rtx cop1;
3284
3285 if (CONST_INT_P (op1)
3286 && GET_MODE (op0) != VOIDmode
3287 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3288 this_optab == umul_widen_optab))
3289 && CONST_INT_P (cop1)
3290 && (INTVAL (cop1) >= 0
3291 || HWI_COMPUTABLE_MODE_P (mode)))
3292 {
3293 HOST_WIDE_INT coeff = INTVAL (cop1);
3294 int max_cost;
3295 enum mult_variant variant;
3296 struct algorithm algorithm;
3297
3298 if (coeff == 0)
3299 return CONST0_RTX (mode);
3300
3301 /* Special case powers of two. */
3302 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3303 {
3304 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3305 return expand_shift (LSHIFT_EXPR, mode, op0,
3306 floor_log2 (coeff), target, unsignedp);
3307 }
3308
3309 /* Exclude cost of op0 from max_cost to match the cost
3310 calculation of the synth_mult. */
3311 max_cost = mul_widen_cost (speed, mode);
3312 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3313 max_cost))
3314 {
3315 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3316 return expand_mult_const (mode, op0, coeff, target,
3317 &algorithm, variant);
3318 }
3319 }
3320 return expand_binop (mode, this_optab, op0, op1, target,
3321 unsignedp, OPTAB_LIB_WIDEN);
3322 }
3323 \f
3324 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3325 replace division by D, and put the least significant N bits of the result
3326 in *MULTIPLIER_PTR and return the most significant bit.
3327
3328 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3329 needed precision is in PRECISION (should be <= N).
3330
3331 PRECISION should be as small as possible so this function can choose
3332 multiplier more freely.
3333
3334 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3335 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3336
3337 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3338 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3339
3340 unsigned HOST_WIDE_INT
3341 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3342 unsigned HOST_WIDE_INT *multiplier_ptr,
3343 int *post_shift_ptr, int *lgup_ptr)
3344 {
3345 int lgup, post_shift;
3346 int pow, pow2;
3347
3348 /* lgup = ceil(log2(divisor)); */
3349 lgup = ceil_log2 (d);
3350
3351 gcc_assert (lgup <= n);
3352
3353 pow = n + lgup;
3354 pow2 = n + lgup - precision;
3355
3356 /* mlow = 2^(N + lgup)/d */
3357 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3358 wide_int mlow = wi::udiv_trunc (val, d);
3359
3360 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3361 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3362 wide_int mhigh = wi::udiv_trunc (val, d);
3363
3364 /* If precision == N, then mlow, mhigh exceed 2^N
3365 (but they do not exceed 2^(N+1)). */
3366
3367 /* Reduce to lowest terms. */
3368 for (post_shift = lgup; post_shift > 0; post_shift--)
3369 {
3370 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3371 HOST_BITS_PER_WIDE_INT);
3372 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3373 HOST_BITS_PER_WIDE_INT);
3374 if (ml_lo >= mh_lo)
3375 break;
3376
3377 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3378 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3379 }
3380
3381 *post_shift_ptr = post_shift;
3382 *lgup_ptr = lgup;
3383 if (n < HOST_BITS_PER_WIDE_INT)
3384 {
3385 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3386 *multiplier_ptr = mhigh.to_uhwi () & mask;
3387 return mhigh.to_uhwi () >= mask;
3388 }
3389 else
3390 {
3391 *multiplier_ptr = mhigh.to_uhwi ();
3392 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3393 }
3394 }
3395
3396 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3397 congruent to 1 (mod 2**N). */
3398
3399 static unsigned HOST_WIDE_INT
3400 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3401 {
3402 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3403
3404 /* The algorithm notes that the choice y = x satisfies
3405 x*y == 1 mod 2^3, since x is assumed odd.
3406 Each iteration doubles the number of bits of significance in y. */
3407
3408 unsigned HOST_WIDE_INT mask;
3409 unsigned HOST_WIDE_INT y = x;
3410 int nbit = 3;
3411
3412 mask = (n == HOST_BITS_PER_WIDE_INT
3413 ? ~(unsigned HOST_WIDE_INT) 0
3414 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3415
3416 while (nbit < n)
3417 {
3418 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3419 nbit *= 2;
3420 }
3421 return y;
3422 }
3423
3424 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3425 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3426 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3427 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3428 become signed.
3429
3430 The result is put in TARGET if that is convenient.
3431
3432 MODE is the mode of operation. */
3433
3434 rtx
3435 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3436 rtx op1, rtx target, int unsignedp)
3437 {
3438 rtx tem;
3439 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3440
3441 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3442 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3443 tem = expand_and (mode, tem, op1, NULL_RTX);
3444 adj_operand
3445 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3446 adj_operand);
3447
3448 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3449 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3450 tem = expand_and (mode, tem, op0, NULL_RTX);
3451 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3452 target);
3453
3454 return target;
3455 }
3456
3457 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3458
3459 static rtx
3460 extract_high_half (machine_mode mode, rtx op)
3461 {
3462 machine_mode wider_mode;
3463
3464 if (mode == word_mode)
3465 return gen_highpart (mode, op);
3466
3467 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3468
3469 wider_mode = GET_MODE_WIDER_MODE (mode);
3470 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3471 GET_MODE_BITSIZE (mode), 0, 1);
3472 return convert_modes (mode, wider_mode, op, 0);
3473 }
3474
3475 /* Like expmed_mult_highpart, but only consider using a multiplication
3476 optab. OP1 is an rtx for the constant operand. */
3477
3478 static rtx
3479 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3480 rtx target, int unsignedp, int max_cost)
3481 {
3482 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3483 machine_mode wider_mode;
3484 optab moptab;
3485 rtx tem;
3486 int size;
3487 bool speed = optimize_insn_for_speed_p ();
3488
3489 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3490
3491 wider_mode = GET_MODE_WIDER_MODE (mode);
3492 size = GET_MODE_BITSIZE (mode);
3493
3494 /* Firstly, try using a multiplication insn that only generates the needed
3495 high part of the product, and in the sign flavor of unsignedp. */
3496 if (mul_highpart_cost (speed, mode) < max_cost)
3497 {
3498 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3499 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3500 unsignedp, OPTAB_DIRECT);
3501 if (tem)
3502 return tem;
3503 }
3504
3505 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3506 Need to adjust the result after the multiplication. */
3507 if (size - 1 < BITS_PER_WORD
3508 && (mul_highpart_cost (speed, mode)
3509 + 2 * shift_cost (speed, mode, size-1)
3510 + 4 * add_cost (speed, mode) < max_cost))
3511 {
3512 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3513 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3514 unsignedp, OPTAB_DIRECT);
3515 if (tem)
3516 /* We used the wrong signedness. Adjust the result. */
3517 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3518 tem, unsignedp);
3519 }
3520
3521 /* Try widening multiplication. */
3522 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3523 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3524 && mul_widen_cost (speed, wider_mode) < max_cost)
3525 {
3526 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3527 unsignedp, OPTAB_WIDEN);
3528 if (tem)
3529 return extract_high_half (mode, tem);
3530 }
3531
3532 /* Try widening the mode and perform a non-widening multiplication. */
3533 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3534 && size - 1 < BITS_PER_WORD
3535 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3536 < max_cost))
3537 {
3538 rtx_insn *insns;
3539 rtx wop0, wop1;
3540
3541 /* We need to widen the operands, for example to ensure the
3542 constant multiplier is correctly sign or zero extended.
3543 Use a sequence to clean-up any instructions emitted by
3544 the conversions if things don't work out. */
3545 start_sequence ();
3546 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3547 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3548 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3549 unsignedp, OPTAB_WIDEN);
3550 insns = get_insns ();
3551 end_sequence ();
3552
3553 if (tem)
3554 {
3555 emit_insn (insns);
3556 return extract_high_half (mode, tem);
3557 }
3558 }
3559
3560 /* Try widening multiplication of opposite signedness, and adjust. */
3561 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3562 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3563 && size - 1 < BITS_PER_WORD
3564 && (mul_widen_cost (speed, wider_mode)
3565 + 2 * shift_cost (speed, mode, size-1)
3566 + 4 * add_cost (speed, mode) < max_cost))
3567 {
3568 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3569 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3570 if (tem != 0)
3571 {
3572 tem = extract_high_half (mode, tem);
3573 /* We used the wrong signedness. Adjust the result. */
3574 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3575 target, unsignedp);
3576 }
3577 }
3578
3579 return 0;
3580 }
3581
3582 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3583 putting the high half of the result in TARGET if that is convenient,
3584 and return where the result is. If the operation can not be performed,
3585 0 is returned.
3586
3587 MODE is the mode of operation and result.
3588
3589 UNSIGNEDP nonzero means unsigned multiply.
3590
3591 MAX_COST is the total allowed cost for the expanded RTL. */
3592
3593 static rtx
3594 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3595 rtx target, int unsignedp, int max_cost)
3596 {
3597 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3598 unsigned HOST_WIDE_INT cnst1;
3599 int extra_cost;
3600 bool sign_adjust = false;
3601 enum mult_variant variant;
3602 struct algorithm alg;
3603 rtx tem;
3604 bool speed = optimize_insn_for_speed_p ();
3605
3606 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3607 /* We can't support modes wider than HOST_BITS_PER_INT. */
3608 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3609
3610 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3611
3612 /* We can't optimize modes wider than BITS_PER_WORD.
3613 ??? We might be able to perform double-word arithmetic if
3614 mode == word_mode, however all the cost calculations in
3615 synth_mult etc. assume single-word operations. */
3616 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3617 return expmed_mult_highpart_optab (mode, op0, op1, target,
3618 unsignedp, max_cost);
3619
3620 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3621
3622 /* Check whether we try to multiply by a negative constant. */
3623 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3624 {
3625 sign_adjust = true;
3626 extra_cost += add_cost (speed, mode);
3627 }
3628
3629 /* See whether shift/add multiplication is cheap enough. */
3630 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3631 max_cost - extra_cost))
3632 {
3633 /* See whether the specialized multiplication optabs are
3634 cheaper than the shift/add version. */
3635 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3636 alg.cost.cost + extra_cost);
3637 if (tem)
3638 return tem;
3639
3640 tem = convert_to_mode (wider_mode, op0, unsignedp);
3641 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3642 tem = extract_high_half (mode, tem);
3643
3644 /* Adjust result for signedness. */
3645 if (sign_adjust)
3646 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3647
3648 return tem;
3649 }
3650 return expmed_mult_highpart_optab (mode, op0, op1, target,
3651 unsignedp, max_cost);
3652 }
3653
3654
3655 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3656
3657 static rtx
3658 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3659 {
3660 rtx result, temp, shift;
3661 rtx_code_label *label;
3662 int logd;
3663 int prec = GET_MODE_PRECISION (mode);
3664
3665 logd = floor_log2 (d);
3666 result = gen_reg_rtx (mode);
3667
3668 /* Avoid conditional branches when they're expensive. */
3669 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3670 && optimize_insn_for_speed_p ())
3671 {
3672 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3673 mode, 0, -1);
3674 if (signmask)
3675 {
3676 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3677 signmask = force_reg (mode, signmask);
3678 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3679
3680 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3681 which instruction sequence to use. If logical right shifts
3682 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3683 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3684
3685 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3686 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3687 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3688 > COSTS_N_INSNS (2)))
3689 {
3690 temp = expand_binop (mode, xor_optab, op0, signmask,
3691 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3692 temp = expand_binop (mode, sub_optab, temp, signmask,
3693 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3694 temp = expand_binop (mode, and_optab, temp,
3695 gen_int_mode (masklow, mode),
3696 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3697 temp = expand_binop (mode, xor_optab, temp, signmask,
3698 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3699 temp = expand_binop (mode, sub_optab, temp, signmask,
3700 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3701 }
3702 else
3703 {
3704 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3705 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3706 signmask = force_reg (mode, signmask);
3707
3708 temp = expand_binop (mode, add_optab, op0, signmask,
3709 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3710 temp = expand_binop (mode, and_optab, temp,
3711 gen_int_mode (masklow, mode),
3712 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3713 temp = expand_binop (mode, sub_optab, temp, signmask,
3714 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3715 }
3716 return temp;
3717 }
3718 }
3719
3720 /* Mask contains the mode's signbit and the significant bits of the
3721 modulus. By including the signbit in the operation, many targets
3722 can avoid an explicit compare operation in the following comparison
3723 against zero. */
3724 wide_int mask = wi::mask (logd, false, prec);
3725 mask = wi::set_bit (mask, prec - 1);
3726
3727 temp = expand_binop (mode, and_optab, op0,
3728 immed_wide_int_const (mask, mode),
3729 result, 1, OPTAB_LIB_WIDEN);
3730 if (temp != result)
3731 emit_move_insn (result, temp);
3732
3733 label = gen_label_rtx ();
3734 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3735
3736 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3737 0, OPTAB_LIB_WIDEN);
3738
3739 mask = wi::mask (logd, true, prec);
3740 temp = expand_binop (mode, ior_optab, temp,
3741 immed_wide_int_const (mask, mode),
3742 result, 1, OPTAB_LIB_WIDEN);
3743 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3744 0, OPTAB_LIB_WIDEN);
3745 if (temp != result)
3746 emit_move_insn (result, temp);
3747 emit_label (label);
3748 return result;
3749 }
3750
3751 /* Expand signed division of OP0 by a power of two D in mode MODE.
3752 This routine is only called for positive values of D. */
3753
3754 static rtx
3755 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3756 {
3757 rtx temp;
3758 rtx_code_label *label;
3759 int logd;
3760
3761 logd = floor_log2 (d);
3762
3763 if (d == 2
3764 && BRANCH_COST (optimize_insn_for_speed_p (),
3765 false) >= 1)
3766 {
3767 temp = gen_reg_rtx (mode);
3768 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3769 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3770 0, OPTAB_LIB_WIDEN);
3771 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3772 }
3773
3774 #ifdef HAVE_conditional_move
3775 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3776 >= 2)
3777 {
3778 rtx temp2;
3779
3780 start_sequence ();
3781 temp2 = copy_to_mode_reg (mode, op0);
3782 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3783 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3784 temp = force_reg (mode, temp);
3785
3786 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3787 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3788 mode, temp, temp2, mode, 0);
3789 if (temp2)
3790 {
3791 rtx_insn *seq = get_insns ();
3792 end_sequence ();
3793 emit_insn (seq);
3794 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3795 }
3796 end_sequence ();
3797 }
3798 #endif
3799
3800 if (BRANCH_COST (optimize_insn_for_speed_p (),
3801 false) >= 2)
3802 {
3803 int ushift = GET_MODE_BITSIZE (mode) - logd;
3804
3805 temp = gen_reg_rtx (mode);
3806 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3807 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3808 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3809 > COSTS_N_INSNS (1))
3810 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3811 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3812 else
3813 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3814 ushift, NULL_RTX, 1);
3815 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3816 0, OPTAB_LIB_WIDEN);
3817 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3818 }
3819
3820 label = gen_label_rtx ();
3821 temp = copy_to_mode_reg (mode, op0);
3822 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3823 expand_inc (temp, gen_int_mode (d - 1, mode));
3824 emit_label (label);
3825 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3826 }
3827 \f
3828 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3829 if that is convenient, and returning where the result is.
3830 You may request either the quotient or the remainder as the result;
3831 specify REM_FLAG nonzero to get the remainder.
3832
3833 CODE is the expression code for which kind of division this is;
3834 it controls how rounding is done. MODE is the machine mode to use.
3835 UNSIGNEDP nonzero means do unsigned division. */
3836
3837 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3838 and then correct it by or'ing in missing high bits
3839 if result of ANDI is nonzero.
3840 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3841 This could optimize to a bfexts instruction.
3842 But C doesn't use these operations, so their optimizations are
3843 left for later. */
3844 /* ??? For modulo, we don't actually need the highpart of the first product,
3845 the low part will do nicely. And for small divisors, the second multiply
3846 can also be a low-part only multiply or even be completely left out.
3847 E.g. to calculate the remainder of a division by 3 with a 32 bit
3848 multiply, multiply with 0x55555556 and extract the upper two bits;
3849 the result is exact for inputs up to 0x1fffffff.
3850 The input range can be reduced by using cross-sum rules.
3851 For odd divisors >= 3, the following table gives right shift counts
3852 so that if a number is shifted by an integer multiple of the given
3853 amount, the remainder stays the same:
3854 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3855 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3856 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3857 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3858 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3859
3860 Cross-sum rules for even numbers can be derived by leaving as many bits
3861 to the right alone as the divisor has zeros to the right.
3862 E.g. if x is an unsigned 32 bit number:
3863 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3864 */
3865
3866 rtx
3867 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3868 rtx op0, rtx op1, rtx target, int unsignedp)
3869 {
3870 machine_mode compute_mode;
3871 rtx tquotient;
3872 rtx quotient = 0, remainder = 0;
3873 rtx_insn *last;
3874 int size;
3875 rtx_insn *insn;
3876 optab optab1, optab2;
3877 int op1_is_constant, op1_is_pow2 = 0;
3878 int max_cost, extra_cost;
3879 static HOST_WIDE_INT last_div_const = 0;
3880 bool speed = optimize_insn_for_speed_p ();
3881
3882 op1_is_constant = CONST_INT_P (op1);
3883 if (op1_is_constant)
3884 {
3885 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3886 if (unsignedp)
3887 ext_op1 &= GET_MODE_MASK (mode);
3888 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3889 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3890 }
3891
3892 /*
3893 This is the structure of expand_divmod:
3894
3895 First comes code to fix up the operands so we can perform the operations
3896 correctly and efficiently.
3897
3898 Second comes a switch statement with code specific for each rounding mode.
3899 For some special operands this code emits all RTL for the desired
3900 operation, for other cases, it generates only a quotient and stores it in
3901 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3902 to indicate that it has not done anything.
3903
3904 Last comes code that finishes the operation. If QUOTIENT is set and
3905 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3906 QUOTIENT is not set, it is computed using trunc rounding.
3907
3908 We try to generate special code for division and remainder when OP1 is a
3909 constant. If |OP1| = 2**n we can use shifts and some other fast
3910 operations. For other values of OP1, we compute a carefully selected
3911 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3912 by m.
3913
3914 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3915 half of the product. Different strategies for generating the product are
3916 implemented in expmed_mult_highpart.
3917
3918 If what we actually want is the remainder, we generate that by another
3919 by-constant multiplication and a subtraction. */
3920
3921 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3922 code below will malfunction if we are, so check here and handle
3923 the special case if so. */
3924 if (op1 == const1_rtx)
3925 return rem_flag ? const0_rtx : op0;
3926
3927 /* When dividing by -1, we could get an overflow.
3928 negv_optab can handle overflows. */
3929 if (! unsignedp && op1 == constm1_rtx)
3930 {
3931 if (rem_flag)
3932 return const0_rtx;
3933 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3934 ? negv_optab : neg_optab, op0, target, 0);
3935 }
3936
3937 if (target
3938 /* Don't use the function value register as a target
3939 since we have to read it as well as write it,
3940 and function-inlining gets confused by this. */
3941 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3942 /* Don't clobber an operand while doing a multi-step calculation. */
3943 || ((rem_flag || op1_is_constant)
3944 && (reg_mentioned_p (target, op0)
3945 || (MEM_P (op0) && MEM_P (target))))
3946 || reg_mentioned_p (target, op1)
3947 || (MEM_P (op1) && MEM_P (target))))
3948 target = 0;
3949
3950 /* Get the mode in which to perform this computation. Normally it will
3951 be MODE, but sometimes we can't do the desired operation in MODE.
3952 If so, pick a wider mode in which we can do the operation. Convert
3953 to that mode at the start to avoid repeated conversions.
3954
3955 First see what operations we need. These depend on the expression
3956 we are evaluating. (We assume that divxx3 insns exist under the
3957 same conditions that modxx3 insns and that these insns don't normally
3958 fail. If these assumptions are not correct, we may generate less
3959 efficient code in some cases.)
3960
3961 Then see if we find a mode in which we can open-code that operation
3962 (either a division, modulus, or shift). Finally, check for the smallest
3963 mode for which we can do the operation with a library call. */
3964
3965 /* We might want to refine this now that we have division-by-constant
3966 optimization. Since expmed_mult_highpart tries so many variants, it is
3967 not straightforward to generalize this. Maybe we should make an array
3968 of possible modes in init_expmed? Save this for GCC 2.7. */
3969
3970 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3971 ? (unsignedp ? lshr_optab : ashr_optab)
3972 : (unsignedp ? udiv_optab : sdiv_optab));
3973 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3974 ? optab1
3975 : (unsignedp ? udivmod_optab : sdivmod_optab));
3976
3977 for (compute_mode = mode; compute_mode != VOIDmode;
3978 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3979 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3980 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3981 break;
3982
3983 if (compute_mode == VOIDmode)
3984 for (compute_mode = mode; compute_mode != VOIDmode;
3985 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3986 if (optab_libfunc (optab1, compute_mode)
3987 || optab_libfunc (optab2, compute_mode))
3988 break;
3989
3990 /* If we still couldn't find a mode, use MODE, but expand_binop will
3991 probably die. */
3992 if (compute_mode == VOIDmode)
3993 compute_mode = mode;
3994
3995 if (target && GET_MODE (target) == compute_mode)
3996 tquotient = target;
3997 else
3998 tquotient = gen_reg_rtx (compute_mode);
3999
4000 size = GET_MODE_BITSIZE (compute_mode);
4001 #if 0
4002 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4003 (mode), and thereby get better code when OP1 is a constant. Do that
4004 later. It will require going over all usages of SIZE below. */
4005 size = GET_MODE_BITSIZE (mode);
4006 #endif
4007
4008 /* Only deduct something for a REM if the last divide done was
4009 for a different constant. Then set the constant of the last
4010 divide. */
4011 max_cost = (unsignedp
4012 ? udiv_cost (speed, compute_mode)
4013 : sdiv_cost (speed, compute_mode));
4014 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4015 && INTVAL (op1) == last_div_const))
4016 max_cost -= (mul_cost (speed, compute_mode)
4017 + add_cost (speed, compute_mode));
4018
4019 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4020
4021 /* Now convert to the best mode to use. */
4022 if (compute_mode != mode)
4023 {
4024 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4025 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4026
4027 /* convert_modes may have placed op1 into a register, so we
4028 must recompute the following. */
4029 op1_is_constant = CONST_INT_P (op1);
4030 op1_is_pow2 = (op1_is_constant
4031 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4032 || (! unsignedp
4033 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4034 }
4035
4036 /* If one of the operands is a volatile MEM, copy it into a register. */
4037
4038 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4039 op0 = force_reg (compute_mode, op0);
4040 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4041 op1 = force_reg (compute_mode, op1);
4042
4043 /* If we need the remainder or if OP1 is constant, we need to
4044 put OP0 in a register in case it has any queued subexpressions. */
4045 if (rem_flag || op1_is_constant)
4046 op0 = force_reg (compute_mode, op0);
4047
4048 last = get_last_insn ();
4049
4050 /* Promote floor rounding to trunc rounding for unsigned operations. */
4051 if (unsignedp)
4052 {
4053 if (code == FLOOR_DIV_EXPR)
4054 code = TRUNC_DIV_EXPR;
4055 if (code == FLOOR_MOD_EXPR)
4056 code = TRUNC_MOD_EXPR;
4057 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4058 code = TRUNC_DIV_EXPR;
4059 }
4060
4061 if (op1 != const0_rtx)
4062 switch (code)
4063 {
4064 case TRUNC_MOD_EXPR:
4065 case TRUNC_DIV_EXPR:
4066 if (op1_is_constant)
4067 {
4068 if (unsignedp)
4069 {
4070 unsigned HOST_WIDE_INT mh, ml;
4071 int pre_shift, post_shift;
4072 int dummy;
4073 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4074 & GET_MODE_MASK (compute_mode));
4075
4076 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4077 {
4078 pre_shift = floor_log2 (d);
4079 if (rem_flag)
4080 {
4081 unsigned HOST_WIDE_INT mask
4082 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4083 remainder
4084 = expand_binop (compute_mode, and_optab, op0,
4085 gen_int_mode (mask, compute_mode),
4086 remainder, 1,
4087 OPTAB_LIB_WIDEN);
4088 if (remainder)
4089 return gen_lowpart (mode, remainder);
4090 }
4091 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4092 pre_shift, tquotient, 1);
4093 }
4094 else if (size <= HOST_BITS_PER_WIDE_INT)
4095 {
4096 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4097 {
4098 /* Most significant bit of divisor is set; emit an scc
4099 insn. */
4100 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4101 compute_mode, 1, 1);
4102 }
4103 else
4104 {
4105 /* Find a suitable multiplier and right shift count
4106 instead of multiplying with D. */
4107
4108 mh = choose_multiplier (d, size, size,
4109 &ml, &post_shift, &dummy);
4110
4111 /* If the suggested multiplier is more than SIZE bits,
4112 we can do better for even divisors, using an
4113 initial right shift. */
4114 if (mh != 0 && (d & 1) == 0)
4115 {
4116 pre_shift = floor_log2 (d & -d);
4117 mh = choose_multiplier (d >> pre_shift, size,
4118 size - pre_shift,
4119 &ml, &post_shift, &dummy);
4120 gcc_assert (!mh);
4121 }
4122 else
4123 pre_shift = 0;
4124
4125 if (mh != 0)
4126 {
4127 rtx t1, t2, t3, t4;
4128
4129 if (post_shift - 1 >= BITS_PER_WORD)
4130 goto fail1;
4131
4132 extra_cost
4133 = (shift_cost (speed, compute_mode, post_shift - 1)
4134 + shift_cost (speed, compute_mode, 1)
4135 + 2 * add_cost (speed, compute_mode));
4136 t1 = expmed_mult_highpart
4137 (compute_mode, op0,
4138 gen_int_mode (ml, compute_mode),
4139 NULL_RTX, 1, max_cost - extra_cost);
4140 if (t1 == 0)
4141 goto fail1;
4142 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4143 op0, t1),
4144 NULL_RTX);
4145 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4146 t2, 1, NULL_RTX, 1);
4147 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4148 t1, t3),
4149 NULL_RTX);
4150 quotient = expand_shift
4151 (RSHIFT_EXPR, compute_mode, t4,
4152 post_shift - 1, tquotient, 1);
4153 }
4154 else
4155 {
4156 rtx t1, t2;
4157
4158 if (pre_shift >= BITS_PER_WORD
4159 || post_shift >= BITS_PER_WORD)
4160 goto fail1;
4161
4162 t1 = expand_shift
4163 (RSHIFT_EXPR, compute_mode, op0,
4164 pre_shift, NULL_RTX, 1);
4165 extra_cost
4166 = (shift_cost (speed, compute_mode, pre_shift)
4167 + shift_cost (speed, compute_mode, post_shift));
4168 t2 = expmed_mult_highpart
4169 (compute_mode, t1,
4170 gen_int_mode (ml, compute_mode),
4171 NULL_RTX, 1, max_cost - extra_cost);
4172 if (t2 == 0)
4173 goto fail1;
4174 quotient = expand_shift
4175 (RSHIFT_EXPR, compute_mode, t2,
4176 post_shift, tquotient, 1);
4177 }
4178 }
4179 }
4180 else /* Too wide mode to use tricky code */
4181 break;
4182
4183 insn = get_last_insn ();
4184 if (insn != last)
4185 set_dst_reg_note (insn, REG_EQUAL,
4186 gen_rtx_UDIV (compute_mode, op0, op1),
4187 quotient);
4188 }
4189 else /* TRUNC_DIV, signed */
4190 {
4191 unsigned HOST_WIDE_INT ml;
4192 int lgup, post_shift;
4193 rtx mlr;
4194 HOST_WIDE_INT d = INTVAL (op1);
4195 unsigned HOST_WIDE_INT abs_d;
4196
4197 /* Since d might be INT_MIN, we have to cast to
4198 unsigned HOST_WIDE_INT before negating to avoid
4199 undefined signed overflow. */
4200 abs_d = (d >= 0
4201 ? (unsigned HOST_WIDE_INT) d
4202 : - (unsigned HOST_WIDE_INT) d);
4203
4204 /* n rem d = n rem -d */
4205 if (rem_flag && d < 0)
4206 {
4207 d = abs_d;
4208 op1 = gen_int_mode (abs_d, compute_mode);
4209 }
4210
4211 if (d == 1)
4212 quotient = op0;
4213 else if (d == -1)
4214 quotient = expand_unop (compute_mode, neg_optab, op0,
4215 tquotient, 0);
4216 else if (HOST_BITS_PER_WIDE_INT >= size
4217 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4218 {
4219 /* This case is not handled correctly below. */
4220 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4221 compute_mode, 1, 1);
4222 if (quotient == 0)
4223 goto fail1;
4224 }
4225 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4226 && (rem_flag
4227 ? smod_pow2_cheap (speed, compute_mode)
4228 : sdiv_pow2_cheap (speed, compute_mode))
4229 /* We assume that cheap metric is true if the
4230 optab has an expander for this mode. */
4231 && ((optab_handler ((rem_flag ? smod_optab
4232 : sdiv_optab),
4233 compute_mode)
4234 != CODE_FOR_nothing)
4235 || (optab_handler (sdivmod_optab,
4236 compute_mode)
4237 != CODE_FOR_nothing)))
4238 ;
4239 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4240 {
4241 if (rem_flag)
4242 {
4243 remainder = expand_smod_pow2 (compute_mode, op0, d);
4244 if (remainder)
4245 return gen_lowpart (mode, remainder);
4246 }
4247
4248 if (sdiv_pow2_cheap (speed, compute_mode)
4249 && ((optab_handler (sdiv_optab, compute_mode)
4250 != CODE_FOR_nothing)
4251 || (optab_handler (sdivmod_optab, compute_mode)
4252 != CODE_FOR_nothing)))
4253 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4254 compute_mode, op0,
4255 gen_int_mode (abs_d,
4256 compute_mode),
4257 NULL_RTX, 0);
4258 else
4259 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4260
4261 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4262 negate the quotient. */
4263 if (d < 0)
4264 {
4265 insn = get_last_insn ();
4266 if (insn != last
4267 && abs_d < ((unsigned HOST_WIDE_INT) 1
4268 << (HOST_BITS_PER_WIDE_INT - 1)))
4269 set_dst_reg_note (insn, REG_EQUAL,
4270 gen_rtx_DIV (compute_mode, op0,
4271 gen_int_mode
4272 (abs_d,
4273 compute_mode)),
4274 quotient);
4275
4276 quotient = expand_unop (compute_mode, neg_optab,
4277 quotient, quotient, 0);
4278 }
4279 }
4280 else if (size <= HOST_BITS_PER_WIDE_INT)
4281 {
4282 choose_multiplier (abs_d, size, size - 1,
4283 &ml, &post_shift, &lgup);
4284 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4285 {
4286 rtx t1, t2, t3;
4287
4288 if (post_shift >= BITS_PER_WORD
4289 || size - 1 >= BITS_PER_WORD)
4290 goto fail1;
4291
4292 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4293 + shift_cost (speed, compute_mode, size - 1)
4294 + add_cost (speed, compute_mode));
4295 t1 = expmed_mult_highpart
4296 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4297 NULL_RTX, 0, max_cost - extra_cost);
4298 if (t1 == 0)
4299 goto fail1;
4300 t2 = expand_shift
4301 (RSHIFT_EXPR, compute_mode, t1,
4302 post_shift, NULL_RTX, 0);
4303 t3 = expand_shift
4304 (RSHIFT_EXPR, compute_mode, op0,
4305 size - 1, NULL_RTX, 0);
4306 if (d < 0)
4307 quotient
4308 = force_operand (gen_rtx_MINUS (compute_mode,
4309 t3, t2),
4310 tquotient);
4311 else
4312 quotient
4313 = force_operand (gen_rtx_MINUS (compute_mode,
4314 t2, t3),
4315 tquotient);
4316 }
4317 else
4318 {
4319 rtx t1, t2, t3, t4;
4320
4321 if (post_shift >= BITS_PER_WORD
4322 || size - 1 >= BITS_PER_WORD)
4323 goto fail1;
4324
4325 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4326 mlr = gen_int_mode (ml, compute_mode);
4327 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4328 + shift_cost (speed, compute_mode, size - 1)
4329 + 2 * add_cost (speed, compute_mode));
4330 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4331 NULL_RTX, 0,
4332 max_cost - extra_cost);
4333 if (t1 == 0)
4334 goto fail1;
4335 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4336 t1, op0),
4337 NULL_RTX);
4338 t3 = expand_shift
4339 (RSHIFT_EXPR, compute_mode, t2,
4340 post_shift, NULL_RTX, 0);
4341 t4 = expand_shift
4342 (RSHIFT_EXPR, compute_mode, op0,
4343 size - 1, NULL_RTX, 0);
4344 if (d < 0)
4345 quotient
4346 = force_operand (gen_rtx_MINUS (compute_mode,
4347 t4, t3),
4348 tquotient);
4349 else
4350 quotient
4351 = force_operand (gen_rtx_MINUS (compute_mode,
4352 t3, t4),
4353 tquotient);
4354 }
4355 }
4356 else /* Too wide mode to use tricky code */
4357 break;
4358
4359 insn = get_last_insn ();
4360 if (insn != last)
4361 set_dst_reg_note (insn, REG_EQUAL,
4362 gen_rtx_DIV (compute_mode, op0, op1),
4363 quotient);
4364 }
4365 break;
4366 }
4367 fail1:
4368 delete_insns_since (last);
4369 break;
4370
4371 case FLOOR_DIV_EXPR:
4372 case FLOOR_MOD_EXPR:
4373 /* We will come here only for signed operations. */
4374 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4375 {
4376 unsigned HOST_WIDE_INT mh, ml;
4377 int pre_shift, lgup, post_shift;
4378 HOST_WIDE_INT d = INTVAL (op1);
4379
4380 if (d > 0)
4381 {
4382 /* We could just as easily deal with negative constants here,
4383 but it does not seem worth the trouble for GCC 2.6. */
4384 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4385 {
4386 pre_shift = floor_log2 (d);
4387 if (rem_flag)
4388 {
4389 unsigned HOST_WIDE_INT mask
4390 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4391 remainder = expand_binop
4392 (compute_mode, and_optab, op0,
4393 gen_int_mode (mask, compute_mode),
4394 remainder, 0, OPTAB_LIB_WIDEN);
4395 if (remainder)
4396 return gen_lowpart (mode, remainder);
4397 }
4398 quotient = expand_shift
4399 (RSHIFT_EXPR, compute_mode, op0,
4400 pre_shift, tquotient, 0);
4401 }
4402 else
4403 {
4404 rtx t1, t2, t3, t4;
4405
4406 mh = choose_multiplier (d, size, size - 1,
4407 &ml, &post_shift, &lgup);
4408 gcc_assert (!mh);
4409
4410 if (post_shift < BITS_PER_WORD
4411 && size - 1 < BITS_PER_WORD)
4412 {
4413 t1 = expand_shift
4414 (RSHIFT_EXPR, compute_mode, op0,
4415 size - 1, NULL_RTX, 0);
4416 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4417 NULL_RTX, 0, OPTAB_WIDEN);
4418 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4419 + shift_cost (speed, compute_mode, size - 1)
4420 + 2 * add_cost (speed, compute_mode));
4421 t3 = expmed_mult_highpart
4422 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4423 NULL_RTX, 1, max_cost - extra_cost);
4424 if (t3 != 0)
4425 {
4426 t4 = expand_shift
4427 (RSHIFT_EXPR, compute_mode, t3,
4428 post_shift, NULL_RTX, 1);
4429 quotient = expand_binop (compute_mode, xor_optab,
4430 t4, t1, tquotient, 0,
4431 OPTAB_WIDEN);
4432 }
4433 }
4434 }
4435 }
4436 else
4437 {
4438 rtx nsign, t1, t2, t3, t4;
4439 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4440 op0, constm1_rtx), NULL_RTX);
4441 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4442 0, OPTAB_WIDEN);
4443 nsign = expand_shift
4444 (RSHIFT_EXPR, compute_mode, t2,
4445 size - 1, NULL_RTX, 0);
4446 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4447 NULL_RTX);
4448 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4449 NULL_RTX, 0);
4450 if (t4)
4451 {
4452 rtx t5;
4453 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4454 NULL_RTX, 0);
4455 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4456 t4, t5),
4457 tquotient);
4458 }
4459 }
4460 }
4461
4462 if (quotient != 0)
4463 break;
4464 delete_insns_since (last);
4465
4466 /* Try using an instruction that produces both the quotient and
4467 remainder, using truncation. We can easily compensate the quotient
4468 or remainder to get floor rounding, once we have the remainder.
4469 Notice that we compute also the final remainder value here,
4470 and return the result right away. */
4471 if (target == 0 || GET_MODE (target) != compute_mode)
4472 target = gen_reg_rtx (compute_mode);
4473
4474 if (rem_flag)
4475 {
4476 remainder
4477 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4478 quotient = gen_reg_rtx (compute_mode);
4479 }
4480 else
4481 {
4482 quotient
4483 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4484 remainder = gen_reg_rtx (compute_mode);
4485 }
4486
4487 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4488 quotient, remainder, 0))
4489 {
4490 /* This could be computed with a branch-less sequence.
4491 Save that for later. */
4492 rtx tem;
4493 rtx_code_label *label = gen_label_rtx ();
4494 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4495 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4496 NULL_RTX, 0, OPTAB_WIDEN);
4497 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4498 expand_dec (quotient, const1_rtx);
4499 expand_inc (remainder, op1);
4500 emit_label (label);
4501 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4502 }
4503
4504 /* No luck with division elimination or divmod. Have to do it
4505 by conditionally adjusting op0 *and* the result. */
4506 {
4507 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4508 rtx adjusted_op0;
4509 rtx tem;
4510
4511 quotient = gen_reg_rtx (compute_mode);
4512 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4513 label1 = gen_label_rtx ();
4514 label2 = gen_label_rtx ();
4515 label3 = gen_label_rtx ();
4516 label4 = gen_label_rtx ();
4517 label5 = gen_label_rtx ();
4518 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4519 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4520 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4521 quotient, 0, OPTAB_LIB_WIDEN);
4522 if (tem != quotient)
4523 emit_move_insn (quotient, tem);
4524 emit_jump_insn (gen_jump (label5));
4525 emit_barrier ();
4526 emit_label (label1);
4527 expand_inc (adjusted_op0, const1_rtx);
4528 emit_jump_insn (gen_jump (label4));
4529 emit_barrier ();
4530 emit_label (label2);
4531 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4532 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4533 quotient, 0, OPTAB_LIB_WIDEN);
4534 if (tem != quotient)
4535 emit_move_insn (quotient, tem);
4536 emit_jump_insn (gen_jump (label5));
4537 emit_barrier ();
4538 emit_label (label3);
4539 expand_dec (adjusted_op0, const1_rtx);
4540 emit_label (label4);
4541 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4542 quotient, 0, OPTAB_LIB_WIDEN);
4543 if (tem != quotient)
4544 emit_move_insn (quotient, tem);
4545 expand_dec (quotient, const1_rtx);
4546 emit_label (label5);
4547 }
4548 break;
4549
4550 case CEIL_DIV_EXPR:
4551 case CEIL_MOD_EXPR:
4552 if (unsignedp)
4553 {
4554 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4555 {
4556 rtx t1, t2, t3;
4557 unsigned HOST_WIDE_INT d = INTVAL (op1);
4558 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4559 floor_log2 (d), tquotient, 1);
4560 t2 = expand_binop (compute_mode, and_optab, op0,
4561 gen_int_mode (d - 1, compute_mode),
4562 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4563 t3 = gen_reg_rtx (compute_mode);
4564 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4565 compute_mode, 1, 1);
4566 if (t3 == 0)
4567 {
4568 rtx_code_label *lab;
4569 lab = gen_label_rtx ();
4570 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4571 expand_inc (t1, const1_rtx);
4572 emit_label (lab);
4573 quotient = t1;
4574 }
4575 else
4576 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4577 t1, t3),
4578 tquotient);
4579 break;
4580 }
4581
4582 /* Try using an instruction that produces both the quotient and
4583 remainder, using truncation. We can easily compensate the
4584 quotient or remainder to get ceiling rounding, once we have the
4585 remainder. Notice that we compute also the final remainder
4586 value here, and return the result right away. */
4587 if (target == 0 || GET_MODE (target) != compute_mode)
4588 target = gen_reg_rtx (compute_mode);
4589
4590 if (rem_flag)
4591 {
4592 remainder = (REG_P (target)
4593 ? target : gen_reg_rtx (compute_mode));
4594 quotient = gen_reg_rtx (compute_mode);
4595 }
4596 else
4597 {
4598 quotient = (REG_P (target)
4599 ? target : gen_reg_rtx (compute_mode));
4600 remainder = gen_reg_rtx (compute_mode);
4601 }
4602
4603 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4604 remainder, 1))
4605 {
4606 /* This could be computed with a branch-less sequence.
4607 Save that for later. */
4608 rtx_code_label *label = gen_label_rtx ();
4609 do_cmp_and_jump (remainder, const0_rtx, EQ,
4610 compute_mode, label);
4611 expand_inc (quotient, const1_rtx);
4612 expand_dec (remainder, op1);
4613 emit_label (label);
4614 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4615 }
4616
4617 /* No luck with division elimination or divmod. Have to do it
4618 by conditionally adjusting op0 *and* the result. */
4619 {
4620 rtx_code_label *label1, *label2;
4621 rtx adjusted_op0, tem;
4622
4623 quotient = gen_reg_rtx (compute_mode);
4624 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4625 label1 = gen_label_rtx ();
4626 label2 = gen_label_rtx ();
4627 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4628 compute_mode, label1);
4629 emit_move_insn (quotient, const0_rtx);
4630 emit_jump_insn (gen_jump (label2));
4631 emit_barrier ();
4632 emit_label (label1);
4633 expand_dec (adjusted_op0, const1_rtx);
4634 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4635 quotient, 1, OPTAB_LIB_WIDEN);
4636 if (tem != quotient)
4637 emit_move_insn (quotient, tem);
4638 expand_inc (quotient, const1_rtx);
4639 emit_label (label2);
4640 }
4641 }
4642 else /* signed */
4643 {
4644 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4645 && INTVAL (op1) >= 0)
4646 {
4647 /* This is extremely similar to the code for the unsigned case
4648 above. For 2.7 we should merge these variants, but for
4649 2.6.1 I don't want to touch the code for unsigned since that
4650 get used in C. The signed case will only be used by other
4651 languages (Ada). */
4652
4653 rtx t1, t2, t3;
4654 unsigned HOST_WIDE_INT d = INTVAL (op1);
4655 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4656 floor_log2 (d), tquotient, 0);
4657 t2 = expand_binop (compute_mode, and_optab, op0,
4658 gen_int_mode (d - 1, compute_mode),
4659 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4660 t3 = gen_reg_rtx (compute_mode);
4661 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4662 compute_mode, 1, 1);
4663 if (t3 == 0)
4664 {
4665 rtx_code_label *lab;
4666 lab = gen_label_rtx ();
4667 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4668 expand_inc (t1, const1_rtx);
4669 emit_label (lab);
4670 quotient = t1;
4671 }
4672 else
4673 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4674 t1, t3),
4675 tquotient);
4676 break;
4677 }
4678
4679 /* Try using an instruction that produces both the quotient and
4680 remainder, using truncation. We can easily compensate the
4681 quotient or remainder to get ceiling rounding, once we have the
4682 remainder. Notice that we compute also the final remainder
4683 value here, and return the result right away. */
4684 if (target == 0 || GET_MODE (target) != compute_mode)
4685 target = gen_reg_rtx (compute_mode);
4686 if (rem_flag)
4687 {
4688 remainder= (REG_P (target)
4689 ? target : gen_reg_rtx (compute_mode));
4690 quotient = gen_reg_rtx (compute_mode);
4691 }
4692 else
4693 {
4694 quotient = (REG_P (target)
4695 ? target : gen_reg_rtx (compute_mode));
4696 remainder = gen_reg_rtx (compute_mode);
4697 }
4698
4699 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4700 remainder, 0))
4701 {
4702 /* This could be computed with a branch-less sequence.
4703 Save that for later. */
4704 rtx tem;
4705 rtx_code_label *label = gen_label_rtx ();
4706 do_cmp_and_jump (remainder, const0_rtx, EQ,
4707 compute_mode, label);
4708 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4709 NULL_RTX, 0, OPTAB_WIDEN);
4710 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4711 expand_inc (quotient, const1_rtx);
4712 expand_dec (remainder, op1);
4713 emit_label (label);
4714 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4715 }
4716
4717 /* No luck with division elimination or divmod. Have to do it
4718 by conditionally adjusting op0 *and* the result. */
4719 {
4720 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4721 rtx adjusted_op0;
4722 rtx tem;
4723
4724 quotient = gen_reg_rtx (compute_mode);
4725 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4726 label1 = gen_label_rtx ();
4727 label2 = gen_label_rtx ();
4728 label3 = gen_label_rtx ();
4729 label4 = gen_label_rtx ();
4730 label5 = gen_label_rtx ();
4731 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4732 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4733 compute_mode, label1);
4734 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735 quotient, 0, OPTAB_LIB_WIDEN);
4736 if (tem != quotient)
4737 emit_move_insn (quotient, tem);
4738 emit_jump_insn (gen_jump (label5));
4739 emit_barrier ();
4740 emit_label (label1);
4741 expand_dec (adjusted_op0, const1_rtx);
4742 emit_jump_insn (gen_jump (label4));
4743 emit_barrier ();
4744 emit_label (label2);
4745 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4746 compute_mode, label3);
4747 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4748 quotient, 0, OPTAB_LIB_WIDEN);
4749 if (tem != quotient)
4750 emit_move_insn (quotient, tem);
4751 emit_jump_insn (gen_jump (label5));
4752 emit_barrier ();
4753 emit_label (label3);
4754 expand_inc (adjusted_op0, const1_rtx);
4755 emit_label (label4);
4756 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4757 quotient, 0, OPTAB_LIB_WIDEN);
4758 if (tem != quotient)
4759 emit_move_insn (quotient, tem);
4760 expand_inc (quotient, const1_rtx);
4761 emit_label (label5);
4762 }
4763 }
4764 break;
4765
4766 case EXACT_DIV_EXPR:
4767 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4768 {
4769 HOST_WIDE_INT d = INTVAL (op1);
4770 unsigned HOST_WIDE_INT ml;
4771 int pre_shift;
4772 rtx t1;
4773
4774 pre_shift = floor_log2 (d & -d);
4775 ml = invert_mod2n (d >> pre_shift, size);
4776 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4777 pre_shift, NULL_RTX, unsignedp);
4778 quotient = expand_mult (compute_mode, t1,
4779 gen_int_mode (ml, compute_mode),
4780 NULL_RTX, 1);
4781
4782 insn = get_last_insn ();
4783 set_dst_reg_note (insn, REG_EQUAL,
4784 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4785 compute_mode, op0, op1),
4786 quotient);
4787 }
4788 break;
4789
4790 case ROUND_DIV_EXPR:
4791 case ROUND_MOD_EXPR:
4792 if (unsignedp)
4793 {
4794 rtx tem;
4795 rtx_code_label *label;
4796 label = gen_label_rtx ();
4797 quotient = gen_reg_rtx (compute_mode);
4798 remainder = gen_reg_rtx (compute_mode);
4799 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4800 {
4801 rtx tem;
4802 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4803 quotient, 1, OPTAB_LIB_WIDEN);
4804 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4805 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4806 remainder, 1, OPTAB_LIB_WIDEN);
4807 }
4808 tem = plus_constant (compute_mode, op1, -1);
4809 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4810 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4811 expand_inc (quotient, const1_rtx);
4812 expand_dec (remainder, op1);
4813 emit_label (label);
4814 }
4815 else
4816 {
4817 rtx abs_rem, abs_op1, tem, mask;
4818 rtx_code_label *label;
4819 label = gen_label_rtx ();
4820 quotient = gen_reg_rtx (compute_mode);
4821 remainder = gen_reg_rtx (compute_mode);
4822 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4823 {
4824 rtx tem;
4825 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4826 quotient, 0, OPTAB_LIB_WIDEN);
4827 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4828 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4829 remainder, 0, OPTAB_LIB_WIDEN);
4830 }
4831 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4832 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4833 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4834 1, NULL_RTX, 1);
4835 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4836 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4837 NULL_RTX, 0, OPTAB_WIDEN);
4838 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4839 size - 1, NULL_RTX, 0);
4840 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4841 NULL_RTX, 0, OPTAB_WIDEN);
4842 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4843 NULL_RTX, 0, OPTAB_WIDEN);
4844 expand_inc (quotient, tem);
4845 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4846 NULL_RTX, 0, OPTAB_WIDEN);
4847 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4848 NULL_RTX, 0, OPTAB_WIDEN);
4849 expand_dec (remainder, tem);
4850 emit_label (label);
4851 }
4852 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4853
4854 default:
4855 gcc_unreachable ();
4856 }
4857
4858 if (quotient == 0)
4859 {
4860 if (target && GET_MODE (target) != compute_mode)
4861 target = 0;
4862
4863 if (rem_flag)
4864 {
4865 /* Try to produce the remainder without producing the quotient.
4866 If we seem to have a divmod pattern that does not require widening,
4867 don't try widening here. We should really have a WIDEN argument
4868 to expand_twoval_binop, since what we'd really like to do here is
4869 1) try a mod insn in compute_mode
4870 2) try a divmod insn in compute_mode
4871 3) try a div insn in compute_mode and multiply-subtract to get
4872 remainder
4873 4) try the same things with widening allowed. */
4874 remainder
4875 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4876 op0, op1, target,
4877 unsignedp,
4878 ((optab_handler (optab2, compute_mode)
4879 != CODE_FOR_nothing)
4880 ? OPTAB_DIRECT : OPTAB_WIDEN));
4881 if (remainder == 0)
4882 {
4883 /* No luck there. Can we do remainder and divide at once
4884 without a library call? */
4885 remainder = gen_reg_rtx (compute_mode);
4886 if (! expand_twoval_binop ((unsignedp
4887 ? udivmod_optab
4888 : sdivmod_optab),
4889 op0, op1,
4890 NULL_RTX, remainder, unsignedp))
4891 remainder = 0;
4892 }
4893
4894 if (remainder)
4895 return gen_lowpart (mode, remainder);
4896 }
4897
4898 /* Produce the quotient. Try a quotient insn, but not a library call.
4899 If we have a divmod in this mode, use it in preference to widening
4900 the div (for this test we assume it will not fail). Note that optab2
4901 is set to the one of the two optabs that the call below will use. */
4902 quotient
4903 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4904 op0, op1, rem_flag ? NULL_RTX : target,
4905 unsignedp,
4906 ((optab_handler (optab2, compute_mode)
4907 != CODE_FOR_nothing)
4908 ? OPTAB_DIRECT : OPTAB_WIDEN));
4909
4910 if (quotient == 0)
4911 {
4912 /* No luck there. Try a quotient-and-remainder insn,
4913 keeping the quotient alone. */
4914 quotient = gen_reg_rtx (compute_mode);
4915 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4916 op0, op1,
4917 quotient, NULL_RTX, unsignedp))
4918 {
4919 quotient = 0;
4920 if (! rem_flag)
4921 /* Still no luck. If we are not computing the remainder,
4922 use a library call for the quotient. */
4923 quotient = sign_expand_binop (compute_mode,
4924 udiv_optab, sdiv_optab,
4925 op0, op1, target,
4926 unsignedp, OPTAB_LIB_WIDEN);
4927 }
4928 }
4929 }
4930
4931 if (rem_flag)
4932 {
4933 if (target && GET_MODE (target) != compute_mode)
4934 target = 0;
4935
4936 if (quotient == 0)
4937 {
4938 /* No divide instruction either. Use library for remainder. */
4939 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4940 op0, op1, target,
4941 unsignedp, OPTAB_LIB_WIDEN);
4942 /* No remainder function. Try a quotient-and-remainder
4943 function, keeping the remainder. */
4944 if (!remainder)
4945 {
4946 remainder = gen_reg_rtx (compute_mode);
4947 if (!expand_twoval_binop_libfunc
4948 (unsignedp ? udivmod_optab : sdivmod_optab,
4949 op0, op1,
4950 NULL_RTX, remainder,
4951 unsignedp ? UMOD : MOD))
4952 remainder = NULL_RTX;
4953 }
4954 }
4955 else
4956 {
4957 /* We divided. Now finish doing X - Y * (X / Y). */
4958 remainder = expand_mult (compute_mode, quotient, op1,
4959 NULL_RTX, unsignedp);
4960 remainder = expand_binop (compute_mode, sub_optab, op0,
4961 remainder, target, unsignedp,
4962 OPTAB_LIB_WIDEN);
4963 }
4964 }
4965
4966 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4967 }
4968 \f
4969 /* Return a tree node with data type TYPE, describing the value of X.
4970 Usually this is an VAR_DECL, if there is no obvious better choice.
4971 X may be an expression, however we only support those expressions
4972 generated by loop.c. */
4973
4974 tree
4975 make_tree (tree type, rtx x)
4976 {
4977 tree t;
4978
4979 switch (GET_CODE (x))
4980 {
4981 case CONST_INT:
4982 case CONST_WIDE_INT:
4983 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4984 return t;
4985
4986 case CONST_DOUBLE:
4987 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4988 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4989 t = wide_int_to_tree (type,
4990 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4991 HOST_BITS_PER_WIDE_INT * 2));
4992 else
4993 {
4994 REAL_VALUE_TYPE d;
4995
4996 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4997 t = build_real (type, d);
4998 }
4999
5000 return t;
5001
5002 case CONST_VECTOR:
5003 {
5004 int units = CONST_VECTOR_NUNITS (x);
5005 tree itype = TREE_TYPE (type);
5006 tree *elts;
5007 int i;
5008
5009 /* Build a tree with vector elements. */
5010 elts = XALLOCAVEC (tree, units);
5011 for (i = units - 1; i >= 0; --i)
5012 {
5013 rtx elt = CONST_VECTOR_ELT (x, i);
5014 elts[i] = make_tree (itype, elt);
5015 }
5016
5017 return build_vector (type, elts);
5018 }
5019
5020 case PLUS:
5021 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5022 make_tree (type, XEXP (x, 1)));
5023
5024 case MINUS:
5025 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5026 make_tree (type, XEXP (x, 1)));
5027
5028 case NEG:
5029 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5030
5031 case MULT:
5032 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5033 make_tree (type, XEXP (x, 1)));
5034
5035 case ASHIFT:
5036 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5037 make_tree (type, XEXP (x, 1)));
5038
5039 case LSHIFTRT:
5040 t = unsigned_type_for (type);
5041 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5042 make_tree (t, XEXP (x, 0)),
5043 make_tree (type, XEXP (x, 1))));
5044
5045 case ASHIFTRT:
5046 t = signed_type_for (type);
5047 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5048 make_tree (t, XEXP (x, 0)),
5049 make_tree (type, XEXP (x, 1))));
5050
5051 case DIV:
5052 if (TREE_CODE (type) != REAL_TYPE)
5053 t = signed_type_for (type);
5054 else
5055 t = type;
5056
5057 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5058 make_tree (t, XEXP (x, 0)),
5059 make_tree (t, XEXP (x, 1))));
5060 case UDIV:
5061 t = unsigned_type_for (type);
5062 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5063 make_tree (t, XEXP (x, 0)),
5064 make_tree (t, XEXP (x, 1))));
5065
5066 case SIGN_EXTEND:
5067 case ZERO_EXTEND:
5068 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5069 GET_CODE (x) == ZERO_EXTEND);
5070 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5071
5072 case CONST:
5073 return make_tree (type, XEXP (x, 0));
5074
5075 case SYMBOL_REF:
5076 t = SYMBOL_REF_DECL (x);
5077 if (t)
5078 return fold_convert (type, build_fold_addr_expr (t));
5079 /* else fall through. */
5080
5081 default:
5082 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5083
5084 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5085 address mode to pointer mode. */
5086 if (POINTER_TYPE_P (type))
5087 x = convert_memory_address_addr_space
5088 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5089
5090 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5091 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5092 t->decl_with_rtl.rtl = x;
5093
5094 return t;
5095 }
5096 }
5097 \f
5098 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5099 and returning TARGET.
5100
5101 If TARGET is 0, a pseudo-register or constant is returned. */
5102
5103 rtx
5104 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5105 {
5106 rtx tem = 0;
5107
5108 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5109 tem = simplify_binary_operation (AND, mode, op0, op1);
5110 if (tem == 0)
5111 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5112
5113 if (target == 0)
5114 target = tem;
5115 else if (tem != target)
5116 emit_move_insn (target, tem);
5117 return target;
5118 }
5119
5120 /* Helper function for emit_store_flag. */
5121 rtx
5122 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5123 machine_mode mode, machine_mode compare_mode,
5124 int unsignedp, rtx x, rtx y, int normalizep,
5125 machine_mode target_mode)
5126 {
5127 struct expand_operand ops[4];
5128 rtx op0, comparison, subtarget;
5129 rtx_insn *last;
5130 machine_mode result_mode = targetm.cstore_mode (icode);
5131
5132 last = get_last_insn ();
5133 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5134 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5135 if (!x || !y)
5136 {
5137 delete_insns_since (last);
5138 return NULL_RTX;
5139 }
5140
5141 if (target_mode == VOIDmode)
5142 target_mode = result_mode;
5143 if (!target)
5144 target = gen_reg_rtx (target_mode);
5145
5146 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5147
5148 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5149 create_fixed_operand (&ops[1], comparison);
5150 create_fixed_operand (&ops[2], x);
5151 create_fixed_operand (&ops[3], y);
5152 if (!maybe_expand_insn (icode, 4, ops))
5153 {
5154 delete_insns_since (last);
5155 return NULL_RTX;
5156 }
5157 subtarget = ops[0].value;
5158
5159 /* If we are converting to a wider mode, first convert to
5160 TARGET_MODE, then normalize. This produces better combining
5161 opportunities on machines that have a SIGN_EXTRACT when we are
5162 testing a single bit. This mostly benefits the 68k.
5163
5164 If STORE_FLAG_VALUE does not have the sign bit set when
5165 interpreted in MODE, we can do this conversion as unsigned, which
5166 is usually more efficient. */
5167 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5168 {
5169 convert_move (target, subtarget,
5170 val_signbit_known_clear_p (result_mode,
5171 STORE_FLAG_VALUE));
5172 op0 = target;
5173 result_mode = target_mode;
5174 }
5175 else
5176 op0 = subtarget;
5177
5178 /* If we want to keep subexpressions around, don't reuse our last
5179 target. */
5180 if (optimize)
5181 subtarget = 0;
5182
5183 /* Now normalize to the proper value in MODE. Sometimes we don't
5184 have to do anything. */
5185 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5186 ;
5187 /* STORE_FLAG_VALUE might be the most negative number, so write
5188 the comparison this way to avoid a compiler-time warning. */
5189 else if (- normalizep == STORE_FLAG_VALUE)
5190 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5191
5192 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5193 it hard to use a value of just the sign bit due to ANSI integer
5194 constant typing rules. */
5195 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5196 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5197 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5198 normalizep == 1);
5199 else
5200 {
5201 gcc_assert (STORE_FLAG_VALUE & 1);
5202
5203 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5204 if (normalizep == -1)
5205 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5206 }
5207
5208 /* If we were converting to a smaller mode, do the conversion now. */
5209 if (target_mode != result_mode)
5210 {
5211 convert_move (target, op0, 0);
5212 return target;
5213 }
5214 else
5215 return op0;
5216 }
5217
5218
5219 /* A subroutine of emit_store_flag only including "tricks" that do not
5220 need a recursive call. These are kept separate to avoid infinite
5221 loops. */
5222
5223 static rtx
5224 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5225 machine_mode mode, int unsignedp, int normalizep,
5226 machine_mode target_mode)
5227 {
5228 rtx subtarget;
5229 enum insn_code icode;
5230 machine_mode compare_mode;
5231 enum mode_class mclass;
5232 enum rtx_code scode;
5233 rtx tem;
5234
5235 if (unsignedp)
5236 code = unsigned_condition (code);
5237 scode = swap_condition (code);
5238
5239 /* If one operand is constant, make it the second one. Only do this
5240 if the other operand is not constant as well. */
5241
5242 if (swap_commutative_operands_p (op0, op1))
5243 {
5244 tem = op0;
5245 op0 = op1;
5246 op1 = tem;
5247 code = swap_condition (code);
5248 }
5249
5250 if (mode == VOIDmode)
5251 mode = GET_MODE (op0);
5252
5253 /* For some comparisons with 1 and -1, we can convert this to
5254 comparisons with zero. This will often produce more opportunities for
5255 store-flag insns. */
5256
5257 switch (code)
5258 {
5259 case LT:
5260 if (op1 == const1_rtx)
5261 op1 = const0_rtx, code = LE;
5262 break;
5263 case LE:
5264 if (op1 == constm1_rtx)
5265 op1 = const0_rtx, code = LT;
5266 break;
5267 case GE:
5268 if (op1 == const1_rtx)
5269 op1 = const0_rtx, code = GT;
5270 break;
5271 case GT:
5272 if (op1 == constm1_rtx)
5273 op1 = const0_rtx, code = GE;
5274 break;
5275 case GEU:
5276 if (op1 == const1_rtx)
5277 op1 = const0_rtx, code = NE;
5278 break;
5279 case LTU:
5280 if (op1 == const1_rtx)
5281 op1 = const0_rtx, code = EQ;
5282 break;
5283 default:
5284 break;
5285 }
5286
5287 /* If we are comparing a double-word integer with zero or -1, we can
5288 convert the comparison into one involving a single word. */
5289 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5290 && GET_MODE_CLASS (mode) == MODE_INT
5291 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5292 {
5293 if ((code == EQ || code == NE)
5294 && (op1 == const0_rtx || op1 == constm1_rtx))
5295 {
5296 rtx op00, op01;
5297
5298 /* Do a logical OR or AND of the two words and compare the
5299 result. */
5300 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5301 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5302 tem = expand_binop (word_mode,
5303 op1 == const0_rtx ? ior_optab : and_optab,
5304 op00, op01, NULL_RTX, unsignedp,
5305 OPTAB_DIRECT);
5306
5307 if (tem != 0)
5308 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5309 unsignedp, normalizep);
5310 }
5311 else if ((code == LT || code == GE) && op1 == const0_rtx)
5312 {
5313 rtx op0h;
5314
5315 /* If testing the sign bit, can just test on high word. */
5316 op0h = simplify_gen_subreg (word_mode, op0, mode,
5317 subreg_highpart_offset (word_mode,
5318 mode));
5319 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5320 unsignedp, normalizep);
5321 }
5322 else
5323 tem = NULL_RTX;
5324
5325 if (tem)
5326 {
5327 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5328 return tem;
5329 if (!target)
5330 target = gen_reg_rtx (target_mode);
5331
5332 convert_move (target, tem,
5333 !val_signbit_known_set_p (word_mode,
5334 (normalizep ? normalizep
5335 : STORE_FLAG_VALUE)));
5336 return target;
5337 }
5338 }
5339
5340 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5341 complement of A (for GE) and shifting the sign bit to the low bit. */
5342 if (op1 == const0_rtx && (code == LT || code == GE)
5343 && GET_MODE_CLASS (mode) == MODE_INT
5344 && (normalizep || STORE_FLAG_VALUE == 1
5345 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5346 {
5347 subtarget = target;
5348
5349 if (!target)
5350 target_mode = mode;
5351
5352 /* If the result is to be wider than OP0, it is best to convert it
5353 first. If it is to be narrower, it is *incorrect* to convert it
5354 first. */
5355 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5356 {
5357 op0 = convert_modes (target_mode, mode, op0, 0);
5358 mode = target_mode;
5359 }
5360
5361 if (target_mode != mode)
5362 subtarget = 0;
5363
5364 if (code == GE)
5365 op0 = expand_unop (mode, one_cmpl_optab, op0,
5366 ((STORE_FLAG_VALUE == 1 || normalizep)
5367 ? 0 : subtarget), 0);
5368
5369 if (STORE_FLAG_VALUE == 1 || normalizep)
5370 /* If we are supposed to produce a 0/1 value, we want to do
5371 a logical shift from the sign bit to the low-order bit; for
5372 a -1/0 value, we do an arithmetic shift. */
5373 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5374 GET_MODE_BITSIZE (mode) - 1,
5375 subtarget, normalizep != -1);
5376
5377 if (mode != target_mode)
5378 op0 = convert_modes (target_mode, mode, op0, 0);
5379
5380 return op0;
5381 }
5382
5383 mclass = GET_MODE_CLASS (mode);
5384 for (compare_mode = mode; compare_mode != VOIDmode;
5385 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5386 {
5387 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5388 icode = optab_handler (cstore_optab, optab_mode);
5389 if (icode != CODE_FOR_nothing)
5390 {
5391 do_pending_stack_adjust ();
5392 tem = emit_cstore (target, icode, code, mode, compare_mode,
5393 unsignedp, op0, op1, normalizep, target_mode);
5394 if (tem)
5395 return tem;
5396
5397 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5398 {
5399 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5400 unsignedp, op1, op0, normalizep, target_mode);
5401 if (tem)
5402 return tem;
5403 }
5404 break;
5405 }
5406 }
5407
5408 return 0;
5409 }
5410
5411 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5412 and storing in TARGET. Normally return TARGET.
5413 Return 0 if that cannot be done.
5414
5415 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5416 it is VOIDmode, they cannot both be CONST_INT.
5417
5418 UNSIGNEDP is for the case where we have to widen the operands
5419 to perform the operation. It says to use zero-extension.
5420
5421 NORMALIZEP is 1 if we should convert the result to be either zero
5422 or one. Normalize is -1 if we should convert the result to be
5423 either zero or -1. If NORMALIZEP is zero, the result will be left
5424 "raw" out of the scc insn. */
5425
5426 rtx
5427 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5428 machine_mode mode, int unsignedp, int normalizep)
5429 {
5430 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5431 enum rtx_code rcode;
5432 rtx subtarget;
5433 rtx tem, trueval;
5434 rtx_insn *last;
5435
5436 /* If we compare constants, we shouldn't use a store-flag operation,
5437 but a constant load. We can get there via the vanilla route that
5438 usually generates a compare-branch sequence, but will in this case
5439 fold the comparison to a constant, and thus elide the branch. */
5440 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5441 return NULL_RTX;
5442
5443 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5444 target_mode);
5445 if (tem)
5446 return tem;
5447
5448 /* If we reached here, we can't do this with a scc insn, however there
5449 are some comparisons that can be done in other ways. Don't do any
5450 of these cases if branches are very cheap. */
5451 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5452 return 0;
5453
5454 /* See what we need to return. We can only return a 1, -1, or the
5455 sign bit. */
5456
5457 if (normalizep == 0)
5458 {
5459 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5460 normalizep = STORE_FLAG_VALUE;
5461
5462 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5463 ;
5464 else
5465 return 0;
5466 }
5467
5468 last = get_last_insn ();
5469
5470 /* If optimizing, use different pseudo registers for each insn, instead
5471 of reusing the same pseudo. This leads to better CSE, but slows
5472 down the compiler, since there are more pseudos */
5473 subtarget = (!optimize
5474 && (target_mode == mode)) ? target : NULL_RTX;
5475 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5476
5477 /* For floating-point comparisons, try the reverse comparison or try
5478 changing the "orderedness" of the comparison. */
5479 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5480 {
5481 enum rtx_code first_code;
5482 bool and_them;
5483
5484 rcode = reverse_condition_maybe_unordered (code);
5485 if (can_compare_p (rcode, mode, ccp_store_flag)
5486 && (code == ORDERED || code == UNORDERED
5487 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5488 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5489 {
5490 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5491 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5492
5493 /* For the reverse comparison, use either an addition or a XOR. */
5494 if (want_add
5495 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5496 optimize_insn_for_speed_p ()) == 0)
5497 {
5498 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5499 STORE_FLAG_VALUE, target_mode);
5500 if (tem)
5501 return expand_binop (target_mode, add_optab, tem,
5502 gen_int_mode (normalizep, target_mode),
5503 target, 0, OPTAB_WIDEN);
5504 }
5505 else if (!want_add
5506 && rtx_cost (trueval, XOR, 1,
5507 optimize_insn_for_speed_p ()) == 0)
5508 {
5509 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5510 normalizep, target_mode);
5511 if (tem)
5512 return expand_binop (target_mode, xor_optab, tem, trueval,
5513 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5514 }
5515 }
5516
5517 delete_insns_since (last);
5518
5519 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5520 if (code == ORDERED || code == UNORDERED)
5521 return 0;
5522
5523 and_them = split_comparison (code, mode, &first_code, &code);
5524
5525 /* If there are no NaNs, the first comparison should always fall through.
5526 Effectively change the comparison to the other one. */
5527 if (!HONOR_NANS (mode))
5528 {
5529 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5530 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5531 target_mode);
5532 }
5533
5534 #ifdef HAVE_conditional_move
5535 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5536 conditional move. */
5537 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5538 normalizep, target_mode);
5539 if (tem == 0)
5540 return 0;
5541
5542 if (and_them)
5543 tem = emit_conditional_move (target, code, op0, op1, mode,
5544 tem, const0_rtx, GET_MODE (tem), 0);
5545 else
5546 tem = emit_conditional_move (target, code, op0, op1, mode,
5547 trueval, tem, GET_MODE (tem), 0);
5548
5549 if (tem == 0)
5550 delete_insns_since (last);
5551 return tem;
5552 #else
5553 return 0;
5554 #endif
5555 }
5556
5557 /* The remaining tricks only apply to integer comparisons. */
5558
5559 if (GET_MODE_CLASS (mode) != MODE_INT)
5560 return 0;
5561
5562 /* If this is an equality comparison of integers, we can try to exclusive-or
5563 (or subtract) the two operands and use a recursive call to try the
5564 comparison with zero. Don't do any of these cases if branches are
5565 very cheap. */
5566
5567 if ((code == EQ || code == NE) && op1 != const0_rtx)
5568 {
5569 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5570 OPTAB_WIDEN);
5571
5572 if (tem == 0)
5573 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5574 OPTAB_WIDEN);
5575 if (tem != 0)
5576 tem = emit_store_flag (target, code, tem, const0_rtx,
5577 mode, unsignedp, normalizep);
5578 if (tem != 0)
5579 return tem;
5580
5581 delete_insns_since (last);
5582 }
5583
5584 /* For integer comparisons, try the reverse comparison. However, for
5585 small X and if we'd have anyway to extend, implementing "X != 0"
5586 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5587 rcode = reverse_condition (code);
5588 if (can_compare_p (rcode, mode, ccp_store_flag)
5589 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5590 && code == NE
5591 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5592 && op1 == const0_rtx))
5593 {
5594 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5595 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5596
5597 /* Again, for the reverse comparison, use either an addition or a XOR. */
5598 if (want_add
5599 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5600 optimize_insn_for_speed_p ()) == 0)
5601 {
5602 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5603 STORE_FLAG_VALUE, target_mode);
5604 if (tem != 0)
5605 tem = expand_binop (target_mode, add_optab, tem,
5606 gen_int_mode (normalizep, target_mode),
5607 target, 0, OPTAB_WIDEN);
5608 }
5609 else if (!want_add
5610 && rtx_cost (trueval, XOR, 1,
5611 optimize_insn_for_speed_p ()) == 0)
5612 {
5613 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5614 normalizep, target_mode);
5615 if (tem != 0)
5616 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5617 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5618 }
5619
5620 if (tem != 0)
5621 return tem;
5622 delete_insns_since (last);
5623 }
5624
5625 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5626 the constant zero. Reject all other comparisons at this point. Only
5627 do LE and GT if branches are expensive since they are expensive on
5628 2-operand machines. */
5629
5630 if (op1 != const0_rtx
5631 || (code != EQ && code != NE
5632 && (BRANCH_COST (optimize_insn_for_speed_p (),
5633 false) <= 1 || (code != LE && code != GT))))
5634 return 0;
5635
5636 /* Try to put the result of the comparison in the sign bit. Assume we can't
5637 do the necessary operation below. */
5638
5639 tem = 0;
5640
5641 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5642 the sign bit set. */
5643
5644 if (code == LE)
5645 {
5646 /* This is destructive, so SUBTARGET can't be OP0. */
5647 if (rtx_equal_p (subtarget, op0))
5648 subtarget = 0;
5649
5650 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5651 OPTAB_WIDEN);
5652 if (tem)
5653 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5654 OPTAB_WIDEN);
5655 }
5656
5657 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5658 number of bits in the mode of OP0, minus one. */
5659
5660 if (code == GT)
5661 {
5662 if (rtx_equal_p (subtarget, op0))
5663 subtarget = 0;
5664
5665 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5666 GET_MODE_BITSIZE (mode) - 1,
5667 subtarget, 0);
5668 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5669 OPTAB_WIDEN);
5670 }
5671
5672 if (code == EQ || code == NE)
5673 {
5674 /* For EQ or NE, one way to do the comparison is to apply an operation
5675 that converts the operand into a positive number if it is nonzero
5676 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5677 for NE we negate. This puts the result in the sign bit. Then we
5678 normalize with a shift, if needed.
5679
5680 Two operations that can do the above actions are ABS and FFS, so try
5681 them. If that doesn't work, and MODE is smaller than a full word,
5682 we can use zero-extension to the wider mode (an unsigned conversion)
5683 as the operation. */
5684
5685 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5686 that is compensated by the subsequent overflow when subtracting
5687 one / negating. */
5688
5689 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5690 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5691 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5692 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5693 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5694 {
5695 tem = convert_modes (word_mode, mode, op0, 1);
5696 mode = word_mode;
5697 }
5698
5699 if (tem != 0)
5700 {
5701 if (code == EQ)
5702 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5703 0, OPTAB_WIDEN);
5704 else
5705 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5706 }
5707
5708 /* If we couldn't do it that way, for NE we can "or" the two's complement
5709 of the value with itself. For EQ, we take the one's complement of
5710 that "or", which is an extra insn, so we only handle EQ if branches
5711 are expensive. */
5712
5713 if (tem == 0
5714 && (code == NE
5715 || BRANCH_COST (optimize_insn_for_speed_p (),
5716 false) > 1))
5717 {
5718 if (rtx_equal_p (subtarget, op0))
5719 subtarget = 0;
5720
5721 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5722 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5723 OPTAB_WIDEN);
5724
5725 if (tem && code == EQ)
5726 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5727 }
5728 }
5729
5730 if (tem && normalizep)
5731 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5732 GET_MODE_BITSIZE (mode) - 1,
5733 subtarget, normalizep == 1);
5734
5735 if (tem)
5736 {
5737 if (!target)
5738 ;
5739 else if (GET_MODE (tem) != target_mode)
5740 {
5741 convert_move (target, tem, 0);
5742 tem = target;
5743 }
5744 else if (!subtarget)
5745 {
5746 emit_move_insn (target, tem);
5747 tem = target;
5748 }
5749 }
5750 else
5751 delete_insns_since (last);
5752
5753 return tem;
5754 }
5755
5756 /* Like emit_store_flag, but always succeeds. */
5757
5758 rtx
5759 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5760 machine_mode mode, int unsignedp, int normalizep)
5761 {
5762 rtx tem;
5763 rtx_code_label *label;
5764 rtx trueval, falseval;
5765
5766 /* First see if emit_store_flag can do the job. */
5767 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5768 if (tem != 0)
5769 return tem;
5770
5771 if (!target)
5772 target = gen_reg_rtx (word_mode);
5773
5774 /* If this failed, we have to do this with set/compare/jump/set code.
5775 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5776 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5777 if (code == NE
5778 && GET_MODE_CLASS (mode) == MODE_INT
5779 && REG_P (target)
5780 && op0 == target
5781 && op1 == const0_rtx)
5782 {
5783 label = gen_label_rtx ();
5784 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5785 mode, NULL_RTX, NULL_RTX, label, -1);
5786 emit_move_insn (target, trueval);
5787 emit_label (label);
5788 return target;
5789 }
5790
5791 if (!REG_P (target)
5792 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5793 target = gen_reg_rtx (GET_MODE (target));
5794
5795 /* Jump in the right direction if the target cannot implement CODE
5796 but can jump on its reverse condition. */
5797 falseval = const0_rtx;
5798 if (! can_compare_p (code, mode, ccp_jump)
5799 && (! FLOAT_MODE_P (mode)
5800 || code == ORDERED || code == UNORDERED
5801 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5802 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5803 {
5804 enum rtx_code rcode;
5805 if (FLOAT_MODE_P (mode))
5806 rcode = reverse_condition_maybe_unordered (code);
5807 else
5808 rcode = reverse_condition (code);
5809
5810 /* Canonicalize to UNORDERED for the libcall. */
5811 if (can_compare_p (rcode, mode, ccp_jump)
5812 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5813 {
5814 falseval = trueval;
5815 trueval = const0_rtx;
5816 code = rcode;
5817 }
5818 }
5819
5820 emit_move_insn (target, trueval);
5821 label = gen_label_rtx ();
5822 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5823 NULL_RTX, label, -1);
5824
5825 emit_move_insn (target, falseval);
5826 emit_label (label);
5827
5828 return target;
5829 }
5830 \f
5831 /* Perform possibly multi-word comparison and conditional jump to LABEL
5832 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5833 now a thin wrapper around do_compare_rtx_and_jump. */
5834
5835 static void
5836 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5837 rtx_code_label *label)
5838 {
5839 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5840 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5841 NULL_RTX, NULL_RTX, label, -1);
5842 }