]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/expmed.c
Allow MODE_PARTIAL_INT in expmed costs
[thirdparty/gcc.git] / gcc / expmed.c
CommitLineData
db96f378 1/* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
ddca3e9d 3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
38ba30bf 4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
49db198b 5 2011, 2012
85afca2d 6 Free Software Foundation, Inc.
db96f378 7
f12b58b3 8This file is part of GCC.
db96f378 9
f12b58b3 10GCC is free software; you can redistribute it and/or modify it under
11the terms of the GNU General Public License as published by the Free
8c4c00c1 12Software Foundation; either version 3, or (at your option) any later
f12b58b3 13version.
db96f378 14
f12b58b3 15GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16WARRANTY; without even the implied warranty of MERCHANTABILITY or
17FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18for more details.
db96f378 19
20You should have received a copy of the GNU General Public License
8c4c00c1 21along with GCC; see the file COPYING3. If not see
22<http://www.gnu.org/licenses/>. */
db96f378 23
24
25#include "config.h"
405711de 26#include "system.h"
805e22b2 27#include "coretypes.h"
28#include "tm.h"
0b205f4c 29#include "diagnostic-core.h"
db96f378 30#include "rtl.h"
31#include "tree.h"
7953c610 32#include "tm_p.h"
db96f378 33#include "flags.h"
db96f378 34#include "insn-config.h"
35#include "expr.h"
d8fc4d0b 36#include "optabs.h"
db96f378 37#include "recog.h"
771d21fa 38#include "langhooks.h"
3072d30e 39#include "df.h"
8ea8de24 40#include "target.h"
6ebe4c69 41#include "expmed.h"
42
43struct target_expmed default_target_expmed;
44#if SWITCHABLE_TARGET
45struct target_expmed *this_target_expmed = &default_target_expmed;
46#endif
db96f378 47
35cb5232 48static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
4bb60ec7 50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
35cb5232 54static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
4bb60ec7 55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
35cb5232 59static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
8eef3a45 62 unsigned HOST_WIDE_INT, rtx, int, bool);
35cb5232 63static rtx mask_rtx (enum machine_mode, int, int, int);
64static rtx lshift_value (enum machine_mode, rtx, int, int);
65static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
41323e11 68static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
9c423367 69static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
db96f378 70
7a9e3364 71/* Test whether a value is zero of a power of two. */
72#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
ed28fcb6 74#ifndef SLOW_UNALIGNED_ACCESS
9439ebf7 75#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
ed28fcb6 76#endif
77
de373123 78
8f5c7a6b 79/* Reduce conditional compilation elsewhere. */
80#ifndef HAVE_insv
de7d059c 81#define HAVE_insv 0
8f5c7a6b 82#define CODE_FOR_insv CODE_FOR_nothing
83#define gen_insv(a,b,c,d) NULL_RTX
84#endif
85#ifndef HAVE_extv
de7d059c 86#define HAVE_extv 0
8f5c7a6b 87#define CODE_FOR_extv CODE_FOR_nothing
88#define gen_extv(a,b,c,d) NULL_RTX
89#endif
90#ifndef HAVE_extzv
de7d059c 91#define HAVE_extzv 0
8f5c7a6b 92#define CODE_FOR_extzv CODE_FOR_nothing
93#define gen_extzv(a,b,c,d) NULL_RTX
94#endif
95
49db198b 96struct init_expmed_rtl
db96f378 97{
49db198b 98 struct rtx_def reg; rtunion reg_fld[2];
99 struct rtx_def plus; rtunion plus_fld1;
100 struct rtx_def neg;
101 struct rtx_def mult; rtunion mult_fld1;
102 struct rtx_def sdiv; rtunion sdiv_fld1;
103 struct rtx_def udiv; rtunion udiv_fld1;
49db198b 104 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
105 struct rtx_def smod_32; rtunion smod_32_fld1;
106 struct rtx_def wide_mult; rtunion wide_mult_fld1;
107 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
108 struct rtx_def wide_trunc;
109 struct rtx_def shift; rtunion shift_fld1;
110 struct rtx_def shift_mult; rtunion shift_mult_fld1;
111 struct rtx_def shift_add; rtunion shift_add_fld1;
112 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
113 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
573ff301 114 struct rtx_def zext;
115 struct rtx_def trunc;
649e81fd 116
e56afeb2 117 rtx pow2[MAX_BITS_PER_WORD];
118 rtx cint[MAX_BITS_PER_WORD];
49db198b 119};
120
573ff301 121static void
122init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
123 enum machine_mode from_mode, bool speed)
124{
125 int to_size, from_size;
126 rtx which;
127
128 /* We're given no information about the true size of a partial integer,
129 only the size of the "full" integer it requires for storage. For
130 comparison purposes here, reduce the bit size by one in that case. */
131 to_size = (GET_MODE_BITSIZE (to_mode)
132 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
133 from_size = (GET_MODE_BITSIZE (from_mode)
134 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
135
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which = (to_size < from_size ? &all->trunc : &all->zext);
138
139 PUT_MODE (&all->reg, from_mode);
140 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
141}
142
49db198b 143static void
144init_expmed_one_mode (struct init_expmed_rtl *all,
145 enum machine_mode mode, int speed)
146{
147 int m, n, mode_bitsize;
72655676 148 enum machine_mode mode_from;
db96f378 149
49db198b 150 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
941522d6 151
49db198b 152 PUT_MODE (&all->reg, mode);
153 PUT_MODE (&all->plus, mode);
154 PUT_MODE (&all->neg, mode);
155 PUT_MODE (&all->mult, mode);
156 PUT_MODE (&all->sdiv, mode);
157 PUT_MODE (&all->udiv, mode);
158 PUT_MODE (&all->sdiv_32, mode);
159 PUT_MODE (&all->smod_32, mode);
160 PUT_MODE (&all->wide_trunc, mode);
161 PUT_MODE (&all->shift, mode);
162 PUT_MODE (&all->shift_mult, mode);
163 PUT_MODE (&all->shift_add, mode);
164 PUT_MODE (&all->shift_sub0, mode);
165 PUT_MODE (&all->shift_sub1, mode);
573ff301 166 PUT_MODE (&all->zext, mode);
167 PUT_MODE (&all->trunc, mode);
49db198b 168
49a71e58 169 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
170 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
171 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
172 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
173 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
174
175 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
176 <= 2 * add_cost (speed, mode)));
177 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
178 <= 4 * add_cost (speed, mode)));
179
180 set_shift_cost (speed, mode, 0, 0);
181 {
182 int cost = add_cost (speed, mode);
183 set_shiftadd_cost (speed, mode, 0, cost);
184 set_shiftsub0_cost (speed, mode, 0, cost);
185 set_shiftsub1_cost (speed, mode, 0, cost);
186 }
49db198b 187
188 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
189 for (m = 1; m < n; m++)
190 {
191 XEXP (&all->shift, 1) = all->cint[m];
192 XEXP (&all->shift_mult, 1) = all->pow2[m];
193
49a71e58 194 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
195 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
196 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
197 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
49db198b 198 }
199
200 if (SCALAR_INT_MODE_P (mode))
e56afeb2 201 {
573ff301 202 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
203 mode_from = (enum machine_mode)(mode_from + 1))
204 init_expmed_one_conv (all, mode, mode_from, speed);
205 }
206 if (GET_MODE_CLASS (mode) == MODE_INT)
207 {
208 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
49db198b 209 if (wider_mode != VOIDmode)
210 {
211 PUT_MODE (&all->zext, wider_mode);
212 PUT_MODE (&all->wide_mult, wider_mode);
213 PUT_MODE (&all->wide_lshr, wider_mode);
214 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
215
573ff301 216 set_mul_widen_cost (speed, wider_mode,
217 set_src_cost (&all->wide_mult, speed));
218 set_mul_highpart_cost (speed, mode,
219 set_src_cost (&all->wide_trunc, speed));
49db198b 220 }
e56afeb2 221 }
49db198b 222}
223
224void
225init_expmed (void)
226{
227 struct init_expmed_rtl all;
228 enum machine_mode mode;
229 int m, speed;
230
649e81fd 231 memset (&all, 0, sizeof all);
49db198b 232 for (m = 1; m < MAX_BITS_PER_WORD; m++)
233 {
234 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
235 all.cint[m] = GEN_INT (m);
236 }
649e81fd 237
238 PUT_CODE (&all.reg, REG);
15a79151 239 /* Avoid using hard regs in ways which may be unsupported. */
3072d30e 240 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
649e81fd 241
242 PUT_CODE (&all.plus, PLUS);
243 XEXP (&all.plus, 0) = &all.reg;
244 XEXP (&all.plus, 1) = &all.reg;
245
246 PUT_CODE (&all.neg, NEG);
247 XEXP (&all.neg, 0) = &all.reg;
248
649e81fd 249 PUT_CODE (&all.mult, MULT);
250 XEXP (&all.mult, 0) = &all.reg;
251 XEXP (&all.mult, 1) = &all.reg;
252
1facc8d7 253 PUT_CODE (&all.sdiv, DIV);
254 XEXP (&all.sdiv, 0) = &all.reg;
255 XEXP (&all.sdiv, 1) = &all.reg;
649e81fd 256
1facc8d7 257 PUT_CODE (&all.udiv, UDIV);
258 XEXP (&all.udiv, 0) = &all.reg;
259 XEXP (&all.udiv, 1) = &all.reg;
260
261 PUT_CODE (&all.sdiv_32, DIV);
262 XEXP (&all.sdiv_32, 0) = &all.reg;
49db198b 263 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
1facc8d7 264
265 PUT_CODE (&all.smod_32, MOD);
266 XEXP (&all.smod_32, 0) = &all.reg;
267 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
649e81fd 268
269 PUT_CODE (&all.zext, ZERO_EXTEND);
270 XEXP (&all.zext, 0) = &all.reg;
271
272 PUT_CODE (&all.wide_mult, MULT);
273 XEXP (&all.wide_mult, 0) = &all.zext;
274 XEXP (&all.wide_mult, 1) = &all.zext;
275
276 PUT_CODE (&all.wide_lshr, LSHIFTRT);
277 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
278
279 PUT_CODE (&all.wide_trunc, TRUNCATE);
280 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
281
282 PUT_CODE (&all.shift, ASHIFT);
283 XEXP (&all.shift, 0) = &all.reg;
284
285 PUT_CODE (&all.shift_mult, MULT);
286 XEXP (&all.shift_mult, 0) = &all.reg;
287
288 PUT_CODE (&all.shift_add, PLUS);
289 XEXP (&all.shift_add, 0) = &all.shift_mult;
290 XEXP (&all.shift_add, 1) = &all.reg;
291
b592bb50 292 PUT_CODE (&all.shift_sub0, MINUS);
293 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
294 XEXP (&all.shift_sub0, 1) = &all.reg;
295
296 PUT_CODE (&all.shift_sub1, MINUS);
297 XEXP (&all.shift_sub1, 0) = &all.reg;
298 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
649e81fd 299
573ff301 300 PUT_CODE (&all.trunc, TRUNCATE);
301 XEXP (&all.trunc, 0) = &all.reg;
72655676 302
f529eb25 303 for (speed = 0; speed < 2; speed++)
33183a3c 304 {
f529eb25 305 crtl->maybe_hot_insn_p = speed;
49a71e58 306 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
649e81fd 307
573ff301 308 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
309 mode = (enum machine_mode)(mode + 1))
49db198b 310 init_expmed_one_mode (&all, mode, speed);
649e81fd 311
573ff301 312 if (MIN_MODE_PARTIAL_INT != VOIDmode)
313 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
314 mode = (enum machine_mode)(mode + 1))
315 init_expmed_one_mode (&all, mode, speed);
316
317 if (MIN_MODE_VECTOR_INT != VOIDmode)
318 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
319 mode = (enum machine_mode)(mode + 1))
320 init_expmed_one_mode (&all, mode, speed);
649e81fd 321 }
49db198b 322
49a71e58 323 if (alg_hash_used_p ())
324 {
325 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
326 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
327 }
92358f62 328 else
49a71e58 329 set_alg_hash_used_p (true);
f529eb25 330 default_rtl_profile ();
db96f378 331}
332
333/* Return an rtx representing minus the value of X.
334 MODE is the intended mode of the result,
335 useful if X is a CONST_INT. */
336
337rtx
35cb5232 338negate_rtx (enum machine_mode mode, rtx x)
db96f378 339{
2242dc4b 340 rtx result = simplify_unary_operation (NEG, mode, x, mode);
341
4e57dfc5 342 if (result == 0)
2242dc4b 343 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
344
345 return result;
db96f378 346}
9068af20 347
348/* Report on the availability of insv/extv/extzv and the desired mode
349 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
350 is false; else the mode of the specified operand. If OPNO is -1,
351 all the caller cares about is whether the insn is available. */
352enum machine_mode
35cb5232 353mode_for_extraction (enum extraction_pattern pattern, int opno)
9068af20 354{
f2956fc5 355 const struct insn_data_d *data;
9068af20 356
357 switch (pattern)
358 {
359 case EP_insv:
9068af20 360 if (HAVE_insv)
361 {
362 data = &insn_data[CODE_FOR_insv];
363 break;
364 }
9068af20 365 return MAX_MACHINE_MODE;
366
367 case EP_extv:
9068af20 368 if (HAVE_extv)
369 {
370 data = &insn_data[CODE_FOR_extv];
371 break;
372 }
9068af20 373 return MAX_MACHINE_MODE;
374
375 case EP_extzv:
9068af20 376 if (HAVE_extzv)
377 {
378 data = &insn_data[CODE_FOR_extzv];
379 break;
380 }
9068af20 381 return MAX_MACHINE_MODE;
de7d059c 382
383 default:
611234b4 384 gcc_unreachable ();
9068af20 385 }
386
387 if (opno == -1)
388 return VOIDmode;
389
390 /* Everyone who uses this function used to follow it with
391 if (result == VOIDmode) result = word_mode; */
392 if (data->operand[opno].mode == VOIDmode)
393 return word_mode;
394 return data->operand[opno].mode;
395}
36122326 396\f
397/* A subroutine of store_bit_field, with the same arguments. Return true
398 if the operation could be implemented.
db96f378 399
36122326 400 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
401 no other way of implementing the operation. If FALLBACK_P is false,
402 return false instead. */
403
404static bool
405store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
4bb60ec7 406 unsigned HOST_WIDE_INT bitnum,
407 unsigned HOST_WIDE_INT bitregion_start,
408 unsigned HOST_WIDE_INT bitregion_end,
409 enum machine_mode fieldmode,
36122326 410 rtx value, bool fallback_p)
db96f378 411{
02e7a332 412 unsigned int unit
e16ceb8e 413 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
bc5449fc 414 unsigned HOST_WIDE_INT offset, bitpos;
19cb6b50 415 rtx op0 = str_rtx;
5e862a70 416 int byte_offset;
3cc25a48 417 rtx orig_value;
9068af20 418
de7d059c 419 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
db96f378 420
db96f378 421 while (GET_CODE (op0) == SUBREG)
422 {
423 /* The following line once was done only if WORDS_BIG_ENDIAN,
424 but I think that is a mistake. WORDS_BIG_ENDIAN is
425 meaningful at a much higher level; when structures are copied
426 between memory and regs, the higher-numbered regs
427 always get higher addresses. */
217d3a24 428 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
429 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
48e1416a 430
217d3a24 431 byte_offset = 0;
432
433 /* Paradoxical subregs need special handling on big endian machines. */
434 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
435 {
436 int difference = inner_mode_size - outer_mode_size;
437
438 if (WORDS_BIG_ENDIAN)
439 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
440 if (BYTES_BIG_ENDIAN)
441 byte_offset += difference % UNITS_PER_WORD;
442 }
443 else
444 byte_offset = SUBREG_BYTE (op0);
445
446 bitnum += byte_offset * BITS_PER_UNIT;
db96f378 447 op0 = SUBREG_REG (op0);
448 }
449
bc5449fc 450 /* No action is needed if the target is a register and if the field
451 lies completely outside that register. This can occur if the source
452 code contains an out-of-bounds access to a small array. */
453 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
36122326 454 return true;
bc5449fc 455
cb746719 456 /* Use vec_set patterns for inserting parts of vectors whenever
b8d2bcdd 457 available. */
458 if (VECTOR_MODE_P (GET_MODE (op0))
e16ceb8e 459 && !MEM_P (op0)
d6bf3b14 460 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
b8d2bcdd 461 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
462 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
463 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
464 {
8786db1e 465 struct expand_operand ops[3];
b8d2bcdd 466 enum machine_mode outermode = GET_MODE (op0);
467 enum machine_mode innermode = GET_MODE_INNER (outermode);
8786db1e 468 enum insn_code icode = optab_handler (vec_set_optab, outermode);
b8d2bcdd 469 int pos = bitnum / GET_MODE_BITSIZE (innermode);
b8d2bcdd 470
8786db1e 471 create_fixed_operand (&ops[0], op0);
472 create_input_operand (&ops[1], value, innermode);
473 create_integer_operand (&ops[2], pos);
474 if (maybe_expand_insn (icode, 3, ops))
475 return true;
b8d2bcdd 476 }
477
9e527b97 478 /* If the target is a register, overwriting the entire object, or storing
479 a full-word or multi-word field can be done with just a SUBREG.
480
481 If the target is memory, storing any naturally aligned field can be
482 done with a simple store. For targets that support fast unaligned
bc9d42da 483 memory, any naturally sized, unit aligned field can be done directly. */
a4194ff7 484
bc5449fc 485 offset = bitnum / unit;
486 bitpos = bitnum % unit;
5e862a70 487 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
488 + (offset * UNITS_PER_WORD);
489
a29e3f42 490 if (bitpos == 0
bc9d42da 491 && bitsize == GET_MODE_BITSIZE (fieldmode)
e16ceb8e 492 && (!MEM_P (op0)
5e862a70 493 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
3a175160 494 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
495 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
496 || validate_subreg (fieldmode, GET_MODE (op0), op0,
497 byte_offset)))
2c269e73 498 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
13af9900 499 || (offset * BITS_PER_UNIT % bitsize == 0
2c269e73 500 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
db96f378 501 {
06d6b64e 502 if (MEM_P (op0))
503 op0 = adjust_address (op0, fieldmode, offset);
504 else if (GET_MODE (op0) != fieldmode)
505 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
506 byte_offset);
db96f378 507 emit_move_insn (op0, value);
36122326 508 return true;
db96f378 509 }
510
b708a05c 511 /* Make sure we are playing with integral modes. Pun with subregs
512 if we aren't. This must come after the entire register case above,
513 since that case is valid for any mode. The following cases are only
514 valid for integral modes. */
515 {
516 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
517 if (imode != GET_MODE (op0))
518 {
e16ceb8e 519 if (MEM_P (op0))
e513d163 520 op0 = adjust_address (op0, imode, 0);
b708a05c 521 else
611234b4 522 {
523 gcc_assert (imode != BLKmode);
524 op0 = gen_lowpart (imode, op0);
525 }
b708a05c 526 }
527 }
528
edc4f1e8 529 /* We may be accessing data outside the field, which means
530 we can alias adjacent data. */
4bb60ec7 531 /* ?? not always for C++0x memory model ?? */
e16ceb8e 532 if (MEM_P (op0))
edc4f1e8 533 {
534 op0 = shallow_copy_rtx (op0);
535 set_mem_alias_set (op0, 0);
536 set_mem_expr (op0, 0);
537 }
538
a29e3f42 539 /* If OP0 is a register, BITPOS must count within a word.
540 But as we have it, it counts within whatever size OP0 now has.
541 On a bigendian machine, these are not the same, so convert. */
542 if (BYTES_BIG_ENDIAN
e16ceb8e 543 && !MEM_P (op0)
a29e3f42 544 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
545 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
546
db96f378 547 /* Storing an lsb-aligned field in a register
548 can be done with a movestrict instruction. */
549
e16ceb8e 550 if (!MEM_P (op0)
51356f86 551 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
db96f378 552 && bitsize == GET_MODE_BITSIZE (fieldmode)
d6bf3b14 553 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
db96f378 554 {
8786db1e 555 struct expand_operand ops[2];
556 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
c2ef487a 557 rtx arg0 = op0;
3a175160 558 unsigned HOST_WIDE_INT subreg_off;
ee2ba10c 559
8786db1e 560 if (GET_CODE (arg0) == SUBREG)
db96f378 561 {
611234b4 562 /* Else we've got some float mode source being extracted into
563 a different float mode destination -- this combination of
564 subregs results in Severe Tire Damage. */
8786db1e 565 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
611234b4 566 || GET_MODE_CLASS (fieldmode) == MODE_INT
567 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
8786db1e 568 arg0 = SUBREG_REG (arg0);
ee2ba10c 569 }
650df5df 570
3a175160 571 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
572 + (offset * UNITS_PER_WORD);
573 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
574 {
575 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
8786db1e 576
3a175160 577 create_fixed_operand (&ops[0], arg0);
578 /* Shrink the source operand to FIELDMODE. */
579 create_convert_operand_to (&ops[1], value, fieldmode, false);
580 if (maybe_expand_insn (icode, 2, ops))
581 return true;
582 }
db96f378 583 }
584
585 /* Handle fields bigger than a word. */
586
587 if (bitsize > BITS_PER_WORD)
588 {
589 /* Here we transfer the words of the field
590 in the order least significant first.
591 This is because the most significant word is the one which may
766e2366 592 be less than full.
593 However, only do that if the value is not BLKmode. */
594
02e7a332 595 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
596 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
597 unsigned int i;
36122326 598 rtx last;
db96f378 599
600 /* This is the mode we must force value to, so that there will be enough
601 subwords to extract. Note that fieldmode will often (always?) be
602 VOIDmode, because that is what store_field uses to indicate that this
89f18f73 603 is a bit field, but passing VOIDmode to operand_subword_force
604 is not allowed. */
7ac3f3c5 605 fieldmode = GET_MODE (value);
606 if (fieldmode == VOIDmode)
0a0dae2e 607 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
db96f378 608
36122326 609 last = get_last_insn ();
db96f378 610 for (i = 0; i < nwords; i++)
611 {
766e2366 612 /* If I is 0, use the low-order word in both field and target;
613 if I is 1, use the next to lowest word; and so on. */
cefa4c33 614 unsigned int wordnum = (backwards
615 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
616 - i - 1
617 : i);
02e7a332 618 unsigned int bit_offset = (backwards
2c269e73 619 ? MAX ((int) bitsize - ((int) i + 1)
620 * BITS_PER_WORD,
621 0)
622 : (int) i * BITS_PER_WORD);
36122326 623 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
4d18c297 624 unsigned HOST_WIDE_INT new_bitsize =
625 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
626
627 /* If the remaining chunk doesn't have full wordsize we have
628 to make sure that for big endian machines the higher order
629 bits are used. */
630 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
631 value_word = simplify_expand_binop (word_mode, lshr_optab,
632 value_word,
633 GEN_INT (BITS_PER_WORD
634 - new_bitsize),
635 NULL_RTX, true,
636 OPTAB_LIB_WIDEN);
637
638 if (!store_bit_field_1 (op0, new_bitsize,
4bb60ec7 639 bitnum + bit_offset,
640 bitregion_start, bitregion_end,
641 word_mode,
36122326 642 value_word, fallback_p))
643 {
644 delete_insns_since (last);
645 return false;
646 }
db96f378 647 }
36122326 648 return true;
db96f378 649 }
650
651 /* From here on we can assume that the field to be stored in is
652 a full-word (whatever type that is), since it is shorter than a word. */
653
654 /* OFFSET is the number of words or bytes (UNIT says which)
655 from STR_RTX to the first word or byte containing part of the field. */
656
e16ceb8e 657 if (!MEM_P (op0))
db96f378 658 {
659 if (offset != 0
660 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
650df5df 661 {
8ad4c111 662 if (!REG_P (op0))
edffc66d 663 {
89f18f73 664 /* Since this is a destination (lvalue), we can't copy
665 it to a pseudo. We can remove a SUBREG that does not
666 change the size of the operand. Such a SUBREG may
667 have been added above. */
611234b4 668 gcc_assert (GET_CODE (op0) == SUBREG
669 && (GET_MODE_SIZE (GET_MODE (op0))
670 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
671 op0 = SUBREG_REG (op0);
edffc66d 672 }
650df5df 673 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
701e46d0 674 op0, (offset * UNITS_PER_WORD));
650df5df 675 }
db96f378 676 offset = 0;
677 }
db96f378 678
2b701a08 679 /* If VALUE has a floating-point or complex mode, access it as an
680 integer of the corresponding size. This can occur on a machine
681 with 64 bit registers that uses SFmode for float. It can also
682 occur for unaligned float or complex fields. */
3cc25a48 683 orig_value = value;
2b701a08 684 if (GET_MODE (value) != VOIDmode
685 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
ead227c9 686 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
2b701a08 687 {
688 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
689 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
690 }
acb2971d 691
db96f378 692 /* Now OFFSET is nonzero only if OP0 is memory
693 and is therefore always measured in bytes. */
694
de7d059c 695 if (HAVE_insv
f89c2379 696 && GET_MODE (value) != BLKmode
02256d9b 697 && bitsize > 0
698 && GET_MODE_BITSIZE (op_mode) >= bitsize
eb04cafb 699 /* Do not use insv for volatile bitfields when
700 -fstrict-volatile-bitfields is in effect. */
701 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
702 && flag_strict_volatile_bitfields > 0)
8ad4c111 703 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
7e9ba3f3 704 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
705 /* Do not use insv if the bit region is restricted and
706 op_mode integer at offset doesn't fit into the
707 restricted region. */
708 && !(MEM_P (op0) && bitregion_end
709 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
710 > bitregion_end + 1))
db96f378 711 {
8786db1e 712 struct expand_operand ops[4];
db96f378 713 int xbitpos = bitpos;
714 rtx value1;
715 rtx xop0 = op0;
716 rtx last = get_last_insn ();
98e07982 717 bool copy_back = false;
db96f378 718
719 /* Add OFFSET into OP0's address. */
e16ceb8e 720 if (MEM_P (xop0))
e513d163 721 xop0 = adjust_address (xop0, byte_mode, offset);
db96f378 722
36122326 723 /* If xop0 is a register, we need it in OP_MODE
db96f378 724 to make it acceptable to the format of insv. */
725 if (GET_CODE (xop0) == SUBREG)
b7c9a162 726 /* We can't just change the mode, because this might clobber op0,
727 and we will need the original value of op0 if insv fails. */
36122326 728 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
729 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
42b8780a 730 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
db96f378 731
98e07982 732 /* If the destination is a paradoxical subreg such that we need a
733 truncate to the inner mode, perform the insertion on a temporary and
734 truncate the result to the original destination. Note that we can't
735 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
736 X) 0)) is (reg:N X). */
737 if (GET_CODE (xop0) == SUBREG
738 && REG_P (SUBREG_REG (xop0))
396f2130 739 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
740 op_mode)))
98e07982 741 {
742 rtx tem = gen_reg_rtx (op_mode);
743 emit_move_insn (tem, xop0);
744 xop0 = tem;
745 copy_back = true;
746 }
747
db96f378 748 /* We have been counting XBITPOS within UNIT.
749 Count instead within the size of the register. */
eb04cafb 750 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
36122326 751 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
51356f86 752
36122326 753 unit = GET_MODE_BITSIZE (op_mode);
db96f378 754
eb04cafb 755 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
756 "backwards" from the size of the unit we are inserting into.
757 Otherwise, we count bits from the most significant on a
758 BYTES/BITS_BIG_ENDIAN machine. */
759
760 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
761 xbitpos = unit - bitsize - xbitpos;
762
36122326 763 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
db96f378 764 value1 = value;
36122326 765 if (GET_MODE (value) != op_mode)
db96f378 766 {
767 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
768 {
769 /* Optimization: Don't bother really extending VALUE
0777ab28 770 if it has all the bits we will actually use. However,
771 if we must narrow it, be sure we do it correctly. */
db96f378 772
36122326 773 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
a4194ff7 774 {
775 rtx tmp;
776
36122326 777 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
a4194ff7 778 if (! tmp)
36122326 779 tmp = simplify_gen_subreg (op_mode,
a4194ff7 780 force_reg (GET_MODE (value),
781 value1),
782 GET_MODE (value), 0);
783 value1 = tmp;
784 }
0777ab28 785 else
36122326 786 value1 = gen_lowpart (op_mode, value1);
db96f378 787 }
971ba038 788 else if (CONST_INT_P (value))
36122326 789 value1 = gen_int_mode (INTVAL (value), op_mode);
611234b4 790 else
db96f378 791 /* Parse phase is supposed to make VALUE's data type
792 match that of the component reference, which is a type
793 at least as wide as the field; so VALUE should have
794 a mode that corresponds to that type. */
611234b4 795 gcc_assert (CONSTANT_P (value));
db96f378 796 }
797
8786db1e 798 create_fixed_operand (&ops[0], xop0);
799 create_integer_operand (&ops[1], bitsize);
800 create_integer_operand (&ops[2], xbitpos);
801 create_input_operand (&ops[3], value1, op_mode);
802 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
36122326 803 {
98e07982 804 if (copy_back)
805 convert_move (op0, xop0, true);
36122326 806 return true;
807 }
808 delete_insns_since (last);
809 }
810
811 /* If OP0 is a memory, try copying it to a register and seeing if a
812 cheap register alternative is available. */
813 if (HAVE_insv && MEM_P (op0))
814 {
815 enum machine_mode bestmode;
4bb60ec7 816 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
817
818 if (bitregion_end)
819 maxbits = bitregion_end - bitregion_start + 1;
36122326 820
821 /* Get the mode to use for inserting into this field. If OP0 is
822 BLKmode, get the smallest mode consistent with the alignment. If
823 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
824 mode. Otherwise, use the smallest mode containing the field. */
825
826 if (GET_MODE (op0) == BLKmode
4bb60ec7 827 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
36122326 828 || (op_mode != MAX_MACHINE_MODE
829 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
7e9ba3f3 830 bestmode = get_best_mode (bitsize, bitnum,
4bb60ec7 831 bitregion_start, bitregion_end,
832 MEM_ALIGN (op0),
36122326 833 (op_mode == MAX_MACHINE_MODE
834 ? VOIDmode : op_mode),
835 MEM_VOLATILE_P (op0));
db96f378 836 else
36122326 837 bestmode = GET_MODE (op0);
838
839 if (bestmode != VOIDmode
840 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
841 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
842 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
ff385626 843 {
36122326 844 rtx last, tempreg, xop0;
845 unsigned HOST_WIDE_INT xoffset, xbitpos;
846
847 last = get_last_insn ();
848
849 /* Adjust address to point to the containing unit of
850 that mode. Compute the offset as a multiple of this unit,
851 counting in bytes. */
852 unit = GET_MODE_BITSIZE (bestmode);
853 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
854 xbitpos = bitnum % unit;
855 xop0 = adjust_address (op0, bestmode, xoffset);
856
857 /* Fetch that unit, store the bitfield in it, then store
858 the unit. */
859 tempreg = copy_to_reg (xop0);
860 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
4bb60ec7 861 bitregion_start, bitregion_end,
36122326 862 fieldmode, orig_value, false))
863 {
864 emit_move_insn (xop0, tempreg);
865 return true;
866 }
db96f378 867 delete_insns_since (last);
db96f378 868 }
869 }
36122326 870
871 if (!fallback_p)
872 return false;
873
4bb60ec7 874 store_fixed_bit_field (op0, offset, bitsize, bitpos,
875 bitregion_start, bitregion_end, value);
36122326 876 return true;
877}
878
879/* Generate code to store value from rtx VALUE
880 into a bit-field within structure STR_RTX
881 containing BITSIZE bits starting at bit BITNUM.
4bb60ec7 882
883 BITREGION_START is bitpos of the first bitfield in this region.
884 BITREGION_END is the bitpos of the ending bitfield in this region.
885 These two fields are 0, if the C++ memory model does not apply,
886 or we are not interested in keeping track of bitfield regions.
887
36122326 888 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
889
890void
891store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
4bb60ec7 892 unsigned HOST_WIDE_INT bitnum,
893 unsigned HOST_WIDE_INT bitregion_start,
894 unsigned HOST_WIDE_INT bitregion_end,
895 enum machine_mode fieldmode,
36122326 896 rtx value)
897{
4bb60ec7 898 /* Under the C++0x memory model, we must not touch bits outside the
899 bit region. Adjust the address to start at the beginning of the
900 bit region. */
73041e9b 901 if (MEM_P (str_rtx) && bitregion_start > 0)
4bb60ec7 902 {
903 enum machine_mode bestmode;
904 enum machine_mode op_mode;
905 unsigned HOST_WIDE_INT offset;
906
907 op_mode = mode_for_extraction (EP_insv, 3);
908 if (op_mode == MAX_MACHINE_MODE)
909 op_mode = VOIDmode;
910
73041e9b 911 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
912
4bb60ec7 913 offset = bitregion_start / BITS_PER_UNIT;
914 bitnum -= bitregion_start;
915 bitregion_end -= bitregion_start;
916 bitregion_start = 0;
917 bestmode = get_best_mode (bitsize, bitnum,
918 bitregion_start, bitregion_end,
919 MEM_ALIGN (str_rtx),
920 op_mode,
921 MEM_VOLATILE_P (str_rtx));
922 str_rtx = adjust_address (str_rtx, bestmode, offset);
923 }
924
925 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
926 bitregion_start, bitregion_end,
927 fieldmode, value, true))
36122326 928 gcc_unreachable ();
db96f378 929}
930\f
931/* Use shifts and boolean operations to store VALUE
932 into a bit field of width BITSIZE
933 in a memory location specified by OP0 except offset by OFFSET bytes.
934 (OFFSET must be 0 if OP0 is a register.)
935 The field starts at position BITPOS within the byte.
936 (If OP0 is a register, it may be a full word or a narrower mode,
937 but BITPOS still counts within a full word,
0a534ba7 938 which is significant on bigendian machines.) */
db96f378 939
940static void
35cb5232 941store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
942 unsigned HOST_WIDE_INT bitsize,
4bb60ec7 943 unsigned HOST_WIDE_INT bitpos,
944 unsigned HOST_WIDE_INT bitregion_start,
945 unsigned HOST_WIDE_INT bitregion_end,
946 rtx value)
db96f378 947{
19cb6b50 948 enum machine_mode mode;
02e7a332 949 unsigned int total_bits = BITS_PER_WORD;
f31d1dc3 950 rtx temp;
db96f378 951 int all_zero = 0;
952 int all_one = 0;
953
db96f378 954 /* There is a case not handled here:
955 a structure with a known alignment of just a halfword
956 and a field split across two aligned halfwords within the structure.
957 Or likewise a structure with a known alignment of just a byte
958 and a field split across two bytes.
959 Such cases are not supposed to be able to occur. */
960
8ad4c111 961 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
db96f378 962 {
611234b4 963 gcc_assert (!offset);
db96f378 964 /* Special treatment for a bit field split across two registers. */
965 if (bitsize + bitpos > BITS_PER_WORD)
966 {
4bb60ec7 967 store_split_bit_field (op0, bitsize, bitpos,
968 bitregion_start, bitregion_end,
969 value);
db96f378 970 return;
971 }
972 }
973 else
974 {
4bb60ec7 975 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
976
977 if (bitregion_end)
978 maxbits = bitregion_end - bitregion_start + 1;
979
db96f378 980 /* Get the proper mode to use for this field. We want a mode that
981 includes the entire field. If such a mode would be larger than
a4194ff7 982 a word, we won't be doing the extraction the normal way.
b498b5e6 983 We don't want a mode bigger than the destination. */
db96f378 984
b498b5e6 985 mode = GET_MODE (op0);
986 if (GET_MODE_BITSIZE (mode) == 0
ff385626 987 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
988 mode = word_mode;
a420d927 989
990 if (MEM_VOLATILE_P (op0)
991 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
4bb60ec7 992 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
a420d927 993 && flag_strict_volatile_bitfields > 0)
994 mode = GET_MODE (op0);
995 else
996 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
4bb60ec7 997 bitregion_start, bitregion_end,
a420d927 998 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
db96f378 999
1000 if (mode == VOIDmode)
1001 {
1002 /* The only way this should occur is if the field spans word
1003 boundaries. */
2c269e73 1004 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
4bb60ec7 1005 bitregion_start, bitregion_end, value);
db96f378 1006 return;
1007 }
1008
1009 total_bits = GET_MODE_BITSIZE (mode);
1010
3ec011bc 1011 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
3398e91d 1012 be in the range 0 to total_bits-1, and put any excess bytes in
3ec011bc 1013 OFFSET. */
1014 if (bitpos >= total_bits)
1015 {
1016 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1017 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1018 * BITS_PER_UNIT);
1019 }
1020
db96f378 1021 /* Get ref to an aligned byte, halfword, or word containing the field.
1022 Adjust BITPOS to be position within a word,
1023 and OFFSET to be the offset of that word.
1024 Then alter OP0 to refer to that word. */
1025 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1026 offset -= (offset % (total_bits / BITS_PER_UNIT));
e513d163 1027 op0 = adjust_address (op0, mode, offset);
db96f378 1028 }
1029
1030 mode = GET_MODE (op0);
1031
1032 /* Now MODE is either some integral mode for a MEM as OP0,
1033 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
1034 The bit field is contained entirely within OP0.
1035 BITPOS is the starting bit number within OP0.
1036 (OP0's mode may actually be narrower than MODE.) */
1037
51356f86 1038 if (BYTES_BIG_ENDIAN)
1039 /* BITPOS is the distance between our msb
1040 and that of the containing datum.
1041 Convert it to the distance from the lsb. */
1042 bitpos = total_bits - bitsize - bitpos;
db96f378 1043
db96f378 1044 /* Now BITPOS is always the distance between our lsb
1045 and that of OP0. */
1046
1047 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
1048 we must first convert its mode to MODE. */
1049
971ba038 1050 if (CONST_INT_P (value))
db96f378 1051 {
19cb6b50 1052 HOST_WIDE_INT v = INTVAL (value);
db96f378 1053
50b0c9ee 1054 if (bitsize < HOST_BITS_PER_WIDE_INT)
1055 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
db96f378 1056
1057 if (v == 0)
1058 all_zero = 1;
50b0c9ee 1059 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1060 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1061 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
db96f378 1062 all_one = 1;
1063
1064 value = lshift_value (mode, value, bitpos, bitsize);
1065 }
1066 else
1067 {
1068 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1069 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1070
1071 if (GET_MODE (value) != mode)
3c15005d 1072 value = convert_to_mode (mode, value, 1);
db96f378 1073
1074 if (must_and)
1075 value = expand_binop (mode, and_optab, value,
1076 mask_rtx (mode, 0, bitsize, 0),
50b0c9ee 1077 NULL_RTX, 1, OPTAB_LIB_WIDEN);
db96f378 1078 if (bitpos > 0)
1079 value = expand_shift (LSHIFT_EXPR, mode, value,
f5ff0b21 1080 bitpos, NULL_RTX, 1);
db96f378 1081 }
1082
1083 /* Now clear the chosen bits in OP0,
1084 except that if VALUE is -1 we need not bother. */
f31d1dc3 1085 /* We keep the intermediates in registers to allow CSE to combine
1086 consecutive bitfield assignments. */
db96f378 1087
f31d1dc3 1088 temp = force_reg (mode, op0);
db96f378 1089
1090 if (! all_one)
1091 {
f31d1dc3 1092 temp = expand_binop (mode, and_optab, temp,
db96f378 1093 mask_rtx (mode, bitpos, bitsize, 1),
f31d1dc3 1094 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1095 temp = force_reg (mode, temp);
db96f378 1096 }
db96f378 1097
1098 /* Now logical-or VALUE into OP0, unless it is zero. */
1099
1100 if (! all_zero)
f31d1dc3 1101 {
1102 temp = expand_binop (mode, ior_optab, temp, value,
1103 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1104 temp = force_reg (mode, temp);
1105 }
1106
db96f378 1107 if (op0 != temp)
39925406 1108 {
1109 op0 = copy_rtx (op0);
1110 emit_move_insn (op0, temp);
1111 }
db96f378 1112}
1113\f
e9782169 1114/* Store a bit field that is split across multiple accessible memory objects.
db96f378 1115
e9782169 1116 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
db96f378 1117 BITSIZE is the field width; BITPOS the position of its first bit
1118 (within the word).
e9782169 1119 VALUE is the value to store.
e9782169 1120
1121 This does not yet handle fields wider than BITS_PER_WORD. */
db96f378 1122
1123static void
35cb5232 1124store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
4bb60ec7 1125 unsigned HOST_WIDE_INT bitpos,
1126 unsigned HOST_WIDE_INT bitregion_start,
1127 unsigned HOST_WIDE_INT bitregion_end,
1128 rtx value)
db96f378 1129{
02e7a332 1130 unsigned int unit;
1131 unsigned int bitsdone = 0;
e81f2e56 1132
ba860eb2 1133 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1134 much at a time. */
8ad4c111 1135 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
e81f2e56 1136 unit = BITS_PER_WORD;
1137 else
2c269e73 1138 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
6d199784 1139
88fc9196 1140 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1141 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1142 that VALUE might be a floating-point constant. */
971ba038 1143 if (CONSTANT_P (value) && !CONST_INT_P (value))
88fc9196 1144 {
1145 rtx word = gen_lowpart_common (word_mode, value);
1146
a2c7abaa 1147 if (word && (value != word))
88fc9196 1148 value = word;
1149 else
1150 value = gen_lowpart_common (word_mode,
329edf37 1151 force_reg (GET_MODE (value) != VOIDmode
1152 ? GET_MODE (value)
1153 : word_mode, value));
88fc9196 1154 }
db96f378 1155
e9782169 1156 while (bitsdone < bitsize)
db96f378 1157 {
02e7a332 1158 unsigned HOST_WIDE_INT thissize;
e9782169 1159 rtx part, word;
02e7a332 1160 unsigned HOST_WIDE_INT thispos;
1161 unsigned HOST_WIDE_INT offset;
db96f378 1162
e9782169 1163 offset = (bitpos + bitsdone) / unit;
1164 thispos = (bitpos + bitsdone) % unit;
db96f378 1165
7e9ba3f3 1166 /* When region of bytes we can touch is restricted, decrease
1167 UNIT close to the end of the region as needed. */
1168 if (bitregion_end
1169 && unit > BITS_PER_UNIT
1170 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1171 {
1172 unit = unit / 2;
1173 continue;
1174 }
1175
ba860eb2 1176 /* THISSIZE must not overrun a word boundary. Otherwise,
1177 store_fixed_bit_field will call us again, and we will mutually
1178 recurse forever. */
1179 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1180 thissize = MIN (thissize, unit - thispos);
db96f378 1181
51356f86 1182 if (BYTES_BIG_ENDIAN)
1183 {
052251d0 1184 int total_bits;
1185
1186 /* We must do an endian conversion exactly the same way as it is
1187 done in extract_bit_field, so that the two calls to
1188 extract_fixed_bit_field will have comparable arguments. */
e16ceb8e 1189 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
052251d0 1190 total_bits = BITS_PER_WORD;
1191 else
1192 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1193
51356f86 1194 /* Fetch successively less significant portions. */
971ba038 1195 if (CONST_INT_P (value))
51356f86 1196 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1197 >> (bitsize - bitsdone - thissize))
1198 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1199 else
1200 /* The args are chosen so that the last part includes the
1201 lsb. Give extract_bit_field the value it needs (with
2c269e73 1202 endianness compensation) to fetch the piece we want. */
1203 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1204 total_bits - bitsize + bitsdone,
8eef3a45 1205 NULL_RTX, 1, false);
51356f86 1206 }
e9782169 1207 else
51356f86 1208 {
1209 /* Fetch successively more significant portions. */
971ba038 1210 if (CONST_INT_P (value))
51356f86 1211 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1212 >> bitsdone)
1213 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1214 else
2c269e73 1215 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
8eef3a45 1216 bitsdone, NULL_RTX, 1, false);
51356f86 1217 }
db96f378 1218
e9782169 1219 /* If OP0 is a register, then handle OFFSET here.
6d292981 1220
1221 When handling multiword bitfields, extract_bit_field may pass
1222 down a word_mode SUBREG of a larger REG for a bitfield that actually
1223 crosses a word boundary. Thus, for a SUBREG, we must find
1224 the current word starting from the base register. */
1225 if (GET_CODE (op0) == SUBREG)
1226 {
701e46d0 1227 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
3a175160 1228 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1229 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1230 word = word_offset ? const0_rtx : op0;
1231 else
1232 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1233 GET_MODE (SUBREG_REG (op0)));
6d292981 1234 offset = 0;
1235 }
8ad4c111 1236 else if (REG_P (op0))
e9782169 1237 {
3a175160 1238 enum machine_mode op0_mode = GET_MODE (op0);
1239 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1240 word = offset ? const0_rtx : op0;
1241 else
1242 word = operand_subword_force (op0, offset, GET_MODE (op0));
e9782169 1243 offset = 0;
1244 }
1245 else
1246 word = op0;
db96f378 1247
ba860eb2 1248 /* OFFSET is in UNITs, and UNIT is in bits.
3a175160 1249 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1250 it is just an out-of-bounds access. Ignore it. */
1251 if (word != const0_rtx)
1252 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
4bb60ec7 1253 thispos, bitregion_start, bitregion_end, part);
e9782169 1254 bitsdone += thissize;
1255 }
db96f378 1256}
1257\f
36122326 1258/* A subroutine of extract_bit_field_1 that converts return value X
1259 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1260 to extract_bit_field. */
db96f378 1261
36122326 1262static rtx
1263convert_extracted_bit_field (rtx x, enum machine_mode mode,
1264 enum machine_mode tmode, bool unsignedp)
1265{
1266 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1267 return x;
db96f378 1268
36122326 1269 /* If the x mode is not a scalar integral, first convert to the
1270 integer mode of that size and then access it as a floating-point
1271 value via a SUBREG. */
1272 if (!SCALAR_INT_MODE_P (tmode))
1273 {
1274 enum machine_mode smode;
db96f378 1275
36122326 1276 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1277 x = convert_to_mode (smode, x, unsignedp);
1278 x = force_reg (smode, x);
1279 return gen_lowpart (tmode, x);
1280 }
db96f378 1281
36122326 1282 return convert_to_mode (tmode, x, unsignedp);
1283}
1284
1285/* A subroutine of extract_bit_field, with the same arguments.
1286 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1287 if we can find no other means of implementing the operation.
1288 if FALLBACK_P is false, return NULL instead. */
1289
1290static rtx
1291extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
8eef3a45 1292 unsigned HOST_WIDE_INT bitnum,
1293 int unsignedp, bool packedp, rtx target,
36122326 1294 enum machine_mode mode, enum machine_mode tmode,
1295 bool fallback_p)
db96f378 1296{
02e7a332 1297 unsigned int unit
e16ceb8e 1298 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
bc5449fc 1299 unsigned HOST_WIDE_INT offset, bitpos;
19cb6b50 1300 rtx op0 = str_rtx;
49776ff6 1301 enum machine_mode int_mode;
36122326 1302 enum machine_mode ext_mode;
ce7c9eb1 1303 enum machine_mode mode1;
1304 int byte_offset;
db96f378 1305
db96f378 1306 if (tmode == VOIDmode)
1307 tmode = mode;
804e9c91 1308
db96f378 1309 while (GET_CODE (op0) == SUBREG)
1310 {
bc5449fc 1311 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
db96f378 1312 op0 = SUBREG_REG (op0);
1313 }
c88df841 1314
bc5449fc 1315 /* If we have an out-of-bounds access to a register, just return an
2fb89879 1316 uninitialized register of the required mode. This can occur if the
bc5449fc 1317 source code contains an out-of-bounds access to a small array. */
1318 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1319 return gen_reg_rtx (tmode);
1320
8ad4c111 1321 if (REG_P (op0)
3664abd2 1322 && mode == GET_MODE (op0)
1323 && bitnum == 0
bc9d42da 1324 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
3664abd2 1325 {
bc9d42da 1326 /* We're trying to extract a full register from itself. */
3664abd2 1327 return op0;
1328 }
1329
8ea8de24 1330 /* See if we can get a better vector mode before extracting. */
1331 if (VECTOR_MODE_P (GET_MODE (op0))
1332 && !MEM_P (op0)
1333 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1334 {
1335 enum machine_mode new_mode;
8ea8de24 1336
1337 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1338 new_mode = MIN_MODE_VECTOR_FLOAT;
06f0b99c 1339 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1340 new_mode = MIN_MODE_VECTOR_FRACT;
1341 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1342 new_mode = MIN_MODE_VECTOR_UFRACT;
1343 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1344 new_mode = MIN_MODE_VECTOR_ACCUM;
1345 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1346 new_mode = MIN_MODE_VECTOR_UACCUM;
8ea8de24 1347 else
1348 new_mode = MIN_MODE_VECTOR_INT;
1349
1350 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
d5f2f2c4 1351 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
8ea8de24 1352 && targetm.vector_mode_supported_p (new_mode))
1353 break;
1354 if (new_mode != VOIDmode)
1355 op0 = gen_lowpart (new_mode, op0);
1356 }
1357
b8d2bcdd 1358 /* Use vec_extract patterns for extracting parts of vectors whenever
1359 available. */
1360 if (VECTOR_MODE_P (GET_MODE (op0))
e16ceb8e 1361 && !MEM_P (op0)
d6bf3b14 1362 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
cb746719 1363 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1364 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
b8d2bcdd 1365 {
8786db1e 1366 struct expand_operand ops[3];
b8d2bcdd 1367 enum machine_mode outermode = GET_MODE (op0);
1368 enum machine_mode innermode = GET_MODE_INNER (outermode);
8786db1e 1369 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
cb746719 1370 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
b8d2bcdd 1371
8786db1e 1372 create_output_operand (&ops[0], target, innermode);
1373 create_input_operand (&ops[1], op0, outermode);
1374 create_integer_operand (&ops[2], pos);
1375 if (maybe_expand_insn (icode, 3, ops))
b8d2bcdd 1376 {
8786db1e 1377 target = ops[0].value;
1378 if (GET_MODE (target) != mode)
1379 return gen_lowpart (tmode, target);
1380 return target;
b8d2bcdd 1381 }
1382 }
1383
86cde393 1384 /* Make sure we are playing with integral modes. Pun with subregs
1385 if we aren't. */
1386 {
1387 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1388 if (imode != GET_MODE (op0))
1389 {
8061fd40 1390 if (MEM_P (op0))
1391 op0 = adjust_address (op0, imode, 0);
4675df53 1392 else if (imode != BLKmode)
8061fd40 1393 {
8061fd40 1394 op0 = gen_lowpart (imode, op0);
f6d8f384 1395
8061fd40 1396 /* If we got a SUBREG, force it into a register since we
1397 aren't going to be able to do another SUBREG on it. */
1398 if (GET_CODE (op0) == SUBREG)
1399 op0 = force_reg (imode, op0);
1400 }
4675df53 1401 else if (REG_P (op0))
1402 {
1403 rtx reg, subreg;
1404 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1405 MODE_INT);
1406 reg = gen_reg_rtx (imode);
1407 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1408 emit_move_insn (subreg, op0);
1409 op0 = reg;
1410 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1411 }
1412 else
1413 {
1414 rtx mem = assign_stack_temp (GET_MODE (op0),
0ab48139 1415 GET_MODE_SIZE (GET_MODE (op0)));
4675df53 1416 emit_move_insn (mem, op0);
1417 op0 = adjust_address (mem, BLKmode, 0);
1418 }
86cde393 1419 }
1420 }
1421
edc4f1e8 1422 /* We may be accessing data outside the field, which means
1423 we can alias adjacent data. */
e16ceb8e 1424 if (MEM_P (op0))
edc4f1e8 1425 {
1426 op0 = shallow_copy_rtx (op0);
1427 set_mem_alias_set (op0, 0);
1428 set_mem_expr (op0, 0);
1429 }
1430
804e9c91 1431 /* Extraction of a full-word or multi-word value from a structure
1432 in a register or aligned memory can be done with just a SUBREG.
1433 A subword value in the least significant part of a register
1434 can also be extracted with a SUBREG. For this, we need the
1435 byte offset of the value in op0. */
1436
bc5449fc 1437 bitpos = bitnum % unit;
1438 offset = bitnum / unit;
804e9c91 1439 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
a4194ff7 1440
db96f378 1441 /* If OP0 is a register, BITPOS must count within a word.
1442 But as we have it, it counts within whatever size OP0 now has.
1443 On a bigendian machine, these are not the same, so convert. */
0bc644e0 1444 if (BYTES_BIG_ENDIAN
e16ceb8e 1445 && !MEM_P (op0)
51356f86 1446 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
db96f378 1447 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
db96f378 1448
804e9c91 1449 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1450 If that's wrong, the solution is to test for it and set TARGET to 0
1451 if needed. */
ce7c9eb1 1452
b97d1165 1453 /* Only scalar integer modes can be converted via subregs. There is an
1454 additional problem for FP modes here in that they can have a precision
1455 which is different from the size. mode_for_size uses precision, but
1456 we want a mode based on the size, so we must avoid calling it for FP
1457 modes. */
1458 mode1 = (SCALAR_INT_MODE_P (tmode)
1459 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1460 : mode);
ce7c9eb1 1461
a420d927 1462 /* If the bitfield is volatile, we need to make sure the access
1463 remains on a type-aligned boundary. */
1464 if (GET_CODE (op0) == MEM
1465 && MEM_VOLATILE_P (op0)
1466 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1467 && flag_strict_volatile_bitfields > 0)
1468 goto no_subreg_mode_swap;
1469
4007ab72 1470 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1471 && bitpos % BITS_PER_WORD == 0)
b97d1165 1472 || (mode1 != BLKmode
4007ab72 1473 /* ??? The big endian test here is wrong. This is correct
1474 if the value is in a register, and if mode_for_size is not
1475 the same mode as op0. This causes us to get unnecessarily
1476 inefficient code from the Thumb port when -mbig-endian. */
1477 && (BYTES_BIG_ENDIAN
1478 ? bitpos + bitsize == BITS_PER_WORD
1479 : bitpos == 0)))
e16ceb8e 1480 && ((!MEM_P (op0)
396f2130 1481 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
4007ab72 1482 && GET_MODE_SIZE (mode1) != 0
1483 && byte_offset % GET_MODE_SIZE (mode1) == 0)
e16ceb8e 1484 || (MEM_P (op0)
4007ab72 1485 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1486 || (offset * BITS_PER_UNIT % bitsize == 0
1487 && MEM_ALIGN (op0) % bitsize == 0)))))
db96f378 1488 {
05b481df 1489 if (MEM_P (op0))
1490 op0 = adjust_address (op0, mode1, offset);
1491 else if (mode1 != GET_MODE (op0))
ed28fcb6 1492 {
05b481df 1493 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1494 byte_offset);
1495 if (sub == NULL)
1496 goto no_subreg_mode_swap;
1497 op0 = sub;
ed28fcb6 1498 }
db96f378 1499 if (mode1 != mode)
1500 return convert_to_mode (tmode, op0, unsignedp);
1501 return op0;
1502 }
c601dd7d 1503 no_subreg_mode_swap:
db96f378 1504
1505 /* Handle fields bigger than a word. */
a4194ff7 1506
db96f378 1507 if (bitsize > BITS_PER_WORD)
1508 {
1509 /* Here we transfer the words of the field
1510 in the order least significant first.
1511 This is because the most significant word is the one which may
1512 be less than full. */
1513
02e7a332 1514 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1515 unsigned int i;
db96f378 1516
e09c2930 1517 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
db96f378 1518 target = gen_reg_rtx (mode);
1519
625d6efb 1520 /* Indicate for flow that the entire target reg is being set. */
18b42941 1521 emit_clobber (target);
625d6efb 1522
db96f378 1523 for (i = 0; i < nwords; i++)
1524 {
1525 /* If I is 0, use the low-order word in both field and target;
1526 if I is 1, use the next to lowest word; and so on. */
c88df841 1527 /* Word number in TARGET to use. */
02e7a332 1528 unsigned int wordnum
1529 = (WORDS_BIG_ENDIAN
1530 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1531 : i);
c88df841 1532 /* Offset from start of field in OP0. */
02e7a332 1533 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1534 ? MAX (0, ((int) bitsize - ((int) i + 1)
0eacd412 1535 * (int) BITS_PER_WORD))
02e7a332 1536 : (int) i * BITS_PER_WORD);
db96f378 1537 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1538 rtx result_part
1539 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1540 bitsize - i * BITS_PER_WORD),
8eef3a45 1541 bitnum + bit_offset, 1, false, target_part, mode,
1445ea5b 1542 word_mode);
db96f378 1543
611234b4 1544 gcc_assert (target_part);
db96f378 1545
1546 if (result_part != target_part)
1547 emit_move_insn (target_part, result_part);
1548 }
1549
6d292981 1550 if (unsignedp)
c88df841 1551 {
1552 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1553 need to be zero'd out. */
1554 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1555 {
02e7a332 1556 unsigned int i, total_words;
c88df841 1557
1558 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1559 for (i = nwords; i < total_words; i++)
2c269e73 1560 emit_move_insn
1561 (operand_subword (target,
1562 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1563 1, VOIDmode),
1564 const0_rtx);
c88df841 1565 }
1566 return target;
1567 }
1568
6d292981 1569 /* Signed bit field: sign-extend with two arithmetic shifts. */
1570 target = expand_shift (LSHIFT_EXPR, mode, target,
f5ff0b21 1571 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
6d292981 1572 return expand_shift (RSHIFT_EXPR, mode, target,
f5ff0b21 1573 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
db96f378 1574 }
a4194ff7 1575
49776ff6 1576 /* From here on we know the desired field is smaller than a word. */
1577
1578 /* Check if there is a correspondingly-sized integer field, so we can
1579 safely extract it as one size of integer, if necessary; then
1580 truncate or extend to the size that is wanted; then use SUBREGs or
1581 convert_to_mode to get one of the modes we really wanted. */
a4194ff7 1582
49776ff6 1583 int_mode = int_mode_for_mode (tmode);
1584 if (int_mode == BLKmode)
1585 int_mode = int_mode_for_mode (mode);
611234b4 1586 /* Should probably push op0 out to memory and then do a load. */
1587 gcc_assert (int_mode != BLKmode);
db96f378 1588
1589 /* OFFSET is the number of words or bytes (UNIT says which)
1590 from STR_RTX to the first word or byte containing part of the field. */
e16ceb8e 1591 if (!MEM_P (op0))
db96f378 1592 {
1593 if (offset != 0
1594 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
650df5df 1595 {
8ad4c111 1596 if (!REG_P (op0))
650df5df 1597 op0 = copy_to_reg (op0);
1598 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
701e46d0 1599 op0, (offset * UNITS_PER_WORD));
650df5df 1600 }
db96f378 1601 offset = 0;
1602 }
db96f378 1603
1604 /* Now OFFSET is nonzero only for memory operands. */
36122326 1605 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
36122326 1606 if (ext_mode != MAX_MACHINE_MODE
1607 && bitsize > 0
1608 && GET_MODE_BITSIZE (ext_mode) >= bitsize
eb04cafb 1609 /* Do not use extv/extzv for volatile bitfields when
1610 -fstrict-volatile-bitfields is in effect. */
1611 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1612 && flag_strict_volatile_bitfields > 0)
36122326 1613 /* If op0 is a register, we need it in EXT_MODE to make it
1614 acceptable to the format of ext(z)v. */
1615 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1616 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
8786db1e 1617 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
db96f378 1618 {
8786db1e 1619 struct expand_operand ops[4];
36122326 1620 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
36122326 1621 rtx xop0 = op0;
1622 rtx xtarget = target;
1623 rtx xspec_target = target;
1624 rtx xspec_target_subreg = 0;
db96f378 1625
36122326 1626 /* If op0 is a register, we need it in EXT_MODE to make it
1627 acceptable to the format of ext(z)v. */
1628 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
42b8780a 1629 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
36122326 1630 if (MEM_P (xop0))
1631 /* Get ref to first byte containing part of the field. */
1632 xop0 = adjust_address (xop0, byte_mode, xoffset);
db96f378 1633
36122326 1634 /* Now convert from counting within UNIT to counting in EXT_MODE. */
eb04cafb 1635 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
36122326 1636 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
51356f86 1637
36122326 1638 unit = GET_MODE_BITSIZE (ext_mode);
db96f378 1639
eb04cafb 1640 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1641 "backwards" from the size of the unit we are extracting from.
1642 Otherwise, we count bits from the most significant on a
1643 BYTES/BITS_BIG_ENDIAN machine. */
1644
1645 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1646 xbitpos = unit - bitsize - xbitpos;
1647
36122326 1648 if (xtarget == 0)
1649 xtarget = xspec_target = gen_reg_rtx (tmode);
db96f378 1650
36122326 1651 if (GET_MODE (xtarget) != ext_mode)
1652 {
fbf78e3d 1653 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1654 between the mode of the extraction (word_mode) and the target
1655 mode. Instead, create a temporary and use convert_move to set
1656 the target. */
1657 if (REG_P (xtarget)
396f2130 1658 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
db96f378 1659 {
36122326 1660 xtarget = gen_lowpart (ext_mode, xtarget);
b537bfdb 1661 if (GET_MODE_PRECISION (ext_mode)
1662 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
36122326 1663 xspec_target_subreg = xtarget;
db96f378 1664 }
36122326 1665 else
1666 xtarget = gen_reg_rtx (ext_mode);
1667 }
db96f378 1668
8786db1e 1669 create_output_operand (&ops[0], xtarget, ext_mode);
1670 create_fixed_operand (&ops[1], xop0);
1671 create_integer_operand (&ops[2], bitsize);
1672 create_integer_operand (&ops[3], xbitpos);
1673 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1674 4, ops))
36122326 1675 {
8786db1e 1676 xtarget = ops[0].value;
36122326 1677 if (xtarget == xspec_target)
1678 return xtarget;
1679 if (xtarget == xspec_target_subreg)
1680 return xspec_target;
1681 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
db96f378 1682 }
db96f378 1683 }
51356f86 1684
36122326 1685 /* If OP0 is a memory, try copying it to a register and seeing if a
1686 cheap register alternative is available. */
1687 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1688 {
1689 enum machine_mode bestmode;
1690
1691 /* Get the mode to use for inserting into this field. If
1692 OP0 is BLKmode, get the smallest mode consistent with the
1693 alignment. If OP0 is a non-BLKmode object that is no
1694 wider than EXT_MODE, use its mode. Otherwise, use the
1695 smallest mode containing the field. */
1696
1697 if (GET_MODE (op0) == BLKmode
1698 || (ext_mode != MAX_MACHINE_MODE
1699 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
4bb60ec7 1700 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
36122326 1701 (ext_mode == MAX_MACHINE_MODE
1702 ? VOIDmode : ext_mode),
1703 MEM_VOLATILE_P (op0));
1704 else
1705 bestmode = GET_MODE (op0);
51356f86 1706
36122326 1707 if (bestmode != VOIDmode
1708 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1709 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1710 {
1711 unsigned HOST_WIDE_INT xoffset, xbitpos;
db96f378 1712
36122326 1713 /* Compute the offset as a multiple of this unit,
1714 counting in bytes. */
1715 unit = GET_MODE_BITSIZE (bestmode);
1716 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1717 xbitpos = bitnum % unit;
db96f378 1718
36122326 1719 /* Make sure the register is big enough for the whole field. */
1720 if (xoffset * BITS_PER_UNIT + unit
1721 >= offset * BITS_PER_UNIT + bitsize)
db96f378 1722 {
36122326 1723 rtx last, result, xop0;
db96f378 1724
36122326 1725 last = get_last_insn ();
db96f378 1726
36122326 1727 /* Fetch it to a register in that size. */
1728 xop0 = adjust_address (op0, bestmode, xoffset);
1729 xop0 = force_reg (bestmode, xop0);
1730 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
8eef3a45 1731 unsignedp, packedp, target,
36122326 1732 mode, tmode, false);
1733 if (result)
1734 return result;
db96f378 1735
db96f378 1736 delete_insns_since (last);
db96f378 1737 }
a4194ff7 1738 }
db96f378 1739 }
4b6262b1 1740
36122326 1741 if (!fallback_p)
1742 return NULL;
1743
1744 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
8eef3a45 1745 bitpos, target, unsignedp, packedp);
36122326 1746 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1747}
1748
1749/* Generate code to extract a byte-field from STR_RTX
1750 containing BITSIZE bits, starting at BITNUM,
1751 and put it in TARGET if possible (if TARGET is nonzero).
1752 Regardless of TARGET, we return the rtx for where the value is placed.
1753
1754 STR_RTX is the structure containing the byte (a REG or MEM).
1755 UNSIGNEDP is nonzero if this is an unsigned bit field.
8eef3a45 1756 PACKEDP is nonzero if the field has the packed attribute.
36122326 1757 MODE is the natural mode of the field value once extracted.
1758 TMODE is the mode the caller would like the value to have;
1759 but the value may be returned with type MODE instead.
1760
1761 If a TARGET is specified and we can store in it at no extra cost,
1762 we do so, and return TARGET.
1763 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1764 if they are equally easy. */
1765
1766rtx
1767extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
8eef3a45 1768 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1769 rtx target, enum machine_mode mode, enum machine_mode tmode)
36122326 1770{
8eef3a45 1771 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
36122326 1772 target, mode, tmode, true);
db96f378 1773}
1774\f
1775/* Extract a bit field using shifts and boolean operations
1776 Returns an rtx to represent the value.
1777 OP0 addresses a register (word) or memory (byte).
1778 BITPOS says which bit within the word or byte the bit field starts in.
1779 OFFSET says how many bytes farther the bit field starts;
1780 it is 0 if OP0 is a register.
1781 BITSIZE says how many bits long the bit field is.
1782 (If OP0 is a register, it may be narrower than a full word,
1783 but BITPOS still counts within a full word,
1784 which is significant on bigendian machines.)
1785
1786 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
8eef3a45 1787 PACKEDP is true if the field has the packed attribute.
1788
db96f378 1789 If TARGET is nonzero, attempts to store the value there
1790 and return TARGET, but this is not guaranteed.
2c269e73 1791 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
db96f378 1792
1793static rtx
35cb5232 1794extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1795 unsigned HOST_WIDE_INT offset,
1796 unsigned HOST_WIDE_INT bitsize,
1797 unsigned HOST_WIDE_INT bitpos, rtx target,
8eef3a45 1798 int unsignedp, bool packedp)
db96f378 1799{
02e7a332 1800 unsigned int total_bits = BITS_PER_WORD;
db96f378 1801 enum machine_mode mode;
1802
8ad4c111 1803 if (GET_CODE (op0) == SUBREG || REG_P (op0))
db96f378 1804 {
1805 /* Special treatment for a bit field split across two registers. */
1806 if (bitsize + bitpos > BITS_PER_WORD)
2c269e73 1807 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
db96f378 1808 }
1809 else
1810 {
1811 /* Get the proper mode to use for this field. We want a mode that
1812 includes the entire field. If such a mode would be larger than
1813 a word, we won't be doing the extraction the normal way. */
1814
a420d927 1815 if (MEM_VOLATILE_P (op0)
1816 && flag_strict_volatile_bitfields > 0)
1817 {
1818 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1819 mode = GET_MODE (op0);
1820 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1821 mode = GET_MODE (target);
1822 else
1823 mode = tmode;
1824 }
1825 else
4bb60ec7 1826 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
a420d927 1827 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
db96f378 1828
1829 if (mode == VOIDmode)
1830 /* The only way this should occur is if the field spans word
1831 boundaries. */
1832 return extract_split_bit_field (op0, bitsize,
1833 bitpos + offset * BITS_PER_UNIT,
2c269e73 1834 unsignedp);
db96f378 1835
1836 total_bits = GET_MODE_BITSIZE (mode);
1837
35140538 1838 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
3398e91d 1839 be in the range 0 to total_bits-1, and put any excess bytes in
35140538 1840 OFFSET. */
1841 if (bitpos >= total_bits)
1842 {
1843 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1844 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1845 * BITS_PER_UNIT);
1846 }
1847
a420d927 1848 /* If we're accessing a volatile MEM, we can't do the next
1849 alignment step if it results in a multi-word access where we
1850 otherwise wouldn't have one. So, check for that case
1851 here. */
1852 if (MEM_P (op0)
1853 && MEM_VOLATILE_P (op0)
1854 && flag_strict_volatile_bitfields > 0
1855 && bitpos + bitsize <= total_bits
1856 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1857 {
1858 if (STRICT_ALIGNMENT)
1859 {
1860 static bool informed_about_misalignment = false;
1861 bool warned;
1862
8eef3a45 1863 if (packedp)
1864 {
1865 if (bitsize == total_bits)
1866 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1867 "multiple accesses to volatile structure member"
1868 " because of packed attribute");
1869 else
1870 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1871 "multiple accesses to volatile structure bitfield"
1872 " because of packed attribute");
1873
1874 return extract_split_bit_field (op0, bitsize,
1875 bitpos + offset * BITS_PER_UNIT,
1876 unsignedp);
1877 }
1878
a420d927 1879 if (bitsize == total_bits)
1880 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1881 "mis-aligned access used for structure member");
1882 else
1883 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1884 "mis-aligned access used for structure bitfield");
1885
1886 if (! informed_about_misalignment && warned)
1887 {
1888 informed_about_misalignment = true;
1889 inform (input_location,
bf776685 1890 "when a volatile object spans multiple type-sized locations,"
a420d927 1891 " the compiler must choose between using a single mis-aligned access to"
1892 " preserve the volatility, or using multiple aligned accesses to avoid"
bf776685 1893 " runtime faults; this code may fail at runtime if the hardware does"
1894 " not allow this access");
a420d927 1895 }
1896 }
1897 }
1898 else
1899 {
1900
1901 /* Get ref to an aligned byte, halfword, or word containing the field.
1902 Adjust BITPOS to be position within a word,
1903 and OFFSET to be the offset of that word.
1904 Then alter OP0 to refer to that word. */
1905 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1906 offset -= (offset % (total_bits / BITS_PER_UNIT));
1907 }
1908
e513d163 1909 op0 = adjust_address (op0, mode, offset);
db96f378 1910 }
1911
052251d0 1912 mode = GET_MODE (op0);
1913
51356f86 1914 if (BYTES_BIG_ENDIAN)
2c269e73 1915 /* BITPOS is the distance between our msb and that of OP0.
1916 Convert it to the distance from the lsb. */
1917 bitpos = total_bits - bitsize - bitpos;
db96f378 1918
db96f378 1919 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1920 We have reduced the big-endian case to the little-endian case. */
1921
1922 if (unsignedp)
1923 {
1924 if (bitpos)
1925 {
1926 /* If the field does not already start at the lsb,
1927 shift it so it does. */
db96f378 1928 /* Maybe propagate the target for the shift. */
8ad4c111 1929 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
ea1760a3 1930 if (tmode != mode)
1931 subtarget = 0;
f5ff0b21 1932 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
db96f378 1933 }
1934 /* Convert the value to the desired mode. */
1935 if (mode != tmode)
1936 op0 = convert_to_mode (tmode, op0, 1);
1937
1938 /* Unless the msb of the field used to be the msb when we shifted,
1939 mask out the upper bits. */
1940
ab89ec1b 1941 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
db96f378 1942 return expand_binop (GET_MODE (op0), and_optab, op0,
1943 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1944 target, 1, OPTAB_LIB_WIDEN);
1945 return op0;
1946 }
1947
1948 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1949 then arithmetic-shift its lsb to the lsb of the word. */
1950 op0 = force_reg (mode, op0);
db96f378 1951
1952 /* Find the narrowest integer mode that contains the field. */
1953
1954 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1955 mode = GET_MODE_WIDER_MODE (mode))
1956 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1957 {
1958 op0 = convert_to_mode (mode, op0, 0);
1959 break;
1960 }
1961
fcae9057 1962 if (mode != tmode)
1963 target = 0;
1964
db96f378 1965 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1966 {
f5ff0b21 1967 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
db96f378 1968 /* Maybe propagate the target for the shift. */
8ad4c111 1969 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
db96f378 1970 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1971 }
1972
1973 return expand_shift (RSHIFT_EXPR, mode, op0,
f5ff0b21 1974 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
db96f378 1975}
1976\f
1977/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1978 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1979 complement of that if COMPLEMENT. The mask is truncated if
c88df841 1980 necessary to the width of mode MODE. The mask is zero-extended if
1981 BITSIZE+BITPOS is too small for MODE. */
db96f378 1982
1983static rtx
35cb5232 1984mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
db96f378 1985{
41283922 1986 double_int mask;
db96f378 1987
41283922 1988 mask = double_int_mask (bitsize);
1989 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
db96f378 1990
1991 if (complement)
41283922 1992 mask = double_int_not (mask);
db96f378 1993
3e052aec 1994 return immed_double_int_const (mask, mode);
db96f378 1995}
1996
1997/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1998 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1999
2000static rtx
35cb5232 2001lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
db96f378 2002{
41283922 2003 double_int val;
2004
2005 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
2006 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
db96f378 2007
3e052aec 2008 return immed_double_int_const (val, mode);
db96f378 2009}
2010\f
2011/* Extract a bit field that is split across two words
2012 and return an RTX for the result.
2013
2014 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2015 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2c269e73 2016 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
db96f378 2017
2018static rtx
35cb5232 2019extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2020 unsigned HOST_WIDE_INT bitpos, int unsignedp)
db96f378 2021{
02e7a332 2022 unsigned int unit;
2023 unsigned int bitsdone = 0;
b1924c4b 2024 rtx result = NULL_RTX;
e9782169 2025 int first = 1;
db96f378 2026
e81f2e56 2027 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2028 much at a time. */
8ad4c111 2029 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
e81f2e56 2030 unit = BITS_PER_WORD;
2031 else
6b2813fb 2032 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
e81f2e56 2033
e9782169 2034 while (bitsdone < bitsize)
2035 {
02e7a332 2036 unsigned HOST_WIDE_INT thissize;
e9782169 2037 rtx part, word;
02e7a332 2038 unsigned HOST_WIDE_INT thispos;
2039 unsigned HOST_WIDE_INT offset;
e9782169 2040
2041 offset = (bitpos + bitsdone) / unit;
2042 thispos = (bitpos + bitsdone) % unit;
2043
ba860eb2 2044 /* THISSIZE must not overrun a word boundary. Otherwise,
2045 extract_fixed_bit_field will call us again, and we will mutually
2046 recurse forever. */
2047 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2048 thissize = MIN (thissize, unit - thispos);
e9782169 2049
2050 /* If OP0 is a register, then handle OFFSET here.
6d292981 2051
2052 When handling multiword bitfields, extract_bit_field may pass
2053 down a word_mode SUBREG of a larger REG for a bitfield that actually
2054 crosses a word boundary. Thus, for a SUBREG, we must find
2055 the current word starting from the base register. */
2056 if (GET_CODE (op0) == SUBREG)
2057 {
701e46d0 2058 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2059 word = operand_subword_force (SUBREG_REG (op0), word_offset,
6d292981 2060 GET_MODE (SUBREG_REG (op0)));
2061 offset = 0;
2062 }
8ad4c111 2063 else if (REG_P (op0))
e9782169 2064 {
2065 word = operand_subword_force (op0, offset, GET_MODE (op0));
2066 offset = 0;
2067 }
2068 else
2069 word = op0;
2070
e9782169 2071 /* Extract the parts in bit-counting order,
ba860eb2 2072 whose meaning is determined by BYTES_PER_UNIT.
2073 OFFSET is in UNITs, and UNIT is in bits.
2074 extract_fixed_bit_field wants offset in bytes. */
2075 part = extract_fixed_bit_field (word_mode, word,
2076 offset * unit / BITS_PER_UNIT,
8eef3a45 2077 thissize, thispos, 0, 1, false);
e9782169 2078 bitsdone += thissize;
db96f378 2079
e9782169 2080 /* Shift this part into place for the result. */
51356f86 2081 if (BYTES_BIG_ENDIAN)
2082 {
2083 if (bitsize != bitsdone)
2084 part = expand_shift (LSHIFT_EXPR, word_mode, part,
f5ff0b21 2085 bitsize - bitsdone, 0, 1);
51356f86 2086 }
2087 else
2088 {
2089 if (bitsdone != thissize)
2090 part = expand_shift (LSHIFT_EXPR, word_mode, part,
f5ff0b21 2091 bitsdone - thissize, 0, 1);
51356f86 2092 }
db96f378 2093
e9782169 2094 if (first)
2095 result = part;
2096 else
2097 /* Combine the parts with bitwise or. This works
2098 because we extracted each part as an unsigned bit field. */
2099 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2100 OPTAB_LIB_WIDEN);
2101
2102 first = 0;
2103 }
db96f378 2104
2105 /* Unsigned bit field: we are done. */
2106 if (unsignedp)
2107 return result;
2108 /* Signed bit field: sign-extend with two arithmetic shifts. */
2109 result = expand_shift (LSHIFT_EXPR, word_mode, result,
f5ff0b21 2110 BITS_PER_WORD - bitsize, NULL_RTX, 0);
db96f378 2111 return expand_shift (RSHIFT_EXPR, word_mode, result,
f5ff0b21 2112 BITS_PER_WORD - bitsize, NULL_RTX, 0);
db96f378 2113}
2114\f
10d4de0e 2115/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2116 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2117 MODE, fill the upper bits with zeros. Fail if the layout of either
2118 mode is unknown (as for CC modes) or if the extraction would involve
2119 unprofitable mode punning. Return the value on success, otherwise
2120 return null.
2121
2122 This is different from gen_lowpart* in these respects:
2123
2124 - the returned value must always be considered an rvalue
2125
2126 - when MODE is wider than SRC_MODE, the extraction involves
2127 a zero extension
2128
2129 - when MODE is smaller than SRC_MODE, the extraction involves
2130 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2131
2132 In other words, this routine performs a computation, whereas the
2133 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2134 operations. */
2135
2136rtx
2137extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2138{
2139 enum machine_mode int_mode, src_int_mode;
2140
2141 if (mode == src_mode)
2142 return src;
2143
2144 if (CONSTANT_P (src))
171557e8 2145 {
2146 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2147 fails, it will happily create (subreg (symbol_ref)) or similar
2148 invalid SUBREGs. */
2149 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2150 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2151 if (ret)
2152 return ret;
2153
2154 if (GET_MODE (src) == VOIDmode
2155 || !validate_subreg (mode, src_mode, src, byte))
2156 return NULL_RTX;
2157
2158 src = force_reg (GET_MODE (src), src);
2159 return gen_rtx_SUBREG (mode, src, byte);
2160 }
10d4de0e 2161
2162 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2163 return NULL_RTX;
2164
2165 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2166 && MODES_TIEABLE_P (mode, src_mode))
2167 {
2168 rtx x = gen_lowpart_common (mode, src);
2169 if (x)
2170 return x;
2171 }
2172
2173 src_int_mode = int_mode_for_mode (src_mode);
2174 int_mode = int_mode_for_mode (mode);
2175 if (src_int_mode == BLKmode || int_mode == BLKmode)
2176 return NULL_RTX;
2177
2178 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2179 return NULL_RTX;
2180 if (!MODES_TIEABLE_P (int_mode, mode))
2181 return NULL_RTX;
2182
2183 src = gen_lowpart (src_int_mode, src);
2184 src = convert_modes (int_mode, src_int_mode, src, true);
2185 src = gen_lowpart (mode, src);
2186 return src;
2187}
2188\f
db96f378 2189/* Add INC into TARGET. */
2190
2191void
35cb5232 2192expand_inc (rtx target, rtx inc)
db96f378 2193{
2194 rtx value = expand_binop (GET_MODE (target), add_optab,
2195 target, inc,
2196 target, 0, OPTAB_LIB_WIDEN);
2197 if (value != target)
2198 emit_move_insn (target, value);
2199}
2200
2201/* Subtract DEC from TARGET. */
2202
2203void
35cb5232 2204expand_dec (rtx target, rtx dec)
db96f378 2205{
2206 rtx value = expand_binop (GET_MODE (target), sub_optab,
2207 target, dec,
2208 target, 0, OPTAB_LIB_WIDEN);
2209 if (value != target)
2210 emit_move_insn (target, value);
2211}
2212\f
2213/* Output a shift instruction for expression code CODE,
2214 with SHIFTED being the rtx for the value to shift,
32d37219 2215 and AMOUNT the rtx for the amount to shift by.
db96f378 2216 Store the result in the rtx TARGET, if that is convenient.
2217 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2218 Return the rtx for where the value is. */
2219
32d37219 2220static rtx
2221expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2222 rtx amount, rtx target, int unsignedp)
db96f378 2223{
19cb6b50 2224 rtx op1, temp = 0;
2225 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2226 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
4d54df85 2227 optab lshift_optab = ashl_optab;
2228 optab rshift_arith_optab = ashr_optab;
2229 optab rshift_uns_optab = lshr_optab;
2230 optab lrotate_optab = rotl_optab;
2231 optab rrotate_optab = rotr_optab;
2232 enum machine_mode op1_mode;
6659485c 2233 int attempt;
f529eb25 2234 bool speed = optimize_insn_for_speed_p ();
db96f378 2235
32d37219 2236 op1 = amount;
4d54df85 2237 op1_mode = GET_MODE (op1);
2238
2239 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2240 shift amount is a vector, use the vector/vector shift patterns. */
2241 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2242 {
2243 lshift_optab = vashl_optab;
2244 rshift_arith_optab = vashr_optab;
2245 rshift_uns_optab = vlshr_optab;
2246 lrotate_optab = vrotl_optab;
2247 rrotate_optab = vrotr_optab;
2248 }
2249
db96f378 2250 /* Previously detected shift-counts computed by NEGATE_EXPR
2251 and shifted in the other direction; but that does not work
2252 on all machines. */
2253
d58b6b22 2254 if (SHIFT_COUNT_TRUNCATED)
2255 {
971ba038 2256 if (CONST_INT_P (op1)
ff385626 2257 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
57380eb2 2258 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
ff385626 2259 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
d58b6b22 2260 % GET_MODE_BITSIZE (mode));
2261 else if (GET_CODE (op1) == SUBREG
298bbd8d 2262 && subreg_lowpart_p (op1)
2263 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
d58b6b22 2264 op1 = SUBREG_REG (op1);
2265 }
73432b7f 2266
db96f378 2267 if (op1 == const0_rtx)
2268 return shifted;
2269
617cc55d 2270 /* Check whether its cheaper to implement a left shift by a constant
2271 bit count by a sequence of additions. */
2272 if (code == LSHIFT_EXPR
971ba038 2273 && CONST_INT_P (op1)
617cc55d 2274 && INTVAL (op1) > 0
995b44f5 2275 && INTVAL (op1) < GET_MODE_PRECISION (mode)
99d2e279 2276 && INTVAL (op1) < MAX_BITS_PER_WORD
49a71e58 2277 && (shift_cost (speed, mode, INTVAL (op1))
2278 > INTVAL (op1) * add_cost (speed, mode))
2279 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
617cc55d 2280 {
2281 int i;
2282 for (i = 0; i < INTVAL (op1); i++)
2283 {
2284 temp = force_reg (mode, shifted);
2285 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2286 unsignedp, OPTAB_LIB_WIDEN);
2287 }
2288 return shifted;
2289 }
2290
6659485c 2291 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
db96f378 2292 {
2293 enum optab_methods methods;
2294
6659485c 2295 if (attempt == 0)
db96f378 2296 methods = OPTAB_DIRECT;
6659485c 2297 else if (attempt == 1)
db96f378 2298 methods = OPTAB_WIDEN;
2299 else
2300 methods = OPTAB_LIB_WIDEN;
2301
2302 if (rotate)
2303 {
2304 /* Widening does not work for rotation. */
2305 if (methods == OPTAB_WIDEN)
2306 continue;
2307 else if (methods == OPTAB_LIB_WIDEN)
1290205f 2308 {
c2c10df6 2309 /* If we have been unable to open-code this by a rotation,
1290205f 2310 do it as the IOR of two shifts. I.e., to rotate A
2311 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2312 where C is the bitsize of A.
2313
2314 It is theoretically possible that the target machine might
2315 not be able to perform either shift and hence we would
2316 be making two libcalls rather than just the one for the
2317 shift (similarly if IOR could not be done). We will allow
2318 this extremely unlikely lossage to avoid complicating the
2319 code below. */
2320
c2c10df6 2321 rtx subtarget = target == shifted ? 0 : target;
32d37219 2322 rtx new_amount, other_amount;
c2c10df6 2323 rtx temp1;
32d37219 2324
2325 new_amount = op1;
714e9434 2326 if (CONST_INT_P (op1))
2327 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2328 - INTVAL (op1));
2329 else
2330 other_amount
2331 = simplify_gen_binary (MINUS, GET_MODE (op1),
995b44f5 2332 GEN_INT (GET_MODE_PRECISION (mode)),
714e9434 2333 op1);
c2c10df6 2334
2335 shifted = force_reg (mode, shifted);
2336
32d37219 2337 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2338 mode, shifted, new_amount, 0, 1);
2339 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2340 mode, shifted, other_amount,
2341 subtarget, 1);
c2c10df6 2342 return expand_binop (mode, ior_optab, temp, temp1, target,
2343 unsignedp, methods);
1290205f 2344 }
db96f378 2345
2346 temp = expand_binop (mode,
4d54df85 2347 left ? lrotate_optab : rrotate_optab,
db96f378 2348 shifted, op1, target, unsignedp, methods);
2349 }
2350 else if (unsignedp)
e1abcbdd 2351 temp = expand_binop (mode,
4d54df85 2352 left ? lshift_optab : rshift_uns_optab,
e1abcbdd 2353 shifted, op1, target, unsignedp, methods);
db96f378 2354
2355 /* Do arithmetic shifts.
2356 Also, if we are going to widen the operand, we can just as well
2357 use an arithmetic right-shift instead of a logical one. */
2358 if (temp == 0 && ! rotate
2359 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2360 {
2361 enum optab_methods methods1 = methods;
2362
2363 /* If trying to widen a log shift to an arithmetic shift,
2364 don't accept an arithmetic shift of the same size. */
2365 if (unsignedp)
2366 methods1 = OPTAB_MUST_WIDEN;
2367
2368 /* Arithmetic shift */
2369
2370 temp = expand_binop (mode,
4d54df85 2371 left ? lshift_optab : rshift_arith_optab,
db96f378 2372 shifted, op1, target, unsignedp, methods1);
2373 }
2374
8164ec17 2375 /* We used to try extzv here for logical right shifts, but that was
a4194ff7 2376 only useful for one machine, the VAX, and caused poor code
8164ec17 2377 generation there for lshrdi3, so the code was deleted and a
2378 define_expand for lshrsi3 was added to vax.md. */
db96f378 2379 }
2380
611234b4 2381 gcc_assert (temp);
db96f378 2382 return temp;
2383}
f5ff0b21 2384
2385/* Output a shift instruction for expression code CODE,
2386 with SHIFTED being the rtx for the value to shift,
2387 and AMOUNT the amount to shift by.
2388 Store the result in the rtx TARGET, if that is convenient.
2389 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2390 Return the rtx for where the value is. */
2391
2392rtx
2393expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2394 int amount, rtx target, int unsignedp)
2395{
32d37219 2396 return expand_shift_1 (code, mode,
2397 shifted, GEN_INT (amount), target, unsignedp);
2398}
2399
2400/* Output a shift instruction for expression code CODE,
2401 with SHIFTED being the rtx for the value to shift,
2402 and AMOUNT the tree for the amount to shift by.
2403 Store the result in the rtx TARGET, if that is convenient.
2404 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2405 Return the rtx for where the value is. */
2406
2407rtx
2408expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2409 tree amount, rtx target, int unsignedp)
2410{
2411 return expand_shift_1 (code, mode,
2412 shifted, expand_normal (amount), target, unsignedp);
f5ff0b21 2413}
32d37219 2414
db96f378 2415\f
27588b0f 2416/* Indicates the type of fixup needed after a constant multiplication.
2417 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2418 the result should be negated, and ADD_VARIANT means that the
2419 multiplicand should be added to the result. */
2420enum mult_variant {basic_variant, negate_variant, add_variant};
2421
49931967 2422static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
d9154849 2423 const struct mult_cost *, enum machine_mode mode);
27588b0f 2424static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
e4fedb10 2425 struct algorithm *, enum mult_variant *, int);
27588b0f 2426static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2427 const struct algorithm *, enum mult_variant);
35cb5232 2428static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
27588b0f 2429static rtx extract_high_half (enum machine_mode, rtx);
ebf4f764 2430static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2431static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
27588b0f 2432 int, int);
db96f378 2433/* Compute and return the best algorithm for multiplying by T.
4b780351 2434 The algorithm must cost less than cost_limit
2435 If retval.cost >= COST_LIMIT, no algorithm was found and all
49931967 2436 other field of the returned struct are undefined.
2437 MODE is the machine mode of the multiplication. */
db96f378 2438
1e401f10 2439static void
35cb5232 2440synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
d9154849 2441 const struct mult_cost *cost_limit, enum machine_mode mode)
db96f378 2442{
183a33d2 2443 int m;
709f9009 2444 struct algorithm *alg_in, *best_alg;
d9154849 2445 struct mult_cost best_cost;
2446 struct mult_cost new_limit;
2447 int op_cost, op_latency;
b592bb50 2448 unsigned HOST_WIDE_INT orig_t = t;
183a33d2 2449 unsigned HOST_WIDE_INT q;
49db198b 2450 int maxm, hash_index;
7fe4cfe2 2451 bool cache_hit = false;
2452 enum alg_code cache_alg = alg_zero;
f529eb25 2453 bool speed = optimize_insn_for_speed_p ();
49db198b 2454 enum machine_mode imode;
49a71e58 2455 struct alg_hash_entry *entry_ptr;
db96f378 2456
4b780351 2457 /* Indicate that no algorithm is yet found. If no algorithm
2458 is found, this value will be returned and indicate failure. */
d9154849 2459 alg_out->cost.cost = cost_limit->cost + 1;
eddf2705 2460 alg_out->cost.latency = cost_limit->latency + 1;
db96f378 2461
d9154849 2462 if (cost_limit->cost < 0
2463 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
1e401f10 2464 return;
db96f378 2465
49db198b 2466 /* Be prepared for vector modes. */
2467 imode = GET_MODE_INNER (mode);
2468 if (imode == VOIDmode)
2469 imode = mode;
2470
2471 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2472
83df06d0 2473 /* Restrict the bits of "t" to the multiplication's mode. */
49db198b 2474 t &= GET_MODE_MASK (imode);
83df06d0 2475
798c4e27 2476 /* t == 1 can be done in zero cost. */
2477 if (t == 1)
183a33d2 2478 {
1e401f10 2479 alg_out->ops = 1;
d9154849 2480 alg_out->cost.cost = 0;
2481 alg_out->cost.latency = 0;
1e401f10 2482 alg_out->op[0] = alg_m;
2483 return;
183a33d2 2484 }
2485
798c4e27 2486 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2487 fail now. */
1e401f10 2488 if (t == 0)
798c4e27 2489 {
49a71e58 2490 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
1e401f10 2491 return;
798c4e27 2492 else
2493 {
1e401f10 2494 alg_out->ops = 1;
49a71e58 2495 alg_out->cost.cost = zero_cost (speed);
2496 alg_out->cost.latency = zero_cost (speed);
1e401f10 2497 alg_out->op[0] = alg_zero;
2498 return;
798c4e27 2499 }
2500 }
2501
709f9009 2502 /* We'll be needing a couple extra algorithm structures now. */
2503
2457c754 2504 alg_in = XALLOCA (struct algorithm);
2505 best_alg = XALLOCA (struct algorithm);
d9154849 2506 best_cost = *cost_limit;
709f9009 2507
7fe4cfe2 2508 /* Compute the hash index. */
f529eb25 2509 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
7fe4cfe2 2510
2511 /* See if we already know what to do for T. */
49a71e58 2512 entry_ptr = alg_hash_entry_ptr (hash_index);
2513 if (entry_ptr->t == t
2514 && entry_ptr->mode == mode
2515 && entry_ptr->mode == mode
2516 && entry_ptr->speed == speed
2517 && entry_ptr->alg != alg_unknown)
7fe4cfe2 2518 {
49a71e58 2519 cache_alg = entry_ptr->alg;
44a03d75 2520
2521 if (cache_alg == alg_impossible)
7fe4cfe2 2522 {
44a03d75 2523 /* The cache tells us that it's impossible to synthesize
49a71e58 2524 multiplication by T within entry_ptr->cost. */
2525 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
44a03d75 2526 /* COST_LIMIT is at least as restrictive as the one
2527 recorded in the hash table, in which case we have no
2528 hope of synthesizing a multiplication. Just
2529 return. */
2530 return;
2531
2532 /* If we get here, COST_LIMIT is less restrictive than the
2533 one recorded in the hash table, so we may be able to
2534 synthesize a multiplication. Proceed as if we didn't
2535 have the cache entry. */
2536 }
2537 else
2538 {
49a71e58 2539 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
44a03d75 2540 /* The cached algorithm shows that this multiplication
2541 requires more cost than COST_LIMIT. Just return. This
2542 way, we don't clobber this cache entry with
2543 alg_impossible but retain useful information. */
2544 return;
7fe4cfe2 2545
44a03d75 2546 cache_hit = true;
2547
2548 switch (cache_alg)
2549 {
2550 case alg_shift:
2551 goto do_alg_shift;
7fe4cfe2 2552
44a03d75 2553 case alg_add_t_m2:
2554 case alg_sub_t_m2:
2555 goto do_alg_addsub_t_m2;
7fe4cfe2 2556
44a03d75 2557 case alg_add_factor:
2558 case alg_sub_factor:
2559 goto do_alg_addsub_factor;
7fe4cfe2 2560
44a03d75 2561 case alg_add_t2_m:
2562 goto do_alg_add_t2_m;
7fe4cfe2 2563
44a03d75 2564 case alg_sub_t2_m:
2565 goto do_alg_sub_t2_m;
2566
2567 default:
2568 gcc_unreachable ();
2569 }
7fe4cfe2 2570 }
2571 }
2572
798c4e27 2573 /* If we have a group of zero bits at the low-order part of T, try
2574 multiplying by the remaining bits and then doing a shift. */
2575
183a33d2 2576 if ((t & 1) == 0)
db96f378 2577 {
7fe4cfe2 2578 do_alg_shift:
183a33d2 2579 m = floor_log2 (t & -t); /* m = number of low zero bits */
83df06d0 2580 if (m < maxm)
db96f378 2581 {
84ab528e 2582 q = t >> m;
617cc55d 2583 /* The function expand_shift will choose between a shift and
2584 a sequence of additions, so the observed cost is given as
49a71e58 2585 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2586 op_cost = m * add_cost (speed, mode);
2587 if (shift_cost (speed, mode, m) < op_cost)
2588 op_cost = shift_cost (speed, mode, m);
d9154849 2589 new_limit.cost = best_cost.cost - op_cost;
2590 new_limit.latency = best_cost.latency - op_cost;
2591 synth_mult (alg_in, q, &new_limit, mode);
2592
2593 alg_in->cost.cost += op_cost;
2594 alg_in->cost.latency += op_cost;
2595 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
84ab528e 2596 {
2597 struct algorithm *x;
d9154849 2598 best_cost = alg_in->cost;
84ab528e 2599 x = alg_in, alg_in = best_alg, best_alg = x;
2600 best_alg->log[best_alg->ops] = m;
2601 best_alg->op[best_alg->ops] = alg_shift;
84ab528e 2602 }
5521b4c8 2603
2604 /* See if treating ORIG_T as a signed number yields a better
2605 sequence. Try this sequence only for a negative ORIG_T
2606 as it would be useless for a non-negative ORIG_T. */
2607 if ((HOST_WIDE_INT) orig_t < 0)
2608 {
2609 /* Shift ORIG_T as follows because a right shift of a
2610 negative-valued signed type is implementation
2611 defined. */
2612 q = ~(~orig_t >> m);
2613 /* The function expand_shift will choose between a shift
2614 and a sequence of additions, so the observed cost is
49a71e58 2615 given as MIN (m * add_cost(speed, mode),
2616 shift_cost(speed, mode, m)). */
2617 op_cost = m * add_cost (speed, mode);
2618 if (shift_cost (speed, mode, m) < op_cost)
2619 op_cost = shift_cost (speed, mode, m);
5521b4c8 2620 new_limit.cost = best_cost.cost - op_cost;
2621 new_limit.latency = best_cost.latency - op_cost;
2622 synth_mult (alg_in, q, &new_limit, mode);
2623
2624 alg_in->cost.cost += op_cost;
2625 alg_in->cost.latency += op_cost;
2626 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2627 {
2628 struct algorithm *x;
2629 best_cost = alg_in->cost;
2630 x = alg_in, alg_in = best_alg, best_alg = x;
2631 best_alg->log[best_alg->ops] = m;
2632 best_alg->op[best_alg->ops] = alg_shift;
2633 }
2634 }
1e401f10 2635 }
7fe4cfe2 2636 if (cache_hit)
2637 goto done;
1e401f10 2638 }
2639
2640 /* If we have an odd number, add or subtract one. */
2641 if ((t & 1) != 0)
2642 {
2643 unsigned HOST_WIDE_INT w;
2644
7fe4cfe2 2645 do_alg_addsub_t_m2:
1e401f10 2646 for (w = 1; (w & t) != 0; w <<= 1)
2647 ;
68215e49 2648 /* If T was -1, then W will be zero after the loop. This is another
a4194ff7 2649 case where T ends with ...111. Handling this with (T + 1) and
68215e49 2650 subtract 1 produces slightly better code and results in algorithm
2651 selection much faster than treating it like the ...0111 case
2652 below. */
2653 if (w == 0
2654 || (w > 2
2655 /* Reject the case where t is 3.
2656 Thus we prefer addition in that case. */
2657 && t != 3))
1e401f10 2658 {
2659 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2660
49a71e58 2661 op_cost = add_cost (speed, mode);
d9154849 2662 new_limit.cost = best_cost.cost - op_cost;
2663 new_limit.latency = best_cost.latency - op_cost;
2664 synth_mult (alg_in, t + 1, &new_limit, mode);
183a33d2 2665
d9154849 2666 alg_in->cost.cost += op_cost;
2667 alg_in->cost.latency += op_cost;
2668 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
db96f378 2669 {
183a33d2 2670 struct algorithm *x;
d9154849 2671 best_cost = alg_in->cost;
183a33d2 2672 x = alg_in, alg_in = best_alg, best_alg = x;
1e401f10 2673 best_alg->log[best_alg->ops] = 0;
2674 best_alg->op[best_alg->ops] = alg_sub_t_m2;
db96f378 2675 }
db96f378 2676 }
1e401f10 2677 else
2678 {
2679 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
db96f378 2680
49a71e58 2681 op_cost = add_cost (speed, mode);
d9154849 2682 new_limit.cost = best_cost.cost - op_cost;
2683 new_limit.latency = best_cost.latency - op_cost;
2684 synth_mult (alg_in, t - 1, &new_limit, mode);
1e401f10 2685
d9154849 2686 alg_in->cost.cost += op_cost;
2687 alg_in->cost.latency += op_cost;
2688 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
1e401f10 2689 {
2690 struct algorithm *x;
d9154849 2691 best_cost = alg_in->cost;
1e401f10 2692 x = alg_in, alg_in = best_alg, best_alg = x;
2693 best_alg->log[best_alg->ops] = 0;
2694 best_alg->op[best_alg->ops] = alg_add_t_m2;
1e401f10 2695 }
2696 }
b592bb50 2697
2698 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2699 quickly with a - a * n for some appropriate constant n. */
2700 m = exact_log2 (-orig_t + 1);
2701 if (m >= 0 && m < maxm)
2702 {
49a71e58 2703 op_cost = shiftsub1_cost (speed, mode, m);
b592bb50 2704 new_limit.cost = best_cost.cost - op_cost;
2705 new_limit.latency = best_cost.latency - op_cost;
49db198b 2706 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2707 &new_limit, mode);
b592bb50 2708
2709 alg_in->cost.cost += op_cost;
2710 alg_in->cost.latency += op_cost;
2711 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2712 {
2713 struct algorithm *x;
2714 best_cost = alg_in->cost;
2715 x = alg_in, alg_in = best_alg, best_alg = x;
2716 best_alg->log[best_alg->ops] = m;
2717 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2718 }
2719 }
2720
7fe4cfe2 2721 if (cache_hit)
2722 goto done;
1e401f10 2723 }
4e6e0ee8 2724
db96f378 2725 /* Look for factors of t of the form
4b780351 2726 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
db96f378 2727 If we find such a factor, we can multiply by t using an algorithm that
4b780351 2728 multiplies by q, shift the result by m and add/subtract it to itself.
db96f378 2729
4b780351 2730 We search for large factors first and loop down, even if large factors
2731 are less probable than small; if we find a large factor we will find a
2732 good sequence quickly, and therefore be able to prune (by decreasing
2733 COST_LIMIT) the search. */
2734
7fe4cfe2 2735 do_alg_addsub_factor:
4b780351 2736 for (m = floor_log2 (t - 1); m >= 2; m--)
db96f378 2737 {
4b780351 2738 unsigned HOST_WIDE_INT d;
db96f378 2739
4b780351 2740 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
7fe4cfe2 2741 if (t % d == 0 && t > d && m < maxm
2742 && (!cache_hit || cache_alg == alg_add_factor))
db96f378 2743 {
d9154849 2744 /* If the target has a cheap shift-and-add instruction use
2745 that in preference to a shift insn followed by an add insn.
2746 Assume that the shift-and-add is "atomic" with a latency
1861ff83 2747 equal to its cost, otherwise assume that on superscalar
d9154849 2748 hardware the shift may be executed concurrently with the
2749 earlier steps in the algorithm. */
49a71e58 2750 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2751 if (shiftadd_cost (speed, mode, m) < op_cost)
d9154849 2752 {
49a71e58 2753 op_cost = shiftadd_cost (speed, mode, m);
d9154849 2754 op_latency = op_cost;
2755 }
2756 else
49a71e58 2757 op_latency = add_cost (speed, mode);
d9154849 2758
2759 new_limit.cost = best_cost.cost - op_cost;
2760 new_limit.latency = best_cost.latency - op_latency;
2761 synth_mult (alg_in, t / d, &new_limit, mode);
db96f378 2762
d9154849 2763 alg_in->cost.cost += op_cost;
2764 alg_in->cost.latency += op_latency;
2765 if (alg_in->cost.latency < op_cost)
2766 alg_in->cost.latency = op_cost;
2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
db96f378 2768 {
4b780351 2769 struct algorithm *x;
d9154849 2770 best_cost = alg_in->cost;
4b780351 2771 x = alg_in, alg_in = best_alg, best_alg = x;
798c4e27 2772 best_alg->log[best_alg->ops] = m;
1e401f10 2773 best_alg->op[best_alg->ops] = alg_add_factor;
db96f378 2774 }
04ba236f 2775 /* Other factors will have been taken care of in the recursion. */
2776 break;
db96f378 2777 }
2778
4b780351 2779 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
7fe4cfe2 2780 if (t % d == 0 && t > d && m < maxm
2781 && (!cache_hit || cache_alg == alg_sub_factor))
db96f378 2782 {
d9154849 2783 /* If the target has a cheap shift-and-subtract insn use
2784 that in preference to a shift insn followed by a sub insn.
2785 Assume that the shift-and-sub is "atomic" with a latency
2786 equal to it's cost, otherwise assume that on superscalar
2787 hardware the shift may be executed concurrently with the
2788 earlier steps in the algorithm. */
49a71e58 2789 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2790 if (shiftsub0_cost (speed, mode, m) < op_cost)
d9154849 2791 {
49a71e58 2792 op_cost = shiftsub0_cost (speed, mode, m);
d9154849 2793 op_latency = op_cost;
2794 }
2795 else
49a71e58 2796 op_latency = add_cost (speed, mode);
d9154849 2797
2798 new_limit.cost = best_cost.cost - op_cost;
379eaa7a 2799 new_limit.latency = best_cost.latency - op_latency;
d9154849 2800 synth_mult (alg_in, t / d, &new_limit, mode);
db96f378 2801
d9154849 2802 alg_in->cost.cost += op_cost;
2803 alg_in->cost.latency += op_latency;
2804 if (alg_in->cost.latency < op_cost)
2805 alg_in->cost.latency = op_cost;
2806 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
db96f378 2807 {
4b780351 2808 struct algorithm *x;
d9154849 2809 best_cost = alg_in->cost;
4b780351 2810 x = alg_in, alg_in = best_alg, best_alg = x;
798c4e27 2811 best_alg->log[best_alg->ops] = m;
1e401f10 2812 best_alg->op[best_alg->ops] = alg_sub_factor;
db96f378 2813 }
04ba236f 2814 break;
db96f378 2815 }
2816 }
7fe4cfe2 2817 if (cache_hit)
2818 goto done;
db96f378 2819
4b780351 2820 /* Try shift-and-add (load effective address) instructions,
2821 i.e. do a*3, a*5, a*9. */
2822 if ((t & 1) != 0)
2823 {
7fe4cfe2 2824 do_alg_add_t2_m:
4b780351 2825 q = t - 1;
2826 q = q & -q;
2827 m = exact_log2 (q);
83df06d0 2828 if (m >= 0 && m < maxm)
798c4e27 2829 {
49a71e58 2830 op_cost = shiftadd_cost (speed, mode, m);
d9154849 2831 new_limit.cost = best_cost.cost - op_cost;
2832 new_limit.latency = best_cost.latency - op_cost;
2833 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2834
2835 alg_in->cost.cost += op_cost;
2836 alg_in->cost.latency += op_cost;
2837 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
010b6a23 2838 {
2839 struct algorithm *x;
d9154849 2840 best_cost = alg_in->cost;
010b6a23 2841 x = alg_in, alg_in = best_alg, best_alg = x;
2842 best_alg->log[best_alg->ops] = m;
1e401f10 2843 best_alg->op[best_alg->ops] = alg_add_t2_m;
010b6a23 2844 }
4b780351 2845 }
7fe4cfe2 2846 if (cache_hit)
2847 goto done;
db96f378 2848
7fe4cfe2 2849 do_alg_sub_t2_m:
4b780351 2850 q = t + 1;
2851 q = q & -q;
2852 m = exact_log2 (q);
83df06d0 2853 if (m >= 0 && m < maxm)
798c4e27 2854 {
49a71e58 2855 op_cost = shiftsub0_cost (speed, mode, m);
d9154849 2856 new_limit.cost = best_cost.cost - op_cost;
2857 new_limit.latency = best_cost.latency - op_cost;
2858 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2859
2860 alg_in->cost.cost += op_cost;
2861 alg_in->cost.latency += op_cost;
2862 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
010b6a23 2863 {
2864 struct algorithm *x;
d9154849 2865 best_cost = alg_in->cost;
010b6a23 2866 x = alg_in, alg_in = best_alg, best_alg = x;
2867 best_alg->log[best_alg->ops] = m;
1e401f10 2868 best_alg->op[best_alg->ops] = alg_sub_t2_m;
010b6a23 2869 }
4b780351 2870 }
7fe4cfe2 2871 if (cache_hit)
2872 goto done;
4b780351 2873 }
db96f378 2874
7fe4cfe2 2875 done:
eddf2705 2876 /* If best_cost has not decreased, we have not found any algorithm. */
2877 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
44a03d75 2878 {
2879 /* We failed to find an algorithm. Record alg_impossible for
2880 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2881 we are asked to find an algorithm for T within the same or
2882 lower COST_LIMIT, we can immediately return to the
2883 caller. */
49a71e58 2884 entry_ptr->t = t;
2885 entry_ptr->mode = mode;
2886 entry_ptr->speed = speed;
2887 entry_ptr->alg = alg_impossible;
2888 entry_ptr->cost = *cost_limit;
44a03d75 2889 return;
2890 }
eddf2705 2891
7fe4cfe2 2892 /* Cache the result. */
2893 if (!cache_hit)
2894 {
49a71e58 2895 entry_ptr->t = t;
2896 entry_ptr->mode = mode;
2897 entry_ptr->speed = speed;
2898 entry_ptr->alg = best_alg->op[best_alg->ops];
2899 entry_ptr->cost.cost = best_cost.cost;
2900 entry_ptr->cost.latency = best_cost.latency;
7fe4cfe2 2901 }
2902
709f9009 2903 /* If we are getting a too long sequence for `struct algorithm'
2904 to record, make this search fail. */
2905 if (best_alg->ops == MAX_BITS_PER_WORD)
2906 return;
2907
1e401f10 2908 /* Copy the algorithm from temporary space to the space at alg_out.
2909 We avoid using structure assignment because the majority of
2910 best_alg is normally undefined, and this is a critical function. */
2911 alg_out->ops = best_alg->ops + 1;
d9154849 2912 alg_out->cost = best_cost;
b1b63592 2913 memcpy (alg_out->op, best_alg->op,
2914 alg_out->ops * sizeof *alg_out->op);
2915 memcpy (alg_out->log, best_alg->log,
2916 alg_out->ops * sizeof *alg_out->log);
db96f378 2917}
2918\f
3927afe0 2919/* Find the cheapest way of multiplying a value of mode MODE by VAL.
27588b0f 2920 Try three variations:
2921
2922 - a shift/add sequence based on VAL itself
2923 - a shift/add sequence based on -VAL, followed by a negation
2924 - a shift/add sequence based on VAL - 1, followed by an addition.
2925
e4fedb10 2926 Return true if the cheapest of these cost less than MULT_COST,
2927 describing the algorithm in *ALG and final fixup in *VARIANT. */
27588b0f 2928
2929static bool
2930choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
e4fedb10 2931 struct algorithm *alg, enum mult_variant *variant,
2932 int mult_cost)
27588b0f 2933{
27588b0f 2934 struct algorithm alg2;
d9154849 2935 struct mult_cost limit;
2936 int op_cost;
f529eb25 2937 bool speed = optimize_insn_for_speed_p ();
27588b0f 2938
4be3f855 2939 /* Fail quickly for impossible bounds. */
2940 if (mult_cost < 0)
2941 return false;
2942
2943 /* Ensure that mult_cost provides a reasonable upper bound.
2944 Any constant multiplication can be performed with less
2945 than 2 * bits additions. */
49a71e58 2946 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
4be3f855 2947 if (mult_cost > op_cost)
2948 mult_cost = op_cost;
2949
27588b0f 2950 *variant = basic_variant;
d9154849 2951 limit.cost = mult_cost;
2952 limit.latency = mult_cost;
2953 synth_mult (alg, val, &limit, mode);
27588b0f 2954
2955 /* This works only if the inverted value actually fits in an
2956 `unsigned int' */
49db198b 2957 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
27588b0f 2958 {
49a71e58 2959 op_cost = neg_cost(speed, mode);
d9154849 2960 if (MULT_COST_LESS (&alg->cost, mult_cost))
2961 {
2962 limit.cost = alg->cost.cost - op_cost;
2963 limit.latency = alg->cost.latency - op_cost;
2964 }
2965 else
2966 {
2967 limit.cost = mult_cost - op_cost;
2968 limit.latency = mult_cost - op_cost;
2969 }
2970
2971 synth_mult (&alg2, -val, &limit, mode);
2972 alg2.cost.cost += op_cost;
2973 alg2.cost.latency += op_cost;
2974 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
27588b0f 2975 *alg = alg2, *variant = negate_variant;
2976 }
2977
2978 /* This proves very useful for division-by-constant. */
49a71e58 2979 op_cost = add_cost (speed, mode);
d9154849 2980 if (MULT_COST_LESS (&alg->cost, mult_cost))
2981 {
2982 limit.cost = alg->cost.cost - op_cost;
2983 limit.latency = alg->cost.latency - op_cost;
2984 }
2985 else
2986 {
2987 limit.cost = mult_cost - op_cost;
2988 limit.latency = mult_cost - op_cost;
2989 }
2990
2991 synth_mult (&alg2, val - 1, &limit, mode);
2992 alg2.cost.cost += op_cost;
2993 alg2.cost.latency += op_cost;
2994 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
27588b0f 2995 *alg = alg2, *variant = add_variant;
2996
d9154849 2997 return MULT_COST_LESS (&alg->cost, mult_cost);
27588b0f 2998}
2999
3000/* A subroutine of expand_mult, used for constant multiplications.
3001 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3002 convenient. Use the shift/add sequence described by ALG and apply
3003 the final fixup specified by VARIANT. */
3004
3005static rtx
3006expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
3007 rtx target, const struct algorithm *alg,
3008 enum mult_variant variant)
3009{
3010 HOST_WIDE_INT val_so_far;
3011 rtx insn, accum, tem;
3012 int opno;
3013 enum machine_mode nmode;
3014
f45da063 3015 /* Avoid referencing memory over and over and invalid sharing
3016 on SUBREGs. */
3017 op0 = force_reg (mode, op0);
27588b0f 3018
3019 /* ACCUM starts out either as OP0 or as a zero, depending on
3020 the first operation. */
3021
3022 if (alg->op[0] == alg_zero)
3023 {
49db198b 3024 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
27588b0f 3025 val_so_far = 0;
3026 }
3027 else if (alg->op[0] == alg_m)
3028 {
3029 accum = copy_to_mode_reg (mode, op0);
3030 val_so_far = 1;
3031 }
3032 else
611234b4 3033 gcc_unreachable ();
27588b0f 3034
3035 for (opno = 1; opno < alg->ops; opno++)
3036 {
3037 int log = alg->log[opno];
a1ad7483 3038 rtx shift_subtarget = optimize ? 0 : accum;
27588b0f 3039 rtx add_target
3040 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
a1ad7483 3041 && !optimize)
27588b0f 3042 ? target : 0;
a1ad7483 3043 rtx accum_target = optimize ? 0 : accum;
41cf444a 3044 rtx accum_inner;
27588b0f 3045
3046 switch (alg->op[opno])
3047 {
3048 case alg_shift:
f5ff0b21 3049 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
8813f8fe 3050 /* REG_EQUAL note will be attached to the following insn. */
3051 emit_move_insn (accum, tem);
27588b0f 3052 val_so_far <<= log;
3053 break;
3054
3055 case alg_add_t_m2:
f5ff0b21 3056 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
27588b0f 3057 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3058 add_target ? add_target : accum_target);
3059 val_so_far += (HOST_WIDE_INT) 1 << log;
3060 break;
3061
3062 case alg_sub_t_m2:
f5ff0b21 3063 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
27588b0f 3064 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3065 add_target ? add_target : accum_target);
3066 val_so_far -= (HOST_WIDE_INT) 1 << log;
3067 break;
3068
3069 case alg_add_t2_m:
3070 accum = expand_shift (LSHIFT_EXPR, mode, accum,
f5ff0b21 3071 log, shift_subtarget, 0);
27588b0f 3072 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3073 add_target ? add_target : accum_target);
3074 val_so_far = (val_so_far << log) + 1;
3075 break;
3076
3077 case alg_sub_t2_m:
3078 accum = expand_shift (LSHIFT_EXPR, mode, accum,
f5ff0b21 3079 log, shift_subtarget, 0);
27588b0f 3080 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3081 add_target ? add_target : accum_target);
3082 val_so_far = (val_so_far << log) - 1;
3083 break;
3084
3085 case alg_add_factor:
f5ff0b21 3086 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
27588b0f 3087 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3088 add_target ? add_target : accum_target);
3089 val_so_far += val_so_far << log;
3090 break;
3091
3092 case alg_sub_factor:
f5ff0b21 3093 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
27588b0f 3094 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
a1ad7483 3095 (add_target
3096 ? add_target : (optimize ? 0 : tem)));
27588b0f 3097 val_so_far = (val_so_far << log) - val_so_far;
3098 break;
3099
3100 default:
611234b4 3101 gcc_unreachable ();
27588b0f 3102 }
3103
49db198b 3104 if (SCALAR_INT_MODE_P (mode))
27588b0f 3105 {
49db198b 3106 /* Write a REG_EQUAL note on the last insn so that we can cse
3107 multiplication sequences. Note that if ACCUM is a SUBREG,
3108 we've set the inner register and must properly indicate that. */
3109 tem = op0, nmode = mode;
3110 accum_inner = accum;
3111 if (GET_CODE (accum) == SUBREG)
3112 {
3113 accum_inner = SUBREG_REG (accum);
3114 nmode = GET_MODE (accum_inner);
3115 tem = gen_lowpart (nmode, op0);
3116 }
27588b0f 3117
49db198b 3118 insn = get_last_insn ();
3119 set_dst_reg_note (insn, REG_EQUAL,
3120 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3121 accum_inner);
3122 }
27588b0f 3123 }
3124
3125 if (variant == negate_variant)
3126 {
3127 val_so_far = -val_so_far;
3128 accum = expand_unop (mode, neg_optab, accum, target, 0);
3129 }
3130 else if (variant == add_variant)
3131 {
3132 val_so_far = val_so_far + 1;
3133 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3134 }
3135
a79b863a 3136 /* Compare only the bits of val and val_so_far that are significant
3137 in the result mode, to avoid sign-/zero-extension confusion. */
49db198b 3138 nmode = GET_MODE_INNER (mode);
3139 if (nmode == VOIDmode)
3140 nmode = mode;
3141 val &= GET_MODE_MASK (nmode);
3142 val_so_far &= GET_MODE_MASK (nmode);
611234b4 3143 gcc_assert (val == val_so_far);
27588b0f 3144
3145 return accum;
3146}
3147
db96f378 3148/* Perform a multiplication and return an rtx for the result.
3149 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3150 TARGET is a suggestion for where to store the result (an rtx).
3151
3152 We check specially for a constant integer as OP1.
3153 If you want this check for OP0 as well, then before calling
3154 you should swap the two operands if OP0 would be constant. */
3155
3156rtx
cf58ef1d 3157expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3158 int unsignedp)
db96f378 3159{
27588b0f 3160 enum mult_variant variant;
3161 struct algorithm algorithm;
49db198b 3162 rtx scalar_op1;
cfd6d985 3163 int max_cost;
f529eb25 3164 bool speed = optimize_insn_for_speed_p ();
49db198b 3165 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
db96f378 3166
49db198b 3167 if (CONSTANT_P (op0))
3168 {
3169 rtx temp = op0;
3170 op0 = op1;
3171 op1 = temp;
3172 }
3173
3174 /* For vectors, there are several simplifications that can be made if
3175 all elements of the vector constant are identical. */
3176 scalar_op1 = op1;
3177 if (GET_CODE (op1) == CONST_VECTOR)
3178 {
3179 int i, n = CONST_VECTOR_NUNITS (op1);
3180 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3181 for (i = 1; i < n; ++i)
3182 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3183 goto skip_scalar;
3184 }
3185
3186 if (INTEGRAL_MODE_P (mode))
3187 {
3188 rtx fake_reg;
d9dadd67 3189 HOST_WIDE_INT coeff;
3190 bool is_neg;
49db198b 3191 int mode_bitsize;
3192
3193 if (op1 == CONST0_RTX (mode))
3194 return op1;
3195 if (op1 == CONST1_RTX (mode))
3196 return op0;
3197 if (op1 == CONSTM1_RTX (mode))
3198 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3199 op0, target, 0);
3200
3201 if (do_trapv)
3202 goto skip_synth;
3203
3204 /* These are the operations that are potentially turned into
3205 a sequence of shifts and additions. */
3206 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
cfd6d985 3207
3208 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3209 less than or equal in size to `unsigned int' this doesn't matter.
3210 If the mode is larger than `unsigned int', then synth_mult works
3211 only if the constant value exactly fits in an `unsigned int' without
3212 any truncation. This means that multiplying by negative values does
3213 not work; results are off by 2^32 on a 32 bit machine. */
e4fedb10 3214
49db198b 3215 if (CONST_INT_P (scalar_op1))
7a9e3364 3216 {
49db198b 3217 coeff = INTVAL (scalar_op1);
3218 is_neg = coeff < 0;
cfd6d985 3219 }
49db198b 3220 else if (CONST_DOUBLE_P (scalar_op1))
cfd6d985 3221 {
3222 /* If we are multiplying in DImode, it may still be a win
3223 to try to work with shifts and adds. */
49db198b 3224 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3225 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3226 {
3227 coeff = CONST_DOUBLE_LOW (scalar_op1);
3228 is_neg = false;
3229 }
3230 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
cfd6d985 3231 {
49db198b 3232 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3233 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3234 {
3235 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3236 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3237 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3238 return expand_shift (LSHIFT_EXPR, mode, op0,
3239 shift, target, unsignedp);
3240 }
3241 goto skip_synth;
cfd6d985 3242 }
d9dadd67 3243 else
3244 goto skip_synth;
cfd6d985 3245 }
49db198b 3246 else
3247 goto skip_synth;
48e1416a 3248
cfd6d985 3249 /* We used to test optimize here, on the grounds that it's better to
3250 produce a smaller program when -O is not used. But this causes
3251 such a terrible slowdown sometimes that it seems better to always
3252 use synth_mult. */
cfd6d985 3253
49db198b 3254 /* Special case powers of two. */
3255 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3256 return expand_shift (LSHIFT_EXPR, mode, op0,
3257 floor_log2 (coeff), target, unsignedp);
3258
3259 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3260
3261 /* Attempt to handle multiplication of DImode values by negative
3262 coefficients, by performing the multiplication by a positive
3263 multiplier and then inverting the result. */
3264 /* ??? How is this not slightly redundant with the neg variant? */
3265 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3266 {
3267 /* Its safe to use -coeff even for INT_MIN, as the
3268 result is interpreted as an unsigned coefficient.
3269 Exclude cost of op0 from max_cost to match the cost
ba83197c 3270 calculation of the synth_mult. */
49db198b 3271 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
49a71e58 3272 - neg_cost(speed, mode));
49db198b 3273 if (max_cost > 0
3274 && choose_mult_variant (mode, -coeff, &algorithm,
3275 &variant, max_cost))
3276 {
3277 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3278 &algorithm, variant);
3279 return expand_unop (mode, neg_optab, temp, target, 0);
3280 }
7a9e3364 3281 }
db96f378 3282
49db198b 3283 /* Exclude cost of op0 from max_cost to match the cost
3284 calculation of the synth_mult. */
3285 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3286 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3287 return expand_mult_const (mode, op0, coeff, target,
3288 &algorithm, variant);
cf58ef1d 3289 }
49db198b 3290 skip_synth:
cf58ef1d 3291
3292 /* Expand x*2.0 as x+x. */
49db198b 3293 if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode))
cf58ef1d 3294 {
3295 REAL_VALUE_TYPE d;
49db198b 3296 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
cf58ef1d 3297
3298 if (REAL_VALUES_EQUAL (d, dconst2))
3299 {
3300 op0 = force_reg (GET_MODE (op0), op0);
3301 return expand_binop (mode, add_optab, op0, op0,
3302 target, unsignedp, OPTAB_LIB_WIDEN);
3303 }
3304 }
49db198b 3305 skip_scalar:
cf58ef1d 3306
1e401f10 3307 /* This used to use umul_optab if unsigned, but for non-widening multiply
3308 there is no difference between signed and unsigned. */
49db198b 3309 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
db96f378 3310 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
611234b4 3311 gcc_assert (op0);
db96f378 3312 return op0;
3313}
62be004c 3314
72655676 3315/* Return a cost estimate for multiplying a register by the given
3316 COEFFicient in the given MODE and SPEED. */
3317
3318int
3319mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3320{
3321 int max_cost;
3322 struct algorithm algorithm;
3323 enum mult_variant variant;
3324
3325 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3326 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3327 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3328 return algorithm.cost.cost;
3329 else
3330 return max_cost;
3331}
3332
62be004c 3333/* Perform a widening multiplication and return an rtx for the result.
3334 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3335 TARGET is a suggestion for where to store the result (an rtx).
3336 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3337 or smul_widen_optab.
3338
3339 We check specially for a constant integer as OP1, comparing the
3340 cost of a widening multiply against the cost of a sequence of shifts
3341 and adds. */
3342
3343rtx
3344expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3345 int unsignedp, optab this_optab)
3346{
3347 bool speed = optimize_insn_for_speed_p ();
38ba30bf 3348 rtx cop1;
62be004c 3349
3350 if (CONST_INT_P (op1)
38ba30bf 3351 && GET_MODE (op0) != VOIDmode
3352 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3353 this_optab == umul_widen_optab))
3354 && CONST_INT_P (cop1)
3355 && (INTVAL (cop1) >= 0
f179ee60 3356 || HWI_COMPUTABLE_MODE_P (mode)))
62be004c 3357 {
38ba30bf 3358 HOST_WIDE_INT coeff = INTVAL (cop1);
62be004c 3359 int max_cost;
3360 enum mult_variant variant;
3361 struct algorithm algorithm;
3362
3363 /* Special case powers of two. */
3364 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3365 {
3366 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3367 return expand_shift (LSHIFT_EXPR, mode, op0,
f5ff0b21 3368 floor_log2 (coeff), target, unsignedp);
62be004c 3369 }
3370
3371 /* Exclude cost of op0 from max_cost to match the cost
3372 calculation of the synth_mult. */
49a71e58 3373 max_cost = mul_widen_cost (speed, mode);
62be004c 3374 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3375 max_cost))
3376 {
3377 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3378 return expand_mult_const (mode, op0, coeff, target,
3379 &algorithm, variant);
3380 }
3381 }
3382 return expand_binop (mode, this_optab, op0, op1, target,
3383 unsignedp, OPTAB_LIB_WIDEN);
3384}
db96f378 3385\f
d2fa4ea5 3386/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3387 replace division by D, and put the least significant N bits of the result
3388 in *MULTIPLIER_PTR and return the most significant bit.
3389
3390 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3391 needed precision is in PRECISION (should be <= N).
3392
3393 PRECISION should be as small as possible so this function can choose
3394 multiplier more freely.
3395
3396 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3397 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3398
3399 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3400 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3401
d2fa4ea5 3402unsigned HOST_WIDE_INT
35cb5232 3403choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
127cb1cd 3404 unsigned HOST_WIDE_INT *multiplier_ptr,
3405 int *post_shift_ptr, int *lgup_ptr)
d2fa4ea5 3406{
4491f79f 3407 HOST_WIDE_INT mhigh_hi, mlow_hi;
3408 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
d2fa4ea5 3409 int lgup, post_shift;
3410 int pow, pow2;
4491f79f 3411 unsigned HOST_WIDE_INT nl, dummy1;
3412 HOST_WIDE_INT nh, dummy2;
d2fa4ea5 3413
3414 /* lgup = ceil(log2(divisor)); */
3415 lgup = ceil_log2 (d);
3416
611234b4 3417 gcc_assert (lgup <= n);
d2fa4ea5 3418
3419 pow = n + lgup;
3420 pow2 = n + lgup - precision;
3421
611234b4 3422 /* We could handle this with some effort, but this case is much
3423 better handled directly with a scc insn, so rely on caller using
3424 that. */
24cd46a7 3425 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
d2fa4ea5 3426
3427 /* mlow = 2^(N + lgup)/d */
3428 if (pow >= HOST_BITS_PER_WIDE_INT)
3429 {
4491f79f 3430 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
d2fa4ea5 3431 nl = 0;
3432 }
3433 else
3434 {
3435 nh = 0;
3436 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3437 }
3438 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3439 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3440
3441 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3442 if (pow2 >= HOST_BITS_PER_WIDE_INT)
4491f79f 3443 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
d2fa4ea5 3444 else
3445 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3446 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3447 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3448
611234b4 3449 gcc_assert (!mhigh_hi || nh - d < d);
3450 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
139c3f48 3451 /* Assert that mlow < mhigh. */
611234b4 3452 gcc_assert (mlow_hi < mhigh_hi
3453 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
d2fa4ea5 3454
3455 /* If precision == N, then mlow, mhigh exceed 2^N
3456 (but they do not exceed 2^(N+1)). */
3457
2358393e 3458 /* Reduce to lowest terms. */
d2fa4ea5 3459 for (post_shift = lgup; post_shift > 0; post_shift--)
3460 {
3461 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3462 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3463 if (ml_lo >= mh_lo)
3464 break;
3465
3466 mlow_hi = 0;
3467 mlow_lo = ml_lo;
3468 mhigh_hi = 0;
3469 mhigh_lo = mh_lo;
3470 }
3471
3472 *post_shift_ptr = post_shift;
3473 *lgup_ptr = lgup;
3474 if (n < HOST_BITS_PER_WIDE_INT)
3475 {
3476 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
127cb1cd 3477 *multiplier_ptr = mhigh_lo & mask;
d2fa4ea5 3478 return mhigh_lo >= mask;
3479 }
3480 else
3481 {
127cb1cd 3482 *multiplier_ptr = mhigh_lo;
d2fa4ea5 3483 return mhigh_hi;
3484 }
3485}
3486
3487/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3488 congruent to 1 (mod 2**N). */
3489
3490static unsigned HOST_WIDE_INT
35cb5232 3491invert_mod2n (unsigned HOST_WIDE_INT x, int n)
d2fa4ea5 3492{
a92771b8 3493 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
d2fa4ea5 3494
3495 /* The algorithm notes that the choice y = x satisfies
3496 x*y == 1 mod 2^3, since x is assumed odd.
3497 Each iteration doubles the number of bits of significance in y. */
3498
3499 unsigned HOST_WIDE_INT mask;
3500 unsigned HOST_WIDE_INT y = x;
3501 int nbit = 3;
3502
3503 mask = (n == HOST_BITS_PER_WIDE_INT
3504 ? ~(unsigned HOST_WIDE_INT) 0
3505 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3506
3507 while (nbit < n)
3508 {
3509 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3510 nbit *= 2;
3511 }
3512 return y;
3513}
3514
3515/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3516 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3517 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3518 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3519 become signed.
3520
3521 The result is put in TARGET if that is convenient.
3522
3523 MODE is the mode of operation. */
3524
3525rtx
35cb5232 3526expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3527 rtx op1, rtx target, int unsignedp)
d2fa4ea5 3528{
3529 rtx tem;
3530 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3531
3532 tem = expand_shift (RSHIFT_EXPR, mode, op0,
f5ff0b21 3533 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
6de9716c 3534 tem = expand_and (mode, tem, op1, NULL_RTX);
941522d6 3535 adj_operand
3536 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3537 adj_operand);
d2fa4ea5 3538
3539 tem = expand_shift (RSHIFT_EXPR, mode, op1,
f5ff0b21 3540 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
6de9716c 3541 tem = expand_and (mode, tem, op0, NULL_RTX);
941522d6 3542 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3543 target);
d2fa4ea5 3544
3545 return target;
3546}
3547
ebf4f764 3548/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
d2fa4ea5 3549
27588b0f 3550static rtx
3551extract_high_half (enum machine_mode mode, rtx op)
3552{
3553 enum machine_mode wider_mode;
d2fa4ea5 3554
27588b0f 3555 if (mode == word_mode)
3556 return gen_highpart (mode, op);
33183a3c 3557
069b07bf 3558 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3559
27588b0f 3560 wider_mode = GET_MODE_WIDER_MODE (mode);
3561 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
f5ff0b21 3562 GET_MODE_BITSIZE (mode), 0, 1);
27588b0f 3563 return convert_modes (mode, wider_mode, op, 0);
3564}
d2fa4ea5 3565
ebf4f764 3566/* Like expmed_mult_highpart, but only consider using a multiplication
27588b0f 3567 optab. OP1 is an rtx for the constant operand. */
3568
3569static rtx
ebf4f764 3570expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
27588b0f 3571 rtx target, int unsignedp, int max_cost)
d2fa4ea5 3572{
b4dcfd48 3573 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
27588b0f 3574 enum machine_mode wider_mode;
d2fa4ea5 3575 optab moptab;
3576 rtx tem;
27588b0f 3577 int size;
f529eb25 3578 bool speed = optimize_insn_for_speed_p ();
d2fa4ea5 3579
069b07bf 3580 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3581
27588b0f 3582 wider_mode = GET_MODE_WIDER_MODE (mode);
3583 size = GET_MODE_BITSIZE (mode);
d2fa4ea5 3584
3585 /* Firstly, try using a multiplication insn that only generates the needed
3586 high part of the product, and in the sign flavor of unsignedp. */
49a71e58 3587 if (mul_highpart_cost (speed, mode) < max_cost)
33183a3c 3588 {
27588b0f 3589 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
b4dcfd48 3590 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
27588b0f 3591 unsignedp, OPTAB_DIRECT);
3592 if (tem)
3593 return tem;
33183a3c 3594 }
d2fa4ea5 3595
3596 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3597 Need to adjust the result after the multiplication. */
84ab528e 3598 if (size - 1 < BITS_PER_WORD
49a71e58 3599 && (mul_highpart_cost (speed, mode)
3600 + 2 * shift_cost (speed, mode, size-1)
3601 + 4 * add_cost (speed, mode) < max_cost))
33183a3c 3602 {
27588b0f 3603 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
b4dcfd48 3604 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
27588b0f 3605 unsignedp, OPTAB_DIRECT);
3606 if (tem)
33183a3c 3607 /* We used the wrong signedness. Adjust the result. */
cb2511ae 3608 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
27588b0f 3609 tem, unsignedp);
33183a3c 3610 }
d2fa4ea5 3611
33183a3c 3612 /* Try widening multiplication. */
d2fa4ea5 3613 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3a9ccbe4 3614 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
49a71e58 3615 && mul_widen_cost (speed, wider_mode) < max_cost)
0e1adf44 3616 {
b4dcfd48 3617 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
27588b0f 3618 unsignedp, OPTAB_WIDEN);
3619 if (tem)
3620 return extract_high_half (mode, tem);
a4194ff7 3621 }
33183a3c 3622
3623 /* Try widening the mode and perform a non-widening multiplication. */
d6bf3b14 3624 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
84ab528e 3625 && size - 1 < BITS_PER_WORD
49a71e58 3626 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3627 < max_cost))
0e1adf44 3628 {
857a1176 3629 rtx insns, wop0, wop1;
3630
3631 /* We need to widen the operands, for example to ensure the
3632 constant multiplier is correctly sign or zero extended.
3633 Use a sequence to clean-up any instructions emitted by
3634 the conversions if things don't work out. */
3635 start_sequence ();
3636 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3637 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3638 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
27588b0f 3639 unsignedp, OPTAB_WIDEN);
857a1176 3640 insns = get_insns ();
3641 end_sequence ();
3642
27588b0f 3643 if (tem)
857a1176 3644 {
3645 emit_insn (insns);
3646 return extract_high_half (mode, tem);
3647 }
0e1adf44 3648 }
33183a3c 3649
3650 /* Try widening multiplication of opposite signedness, and adjust. */
3651 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3a9ccbe4 3652 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
84ab528e 3653 && size - 1 < BITS_PER_WORD
49a71e58 3654 && (mul_widen_cost (speed, wider_mode)
3655 + 2 * shift_cost (speed, mode, size-1)
3656 + 4 * add_cost (speed, mode) < max_cost))
d2fa4ea5 3657 {
b4dcfd48 3658 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
33183a3c 3659 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3660 if (tem != 0)
d2fa4ea5 3661 {
27588b0f 3662 tem = extract_high_half (mode, tem);
33183a3c 3663 /* We used the wrong signedness. Adjust the result. */
cb2511ae 3664 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
33183a3c 3665 target, unsignedp);
d2fa4ea5 3666 }
d2fa4ea5 3667 }
3668
33183a3c 3669 return 0;
27588b0f 3670}
33183a3c 3671
0dc8d714 3672/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3673 putting the high half of the result in TARGET if that is convenient,
3674 and return where the result is. If the operation can not be performed,
3675 0 is returned.
d2fa4ea5 3676
27588b0f 3677 MODE is the mode of operation and result.
3678
3679 UNSIGNEDP nonzero means unsigned multiply.
3680
3681 MAX_COST is the total allowed cost for the expanded RTL. */
3682
0dc8d714 3683static rtx
ebf4f764 3684expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
0dc8d714 3685 rtx target, int unsignedp, int max_cost)
27588b0f 3686{
e4fedb10 3687 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
0dc8d714 3688 unsigned HOST_WIDE_INT cnst1;
e4fedb10 3689 int extra_cost;
3690 bool sign_adjust = false;
27588b0f 3691 enum mult_variant variant;
3692 struct algorithm alg;
0dc8d714 3693 rtx tem;
f529eb25 3694 bool speed = optimize_insn_for_speed_p ();
27588b0f 3695
069b07bf 3696 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
27588b0f 3697 /* We can't support modes wider than HOST_BITS_PER_INT. */
f179ee60 3698 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
27588b0f 3699
0dc8d714 3700 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
e4fedb10 3701
48e1416a 3702 /* We can't optimize modes wider than BITS_PER_WORD.
3703 ??? We might be able to perform double-word arithmetic if
e4fedb10 3704 mode == word_mode, however all the cost calculations in
3705 synth_mult etc. assume single-word operations. */
3706 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
ebf4f764 3707 return expmed_mult_highpart_optab (mode, op0, op1, target,
e4fedb10 3708 unsignedp, max_cost);
3709
49a71e58 3710 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
e4fedb10 3711
3712 /* Check whether we try to multiply by a negative constant. */
3713 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3714 {
3715 sign_adjust = true;
49a71e58 3716 extra_cost += add_cost (speed, mode);
e4fedb10 3717 }
27588b0f 3718
3719 /* See whether shift/add multiplication is cheap enough. */
e4fedb10 3720 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3721 max_cost - extra_cost))
0e1adf44 3722 {
27588b0f 3723 /* See whether the specialized multiplication optabs are
3724 cheaper than the shift/add version. */
ebf4f764 3725 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
d9154849 3726 alg.cost.cost + extra_cost);
27588b0f 3727 if (tem)
3728 return tem;
3729
e4fedb10 3730 tem = convert_to_mode (wider_mode, op0, unsignedp);
3731 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3732 tem = extract_high_half (mode, tem);
3733
0bed3869 3734 /* Adjust result for signedness. */
e4fedb10 3735 if (sign_adjust)
3736 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3737
3738 return tem;
0e1adf44 3739 }
ebf4f764 3740 return expmed_mult_highpart_optab (mode, op0, op1, target,
27588b0f 3741 unsignedp, max_cost);
d2fa4ea5 3742}
41323e11 3743
3744
3745/* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3746
3747static rtx
3748expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3749{
4b05206e 3750 unsigned HOST_WIDE_INT masklow, maskhigh;
8b908ec4 3751 rtx result, temp, shift, label;
41323e11 3752 int logd;
3753
3754 logd = floor_log2 (d);
3755 result = gen_reg_rtx (mode);
3756
3757 /* Avoid conditional branches when they're expensive. */
4a9d7ef7 3758 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
a0d18cec 3759 && optimize_insn_for_speed_p ())
41323e11 3760 {
3761 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3762 mode, 0, -1);
3763 if (signmask)
3764 {
3765 signmask = force_reg (mode, signmask);
4b05206e 3766 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
8b908ec4 3767 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3768
3769 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3770 which instruction sequence to use. If logical right shifts
3771 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3772 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4b05206e 3773
8b908ec4 3774 temp = gen_rtx_LSHIFTRT (mode, result, shift);
d6bf3b14 3775 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
7013e87c 3776 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3777 > COSTS_N_INSNS (2)))
8b908ec4 3778 {
3779 temp = expand_binop (mode, xor_optab, op0, signmask,
3780 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3781 temp = expand_binop (mode, sub_optab, temp, signmask,
3782 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4b05206e 3783 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
8b908ec4 3784 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3785 temp = expand_binop (mode, xor_optab, temp, signmask,
3786 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3787 temp = expand_binop (mode, sub_optab, temp, signmask,
3788 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3789 }
3790 else
3791 {
3792 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3793 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3794 signmask = force_reg (mode, signmask);
3795
3796 temp = expand_binop (mode, add_optab, op0, signmask,
3797 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4b05206e 3798 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
8b908ec4 3799 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3800 temp = expand_binop (mode, sub_optab, temp, signmask,
3801 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3802 }
41323e11 3803 return temp;
3804 }
3805 }
3806
3807 /* Mask contains the mode's signbit and the significant bits of the
3808 modulus. By including the signbit in the operation, many targets
3809 can avoid an explicit compare operation in the following comparison
3810 against zero. */
3811
4b05206e 3812 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3813 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3814 {
3815 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3816 maskhigh = -1;
3817 }
3818 else
3819 maskhigh = (HOST_WIDE_INT) -1
3820 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
41323e11 3821
4b05206e 3822 temp = expand_binop (mode, and_optab, op0,
3823 immed_double_const (masklow, maskhigh, mode),
3824 result, 1, OPTAB_LIB_WIDEN);
41323e11 3825 if (temp != result)
3826 emit_move_insn (result, temp);
3827
3828 label = gen_label_rtx ();
3829 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3830
3831 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3832 0, OPTAB_LIB_WIDEN);
4b05206e 3833 masklow = (HOST_WIDE_INT) -1 << logd;
3834 maskhigh = -1;
3835 temp = expand_binop (mode, ior_optab, temp,
3836 immed_double_const (masklow, maskhigh, mode),
3837 result, 1, OPTAB_LIB_WIDEN);
41323e11 3838 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3839 0, OPTAB_LIB_WIDEN);
3840 if (temp != result)
3841 emit_move_insn (result, temp);
3842 emit_label (label);
3843 return result;
3844}
9c423367 3845
3846/* Expand signed division of OP0 by a power of two D in mode MODE.
3847 This routine is only called for positive values of D. */
3848
3849static rtx
3850expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3851{
3852 rtx temp, label;
9c423367 3853 int logd;
3854
3855 logd = floor_log2 (d);
9c423367 3856
4a9d7ef7 3857 if (d == 2
3858 && BRANCH_COST (optimize_insn_for_speed_p (),
3859 false) >= 1)
9c423367 3860 {
3861 temp = gen_reg_rtx (mode);
3862 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3863 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3864 0, OPTAB_LIB_WIDEN);
f5ff0b21 3865 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
9c423367 3866 }
3867
7f4f7064 3868#ifdef HAVE_conditional_move
4a9d7ef7 3869 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3870 >= 2)
7f4f7064 3871 {
3872 rtx temp2;
3873
239d5663 3874 /* ??? emit_conditional_move forces a stack adjustment via
3875 compare_from_rtx so, if the sequence is discarded, it will
3876 be lost. Do it now instead. */
3877 do_pending_stack_adjust ();
3878
7f4f7064 3879 start_sequence ();
3880 temp2 = copy_to_mode_reg (mode, op0);
3881 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3882 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3883 temp = force_reg (mode, temp);
3884
3885 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3886 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3887 mode, temp, temp2, mode, 0);
3888 if (temp2)
3889 {
3890 rtx seq = get_insns ();
3891 end_sequence ();
3892 emit_insn (seq);
f5ff0b21 3893 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
7f4f7064 3894 }
3895 end_sequence ();
3896 }
3897#endif
3898
4a9d7ef7 3899 if (BRANCH_COST (optimize_insn_for_speed_p (),
3900 false) >= 2)
9c423367 3901 {
3902 int ushift = GET_MODE_BITSIZE (mode) - logd;
3903
3904 temp = gen_reg_rtx (mode);
3905 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
49a71e58 3906 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3907 > COSTS_N_INSNS (1))
9c423367 3908 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3909 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3910 else
3911 temp = expand_shift (RSHIFT_EXPR, mode, temp,
f5ff0b21 3912 ushift, NULL_RTX, 1);
9c423367 3913 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3914 0, OPTAB_LIB_WIDEN);
f5ff0b21 3915 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
9c423367 3916 }
3917
3918 label = gen_label_rtx ();
3919 temp = copy_to_mode_reg (mode, op0);
3920 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3921 expand_inc (temp, GEN_INT (d - 1));
3922 emit_label (label);
f5ff0b21 3923 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
9c423367 3924}
d2fa4ea5 3925\f
db96f378 3926/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3927 if that is convenient, and returning where the result is.
3928 You may request either the quotient or the remainder as the result;
3929 specify REM_FLAG nonzero to get the remainder.
3930
3931 CODE is the expression code for which kind of division this is;
3932 it controls how rounding is done. MODE is the machine mode to use.
3933 UNSIGNEDP nonzero means do unsigned division. */
3934
3935/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3936 and then correct it by or'ing in missing high bits
3937 if result of ANDI is nonzero.
3938 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3939 This could optimize to a bfexts instruction.
3940 But C doesn't use these operations, so their optimizations are
3941 left for later. */
a490489b 3942/* ??? For modulo, we don't actually need the highpart of the first product,
3943 the low part will do nicely. And for small divisors, the second multiply
3944 can also be a low-part only multiply or even be completely left out.
3945 E.g. to calculate the remainder of a division by 3 with a 32 bit
3946 multiply, multiply with 0x55555556 and extract the upper two bits;
3947 the result is exact for inputs up to 0x1fffffff.
3948 The input range can be reduced by using cross-sum rules.
3949 For odd divisors >= 3, the following table gives right shift counts
edc2a478 3950 so that if a number is shifted by an integer multiple of the given
a490489b 3951 amount, the remainder stays the same:
3952 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3953 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3954 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3955 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3956 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3957
3958 Cross-sum rules for even numbers can be derived by leaving as many bits
3959 to the right alone as the divisor has zeros to the right.
3960 E.g. if x is an unsigned 32 bit number:
3961 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3962 */
db96f378 3963
3964rtx
35cb5232 3965expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3966 rtx op0, rtx op1, rtx target, int unsignedp)
db96f378 3967{
db96f378 3968 enum machine_mode compute_mode;
19cb6b50 3969 rtx tquotient;
d2fa4ea5 3970 rtx quotient = 0, remainder = 0;
3971 rtx last;
ccc4d85f 3972 int size;
41cf444a 3973 rtx insn;
db96f378 3974 optab optab1, optab2;
ae01b312 3975 int op1_is_constant, op1_is_pow2 = 0;
33183a3c 3976 int max_cost, extra_cost;
9e042f31 3977 static HOST_WIDE_INT last_div_const = 0;
ae01b312 3978 static HOST_WIDE_INT ext_op1;
f529eb25 3979 bool speed = optimize_insn_for_speed_p ();
d2fa4ea5 3980
971ba038 3981 op1_is_constant = CONST_INT_P (op1);
ae01b312 3982 if (op1_is_constant)
3983 {
3984 ext_op1 = INTVAL (op1);
3985 if (unsignedp)
3986 ext_op1 &= GET_MODE_MASK (mode);
3987 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3988 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3989 }
d2fa4ea5 3990
3991 /*
3992 This is the structure of expand_divmod:
3993
3994 First comes code to fix up the operands so we can perform the operations
3995 correctly and efficiently.
3996
3997 Second comes a switch statement with code specific for each rounding mode.
3998 For some special operands this code emits all RTL for the desired
c3118728 3999 operation, for other cases, it generates only a quotient and stores it in
d2fa4ea5 4000 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4001 to indicate that it has not done anything.
4002
c3118728 4003 Last comes code that finishes the operation. If QUOTIENT is set and
4004 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4005 QUOTIENT is not set, it is computed using trunc rounding.
db96f378 4006
d2fa4ea5 4007 We try to generate special code for division and remainder when OP1 is a
4008 constant. If |OP1| = 2**n we can use shifts and some other fast
4009 operations. For other values of OP1, we compute a carefully selected
4010 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4011 by m.
4012
4013 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4014 half of the product. Different strategies for generating the product are
ebf4f764 4015 implemented in expmed_mult_highpart.
d2fa4ea5 4016
4017 If what we actually want is the remainder, we generate that by another
4018 by-constant multiplication and a subtraction. */
4019
4020 /* We shouldn't be called with OP1 == const1_rtx, but some of the
1b05ead8 4021 code below will malfunction if we are, so check here and handle
4022 the special case if so. */
4023 if (op1 == const1_rtx)
4024 return rem_flag ? const0_rtx : op0;
4025
bec2d490 4026 /* When dividing by -1, we could get an overflow.
4027 negv_optab can handle overflows. */
4028 if (! unsignedp && op1 == constm1_rtx)
4029 {
4030 if (rem_flag)
ff385626 4031 return const0_rtx;
bec2d490 4032 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
ff385626 4033 ? negv_optab : neg_optab, op0, target, 0);
bec2d490 4034 }
4035
64e50eaa 4036 if (target
4037 /* Don't use the function value register as a target
4038 since we have to read it as well as write it,
4039 and function-inlining gets confused by this. */
4040 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4041 /* Don't clobber an operand while doing a multi-step calculation. */
eb55662f 4042 || ((rem_flag || op1_is_constant)
64e50eaa 4043 && (reg_mentioned_p (target, op0)
e16ceb8e 4044 || (MEM_P (op0) && MEM_P (target))))
64e50eaa 4045 || reg_mentioned_p (target, op1)
e16ceb8e 4046 || (MEM_P (op1) && MEM_P (target))))
db96f378 4047 target = 0;
4048
db96f378 4049 /* Get the mode in which to perform this computation. Normally it will
4050 be MODE, but sometimes we can't do the desired operation in MODE.
4051 If so, pick a wider mode in which we can do the operation. Convert
4052 to that mode at the start to avoid repeated conversions.
4053
4054 First see what operations we need. These depend on the expression
4055 we are evaluating. (We assume that divxx3 insns exist under the
4056 same conditions that modxx3 insns and that these insns don't normally
4057 fail. If these assumptions are not correct, we may generate less
4058 efficient code in some cases.)
4059
4060 Then see if we find a mode in which we can open-code that operation
4061 (either a division, modulus, or shift). Finally, check for the smallest
4062 mode for which we can do the operation with a library call. */
4063
d2fa4ea5 4064 /* We might want to refine this now that we have division-by-constant
ebf4f764 4065 optimization. Since expmed_mult_highpart tries so many variants, it is
d2fa4ea5 4066 not straightforward to generalize this. Maybe we should make an array
4067 of possible modes in init_expmed? Save this for GCC 2.7. */
4068
0ceee13a 4069 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4070 ? (unsignedp ? lshr_optab : ashr_optab)
db96f378 4071 : (unsignedp ? udiv_optab : sdiv_optab));
0ceee13a 4072 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4073 ? optab1
4074 : (unsignedp ? udivmod_optab : sdivmod_optab));
db96f378 4075
4076 for (compute_mode = mode; compute_mode != VOIDmode;
4077 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
d6bf3b14 4078 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4079 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
db96f378 4080 break;
4081
4082 if (compute_mode == VOIDmode)
4083 for (compute_mode = mode; compute_mode != VOIDmode;
4084 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
f36b9f69 4085 if (optab_libfunc (optab1, compute_mode)
4086 || optab_libfunc (optab2, compute_mode))
db96f378 4087 break;
4088
89f18f73 4089 /* If we still couldn't find a mode, use MODE, but expand_binop will
4090 probably die. */
db96f378 4091 if (compute_mode == VOIDmode)
4092 compute_mode = mode;
4093
d2fa4ea5 4094 if (target && GET_MODE (target) == compute_mode)
4095 tquotient = target;
4096 else
4097 tquotient = gen_reg_rtx (compute_mode);
ccc4d85f 4098
d2fa4ea5 4099 size = GET_MODE_BITSIZE (compute_mode);
4100#if 0
4101 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
33183a3c 4102 (mode), and thereby get better code when OP1 is a constant. Do that
4103 later. It will require going over all usages of SIZE below. */
d2fa4ea5 4104 size = GET_MODE_BITSIZE (mode);
4105#endif
64e50eaa 4106
9e042f31 4107 /* Only deduct something for a REM if the last divide done was
4108 for a different constant. Then set the constant of the last
4109 divide. */
49a71e58 4110 max_cost = (unsignedp
4111 ? udiv_cost (speed, compute_mode)
4112 : sdiv_cost (speed, compute_mode));
1facc8d7 4113 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4114 && INTVAL (op1) == last_div_const))
49a71e58 4115 max_cost -= (mul_cost (speed, compute_mode)
4116 + add_cost (speed, compute_mode));
9e042f31 4117
4118 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
33183a3c 4119
d2fa4ea5 4120 /* Now convert to the best mode to use. */
db96f378 4121 if (compute_mode != mode)
4122 {
d2fa4ea5 4123 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
72467481 4124 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
6d9d382f 4125
0dbd1c74 4126 /* convert_modes may have placed op1 into a register, so we
4127 must recompute the following. */
971ba038 4128 op1_is_constant = CONST_INT_P (op1);
6d9d382f 4129 op1_is_pow2 = (op1_is_constant
4130 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4131 || (! unsignedp
0dbd1c74 4132 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
db96f378 4133 }
4134
d2fa4ea5 4135 /* If one of the operands is a volatile MEM, copy it into a register. */
8cdd0f84 4136
e16ceb8e 4137 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
d2fa4ea5 4138 op0 = force_reg (compute_mode, op0);
e16ceb8e 4139 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
8cdd0f84 4140 op1 = force_reg (compute_mode, op1);
4141
008862a8 4142 /* If we need the remainder or if OP1 is constant, we need to
4143 put OP0 in a register in case it has any queued subexpressions. */
4144 if (rem_flag || op1_is_constant)
4145 op0 = force_reg (compute_mode, op0);
64e50eaa 4146
d2fa4ea5 4147 last = get_last_insn ();
db96f378 4148
c3418f42 4149 /* Promote floor rounding to trunc rounding for unsigned operations. */
d2fa4ea5 4150 if (unsignedp)
db96f378 4151 {
d2fa4ea5 4152 if (code == FLOOR_DIV_EXPR)
4153 code = TRUNC_DIV_EXPR;
4154 if (code == FLOOR_MOD_EXPR)
4155 code = TRUNC_MOD_EXPR;
03b70ee3 4156 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4157 code = TRUNC_DIV_EXPR;
d2fa4ea5 4158 }
64e50eaa 4159
d2fa4ea5 4160 if (op1 != const0_rtx)
4161 switch (code)
4162 {
4163 case TRUNC_MOD_EXPR:
4164 case TRUNC_DIV_EXPR:
61e477c7 4165 if (op1_is_constant)
d2fa4ea5 4166 {
210ba7c8 4167 if (unsignedp)
d2fa4ea5 4168 {
127cb1cd 4169 unsigned HOST_WIDE_INT mh, ml;
d2fa4ea5 4170 int pre_shift, post_shift;
4171 int dummy;
ae01b312 4172 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4173 & GET_MODE_MASK (compute_mode));
d2fa4ea5 4174
4175 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4176 {
4177 pre_shift = floor_log2 (d);
4178 if (rem_flag)
4179 {
0bc644e0 4180 remainder
4181 = expand_binop (compute_mode, and_optab, op0,
4182 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4183 remainder, 1,
4184 OPTAB_LIB_WIDEN);
d2fa4ea5 4185 if (remainder)
436b0397 4186 return gen_lowpart (mode, remainder);
d2fa4ea5 4187 }
4188 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4189 pre_shift, tquotient, 1);
d2fa4ea5 4190 }
61e477c7 4191 else if (size <= HOST_BITS_PER_WIDE_INT)
d2fa4ea5 4192 {
75ff336e 4193 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
d2fa4ea5 4194 {
75ff336e 4195 /* Most significant bit of divisor is set; emit an scc
4196 insn. */
dab963fb 4197 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4198 compute_mode, 1, 1);
d2fa4ea5 4199 }
4200 else
4201 {
75ff336e 4202 /* Find a suitable multiplier and right shift count
4203 instead of multiplying with D. */
4204
4205 mh = choose_multiplier (d, size, size,
4206 &ml, &post_shift, &dummy);
4207
4208 /* If the suggested multiplier is more than SIZE bits,
4209 we can do better for even divisors, using an
4210 initial right shift. */
4211 if (mh != 0 && (d & 1) == 0)
4212 {
4213 pre_shift = floor_log2 (d & -d);
4214 mh = choose_multiplier (d >> pre_shift, size,
4215 size - pre_shift,
4216 &ml, &post_shift, &dummy);
611234b4 4217 gcc_assert (!mh);
75ff336e 4218 }
4219 else
4220 pre_shift = 0;
4221
4222 if (mh != 0)
4223 {
4224 rtx t1, t2, t3, t4;
4225
84ab528e 4226 if (post_shift - 1 >= BITS_PER_WORD)
4227 goto fail1;
4228
e56afeb2 4229 extra_cost
49a71e58 4230 = (shift_cost (speed, compute_mode, post_shift - 1)
4231 + shift_cost (speed, compute_mode, 1)
4232 + 2 * add_cost (speed, compute_mode));
ebf4f764 4233 t1 = expmed_mult_highpart (compute_mode, op0,
127cb1cd 4234 GEN_INT (ml),
75ff336e 4235 NULL_RTX, 1,
4236 max_cost - extra_cost);
4237 if (t1 == 0)
4238 goto fail1;
941522d6 4239 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4240 op0, t1),
75ff336e 4241 NULL_RTX);
f5ff0b21 4242 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4243 t2, 1, NULL_RTX, 1);
941522d6 4244 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4245 t1, t3),
75ff336e 4246 NULL_RTX);
7c446c95 4247 quotient = expand_shift
4248 (RSHIFT_EXPR, compute_mode, t4,
f5ff0b21 4249 post_shift - 1, tquotient, 1);
75ff336e 4250 }
4251 else
4252 {
4253 rtx t1, t2;
4254
84ab528e 4255 if (pre_shift >= BITS_PER_WORD
4256 || post_shift >= BITS_PER_WORD)
4257 goto fail1;
4258
7c446c95 4259 t1 = expand_shift
4260 (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4261 pre_shift, NULL_RTX, 1);
e56afeb2 4262 extra_cost
49a71e58 4263 = (shift_cost (speed, compute_mode, pre_shift)
4264 + shift_cost (speed, compute_mode, post_shift));
ebf4f764 4265 t2 = expmed_mult_highpart (compute_mode, t1,
127cb1cd 4266 GEN_INT (ml),
75ff336e 4267 NULL_RTX, 1,
4268 max_cost - extra_cost);
4269 if (t2 == 0)
4270 goto fail1;
7c446c95 4271 quotient = expand_shift
4272 (RSHIFT_EXPR, compute_mode, t2,
f5ff0b21 4273 post_shift, tquotient, 1);
75ff336e 4274 }
d2fa4ea5 4275 }
4276 }
61e477c7 4277 else /* Too wide mode to use tricky code */
4278 break;
d2fa4ea5 4279
4280 insn = get_last_insn ();
41cf444a 4281 if (insn != last)
4282 set_dst_reg_note (insn, REG_EQUAL,
4283 gen_rtx_UDIV (compute_mode, op0, op1),
4284 quotient);
d2fa4ea5 4285 }
4286 else /* TRUNC_DIV, signed */
4287 {
4288 unsigned HOST_WIDE_INT ml;
4289 int lgup, post_shift;
4de52edf 4290 rtx mlr;
d2fa4ea5 4291 HOST_WIDE_INT d = INTVAL (op1);
f74f4e04 4292 unsigned HOST_WIDE_INT abs_d;
4293
3d77819c 4294 /* Since d might be INT_MIN, we have to cast to
4295 unsigned HOST_WIDE_INT before negating to avoid
4296 undefined signed overflow. */
34f60736 4297 abs_d = (d >= 0
4298 ? (unsigned HOST_WIDE_INT) d
4299 : - (unsigned HOST_WIDE_INT) d);
d2fa4ea5 4300
4301 /* n rem d = n rem -d */
4302 if (rem_flag && d < 0)
4303 {
4304 d = abs_d;
2d232d05 4305 op1 = gen_int_mode (abs_d, compute_mode);
d2fa4ea5 4306 }
4307
4308 if (d == 1)
4309 quotient = op0;
4310 else if (d == -1)
4311 quotient = expand_unop (compute_mode, neg_optab, op0,
4312 tquotient, 0);
6115f243 4313 else if (HOST_BITS_PER_WIDE_INT >= size
4314 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
7676164c 4315 {
4316 /* This case is not handled correctly below. */
4317 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4318 compute_mode, 1, 1);
4319 if (quotient == 0)
4320 goto fail1;
4321 }
d2fa4ea5 4322 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
49a71e58 4323 && (rem_flag
4324 ? smod_pow2_cheap (speed, compute_mode)
4325 : sdiv_pow2_cheap (speed, compute_mode))
41323e11 4326 /* We assume that cheap metric is true if the
4327 optab has an expander for this mode. */
99bdde56 4328 && ((optab_handler ((rem_flag ? smod_optab
4329 : sdiv_optab),
d6bf3b14 4330 compute_mode)
9884e77f 4331 != CODE_FOR_nothing)
d6bf3b14 4332 || (optab_handler (sdivmod_optab,
4333 compute_mode)
4334 != CODE_FOR_nothing)))
d2fa4ea5 4335 ;
4336 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4337 {
41323e11 4338 if (rem_flag)
4339 {
4340 remainder = expand_smod_pow2 (compute_mode, op0, d);
4341 if (remainder)
4342 return gen_lowpart (mode, remainder);
4343 }
cb2e141e 4344
49a71e58 4345 if (sdiv_pow2_cheap (speed, compute_mode)
d6bf3b14 4346 && ((optab_handler (sdiv_optab, compute_mode)
cb2e141e 4347 != CODE_FOR_nothing)
d6bf3b14 4348 || (optab_handler (sdivmod_optab, compute_mode)
cb2e141e 4349 != CODE_FOR_nothing)))
4350 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4351 compute_mode, op0,
4352 gen_int_mode (abs_d,
4353 compute_mode),
4354 NULL_RTX, 0);
4355 else
4356 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
d2fa4ea5 4357
41323e11 4358 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4359 negate the quotient. */
d2fa4ea5 4360 if (d < 0)
4361 {
4362 insn = get_last_insn ();
38457527 4363 if (insn != last
dd192c2d 4364 && abs_d < ((unsigned HOST_WIDE_INT) 1
4365 << (HOST_BITS_PER_WIDE_INT - 1)))
41cf444a 4366 set_dst_reg_note (insn, REG_EQUAL,
4367 gen_rtx_DIV (compute_mode, op0,
f62058c3 4368 gen_int_mode
4369 (abs_d,
4370 compute_mode)),
41cf444a 4371 quotient);
d2fa4ea5 4372
4373 quotient = expand_unop (compute_mode, neg_optab,
4374 quotient, quotient, 0);
4375 }
4376 }
61e477c7 4377 else if (size <= HOST_BITS_PER_WIDE_INT)
d2fa4ea5 4378 {
4379 choose_multiplier (abs_d, size, size - 1,
127cb1cd 4380 &ml, &post_shift, &lgup);
d2fa4ea5 4381 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4382 {
4383 rtx t1, t2, t3;
4384
84ab528e 4385 if (post_shift >= BITS_PER_WORD
4386 || size - 1 >= BITS_PER_WORD)
4387 goto fail1;
4388
49a71e58 4389 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4390 + shift_cost (speed, compute_mode, size - 1)
4391 + add_cost (speed, compute_mode));
ebf4f764 4392 t1 = expmed_mult_highpart (compute_mode, op0,
127cb1cd 4393 GEN_INT (ml), NULL_RTX, 0,
33183a3c 4394 max_cost - extra_cost);
d2fa4ea5 4395 if (t1 == 0)
4396 goto fail1;
7c446c95 4397 t2 = expand_shift
4398 (RSHIFT_EXPR, compute_mode, t1,
f5ff0b21 4399 post_shift, NULL_RTX, 0);
7c446c95 4400 t3 = expand_shift
4401 (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4402 size - 1, NULL_RTX, 0);
d2fa4ea5 4403 if (d < 0)
7014838c 4404 quotient
4405 = force_operand (gen_rtx_MINUS (compute_mode,
4406 t3, t2),
4407 tquotient);
d2fa4ea5 4408 else
7014838c 4409 quotient
4410 = force_operand (gen_rtx_MINUS (compute_mode,
4411 t2, t3),
4412 tquotient);
d2fa4ea5 4413 }
4414 else
4415 {
4416 rtx t1, t2, t3, t4;
4417
84ab528e 4418 if (post_shift >= BITS_PER_WORD
4419 || size - 1 >= BITS_PER_WORD)
4420 goto fail1;
4421
d2fa4ea5 4422 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4de52edf 4423 mlr = gen_int_mode (ml, compute_mode);
49a71e58 4424 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4425 + shift_cost (speed, compute_mode, size - 1)
4426 + 2 * add_cost (speed, compute_mode));
ebf4f764 4427 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
33183a3c 4428 NULL_RTX, 0,
4429 max_cost - extra_cost);
d2fa4ea5 4430 if (t1 == 0)
4431 goto fail1;
7014838c 4432 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4433 t1, op0),
d2fa4ea5 4434 NULL_RTX);
7c446c95 4435 t3 = expand_shift
4436 (RSHIFT_EXPR, compute_mode, t2,
f5ff0b21 4437 post_shift, NULL_RTX, 0);
7c446c95 4438 t4 = expand_shift
4439 (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4440 size - 1, NULL_RTX, 0);
d2fa4ea5 4441 if (d < 0)
7014838c 4442 quotient
4443 = force_operand (gen_rtx_MINUS (compute_mode,
4444 t4, t3),
4445 tquotient);
d2fa4ea5 4446 else
7014838c 4447 quotient
4448 = force_operand (gen_rtx_MINUS (compute_mode,
4449 t3, t4),
4450 tquotient);
d2fa4ea5 4451 }
4452 }
61e477c7 4453 else /* Too wide mode to use tricky code */
4454 break;
d2fa4ea5 4455
38457527 4456 insn = get_last_insn ();
41cf444a 4457 if (insn != last)
4458 set_dst_reg_note (insn, REG_EQUAL,
4459 gen_rtx_DIV (compute_mode, op0, op1),
4460 quotient);
d2fa4ea5 4461 }
4462 break;
4463 }
4464 fail1:
4465 delete_insns_since (last);
4466 break;
db96f378 4467
d2fa4ea5 4468 case FLOOR_DIV_EXPR:
4469 case FLOOR_MOD_EXPR:
4470 /* We will come here only for signed operations. */
4471 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4472 {
127cb1cd 4473 unsigned HOST_WIDE_INT mh, ml;
d2fa4ea5 4474 int pre_shift, lgup, post_shift;
4475 HOST_WIDE_INT d = INTVAL (op1);
4476
4477 if (d > 0)
4478 {
4479 /* We could just as easily deal with negative constants here,
4480 but it does not seem worth the trouble for GCC 2.6. */
4481 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4482 {
4483 pre_shift = floor_log2 (d);
4484 if (rem_flag)
4485 {
4486 remainder = expand_binop (compute_mode, and_optab, op0,
4487 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4488 remainder, 0, OPTAB_LIB_WIDEN);
4489 if (remainder)
436b0397 4490 return gen_lowpart (mode, remainder);
d2fa4ea5 4491 }
7c446c95 4492 quotient = expand_shift
4493 (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4494 pre_shift, tquotient, 0);
d2fa4ea5 4495 }
4496 else
4497 {
4498 rtx t1, t2, t3, t4;
4499
4500 mh = choose_multiplier (d, size, size - 1,
4501 &ml, &post_shift, &lgup);
611234b4 4502 gcc_assert (!mh);
d2fa4ea5 4503
84ab528e 4504 if (post_shift < BITS_PER_WORD
4505 && size - 1 < BITS_PER_WORD)
d2fa4ea5 4506 {
7c446c95 4507 t1 = expand_shift
4508 (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4509 size - 1, NULL_RTX, 0);
84ab528e 4510 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4511 NULL_RTX, 0, OPTAB_WIDEN);
49a71e58 4512 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4513 + shift_cost (speed, compute_mode, size - 1)
4514 + 2 * add_cost (speed, compute_mode));
ebf4f764 4515 t3 = expmed_mult_highpart (compute_mode, t2,
127cb1cd 4516 GEN_INT (ml), NULL_RTX, 1,
84ab528e 4517 max_cost - extra_cost);
4518 if (t3 != 0)
4519 {
7c446c95 4520 t4 = expand_shift
4521 (RSHIFT_EXPR, compute_mode, t3,
f5ff0b21 4522 post_shift, NULL_RTX, 1);
84ab528e 4523 quotient = expand_binop (compute_mode, xor_optab,
4524 t4, t1, tquotient, 0,
4525 OPTAB_WIDEN);
4526 }
d2fa4ea5 4527 }
4528 }
4529 }
4530 else
4531 {
4532 rtx nsign, t1, t2, t3, t4;
941522d6 4533 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4534 op0, constm1_rtx), NULL_RTX);
d2fa4ea5 4535 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4536 0, OPTAB_WIDEN);
7c446c95 4537 nsign = expand_shift
4538 (RSHIFT_EXPR, compute_mode, t2,
f5ff0b21 4539 size - 1, NULL_RTX, 0);
941522d6 4540 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
d2fa4ea5 4541 NULL_RTX);
4542 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4543 NULL_RTX, 0);
4544 if (t4)
4545 {
4546 rtx t5;
4547 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4548 NULL_RTX, 0);
941522d6 4549 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4550 t4, t5),
d2fa4ea5 4551 tquotient);
4552 }
4553 }
4554 }
4555
4556 if (quotient != 0)
4557 break;
4558 delete_insns_since (last);
4559
4560 /* Try using an instruction that produces both the quotient and
4561 remainder, using truncation. We can easily compensate the quotient
4562 or remainder to get floor rounding, once we have the remainder.
4563 Notice that we compute also the final remainder value here,
4564 and return the result right away. */
36db22a0 4565 if (target == 0 || GET_MODE (target) != compute_mode)
d2fa4ea5 4566 target = gen_reg_rtx (compute_mode);
e324608c 4567
d2fa4ea5 4568 if (rem_flag)
4569 {
e324608c 4570 remainder
8ad4c111 4571 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
d2fa4ea5 4572 quotient = gen_reg_rtx (compute_mode);
4573 }
4574 else
4575 {
e324608c 4576 quotient
8ad4c111 4577 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
d2fa4ea5 4578 remainder = gen_reg_rtx (compute_mode);
4579 }
4580
4581 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4582 quotient, remainder, 0))
4583 {
4584 /* This could be computed with a branch-less sequence.
4585 Save that for later. */
4586 rtx tem;
4587 rtx label = gen_label_rtx ();
c5aa1e92 4588 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
d2fa4ea5 4589 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4590 NULL_RTX, 0, OPTAB_WIDEN);
c5aa1e92 4591 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
d2fa4ea5 4592 expand_dec (quotient, const1_rtx);
4593 expand_inc (remainder, op1);
4594 emit_label (label);
436b0397 4595 return gen_lowpart (mode, rem_flag ? remainder : quotient);
d2fa4ea5 4596 }
4597
4598 /* No luck with division elimination or divmod. Have to do it
4599 by conditionally adjusting op0 *and* the result. */
db96f378 4600 {
d2fa4ea5 4601 rtx label1, label2, label3, label4, label5;
4602 rtx adjusted_op0;
4603 rtx tem;
4604
4605 quotient = gen_reg_rtx (compute_mode);
4606 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4607 label1 = gen_label_rtx ();
4608 label2 = gen_label_rtx ();
4609 label3 = gen_label_rtx ();
4610 label4 = gen_label_rtx ();
4611 label5 = gen_label_rtx ();
c5aa1e92 4612 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4613 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
d2fa4ea5 4614 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4615 quotient, 0, OPTAB_LIB_WIDEN);
4616 if (tem != quotient)
4617 emit_move_insn (quotient, tem);
4618 emit_jump_insn (gen_jump (label5));
4619 emit_barrier ();
4620 emit_label (label1);
db96f378 4621 expand_inc (adjusted_op0, const1_rtx);
d2fa4ea5 4622 emit_jump_insn (gen_jump (label4));
4623 emit_barrier ();
4624 emit_label (label2);
c5aa1e92 4625 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
d2fa4ea5 4626 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4627 quotient, 0, OPTAB_LIB_WIDEN);
4628 if (tem != quotient)
4629 emit_move_insn (quotient, tem);
4630 emit_jump_insn (gen_jump (label5));
4631 emit_barrier ();
4632 emit_label (label3);
4633 expand_dec (adjusted_op0, const1_rtx);
4634 emit_label (label4);
4635 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4636 quotient, 0, OPTAB_LIB_WIDEN);
4637 if (tem != quotient)
4638 emit_move_insn (quotient, tem);
4639 expand_dec (quotient, const1_rtx);
4640 emit_label (label5);
db96f378 4641 }
d2fa4ea5 4642 break;
db96f378 4643
d2fa4ea5 4644 case CEIL_DIV_EXPR:
4645 case CEIL_MOD_EXPR:
4646 if (unsignedp)
4647 {
3f4d178c 4648 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4649 {
4650 rtx t1, t2, t3;
4651 unsigned HOST_WIDE_INT d = INTVAL (op1);
4652 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4653 floor_log2 (d), tquotient, 1);
3f4d178c 4654 t2 = expand_binop (compute_mode, and_optab, op0,
4655 GEN_INT (d - 1),
4656 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4657 t3 = gen_reg_rtx (compute_mode);
4658 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4659 compute_mode, 1, 1);
9d7a4e0b 4660 if (t3 == 0)
4661 {
4662 rtx lab;
4663 lab = gen_label_rtx ();
c5aa1e92 4664 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
9d7a4e0b 4665 expand_inc (t1, const1_rtx);
4666 emit_label (lab);
4667 quotient = t1;
4668 }
4669 else
941522d6 4670 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4671 t1, t3),
9d7a4e0b 4672 tquotient);
3f4d178c 4673 break;
4674 }
d2fa4ea5 4675
4676 /* Try using an instruction that produces both the quotient and
4677 remainder, using truncation. We can easily compensate the
4678 quotient or remainder to get ceiling rounding, once we have the
4679 remainder. Notice that we compute also the final remainder
4680 value here, and return the result right away. */
36db22a0 4681 if (target == 0 || GET_MODE (target) != compute_mode)
d2fa4ea5 4682 target = gen_reg_rtx (compute_mode);
e324608c 4683
d2fa4ea5 4684 if (rem_flag)
4685 {
8ad4c111 4686 remainder = (REG_P (target)
e324608c 4687 ? target : gen_reg_rtx (compute_mode));
d2fa4ea5 4688 quotient = gen_reg_rtx (compute_mode);
4689 }
4690 else
4691 {
8ad4c111 4692 quotient = (REG_P (target)
e324608c 4693 ? target : gen_reg_rtx (compute_mode));
d2fa4ea5 4694 remainder = gen_reg_rtx (compute_mode);
4695 }
4696
4697 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4698 remainder, 1))
4699 {
4700 /* This could be computed with a branch-less sequence.
4701 Save that for later. */
4702 rtx label = gen_label_rtx ();
c5aa1e92 4703 do_cmp_and_jump (remainder, const0_rtx, EQ,
4704 compute_mode, label);
d2fa4ea5 4705 expand_inc (quotient, const1_rtx);
4706 expand_dec (remainder, op1);
4707 emit_label (label);
436b0397 4708 return gen_lowpart (mode, rem_flag ? remainder : quotient);
d2fa4ea5 4709 }
4710
4711 /* No luck with division elimination or divmod. Have to do it
4712 by conditionally adjusting op0 *and* the result. */
db96f378 4713 {
d2fa4ea5 4714 rtx label1, label2;
4715 rtx adjusted_op0, tem;
4716
4717 quotient = gen_reg_rtx (compute_mode);
4718 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4719 label1 = gen_label_rtx ();
4720 label2 = gen_label_rtx ();
c5aa1e92 4721 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4722 compute_mode, label1);
d2fa4ea5 4723 emit_move_insn (quotient, const0_rtx);
4724 emit_jump_insn (gen_jump (label2));
4725 emit_barrier ();
4726 emit_label (label1);
4727 expand_dec (adjusted_op0, const1_rtx);
4728 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4729 quotient, 1, OPTAB_LIB_WIDEN);
4730 if (tem != quotient)
4731 emit_move_insn (quotient, tem);
4732 expand_inc (quotient, const1_rtx);
4733 emit_label (label2);
db96f378 4734 }
d2fa4ea5 4735 }
4736 else /* signed */
4737 {
2b10064a 4738 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4739 && INTVAL (op1) >= 0)
4740 {
4741 /* This is extremely similar to the code for the unsigned case
4742 above. For 2.7 we should merge these variants, but for
4743 2.6.1 I don't want to touch the code for unsigned since that
4744 get used in C. The signed case will only be used by other
4745 languages (Ada). */
4746
4747 rtx t1, t2, t3;
4748 unsigned HOST_WIDE_INT d = INTVAL (op1);
4749 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4750 floor_log2 (d), tquotient, 0);
2b10064a 4751 t2 = expand_binop (compute_mode, and_optab, op0,
4752 GEN_INT (d - 1),
4753 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4754 t3 = gen_reg_rtx (compute_mode);
4755 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4756 compute_mode, 1, 1);
4757 if (t3 == 0)
4758 {
4759 rtx lab;
4760 lab = gen_label_rtx ();
c5aa1e92 4761 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
2b10064a 4762 expand_inc (t1, const1_rtx);
4763 emit_label (lab);
4764 quotient = t1;
4765 }
4766 else
941522d6 4767 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4768 t1, t3),
2b10064a 4769 tquotient);
4770 break;
4771 }
4772
d2fa4ea5 4773 /* Try using an instruction that produces both the quotient and
4774 remainder, using truncation. We can easily compensate the
4775 quotient or remainder to get ceiling rounding, once we have the
4776 remainder. Notice that we compute also the final remainder
4777 value here, and return the result right away. */
36db22a0 4778 if (target == 0 || GET_MODE (target) != compute_mode)
d2fa4ea5 4779 target = gen_reg_rtx (compute_mode);
4780 if (rem_flag)
4781 {
8ad4c111 4782 remainder= (REG_P (target)
e324608c 4783 ? target : gen_reg_rtx (compute_mode));
d2fa4ea5 4784 quotient = gen_reg_rtx (compute_mode);
4785 }
4786 else
4787 {
8ad4c111 4788 quotient = (REG_P (target)
e324608c 4789 ? target : gen_reg_rtx (compute_mode));
d2fa4ea5 4790 remainder = gen_reg_rtx (compute_mode);
4791 }
4792
4793 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4794 remainder, 0))
4795 {
4796 /* This could be computed with a branch-less sequence.
4797 Save that for later. */
4798 rtx tem;
4799 rtx label = gen_label_rtx ();
c5aa1e92 4800 do_cmp_and_jump (remainder, const0_rtx, EQ,
4801 compute_mode, label);
d2fa4ea5 4802 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4803 NULL_RTX, 0, OPTAB_WIDEN);
c5aa1e92 4804 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
d2fa4ea5 4805 expand_inc (quotient, const1_rtx);
4806 expand_dec (remainder, op1);
4807 emit_label (label);
436b0397 4808 return gen_lowpart (mode, rem_flag ? remainder : quotient);
d2fa4ea5 4809 }
4810
4811 /* No luck with division elimination or divmod. Have to do it
4812 by conditionally adjusting op0 *and* the result. */
db96f378 4813 {
d2fa4ea5 4814 rtx label1, label2, label3, label4, label5;
4815 rtx adjusted_op0;
4816 rtx tem;
4817
4818 quotient = gen_reg_rtx (compute_mode);
4819 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4820 label1 = gen_label_rtx ();
4821 label2 = gen_label_rtx ();
4822 label3 = gen_label_rtx ();
4823 label4 = gen_label_rtx ();
4824 label5 = gen_label_rtx ();
c5aa1e92 4825 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4826 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4827 compute_mode, label1);
d2fa4ea5 4828 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4829 quotient, 0, OPTAB_LIB_WIDEN);
4830 if (tem != quotient)
4831 emit_move_insn (quotient, tem);
4832 emit_jump_insn (gen_jump (label5));
4833 emit_barrier ();
4834 emit_label (label1);
4835 expand_dec (adjusted_op0, const1_rtx);
4836 emit_jump_insn (gen_jump (label4));
4837 emit_barrier ();
4838 emit_label (label2);
c5aa1e92 4839 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4840 compute_mode, label3);
d2fa4ea5 4841 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4842 quotient, 0, OPTAB_LIB_WIDEN);
4843 if (tem != quotient)
4844 emit_move_insn (quotient, tem);
4845 emit_jump_insn (gen_jump (label5));
4846 emit_barrier ();
4847 emit_label (label3);
4848 expand_inc (adjusted_op0, const1_rtx);
4849 emit_label (label4);
4850 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4851 quotient, 0, OPTAB_LIB_WIDEN);
4852 if (tem != quotient)
4853 emit_move_insn (quotient, tem);
4854 expand_inc (quotient, const1_rtx);
4855 emit_label (label5);
db96f378 4856 }
d2fa4ea5 4857 }
4858 break;
64e50eaa 4859
d2fa4ea5 4860 case EXACT_DIV_EXPR:
4861 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4862 {
4863 HOST_WIDE_INT d = INTVAL (op1);
4864 unsigned HOST_WIDE_INT ml;
bec2d490 4865 int pre_shift;
d2fa4ea5 4866 rtx t1;
4867
bec2d490 4868 pre_shift = floor_log2 (d & -d);
4869 ml = invert_mod2n (d >> pre_shift, size);
4870 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
f5ff0b21 4871 pre_shift, NULL_RTX, unsignedp);
213b27c9 4872 quotient = expand_mult (compute_mode, t1,
2d232d05 4873 gen_int_mode (ml, compute_mode),
273014f4 4874 NULL_RTX, 1);
d2fa4ea5 4875
4876 insn = get_last_insn ();
41cf444a 4877 set_dst_reg_note (insn, REG_EQUAL,
4878 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4879 compute_mode, op0, op1),
4880 quotient);
d2fa4ea5 4881 }
4882 break;
4883
4884 case ROUND_DIV_EXPR:
4885 case ROUND_MOD_EXPR:
c3118728 4886 if (unsignedp)
4887 {
4888 rtx tem;
4889 rtx label;
4890 label = gen_label_rtx ();
4891 quotient = gen_reg_rtx (compute_mode);
4892 remainder = gen_reg_rtx (compute_mode);
4893 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4894 {
4895 rtx tem;
4896 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4897 quotient, 1, OPTAB_LIB_WIDEN);
4898 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4899 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4900 remainder, 1, OPTAB_LIB_WIDEN);
4901 }
29c05e22 4902 tem = plus_constant (compute_mode, op1, -1);
f5ff0b21 4903 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
c5aa1e92 4904 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
c3118728 4905 expand_inc (quotient, const1_rtx);
4906 expand_dec (remainder, op1);
4907 emit_label (label);
4908 }
4909 else
4910 {
4911 rtx abs_rem, abs_op1, tem, mask;
4912 rtx label;
4913 label = gen_label_rtx ();
4914 quotient = gen_reg_rtx (compute_mode);
4915 remainder = gen_reg_rtx (compute_mode);
4916 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4917 {
4918 rtx tem;
4919 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4920 quotient, 0, OPTAB_LIB_WIDEN);
4921 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4922 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4923 remainder, 0, OPTAB_LIB_WIDEN);
4924 }
bec2d490 4925 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4926 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
c3118728 4927 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
f5ff0b21 4928 1, NULL_RTX, 1);
c5aa1e92 4929 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
c3118728 4930 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4931 NULL_RTX, 0, OPTAB_WIDEN);
4932 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
f5ff0b21 4933 size - 1, NULL_RTX, 0);
c3118728 4934 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4935 NULL_RTX, 0, OPTAB_WIDEN);
4936 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4937 NULL_RTX, 0, OPTAB_WIDEN);
4938 expand_inc (quotient, tem);
4939 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4940 NULL_RTX, 0, OPTAB_WIDEN);
4941 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4942 NULL_RTX, 0, OPTAB_WIDEN);
4943 expand_dec (remainder, tem);
4944 emit_label (label);
4945 }
4946 return gen_lowpart (mode, rem_flag ? remainder : quotient);
a4194ff7 4947
0dbd1c74 4948 default:
611234b4 4949 gcc_unreachable ();
d2fa4ea5 4950 }
db96f378 4951
d2fa4ea5 4952 if (quotient == 0)
db96f378 4953 {
36db22a0 4954 if (target && GET_MODE (target) != compute_mode)
4955 target = 0;
4956
d2fa4ea5 4957 if (rem_flag)
db96f378 4958 {
d6567151 4959 /* Try to produce the remainder without producing the quotient.
cb0ccc1e 4960 If we seem to have a divmod pattern that does not require widening,
df07c3ae 4961 don't try widening here. We should really have a WIDEN argument
d6567151 4962 to expand_twoval_binop, since what we'd really like to do here is
4963 1) try a mod insn in compute_mode
4964 2) try a divmod insn in compute_mode
4965 3) try a div insn in compute_mode and multiply-subtract to get
4966 remainder
4967 4) try the same things with widening allowed. */
4968 remainder
4969 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4970 op0, op1, target,
4971 unsignedp,
d6bf3b14 4972 ((optab_handler (optab2, compute_mode)
d6567151 4973 != CODE_FOR_nothing)
4974 ? OPTAB_DIRECT : OPTAB_WIDEN));
d2fa4ea5 4975 if (remainder == 0)
db96f378 4976 {
4977 /* No luck there. Can we do remainder and divide at once
4978 without a library call? */
d2fa4ea5 4979 remainder = gen_reg_rtx (compute_mode);
4980 if (! expand_twoval_binop ((unsignedp
4981 ? udivmod_optab
4982 : sdivmod_optab),
4983 op0, op1,
4984 NULL_RTX, remainder, unsignedp))
4985 remainder = 0;
db96f378 4986 }
d2fa4ea5 4987
4988 if (remainder)
4989 return gen_lowpart (mode, remainder);
db96f378 4990 }
db96f378 4991
22971e4a 4992 /* Produce the quotient. Try a quotient insn, but not a library call.
4993 If we have a divmod in this mode, use it in preference to widening
4994 the div (for this test we assume it will not fail). Note that optab2
4995 is set to the one of the two optabs that the call below will use. */
4996 quotient
4997 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4998 op0, op1, rem_flag ? NULL_RTX : target,
4999 unsignedp,
d6bf3b14 5000 ((optab_handler (optab2, compute_mode)
22971e4a 5001 != CODE_FOR_nothing)
5002 ? OPTAB_DIRECT : OPTAB_WIDEN));
5003
d2fa4ea5 5004 if (quotient == 0)
db96f378 5005 {
5006 /* No luck there. Try a quotient-and-remainder insn,
5007 keeping the quotient alone. */
d2fa4ea5 5008 quotient = gen_reg_rtx (compute_mode);
db96f378 5009 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
d2fa4ea5 5010 op0, op1,
5011 quotient, NULL_RTX, unsignedp))
5012 {
5013 quotient = 0;
5014 if (! rem_flag)
5015 /* Still no luck. If we are not computing the remainder,
5016 use a library call for the quotient. */
5017 quotient = sign_expand_binop (compute_mode,
5018 udiv_optab, sdiv_optab,
5019 op0, op1, target,
5020 unsignedp, OPTAB_LIB_WIDEN);
5021 }
db96f378 5022 }
db96f378 5023 }
5024
db96f378 5025 if (rem_flag)
5026 {
36db22a0 5027 if (target && GET_MODE (target) != compute_mode)
5028 target = 0;
5029
d2fa4ea5 5030 if (quotient == 0)
30e9913f 5031 {
5032 /* No divide instruction either. Use library for remainder. */
5033 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5034 op0, op1, target,
5035 unsignedp, OPTAB_LIB_WIDEN);
5036 /* No remainder function. Try a quotient-and-remainder
5037 function, keeping the remainder. */
5038 if (!remainder)
5039 {
5040 remainder = gen_reg_rtx (compute_mode);
48e1416a 5041 if (!expand_twoval_binop_libfunc
30e9913f 5042 (unsignedp ? udivmod_optab : sdivmod_optab,
5043 op0, op1,
5044 NULL_RTX, remainder,
5045 unsignedp ? UMOD : MOD))
5046 remainder = NULL_RTX;
5047 }
5048 }
db96f378 5049 else
5050 {
5051 /* We divided. Now finish doing X - Y * (X / Y). */
d2fa4ea5 5052 remainder = expand_mult (compute_mode, quotient, op1,
5053 NULL_RTX, unsignedp);
5054 remainder = expand_binop (compute_mode, sub_optab, op0,
5055 remainder, target, unsignedp,
5056 OPTAB_LIB_WIDEN);
db96f378 5057 }
5058 }
5059
d2fa4ea5 5060 return gen_lowpart (mode, rem_flag ? remainder : quotient);
db96f378 5061}
5062\f
5063/* Return a tree node with data type TYPE, describing the value of X.
735f4358 5064 Usually this is an VAR_DECL, if there is no obvious better choice.
db96f378 5065 X may be an expression, however we only support those expressions
1e625a2e 5066 generated by loop.c. */
db96f378 5067
5068tree
35cb5232 5069make_tree (tree type, rtx x)
db96f378 5070{
5071 tree t;
5072
5073 switch (GET_CODE (x))
5074 {
5075 case CONST_INT:
7c446c95 5076 {
5077 HOST_WIDE_INT hi = 0;
5078
5079 if (INTVAL (x) < 0
5080 && !(TYPE_UNSIGNED (type)
5081 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5082 < HOST_BITS_PER_WIDE_INT)))
5083 hi = -1;
48e1416a 5084
7016c612 5085 t = build_int_cst_wide (type, INTVAL (x), hi);
48e1416a 5086
7c446c95 5087 return t;
5088 }
48e1416a 5089
db96f378 5090 case CONST_DOUBLE:
5091 if (GET_MODE (x) == VOIDmode)
7016c612 5092 t = build_int_cst_wide (type,
5093 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
db96f378 5094 else
5095 {
5096 REAL_VALUE_TYPE d;
5097
5098 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5099 t = build_real (type, d);
5100 }
5101
5102 return t;
886cfd4f 5103
5104 case CONST_VECTOR:
5105 {
aae57ecf 5106 int units = CONST_VECTOR_NUNITS (x);
5107 tree itype = TREE_TYPE (type);
fadf62f4 5108 tree *elts;
aae57ecf 5109 int i;
886cfd4f 5110
886cfd4f 5111 /* Build a tree with vector elements. */
fadf62f4 5112 elts = XALLOCAVEC (tree, units);
886cfd4f 5113 for (i = units - 1; i >= 0; --i)
5114 {
aae57ecf 5115 rtx elt = CONST_VECTOR_ELT (x, i);
fadf62f4 5116 elts[i] = make_tree (itype, elt);
886cfd4f 5117 }
a4194ff7 5118
fadf62f4 5119 return build_vector (type, elts);
886cfd4f 5120 }
5121
db96f378 5122 case PLUS:
faa43f85 5123 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5124 make_tree (type, XEXP (x, 1)));
a4194ff7 5125
db96f378 5126 case MINUS:
faa43f85 5127 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5128 make_tree (type, XEXP (x, 1)));
a4194ff7 5129
db96f378 5130 case NEG:
faa43f85 5131 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
db96f378 5132
5133 case MULT:
faa43f85 5134 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5135 make_tree (type, XEXP (x, 1)));
a4194ff7 5136
db96f378 5137 case ASHIFT:
faa43f85 5138 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5139 make_tree (type, XEXP (x, 1)));
a4194ff7 5140
db96f378 5141 case LSHIFTRT:
71eea85c 5142 t = unsigned_type_for (type);
37e8021c 5143 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5144 make_tree (t, XEXP (x, 0)),
5145 make_tree (type, XEXP (x, 1))));
a4194ff7 5146
db96f378 5147 case ASHIFTRT:
11773141 5148 t = signed_type_for (type);
37e8021c 5149 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5150 make_tree (t, XEXP (x, 0)),
5151 make_tree (type, XEXP (x, 1))));
a4194ff7 5152
db96f378 5153 case DIV:
5154 if (TREE_CODE (type) != REAL_TYPE)
11773141 5155 t = signed_type_for (type);
db96f378 5156 else
5157 t = type;
5158
37e8021c 5159 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5160 make_tree (t, XEXP (x, 0)),
5161 make_tree (t, XEXP (x, 1))));
db96f378 5162 case UDIV:
71eea85c 5163 t = unsigned_type_for (type);
37e8021c 5164 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5165 make_tree (t, XEXP (x, 0)),
5166 make_tree (t, XEXP (x, 1))));
513fac1b 5167
5168 case SIGN_EXTEND:
5169 case ZERO_EXTEND:
dc24ddbd 5170 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5171 GET_CODE (x) == ZERO_EXTEND);
37e8021c 5172 return fold_convert (type, make_tree (t, XEXP (x, 0)));
513fac1b 5173
96d5c2e2 5174 case CONST:
5175 return make_tree (type, XEXP (x, 0));
5176
5177 case SYMBOL_REF:
5178 t = SYMBOL_REF_DECL (x);
5179 if (t)
5180 return fold_convert (type, build_fold_addr_expr (t));
5181 /* else fall through. */
5182
735f4358 5183 default:
e60a6f7b 5184 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
c54c9422 5185
98155838 5186 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5187 address mode to pointer mode. */
85d654dd 5188 if (POINTER_TYPE_P (type))
98155838 5189 x = convert_memory_address_addr_space
5190 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
c54c9422 5191
9fabac44 5192 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5193 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5ded8c6f 5194 t->decl_with_rtl.rtl = x;
735f4358 5195
db96f378 5196 return t;
5197 }
5198}
db96f378 5199\f
5200/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5201 and returning TARGET.
5202
5203 If TARGET is 0, a pseudo-register or constant is returned. */
5204
5205rtx
35cb5232 5206expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
db96f378 5207{
6de9716c 5208 rtx tem = 0;
db96f378 5209
6de9716c 5210 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5211 tem = simplify_binary_operation (AND, mode, op0, op1);
5212 if (tem == 0)
db96f378 5213 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
db96f378 5214
5215 if (target == 0)
5216 target = tem;
5217 else if (tem != target)
5218 emit_move_insn (target, tem);
5219 return target;
5220}
80e1bfa1 5221
595d88b5 5222/* Helper function for emit_store_flag. */
5223static rtx
cf564daf 5224emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5225 enum machine_mode mode, enum machine_mode compare_mode,
d68bc06a 5226 int unsignedp, rtx x, rtx y, int normalizep,
5227 enum machine_mode target_mode)
595d88b5 5228{
8786db1e 5229 struct expand_operand ops[4];
5230 rtx op0, last, comparison, subtarget;
fb425e71 5231 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5232
5233 last = get_last_insn ();
5234 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5235 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
8786db1e 5236 if (!x || !y)
fb425e71 5237 {
5238 delete_insns_since (last);
5239 return NULL_RTX;
5240 }
5241
d68bc06a 5242 if (target_mode == VOIDmode)
5243 target_mode = result_mode;
5244 if (!target)
5245 target = gen_reg_rtx (target_mode);
48e1416a 5246
8786db1e 5247 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
fb425e71 5248
8786db1e 5249 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5250 create_fixed_operand (&ops[1], comparison);
5251 create_fixed_operand (&ops[2], x);
5252 create_fixed_operand (&ops[3], y);
5253 if (!maybe_expand_insn (icode, 4, ops))
5254 {
5255 delete_insns_since (last);
5256 return NULL_RTX;
5257 }
5258 subtarget = ops[0].value;
80e1bfa1 5259
595d88b5 5260 /* If we are converting to a wider mode, first convert to
5261 TARGET_MODE, then normalize. This produces better combining
5262 opportunities on machines that have a SIGN_EXTRACT when we are
5263 testing a single bit. This mostly benefits the 68k.
5264
5265 If STORE_FLAG_VALUE does not have the sign bit set when
5266 interpreted in MODE, we can do this conversion as unsigned, which
5267 is usually more efficient. */
fb425e71 5268 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
595d88b5 5269 {
5270 convert_move (target, subtarget,
f92430e0 5271 val_signbit_known_clear_p (result_mode,
5272 STORE_FLAG_VALUE));
595d88b5 5273 op0 = target;
fb425e71 5274 result_mode = target_mode;
595d88b5 5275 }
5276 else
5277 op0 = subtarget;
5278
5279 /* If we want to keep subexpressions around, don't reuse our last
5280 target. */
5281 if (optimize)
5282 subtarget = 0;
5283
5284 /* Now normalize to the proper value in MODE. Sometimes we don't
5285 have to do anything. */
5286 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5287 ;
5288 /* STORE_FLAG_VALUE might be the most negative number, so write
5289 the comparison this way to avoid a compiler-time warning. */
5290 else if (- normalizep == STORE_FLAG_VALUE)
fb425e71 5291 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
595d88b5 5292
5293 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5294 it hard to use a value of just the sign bit due to ANSI integer
5295 constant typing rules. */
f92430e0 5296 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
fb425e71 5297 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
f5ff0b21 5298 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
595d88b5 5299 normalizep == 1);
5300 else
5301 {
5302 gcc_assert (STORE_FLAG_VALUE & 1);
5303
fb425e71 5304 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
595d88b5 5305 if (normalizep == -1)
fb425e71 5306 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
595d88b5 5307 }
5308
5309 /* If we were converting to a smaller mode, do the conversion now. */
fb425e71 5310 if (target_mode != result_mode)
595d88b5 5311 {
5312 convert_move (target, op0, 0);
5313 return target;
5314 }
5315 else
5316 return op0;
5317}
5318
db96f378 5319
cf564daf 5320/* A subroutine of emit_store_flag only including "tricks" that do not
5321 need a recursive call. These are kept separate to avoid infinite
5322 loops. */
db96f378 5323
cf564daf 5324static rtx
5325emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
d68bc06a 5326 enum machine_mode mode, int unsignedp, int normalizep,
5327 enum machine_mode target_mode)
db96f378 5328{
5329 rtx subtarget;
5330 enum insn_code icode;
5331 enum machine_mode compare_mode;
74f4459c 5332 enum mode_class mclass;
fb425e71 5333 enum rtx_code scode;
cf564daf 5334 rtx tem;
db96f378 5335
1a29b174 5336 if (unsignedp)
5337 code = unsigned_condition (code);
fb425e71 5338 scode = swap_condition (code);
1a29b174 5339
9e2944e9 5340 /* If one operand is constant, make it the second one. Only do this
5341 if the other operand is not constant as well. */
5342
f5ef1390 5343 if (swap_commutative_operands_p (op0, op1))
9e2944e9 5344 {
5345 tem = op0;
5346 op0 = op1;
5347 op1 = tem;
5348 code = swap_condition (code);
5349 }
5350
b65270aa 5351 if (mode == VOIDmode)
5352 mode = GET_MODE (op0);
5353
a4194ff7 5354 /* For some comparisons with 1 and -1, we can convert this to
db96f378 5355 comparisons with zero. This will often produce more opportunities for
a92771b8 5356 store-flag insns. */
db96f378 5357
5358 switch (code)
5359 {
5360 case LT:
5361 if (op1 == const1_rtx)
5362 op1 = const0_rtx, code = LE;
5363 break;
5364 case LE:
5365 if (op1 == constm1_rtx)
5366 op1 = const0_rtx, code = LT;
5367 break;
5368 case GE:
5369 if (op1 == const1_rtx)
5370 op1 = const0_rtx, code = GT;
5371 break;
5372 case GT:
5373 if (op1 == constm1_rtx)
5374 op1 = const0_rtx, code = GE;
5375 break;
5376 case GEU:
5377 if (op1 == const1_rtx)
5378 op1 = const0_rtx, code = NE;
5379 break;
5380 case LTU:
5381 if (op1 == const1_rtx)
5382 op1 = const0_rtx, code = EQ;
5383 break;
0dbd1c74 5384 default:
5385 break;
db96f378 5386 }
5387
2986c324 5388 /* If we are comparing a double-word integer with zero or -1, we can
5389 convert the comparison into one involving a single word. */
34a84294 5390 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5391 && GET_MODE_CLASS (mode) == MODE_INT
e16ceb8e 5392 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
34a84294 5393 {
2986c324 5394 if ((code == EQ || code == NE)
5395 && (op1 == const0_rtx || op1 == constm1_rtx))
34a84294 5396 {
7d4098a0 5397 rtx op00, op01;
18df88fa 5398
595d88b5 5399 /* Do a logical OR or AND of the two words and compare the
5400 result. */
18df88fa 5401 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5402 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
7d4098a0 5403 tem = expand_binop (word_mode,
5404 op1 == const0_rtx ? ior_optab : and_optab,
5405 op00, op01, NULL_RTX, unsignedp,
5406 OPTAB_DIRECT);
2986c324 5407
7d4098a0 5408 if (tem != 0)
5409 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
d68bc06a 5410 unsignedp, normalizep);
34a84294 5411 }
2986c324 5412 else if ((code == LT || code == GE) && op1 == const0_rtx)
18df88fa 5413 {
5414 rtx op0h;
5415
5416 /* If testing the sign bit, can just test on high word. */
5417 op0h = simplify_gen_subreg (word_mode, op0, mode,
595d88b5 5418 subreg_highpart_offset (word_mode,
5419 mode));
7d4098a0 5420 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5421 unsignedp, normalizep);
5422 }
5423 else
5424 tem = NULL_RTX;
5425
5426 if (tem)
5427 {
d68bc06a 5428 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
7d4098a0 5429 return tem;
d68bc06a 5430 if (!target)
5431 target = gen_reg_rtx (target_mode);
7d4098a0 5432
5433 convert_move (target, tem,
f92430e0 5434 !val_signbit_known_set_p (word_mode,
5435 (normalizep ? normalizep
5436 : STORE_FLAG_VALUE)));
7d4098a0 5437 return target;
18df88fa 5438 }
34a84294 5439 }
5440
db96f378 5441 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5442 complement of A (for GE) and shifting the sign bit to the low bit. */
5443 if (op1 == const0_rtx && (code == LT || code == GE)
5444 && GET_MODE_CLASS (mode) == MODE_INT
5445 && (normalizep || STORE_FLAG_VALUE == 1
f92430e0 5446 || val_signbit_p (mode, STORE_FLAG_VALUE)))
db96f378 5447 {
02bd6c04 5448 subtarget = target;
db96f378 5449
80e1bfa1 5450 if (!target)
5451 target_mode = mode;
5452
db96f378 5453 /* If the result is to be wider than OP0, it is best to convert it
5454 first. If it is to be narrower, it is *incorrect* to convert it
5455 first. */
80e1bfa1 5456 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
db96f378 5457 {
72467481 5458 op0 = convert_modes (target_mode, mode, op0, 0);
db96f378 5459 mode = target_mode;
5460 }
5461
5462 if (target_mode != mode)
5463 subtarget = 0;
5464
5465 if (code == GE)
fbb71644 5466 op0 = expand_unop (mode, one_cmpl_optab, op0,
5467 ((STORE_FLAG_VALUE == 1 || normalizep)
5468 ? 0 : subtarget), 0);
db96f378 5469
fbb71644 5470 if (STORE_FLAG_VALUE == 1 || normalizep)
db96f378 5471 /* If we are supposed to produce a 0/1 value, we want to do
5472 a logical shift from the sign bit to the low-order bit; for
5473 a -1/0 value, we do an arithmetic shift. */
5474 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
f5ff0b21 5475 GET_MODE_BITSIZE (mode) - 1,
db96f378 5476 subtarget, normalizep != -1);
5477
5478 if (mode != target_mode)
1d8c65d8 5479 op0 = convert_modes (target_mode, mode, op0, 0);
db96f378 5480
5481 return op0;
5482 }
5483
74f4459c 5484 mclass = GET_MODE_CLASS (mode);
5485 for (compare_mode = mode; compare_mode != VOIDmode;
5486 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
595d88b5 5487 {
74f4459c 5488 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
d6bf3b14 5489 icode = optab_handler (cstore_optab, optab_mode);
74f4459c 5490 if (icode != CODE_FOR_nothing)
595d88b5 5491 {
595d88b5 5492 do_pending_stack_adjust ();
cf564daf 5493 tem = emit_cstore (target, icode, code, mode, compare_mode,
d68bc06a 5494 unsignedp, op0, op1, normalizep, target_mode);
fb425e71 5495 if (tem)
5496 return tem;
db96f378 5497
fb425e71 5498 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
db96f378 5499 {
cf564daf 5500 tem = emit_cstore (target, icode, scode, mode, compare_mode,
d68bc06a 5501 unsignedp, op1, op0, normalizep, target_mode);
fb425e71 5502 if (tem)
5503 return tem;
db96f378 5504 }
74f4459c 5505 break;
db96f378 5506 }
5507 }
5508
cf564daf 5509 return 0;
5510}
5511
5512/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5513 and storing in TARGET. Normally return TARGET.
5514 Return 0 if that cannot be done.
5515
5516 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5517 it is VOIDmode, they cannot both be CONST_INT.
5518
5519 UNSIGNEDP is for the case where we have to widen the operands
5520 to perform the operation. It says to use zero-extension.
5521
5522 NORMALIZEP is 1 if we should convert the result to be either zero
5523 or one. Normalize is -1 if we should convert the result to be
5524 either zero or -1. If NORMALIZEP is zero, the result will be left
5525 "raw" out of the scc insn. */
5526
5527rtx
5528emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5529 enum machine_mode mode, int unsignedp, int normalizep)
5530{
5531 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5532 enum rtx_code rcode;
5533 rtx subtarget;
5534 rtx tem, last, trueval;
5535
d68bc06a 5536 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5537 target_mode);
cf564daf 5538 if (tem)
5539 return tem;
db96f378 5540
80e1bfa1 5541 /* If we reached here, we can't do this with a scc insn, however there
5542 are some comparisons that can be done in other ways. Don't do any
5543 of these cases if branches are very cheap. */
5544 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5545 return 0;
5546
5547 /* See what we need to return. We can only return a 1, -1, or the
5548 sign bit. */
5549
5550 if (normalizep == 0)
5551 {
5552 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5553 normalizep = STORE_FLAG_VALUE;
5554
f92430e0 5555 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
80e1bfa1 5556 ;
5557 else
5558 return 0;
5559 }
5560
cf564daf 5561 last = get_last_insn ();
5562
a1ad7483 5563 /* If optimizing, use different pseudo registers for each insn, instead
5564 of reusing the same pseudo. This leads to better CSE, but slows
5565 down the compiler, since there are more pseudos */
5566 subtarget = (!optimize
98f85d3a 5567 && (target_mode == mode)) ? target : NULL_RTX;
80e1bfa1 5568 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5569
5570 /* For floating-point comparisons, try the reverse comparison or try
5571 changing the "orderedness" of the comparison. */
5572 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5573 {
5574 enum rtx_code first_code;
5575 bool and_them;
5576
5577 rcode = reverse_condition_maybe_unordered (code);
5578 if (can_compare_p (rcode, mode, ccp_store_flag)
5579 && (code == ORDERED || code == UNORDERED
5580 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5581 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5582 {
12df6aa8 5583 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5584 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5585
80e1bfa1 5586 /* For the reverse comparison, use either an addition or a XOR. */
12df6aa8 5587 if (want_add
20d892d1 5588 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
12df6aa8 5589 optimize_insn_for_speed_p ()) == 0)
80e1bfa1 5590 {
cf564daf 5591 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
d68bc06a 5592 STORE_FLAG_VALUE, target_mode);
80e1bfa1 5593 if (tem)
5594 return expand_binop (target_mode, add_optab, tem,
5595 GEN_INT (normalizep),
5596 target, 0, OPTAB_WIDEN);
5597 }
12df6aa8 5598 else if (!want_add
20d892d1 5599 && rtx_cost (trueval, XOR, 1,
12df6aa8 5600 optimize_insn_for_speed_p ()) == 0)
80e1bfa1 5601 {
cf564daf 5602 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
d68bc06a 5603 normalizep, target_mode);
80e1bfa1 5604 if (tem)
5605 return expand_binop (target_mode, xor_optab, tem, trueval,
5606 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5607 }
5608 }
5609
5610 delete_insns_since (last);
5611
5612 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5613 if (code == ORDERED || code == UNORDERED)
5614 return 0;
48e1416a 5615
80e1bfa1 5616 and_them = split_comparison (code, mode, &first_code, &code);
5617
5618 /* If there are no NaNs, the first comparison should always fall through.
5619 Effectively change the comparison to the other one. */
5620 if (!HONOR_NANS (mode))
5621 {
5622 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
d68bc06a 5623 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5624 target_mode);
80e1bfa1 5625 }
5626
5627#ifdef HAVE_conditional_move
5628 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5629 conditional move. */
d68bc06a 5630 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5631 normalizep, target_mode);
80e1bfa1 5632 if (tem == 0)
5633 return 0;
5634
5635 if (and_them)
5636 tem = emit_conditional_move (target, code, op0, op1, mode,
5637 tem, const0_rtx, GET_MODE (tem), 0);
5638 else
5639 tem = emit_conditional_move (target, code, op0, op1, mode,
5640 trueval, tem, GET_MODE (tem), 0);
5641
5642 if (tem == 0)
5643 delete_insns_since (last);
5644 return tem;
5645#else
5646 return 0;
5647#endif
5648 }
db96f378 5649
80e1bfa1 5650 /* The remaining tricks only apply to integer comparisons. */
5651
5652 if (GET_MODE_CLASS (mode) != MODE_INT)
5653 return 0;
5654
5655 /* If this is an equality comparison of integers, we can try to exclusive-or
db96f378 5656 (or subtract) the two operands and use a recursive call to try the
5657 comparison with zero. Don't do any of these cases if branches are
5658 very cheap. */
5659
80e1bfa1 5660 if ((code == EQ || code == NE) && op1 != const0_rtx)
db96f378 5661 {
5662 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5663 OPTAB_WIDEN);
5664
5665 if (tem == 0)
5666 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5667 OPTAB_WIDEN);
5668 if (tem != 0)
39a4aea8 5669 tem = emit_store_flag (target, code, tem, const0_rtx,
5670 mode, unsignedp, normalizep);
80e1bfa1 5671 if (tem != 0)
5672 return tem;
5673
5674 delete_insns_since (last);
5675 }
5676
5677 /* For integer comparisons, try the reverse comparison. However, for
5678 small X and if we'd have anyway to extend, implementing "X != 0"
5679 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5680 rcode = reverse_condition (code);
5681 if (can_compare_p (rcode, mode, ccp_store_flag)
d6bf3b14 5682 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
80e1bfa1 5683 && code == NE
5684 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5685 && op1 == const0_rtx))
5686 {
12df6aa8 5687 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5688 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5689
80e1bfa1 5690 /* Again, for the reverse comparison, use either an addition or a XOR. */
12df6aa8 5691 if (want_add
20d892d1 5692 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
12df6aa8 5693 optimize_insn_for_speed_p ()) == 0)
80e1bfa1 5694 {
cf564daf 5695 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
d68bc06a 5696 STORE_FLAG_VALUE, target_mode);
80e1bfa1 5697 if (tem != 0)
5698 tem = expand_binop (target_mode, add_optab, tem,
5699 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5700 }
12df6aa8 5701 else if (!want_add
20d892d1 5702 && rtx_cost (trueval, XOR, 1,
12df6aa8 5703 optimize_insn_for_speed_p ()) == 0)
80e1bfa1 5704 {
cf564daf 5705 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
d68bc06a 5706 normalizep, target_mode);
80e1bfa1 5707 if (tem != 0)
5708 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5709 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5710 }
5711
5712 if (tem != 0)
5713 return tem;
5714 delete_insns_since (last);
db96f378 5715 }
5716
a4194ff7 5717 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
db96f378 5718 the constant zero. Reject all other comparisons at this point. Only
5719 do LE and GT if branches are expensive since they are expensive on
5720 2-operand machines. */
5721
80e1bfa1 5722 if (op1 != const0_rtx
db96f378 5723 || (code != EQ && code != NE
4a9d7ef7 5724 && (BRANCH_COST (optimize_insn_for_speed_p (),
5725 false) <= 1 || (code != LE && code != GT))))
db96f378 5726 return 0;
5727
db96f378 5728 /* Try to put the result of the comparison in the sign bit. Assume we can't
5729 do the necessary operation below. */
5730
5731 tem = 0;
5732
5733 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5734 the sign bit set. */
5735
5736 if (code == LE)
5737 {
5738 /* This is destructive, so SUBTARGET can't be OP0. */
5739 if (rtx_equal_p (subtarget, op0))
5740 subtarget = 0;
5741
5742 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5743 OPTAB_WIDEN);
5744 if (tem)
5745 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5746 OPTAB_WIDEN);
5747 }
5748
5749 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5750 number of bits in the mode of OP0, minus one. */
5751
5752 if (code == GT)
5753 {
5754 if (rtx_equal_p (subtarget, op0))
5755 subtarget = 0;
5756
5757 tem = expand_shift (RSHIFT_EXPR, mode, op0,
f5ff0b21 5758 GET_MODE_BITSIZE (mode) - 1,
db96f378 5759 subtarget, 0);
5760 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5761 OPTAB_WIDEN);
5762 }
a4194ff7 5763
db96f378 5764 if (code == EQ || code == NE)
5765 {
5766 /* For EQ or NE, one way to do the comparison is to apply an operation
6ef828f9 5767 that converts the operand into a positive number if it is nonzero
db96f378 5768 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5769 for NE we negate. This puts the result in the sign bit. Then we
a4194ff7 5770 normalize with a shift, if needed.
db96f378 5771
5772 Two operations that can do the above actions are ABS and FFS, so try
5773 them. If that doesn't work, and MODE is smaller than a full word,
2f4e12a2 5774 we can use zero-extension to the wider mode (an unsigned conversion)
db96f378 5775 as the operation. */
5776
a4194ff7 5777 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5778 that is compensated by the subsequent overflow when subtracting
6312a35e 5779 one / negating. */
bec2d490 5780
d6bf3b14 5781 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
db96f378 5782 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
d6bf3b14 5783 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
db96f378 5784 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5785 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5786 {
1d8c65d8 5787 tem = convert_modes (word_mode, mode, op0, 1);
72467481 5788 mode = word_mode;
db96f378 5789 }
5790
5791 if (tem != 0)
5792 {
5793 if (code == EQ)
5794 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5795 0, OPTAB_WIDEN);
5796 else
5797 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5798 }
5799
5800 /* If we couldn't do it that way, for NE we can "or" the two's complement
5801 of the value with itself. For EQ, we take the one's complement of
5802 that "or", which is an extra insn, so we only handle EQ if branches
5803 are expensive. */
5804
4a9d7ef7 5805 if (tem == 0
5806 && (code == NE
5807 || BRANCH_COST (optimize_insn_for_speed_p (),
5808 false) > 1))
db96f378 5809 {
2f4e12a2 5810 if (rtx_equal_p (subtarget, op0))
5811 subtarget = 0;
5812
db96f378 5813 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5814 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5815 OPTAB_WIDEN);
5816
5817 if (tem && code == EQ)
5818 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5819 }
5820 }
5821
5822 if (tem && normalizep)
5823 tem = expand_shift (RSHIFT_EXPR, mode, tem,
f5ff0b21 5824 GET_MODE_BITSIZE (mode) - 1,
98f85d3a 5825 subtarget, normalizep == 1);
db96f378 5826
98f85d3a 5827 if (tem)
db96f378 5828 {
80e1bfa1 5829 if (!target)
5830 ;
5831 else if (GET_MODE (tem) != target_mode)
98f85d3a 5832 {
5833 convert_move (target, tem, 0);
5834 tem = target;
5835 }
5836 else if (!subtarget)
5837 {
5838 emit_move_insn (target, tem);
5839 tem = target;
5840 }
db96f378 5841 }
98f85d3a 5842 else
db96f378 5843 delete_insns_since (last);
5844
5845 return tem;
5846}
469b49fd 5847
5848/* Like emit_store_flag, but always succeeds. */
5849
5850rtx
35cb5232 5851emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5852 enum machine_mode mode, int unsignedp, int normalizep)
469b49fd 5853{
5854 rtx tem, label;
80e1bfa1 5855 rtx trueval, falseval;
469b49fd 5856
5857 /* First see if emit_store_flag can do the job. */
5858 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5859 if (tem != 0)
5860 return tem;
5861
80e1bfa1 5862 if (!target)
5863 target = gen_reg_rtx (word_mode);
469b49fd 5864
80e1bfa1 5865 /* If this failed, we have to do this with set/compare/jump/set code.
5866 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5867 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
48e1416a 5868 if (code == NE
80e1bfa1 5869 && GET_MODE_CLASS (mode) == MODE_INT
5870 && REG_P (target)
5871 && op0 == target
5872 && op1 == const0_rtx)
5873 {
5874 label = gen_label_rtx ();
5875 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
79ab74cc 5876 mode, NULL_RTX, NULL_RTX, label, -1);
80e1bfa1 5877 emit_move_insn (target, trueval);
5878 emit_label (label);
5879 return target;
5880 }
469b49fd 5881
8ad4c111 5882 if (!REG_P (target)
469b49fd 5883 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5884 target = gen_reg_rtx (GET_MODE (target));
5885
80e1bfa1 5886 /* Jump in the right direction if the target cannot implement CODE
5887 but can jump on its reverse condition. */
5888 falseval = const0_rtx;
5889 if (! can_compare_p (code, mode, ccp_jump)
5890 && (! FLOAT_MODE_P (mode)
5891 || code == ORDERED || code == UNORDERED
5892 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5893 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5894 {
5895 enum rtx_code rcode;
5896 if (FLOAT_MODE_P (mode))
5897 rcode = reverse_condition_maybe_unordered (code);
5898 else
5899 rcode = reverse_condition (code);
5900
5901 /* Canonicalize to UNORDERED for the libcall. */
5902 if (can_compare_p (rcode, mode, ccp_jump)
5903 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5904 {
5905 falseval = trueval;
5906 trueval = const0_rtx;
5907 code = rcode;
5908 }
5909 }
5910
5911 emit_move_insn (target, trueval);
469b49fd 5912 label = gen_label_rtx ();
7e69f45b 5913 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
79ab74cc 5914 NULL_RTX, label, -1);
469b49fd 5915
80e1bfa1 5916 emit_move_insn (target, falseval);
db96f378 5917 emit_label (label);
5918
5919 return target;
5920}
c5aa1e92 5921\f
5922/* Perform possibly multi-word comparison and conditional jump to LABEL
85afca2d 5923 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5924 now a thin wrapper around do_compare_rtx_and_jump. */
c5aa1e92 5925
5926static void
35cb5232 5927do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5928 rtx label)
c5aa1e92 5929{
85afca2d 5930 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5931 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
79ab74cc 5932 NULL_RTX, NULL_RTX, label, -1);
c5aa1e92 5933}