]>
Commit | Line | Data |
---|---|---|
44037a66 TG |
1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. | |
ef58a523 | 3 | Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, |
e4c9f3c2 | 4 | 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 |
feb04780 | 5 | Free Software Foundation, Inc. |
44037a66 | 6 | |
1322177d | 7 | This file is part of GCC. |
44037a66 | 8 | |
1322177d LB |
9 | GCC is free software; you can redistribute it and/or modify it under |
10 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 11 | Software Foundation; either version 3, or (at your option) any later |
1322177d | 12 | version. |
44037a66 | 13 | |
1322177d LB |
14 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 | for more details. | |
44037a66 TG |
18 | |
19 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
20 | along with GCC; see the file COPYING3. If not see |
21 | <http://www.gnu.org/licenses/>. */ | |
44037a66 TG |
22 | |
23 | ||
24 | #include "config.h" | |
670ee920 | 25 | #include "system.h" |
4977bab6 ZW |
26 | #include "coretypes.h" |
27 | #include "tm.h" | |
01198c2f | 28 | #include "toplev.h" |
44037a66 TG |
29 | #include "rtl.h" |
30 | #include "tree.h" | |
6baf1cc8 | 31 | #include "tm_p.h" |
44037a66 | 32 | #include "flags.h" |
44037a66 TG |
33 | #include "insn-config.h" |
34 | #include "expr.h" | |
e78d8e51 | 35 | #include "optabs.h" |
44037a66 TG |
36 | #include "real.h" |
37 | #include "recog.h" | |
b0c48229 | 38 | #include "langhooks.h" |
6fb5fa3c | 39 | #include "df.h" |
0890b981 | 40 | #include "target.h" |
44037a66 | 41 | |
502b8322 AJ |
42 | static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, |
43 | unsigned HOST_WIDE_INT, | |
44 | unsigned HOST_WIDE_INT, rtx); | |
45 | static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, | |
46 | unsigned HOST_WIDE_INT, rtx); | |
47 | static rtx extract_fixed_bit_field (enum machine_mode, rtx, | |
48 | unsigned HOST_WIDE_INT, | |
49 | unsigned HOST_WIDE_INT, | |
50 | unsigned HOST_WIDE_INT, rtx, int); | |
51 | static rtx mask_rtx (enum machine_mode, int, int, int); | |
52 | static rtx lshift_value (enum machine_mode, rtx, int, int); | |
53 | static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, | |
54 | unsigned HOST_WIDE_INT, int); | |
55 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx); | |
0b55e932 | 56 | static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); |
39cab019 | 57 | static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); |
44037a66 | 58 | |
58b42e19 RS |
59 | /* Test whether a value is zero of a power of two. */ |
60 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0) | |
61 | ||
cc2902df | 62 | /* Nonzero means divides or modulus operations are relatively cheap for |
c410d49e | 63 | powers of two, so don't use branches; emit the operation instead. |
44037a66 TG |
64 | Usually, this will mean that the MD file will emit non-branch |
65 | sequences. */ | |
66 | ||
fdded401 RS |
67 | static bool sdiv_pow2_cheap[NUM_MACHINE_MODES]; |
68 | static bool smod_pow2_cheap[NUM_MACHINE_MODES]; | |
44037a66 | 69 | |
c7e33f89 | 70 | #ifndef SLOW_UNALIGNED_ACCESS |
e1565e65 | 71 | #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT |
c7e33f89 RS |
72 | #endif |
73 | ||
e49a094d RS |
74 | /* For compilers that support multiple targets with different word sizes, |
75 | MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example | |
76 | is the H8/300(H) compiler. */ | |
77 | ||
78 | #ifndef MAX_BITS_PER_WORD | |
79 | #define MAX_BITS_PER_WORD BITS_PER_WORD | |
80 | #endif | |
81 | ||
d523b40e RH |
82 | /* Reduce conditional compilation elsewhere. */ |
83 | #ifndef HAVE_insv | |
a242b083 | 84 | #define HAVE_insv 0 |
d523b40e RH |
85 | #define CODE_FOR_insv CODE_FOR_nothing |
86 | #define gen_insv(a,b,c,d) NULL_RTX | |
87 | #endif | |
88 | #ifndef HAVE_extv | |
a242b083 | 89 | #define HAVE_extv 0 |
d523b40e RH |
90 | #define CODE_FOR_extv CODE_FOR_nothing |
91 | #define gen_extv(a,b,c,d) NULL_RTX | |
92 | #endif | |
93 | #ifndef HAVE_extzv | |
a242b083 | 94 | #define HAVE_extzv 0 |
d523b40e RH |
95 | #define CODE_FOR_extzv CODE_FOR_nothing |
96 | #define gen_extzv(a,b,c,d) NULL_RTX | |
97 | #endif | |
98 | ||
c5c76735 JL |
99 | /* Cost of various pieces of RTL. Note that some of these are indexed by |
100 | shift count and some by mode. */ | |
58777718 RS |
101 | static int zero_cost; |
102 | static int add_cost[NUM_MACHINE_MODES]; | |
103 | static int neg_cost[NUM_MACHINE_MODES]; | |
965703ed RS |
104 | static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; |
105 | static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; | |
106 | static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; | |
71af73bb | 107 | static int mul_cost[NUM_MACHINE_MODES]; |
a28b2ac6 RS |
108 | static int sdiv_cost[NUM_MACHINE_MODES]; |
109 | static int udiv_cost[NUM_MACHINE_MODES]; | |
71af73bb TG |
110 | static int mul_widen_cost[NUM_MACHINE_MODES]; |
111 | static int mul_highpart_cost[NUM_MACHINE_MODES]; | |
44037a66 | 112 | |
44037a66 | 113 | void |
502b8322 | 114 | init_expmed (void) |
44037a66 | 115 | { |
79b4a8dc RH |
116 | struct |
117 | { | |
fdded401 | 118 | struct rtx_def reg; rtunion reg_fld[2]; |
79b4a8dc RH |
119 | struct rtx_def plus; rtunion plus_fld1; |
120 | struct rtx_def neg; | |
79b4a8dc | 121 | struct rtx_def mult; rtunion mult_fld1; |
a28b2ac6 RS |
122 | struct rtx_def sdiv; rtunion sdiv_fld1; |
123 | struct rtx_def udiv; rtunion udiv_fld1; | |
79b4a8dc | 124 | struct rtx_def zext; |
a28b2ac6 RS |
125 | struct rtx_def sdiv_32; rtunion sdiv_32_fld1; |
126 | struct rtx_def smod_32; rtunion smod_32_fld1; | |
79b4a8dc RH |
127 | struct rtx_def wide_mult; rtunion wide_mult_fld1; |
128 | struct rtx_def wide_lshr; rtunion wide_lshr_fld1; | |
129 | struct rtx_def wide_trunc; | |
130 | struct rtx_def shift; rtunion shift_fld1; | |
131 | struct rtx_def shift_mult; rtunion shift_mult_fld1; | |
132 | struct rtx_def shift_add; rtunion shift_add_fld1; | |
133 | struct rtx_def shift_sub; rtunion shift_sub_fld1; | |
134 | } all; | |
135 | ||
965703ed RS |
136 | rtx pow2[MAX_BITS_PER_WORD]; |
137 | rtx cint[MAX_BITS_PER_WORD]; | |
965703ed | 138 | int m, n; |
71af73bb | 139 | enum machine_mode mode, wider_mode; |
44037a66 | 140 | |
172a1cb0 | 141 | zero_cost = rtx_cost (const0_rtx, 0); |
38a448ca | 142 | |
965703ed RS |
143 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
144 | { | |
145 | pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); | |
146 | cint[m] = GEN_INT (m); | |
147 | } | |
44037a66 | 148 | |
79b4a8dc RH |
149 | memset (&all, 0, sizeof all); |
150 | ||
151 | PUT_CODE (&all.reg, REG); | |
1d27fed4 | 152 | /* Avoid using hard regs in ways which may be unsupported. */ |
6fb5fa3c | 153 | SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1); |
79b4a8dc RH |
154 | |
155 | PUT_CODE (&all.plus, PLUS); | |
156 | XEXP (&all.plus, 0) = &all.reg; | |
157 | XEXP (&all.plus, 1) = &all.reg; | |
158 | ||
159 | PUT_CODE (&all.neg, NEG); | |
160 | XEXP (&all.neg, 0) = &all.reg; | |
161 | ||
79b4a8dc RH |
162 | PUT_CODE (&all.mult, MULT); |
163 | XEXP (&all.mult, 0) = &all.reg; | |
164 | XEXP (&all.mult, 1) = &all.reg; | |
165 | ||
a28b2ac6 RS |
166 | PUT_CODE (&all.sdiv, DIV); |
167 | XEXP (&all.sdiv, 0) = &all.reg; | |
168 | XEXP (&all.sdiv, 1) = &all.reg; | |
79b4a8dc | 169 | |
a28b2ac6 RS |
170 | PUT_CODE (&all.udiv, UDIV); |
171 | XEXP (&all.udiv, 0) = &all.reg; | |
172 | XEXP (&all.udiv, 1) = &all.reg; | |
173 | ||
174 | PUT_CODE (&all.sdiv_32, DIV); | |
175 | XEXP (&all.sdiv_32, 0) = &all.reg; | |
176 | XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32); | |
177 | ||
178 | PUT_CODE (&all.smod_32, MOD); | |
179 | XEXP (&all.smod_32, 0) = &all.reg; | |
180 | XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1); | |
79b4a8dc RH |
181 | |
182 | PUT_CODE (&all.zext, ZERO_EXTEND); | |
183 | XEXP (&all.zext, 0) = &all.reg; | |
184 | ||
185 | PUT_CODE (&all.wide_mult, MULT); | |
186 | XEXP (&all.wide_mult, 0) = &all.zext; | |
187 | XEXP (&all.wide_mult, 1) = &all.zext; | |
188 | ||
189 | PUT_CODE (&all.wide_lshr, LSHIFTRT); | |
190 | XEXP (&all.wide_lshr, 0) = &all.wide_mult; | |
191 | ||
192 | PUT_CODE (&all.wide_trunc, TRUNCATE); | |
193 | XEXP (&all.wide_trunc, 0) = &all.wide_lshr; | |
194 | ||
195 | PUT_CODE (&all.shift, ASHIFT); | |
196 | XEXP (&all.shift, 0) = &all.reg; | |
197 | ||
198 | PUT_CODE (&all.shift_mult, MULT); | |
199 | XEXP (&all.shift_mult, 0) = &all.reg; | |
200 | ||
201 | PUT_CODE (&all.shift_add, PLUS); | |
202 | XEXP (&all.shift_add, 0) = &all.shift_mult; | |
203 | XEXP (&all.shift_add, 1) = &all.reg; | |
204 | ||
205 | PUT_CODE (&all.shift_sub, MINUS); | |
206 | XEXP (&all.shift_sub, 0) = &all.shift_mult; | |
207 | XEXP (&all.shift_sub, 1) = &all.reg; | |
208 | ||
71af73bb TG |
209 | for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); |
210 | mode != VOIDmode; | |
211 | mode = GET_MODE_WIDER_MODE (mode)) | |
212 | { | |
79b4a8dc RH |
213 | PUT_MODE (&all.reg, mode); |
214 | PUT_MODE (&all.plus, mode); | |
215 | PUT_MODE (&all.neg, mode); | |
79b4a8dc | 216 | PUT_MODE (&all.mult, mode); |
a28b2ac6 RS |
217 | PUT_MODE (&all.sdiv, mode); |
218 | PUT_MODE (&all.udiv, mode); | |
219 | PUT_MODE (&all.sdiv_32, mode); | |
220 | PUT_MODE (&all.smod_32, mode); | |
79b4a8dc RH |
221 | PUT_MODE (&all.wide_trunc, mode); |
222 | PUT_MODE (&all.shift, mode); | |
223 | PUT_MODE (&all.shift_mult, mode); | |
224 | PUT_MODE (&all.shift_add, mode); | |
225 | PUT_MODE (&all.shift_sub, mode); | |
226 | ||
227 | add_cost[mode] = rtx_cost (&all.plus, SET); | |
228 | neg_cost[mode] = rtx_cost (&all.neg, SET); | |
79b4a8dc | 229 | mul_cost[mode] = rtx_cost (&all.mult, SET); |
a28b2ac6 RS |
230 | sdiv_cost[mode] = rtx_cost (&all.sdiv, SET); |
231 | udiv_cost[mode] = rtx_cost (&all.udiv, SET); | |
79b4a8dc | 232 | |
a28b2ac6 RS |
233 | sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET) |
234 | <= 2 * add_cost[mode]); | |
235 | smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET) | |
236 | <= 4 * add_cost[mode]); | |
58777718 | 237 | |
71af73bb TG |
238 | wider_mode = GET_MODE_WIDER_MODE (mode); |
239 | if (wider_mode != VOIDmode) | |
240 | { | |
79b4a8dc RH |
241 | PUT_MODE (&all.zext, wider_mode); |
242 | PUT_MODE (&all.wide_mult, wider_mode); | |
243 | PUT_MODE (&all.wide_lshr, wider_mode); | |
244 | XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode)); | |
245 | ||
246 | mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET); | |
247 | mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET); | |
71af73bb | 248 | } |
71af73bb | 249 | |
79b4a8dc RH |
250 | shift_cost[mode][0] = 0; |
251 | shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode]; | |
252 | ||
253 | n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); | |
254 | for (m = 1; m < n; m++) | |
255 | { | |
256 | XEXP (&all.shift, 1) = cint[m]; | |
257 | XEXP (&all.shift_mult, 1) = pow2[m]; | |
58777718 | 258 | |
79b4a8dc RH |
259 | shift_cost[mode][m] = rtx_cost (&all.shift, SET); |
260 | shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET); | |
261 | shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET); | |
262 | } | |
263 | } | |
44037a66 TG |
264 | } |
265 | ||
266 | /* Return an rtx representing minus the value of X. | |
267 | MODE is the intended mode of the result, | |
268 | useful if X is a CONST_INT. */ | |
269 | ||
270 | rtx | |
502b8322 | 271 | negate_rtx (enum machine_mode mode, rtx x) |
44037a66 | 272 | { |
a39a7484 RK |
273 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |
274 | ||
fdb5537f | 275 | if (result == 0) |
a39a7484 RK |
276 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
277 | ||
278 | return result; | |
44037a66 | 279 | } |
da920570 ZW |
280 | |
281 | /* Report on the availability of insv/extv/extzv and the desired mode | |
282 | of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo | |
283 | is false; else the mode of the specified operand. If OPNO is -1, | |
284 | all the caller cares about is whether the insn is available. */ | |
285 | enum machine_mode | |
502b8322 | 286 | mode_for_extraction (enum extraction_pattern pattern, int opno) |
da920570 ZW |
287 | { |
288 | const struct insn_data *data; | |
289 | ||
290 | switch (pattern) | |
291 | { | |
292 | case EP_insv: | |
da920570 ZW |
293 | if (HAVE_insv) |
294 | { | |
295 | data = &insn_data[CODE_FOR_insv]; | |
296 | break; | |
297 | } | |
da920570 ZW |
298 | return MAX_MACHINE_MODE; |
299 | ||
300 | case EP_extv: | |
da920570 ZW |
301 | if (HAVE_extv) |
302 | { | |
303 | data = &insn_data[CODE_FOR_extv]; | |
304 | break; | |
305 | } | |
da920570 ZW |
306 | return MAX_MACHINE_MODE; |
307 | ||
308 | case EP_extzv: | |
da920570 ZW |
309 | if (HAVE_extzv) |
310 | { | |
311 | data = &insn_data[CODE_FOR_extzv]; | |
312 | break; | |
313 | } | |
da920570 | 314 | return MAX_MACHINE_MODE; |
a242b083 ZW |
315 | |
316 | default: | |
5b0264cb | 317 | gcc_unreachable (); |
da920570 ZW |
318 | } |
319 | ||
320 | if (opno == -1) | |
321 | return VOIDmode; | |
322 | ||
323 | /* Everyone who uses this function used to follow it with | |
324 | if (result == VOIDmode) result = word_mode; */ | |
325 | if (data->operand[opno].mode == VOIDmode) | |
326 | return word_mode; | |
327 | return data->operand[opno].mode; | |
328 | } | |
329 | ||
6d7db3c5 RS |
330 | /* Return true if X, of mode MODE, matches the predicate for operand |
331 | OPNO of instruction ICODE. Allow volatile memories, regardless of | |
332 | the ambient volatile_ok setting. */ | |
44037a66 | 333 | |
6d7db3c5 RS |
334 | static bool |
335 | check_predicate_volatile_ok (enum insn_code icode, int opno, | |
336 | rtx x, enum machine_mode mode) | |
337 | { | |
338 | bool save_volatile_ok, result; | |
0d8e55d8 | 339 | |
6d7db3c5 RS |
340 | save_volatile_ok = volatile_ok; |
341 | result = insn_data[(int) icode].operand[opno].predicate (x, mode); | |
342 | volatile_ok = save_volatile_ok; | |
343 | return result; | |
344 | } | |
345 | \f | |
346 | /* A subroutine of store_bit_field, with the same arguments. Return true | |
347 | if the operation could be implemented. | |
44037a66 | 348 | |
6d7db3c5 RS |
349 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
350 | no other way of implementing the operation. If FALLBACK_P is false, | |
351 | return false instead. */ | |
352 | ||
353 | static bool | |
354 | store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
355 | unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, | |
356 | rtx value, bool fallback_p) | |
44037a66 | 357 | { |
770ae6cc | 358 | unsigned int unit |
3c0cb5de | 359 | = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; |
2c58f7dd | 360 | unsigned HOST_WIDE_INT offset, bitpos; |
b3694847 | 361 | rtx op0 = str_rtx; |
420e7dfa | 362 | int byte_offset; |
28526e20 | 363 | rtx orig_value; |
da920570 | 364 | |
a242b083 | 365 | enum machine_mode op_mode = mode_for_extraction (EP_insv, 3); |
44037a66 | 366 | |
44037a66 TG |
367 | while (GET_CODE (op0) == SUBREG) |
368 | { | |
369 | /* The following line once was done only if WORDS_BIG_ENDIAN, | |
370 | but I think that is a mistake. WORDS_BIG_ENDIAN is | |
371 | meaningful at a much higher level; when structures are copied | |
372 | between memory and regs, the higher-numbered regs | |
373 | always get higher addresses. */ | |
495db1a1 AK |
374 | int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); |
375 | int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); | |
376 | ||
377 | byte_offset = 0; | |
378 | ||
379 | /* Paradoxical subregs need special handling on big endian machines. */ | |
380 | if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) | |
381 | { | |
382 | int difference = inner_mode_size - outer_mode_size; | |
383 | ||
384 | if (WORDS_BIG_ENDIAN) | |
385 | byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; | |
386 | if (BYTES_BIG_ENDIAN) | |
387 | byte_offset += difference % UNITS_PER_WORD; | |
388 | } | |
389 | else | |
390 | byte_offset = SUBREG_BYTE (op0); | |
391 | ||
392 | bitnum += byte_offset * BITS_PER_UNIT; | |
44037a66 TG |
393 | op0 = SUBREG_REG (op0); |
394 | } | |
395 | ||
2c58f7dd RS |
396 | /* No action is needed if the target is a register and if the field |
397 | lies completely outside that register. This can occur if the source | |
398 | code contains an out-of-bounds access to a small array. */ | |
399 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
6d7db3c5 | 400 | return true; |
2c58f7dd | 401 | |
b42271d6 | 402 | /* Use vec_set patterns for inserting parts of vectors whenever |
997404de JH |
403 | available. */ |
404 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 405 | && !MEM_P (op0) |
166cdb08 | 406 | && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code |
997404de JH |
407 | != CODE_FOR_nothing) |
408 | && fieldmode == GET_MODE_INNER (GET_MODE (op0)) | |
409 | && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) | |
410 | && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
411 | { | |
412 | enum machine_mode outermode = GET_MODE (op0); | |
413 | enum machine_mode innermode = GET_MODE_INNER (outermode); | |
166cdb08 | 414 | int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code; |
997404de JH |
415 | int pos = bitnum / GET_MODE_BITSIZE (innermode); |
416 | rtx rtxpos = GEN_INT (pos); | |
417 | rtx src = value; | |
418 | rtx dest = op0; | |
419 | rtx pat, seq; | |
420 | enum machine_mode mode0 = insn_data[icode].operand[0].mode; | |
421 | enum machine_mode mode1 = insn_data[icode].operand[1].mode; | |
422 | enum machine_mode mode2 = insn_data[icode].operand[2].mode; | |
423 | ||
424 | start_sequence (); | |
425 | ||
426 | if (! (*insn_data[icode].operand[1].predicate) (src, mode1)) | |
427 | src = copy_to_mode_reg (mode1, src); | |
428 | ||
429 | if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) | |
430 | rtxpos = copy_to_mode_reg (mode1, rtxpos); | |
431 | ||
432 | /* We could handle this, but we should always be called with a pseudo | |
433 | for our targets and all insns should take them as outputs. */ | |
5b0264cb NS |
434 | gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0) |
435 | && (*insn_data[icode].operand[1].predicate) (src, mode1) | |
436 | && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)); | |
997404de JH |
437 | pat = GEN_FCN (icode) (dest, src, rtxpos); |
438 | seq = get_insns (); | |
439 | end_sequence (); | |
440 | if (pat) | |
441 | { | |
442 | emit_insn (seq); | |
443 | emit_insn (pat); | |
6d7db3c5 | 444 | return true; |
997404de JH |
445 | } |
446 | } | |
447 | ||
308ecea0 RH |
448 | /* If the target is a register, overwriting the entire object, or storing |
449 | a full-word or multi-word field can be done with just a SUBREG. | |
450 | ||
451 | If the target is memory, storing any naturally aligned field can be | |
452 | done with a simple store. For targets that support fast unaligned | |
0b69c29f | 453 | memory, any naturally sized, unit aligned field can be done directly. */ |
c410d49e | 454 | |
2c58f7dd RS |
455 | offset = bitnum / unit; |
456 | bitpos = bitnum % unit; | |
420e7dfa DN |
457 | byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT |
458 | + (offset * UNITS_PER_WORD); | |
459 | ||
57bfa49a | 460 | if (bitpos == 0 |
0b69c29f | 461 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
3c0cb5de | 462 | && (!MEM_P (op0) |
420e7dfa | 463 | ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD |
ea9ea008 | 464 | || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)) |
0fb7aeda | 465 | && byte_offset % GET_MODE_SIZE (fieldmode) == 0) |
04050c69 | 466 | : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0)) |
ea9ea008 | 467 | || (offset * BITS_PER_UNIT % bitsize == 0 |
04050c69 | 468 | && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))) |
44037a66 | 469 | { |
69498c64 DJ |
470 | if (MEM_P (op0)) |
471 | op0 = adjust_address (op0, fieldmode, offset); | |
472 | else if (GET_MODE (op0) != fieldmode) | |
473 | op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), | |
474 | byte_offset); | |
44037a66 | 475 | emit_move_insn (op0, value); |
6d7db3c5 | 476 | return true; |
44037a66 TG |
477 | } |
478 | ||
a8ca7756 JW |
479 | /* Make sure we are playing with integral modes. Pun with subregs |
480 | if we aren't. This must come after the entire register case above, | |
481 | since that case is valid for any mode. The following cases are only | |
482 | valid for integral modes. */ | |
483 | { | |
484 | enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); | |
485 | if (imode != GET_MODE (op0)) | |
486 | { | |
3c0cb5de | 487 | if (MEM_P (op0)) |
f4ef873c | 488 | op0 = adjust_address (op0, imode, 0); |
a8ca7756 | 489 | else |
5b0264cb NS |
490 | { |
491 | gcc_assert (imode != BLKmode); | |
492 | op0 = gen_lowpart (imode, op0); | |
493 | } | |
a8ca7756 JW |
494 | } |
495 | } | |
496 | ||
4e9bb42b AH |
497 | /* We may be accessing data outside the field, which means |
498 | we can alias adjacent data. */ | |
3c0cb5de | 499 | if (MEM_P (op0)) |
4e9bb42b AH |
500 | { |
501 | op0 = shallow_copy_rtx (op0); | |
502 | set_mem_alias_set (op0, 0); | |
503 | set_mem_expr (op0, 0); | |
504 | } | |
505 | ||
57bfa49a RZ |
506 | /* If OP0 is a register, BITPOS must count within a word. |
507 | But as we have it, it counts within whatever size OP0 now has. | |
508 | On a bigendian machine, these are not the same, so convert. */ | |
509 | if (BYTES_BIG_ENDIAN | |
3c0cb5de | 510 | && !MEM_P (op0) |
57bfa49a RZ |
511 | && unit > GET_MODE_BITSIZE (GET_MODE (op0))) |
512 | bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
513 | ||
44037a66 TG |
514 | /* Storing an lsb-aligned field in a register |
515 | can be done with a movestrict instruction. */ | |
516 | ||
3c0cb5de | 517 | if (!MEM_P (op0) |
f76b9db2 | 518 | && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0) |
44037a66 | 519 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
166cdb08 | 520 | && (optab_handler (movstrict_optab, fieldmode)->insn_code |
5e4900c7 | 521 | != CODE_FOR_nothing)) |
44037a66 | 522 | { |
166cdb08 | 523 | int icode = optab_handler (movstrict_optab, fieldmode)->insn_code; |
5e4900c7 | 524 | |
44037a66 | 525 | /* Get appropriate low part of the value being stored. */ |
f8cfc6aa | 526 | if (GET_CODE (value) == CONST_INT || REG_P (value)) |
44037a66 TG |
527 | value = gen_lowpart (fieldmode, value); |
528 | else if (!(GET_CODE (value) == SYMBOL_REF | |
529 | || GET_CODE (value) == LABEL_REF | |
530 | || GET_CODE (value) == CONST)) | |
531 | value = convert_to_mode (fieldmode, value, 0); | |
532 | ||
5e4900c7 JW |
533 | if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode)) |
534 | value = copy_to_mode_reg (fieldmode, value); | |
535 | ||
536 | if (GET_CODE (op0) == SUBREG) | |
44037a66 | 537 | { |
5b0264cb NS |
538 | /* Else we've got some float mode source being extracted into |
539 | a different float mode destination -- this combination of | |
540 | subregs results in Severe Tire Damage. */ | |
541 | gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode | |
542 | || GET_MODE_CLASS (fieldmode) == MODE_INT | |
543 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); | |
544 | op0 = SUBREG_REG (op0); | |
5e4900c7 | 545 | } |
470032d7 | 546 | |
5e4900c7 | 547 | emit_insn (GEN_FCN (icode) |
ddef6bc7 JJ |
548 | (gen_rtx_SUBREG (fieldmode, op0, |
549 | (bitnum % BITS_PER_WORD) / BITS_PER_UNIT | |
550 | + (offset * UNITS_PER_WORD)), | |
551 | value)); | |
470032d7 | 552 | |
6d7db3c5 | 553 | return true; |
44037a66 TG |
554 | } |
555 | ||
556 | /* Handle fields bigger than a word. */ | |
557 | ||
558 | if (bitsize > BITS_PER_WORD) | |
559 | { | |
560 | /* Here we transfer the words of the field | |
561 | in the order least significant first. | |
562 | This is because the most significant word is the one which may | |
ad83e87b PB |
563 | be less than full. |
564 | However, only do that if the value is not BLKmode. */ | |
565 | ||
770ae6cc RK |
566 | unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
567 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; | |
568 | unsigned int i; | |
6d7db3c5 | 569 | rtx last; |
44037a66 TG |
570 | |
571 | /* This is the mode we must force value to, so that there will be enough | |
572 | subwords to extract. Note that fieldmode will often (always?) be | |
573 | VOIDmode, because that is what store_field uses to indicate that this | |
535a42b1 NS |
574 | is a bit field, but passing VOIDmode to operand_subword_force |
575 | is not allowed. */ | |
9f5e2e11 RS |
576 | fieldmode = GET_MODE (value); |
577 | if (fieldmode == VOIDmode) | |
578 | fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); | |
44037a66 | 579 | |
6d7db3c5 | 580 | last = get_last_insn (); |
44037a66 TG |
581 | for (i = 0; i < nwords; i++) |
582 | { | |
ad83e87b PB |
583 | /* If I is 0, use the low-order word in both field and target; |
584 | if I is 1, use the next to lowest word; and so on. */ | |
770ae6cc RK |
585 | unsigned int wordnum = (backwards ? nwords - i - 1 : i); |
586 | unsigned int bit_offset = (backwards | |
04050c69 RK |
587 | ? MAX ((int) bitsize - ((int) i + 1) |
588 | * BITS_PER_WORD, | |
589 | 0) | |
590 | : (int) i * BITS_PER_WORD); | |
6d7db3c5 | 591 | rtx value_word = operand_subword_force (value, wordnum, fieldmode); |
770ae6cc | 592 | |
6d7db3c5 RS |
593 | if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD, |
594 | bitsize - i * BITS_PER_WORD), | |
595 | bitnum + bit_offset, word_mode, | |
596 | value_word, fallback_p)) | |
597 | { | |
598 | delete_insns_since (last); | |
599 | return false; | |
600 | } | |
44037a66 | 601 | } |
6d7db3c5 | 602 | return true; |
44037a66 TG |
603 | } |
604 | ||
605 | /* From here on we can assume that the field to be stored in is | |
606 | a full-word (whatever type that is), since it is shorter than a word. */ | |
607 | ||
608 | /* OFFSET is the number of words or bytes (UNIT says which) | |
609 | from STR_RTX to the first word or byte containing part of the field. */ | |
610 | ||
3c0cb5de | 611 | if (!MEM_P (op0)) |
44037a66 TG |
612 | { |
613 | if (offset != 0 | |
614 | || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
470032d7 | 615 | { |
f8cfc6aa | 616 | if (!REG_P (op0)) |
7be7a07d | 617 | { |
535a42b1 NS |
618 | /* Since this is a destination (lvalue), we can't copy |
619 | it to a pseudo. We can remove a SUBREG that does not | |
620 | change the size of the operand. Such a SUBREG may | |
621 | have been added above. */ | |
5b0264cb NS |
622 | gcc_assert (GET_CODE (op0) == SUBREG |
623 | && (GET_MODE_SIZE (GET_MODE (op0)) | |
624 | == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))))); | |
625 | op0 = SUBREG_REG (op0); | |
7be7a07d | 626 | } |
470032d7 | 627 | op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), |
ddef6bc7 | 628 | op0, (offset * UNITS_PER_WORD)); |
470032d7 | 629 | } |
44037a66 TG |
630 | offset = 0; |
631 | } | |
44037a66 | 632 | |
4f1da2e9 RS |
633 | /* If VALUE has a floating-point or complex mode, access it as an |
634 | integer of the corresponding size. This can occur on a machine | |
635 | with 64 bit registers that uses SFmode for float. It can also | |
636 | occur for unaligned float or complex fields. */ | |
28526e20 | 637 | orig_value = value; |
4f1da2e9 RS |
638 | if (GET_MODE (value) != VOIDmode |
639 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT | |
32b069d3 | 640 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) |
4f1da2e9 RS |
641 | { |
642 | value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); | |
643 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); | |
644 | } | |
2305bcad | 645 | |
44037a66 TG |
646 | /* Now OFFSET is nonzero only if OP0 is memory |
647 | and is therefore always measured in bytes. */ | |
648 | ||
a242b083 | 649 | if (HAVE_insv |
1d269b0c | 650 | && GET_MODE (value) != BLKmode |
3ab997e8 EB |
651 | && bitsize > 0 |
652 | && GET_MODE_BITSIZE (op_mode) >= bitsize | |
f8cfc6aa | 653 | && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) |
f7acbf4c RS |
654 | && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) |
655 | && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize), | |
6d7db3c5 RS |
656 | VOIDmode) |
657 | && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode)) | |
44037a66 TG |
658 | { |
659 | int xbitpos = bitpos; | |
660 | rtx value1; | |
661 | rtx xop0 = op0; | |
662 | rtx last = get_last_insn (); | |
663 | rtx pat; | |
44037a66 TG |
664 | |
665 | /* Add OFFSET into OP0's address. */ | |
3c0cb5de | 666 | if (MEM_P (xop0)) |
f4ef873c | 667 | xop0 = adjust_address (xop0, byte_mode, offset); |
44037a66 | 668 | |
6d7db3c5 | 669 | /* If xop0 is a register, we need it in OP_MODE |
44037a66 TG |
670 | to make it acceptable to the format of insv. */ |
671 | if (GET_CODE (xop0) == SUBREG) | |
bac7cdfd DE |
672 | /* We can't just change the mode, because this might clobber op0, |
673 | and we will need the original value of op0 if insv fails. */ | |
6d7db3c5 RS |
674 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); |
675 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) | |
676 | xop0 = gen_rtx_SUBREG (op_mode, xop0, 0); | |
44037a66 TG |
677 | |
678 | /* On big-endian machines, we count bits from the most significant. | |
679 | If the bit field insn does not, we must invert. */ | |
680 | ||
f76b9db2 ILT |
681 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |
682 | xbitpos = unit - bitsize - xbitpos; | |
683 | ||
44037a66 TG |
684 | /* We have been counting XBITPOS within UNIT. |
685 | Count instead within the size of the register. */ | |
3c0cb5de | 686 | if (BITS_BIG_ENDIAN && !MEM_P (xop0)) |
6d7db3c5 | 687 | xbitpos += GET_MODE_BITSIZE (op_mode) - unit; |
f76b9db2 | 688 | |
6d7db3c5 | 689 | unit = GET_MODE_BITSIZE (op_mode); |
44037a66 | 690 | |
6d7db3c5 | 691 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ |
44037a66 | 692 | value1 = value; |
6d7db3c5 | 693 | if (GET_MODE (value) != op_mode) |
44037a66 TG |
694 | { |
695 | if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) | |
696 | { | |
697 | /* Optimization: Don't bother really extending VALUE | |
f5df292e RS |
698 | if it has all the bits we will actually use. However, |
699 | if we must narrow it, be sure we do it correctly. */ | |
44037a66 | 700 | |
6d7db3c5 | 701 | if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) |
c410d49e EC |
702 | { |
703 | rtx tmp; | |
704 | ||
6d7db3c5 | 705 | tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); |
c410d49e | 706 | if (! tmp) |
6d7db3c5 | 707 | tmp = simplify_gen_subreg (op_mode, |
c410d49e EC |
708 | force_reg (GET_MODE (value), |
709 | value1), | |
710 | GET_MODE (value), 0); | |
711 | value1 = tmp; | |
712 | } | |
f5df292e | 713 | else |
6d7db3c5 | 714 | value1 = gen_lowpart (op_mode, value1); |
44037a66 | 715 | } |
69107307 | 716 | else if (GET_CODE (value) == CONST_INT) |
6d7db3c5 | 717 | value1 = gen_int_mode (INTVAL (value), op_mode); |
5b0264cb | 718 | else |
44037a66 TG |
719 | /* Parse phase is supposed to make VALUE's data type |
720 | match that of the component reference, which is a type | |
721 | at least as wide as the field; so VALUE should have | |
722 | a mode that corresponds to that type. */ | |
5b0264cb | 723 | gcc_assert (CONSTANT_P (value)); |
44037a66 TG |
724 | } |
725 | ||
726 | /* If this machine's insv insists on a register, | |
727 | get VALUE1 into a register. */ | |
a995e389 | 728 | if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate) |
6d7db3c5 RS |
729 | (value1, op_mode))) |
730 | value1 = force_reg (op_mode, value1); | |
44037a66 | 731 | |
b1ec3c92 | 732 | pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1); |
44037a66 | 733 | if (pat) |
6d7db3c5 RS |
734 | { |
735 | emit_insn (pat); | |
736 | return true; | |
737 | } | |
738 | delete_insns_since (last); | |
739 | } | |
740 | ||
741 | /* If OP0 is a memory, try copying it to a register and seeing if a | |
742 | cheap register alternative is available. */ | |
743 | if (HAVE_insv && MEM_P (op0)) | |
744 | { | |
745 | enum machine_mode bestmode; | |
746 | ||
747 | /* Get the mode to use for inserting into this field. If OP0 is | |
748 | BLKmode, get the smallest mode consistent with the alignment. If | |
749 | OP0 is a non-BLKmode object that is no wider than OP_MODE, use its | |
750 | mode. Otherwise, use the smallest mode containing the field. */ | |
751 | ||
752 | if (GET_MODE (op0) == BLKmode | |
753 | || (op_mode != MAX_MACHINE_MODE | |
754 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))) | |
755 | bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), | |
756 | (op_mode == MAX_MACHINE_MODE | |
757 | ? VOIDmode : op_mode), | |
758 | MEM_VOLATILE_P (op0)); | |
44037a66 | 759 | else |
6d7db3c5 RS |
760 | bestmode = GET_MODE (op0); |
761 | ||
762 | if (bestmode != VOIDmode | |
763 | && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode) | |
764 | && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) | |
765 | && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) | |
0fb7aeda | 766 | { |
6d7db3c5 RS |
767 | rtx last, tempreg, xop0; |
768 | unsigned HOST_WIDE_INT xoffset, xbitpos; | |
769 | ||
770 | last = get_last_insn (); | |
771 | ||
772 | /* Adjust address to point to the containing unit of | |
773 | that mode. Compute the offset as a multiple of this unit, | |
774 | counting in bytes. */ | |
775 | unit = GET_MODE_BITSIZE (bestmode); | |
776 | xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); | |
777 | xbitpos = bitnum % unit; | |
778 | xop0 = adjust_address (op0, bestmode, xoffset); | |
779 | ||
780 | /* Fetch that unit, store the bitfield in it, then store | |
781 | the unit. */ | |
782 | tempreg = copy_to_reg (xop0); | |
783 | if (store_bit_field_1 (tempreg, bitsize, xbitpos, | |
784 | fieldmode, orig_value, false)) | |
785 | { | |
786 | emit_move_insn (xop0, tempreg); | |
787 | return true; | |
788 | } | |
44037a66 | 789 | delete_insns_since (last); |
44037a66 TG |
790 | } |
791 | } | |
6d7db3c5 RS |
792 | |
793 | if (!fallback_p) | |
794 | return false; | |
795 | ||
796 | store_fixed_bit_field (op0, offset, bitsize, bitpos, value); | |
797 | return true; | |
798 | } | |
799 | ||
800 | /* Generate code to store value from rtx VALUE | |
801 | into a bit-field within structure STR_RTX | |
802 | containing BITSIZE bits starting at bit BITNUM. | |
803 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ | |
804 | ||
805 | void | |
806 | store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
807 | unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, | |
808 | rtx value) | |
809 | { | |
810 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true)) | |
811 | gcc_unreachable (); | |
44037a66 TG |
812 | } |
813 | \f | |
814 | /* Use shifts and boolean operations to store VALUE | |
815 | into a bit field of width BITSIZE | |
816 | in a memory location specified by OP0 except offset by OFFSET bytes. | |
817 | (OFFSET must be 0 if OP0 is a register.) | |
818 | The field starts at position BITPOS within the byte. | |
819 | (If OP0 is a register, it may be a full word or a narrower mode, | |
820 | but BITPOS still counts within a full word, | |
ad76cef8 | 821 | which is significant on bigendian machines.) */ |
44037a66 TG |
822 | |
823 | static void | |
502b8322 AJ |
824 | store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, |
825 | unsigned HOST_WIDE_INT bitsize, | |
826 | unsigned HOST_WIDE_INT bitpos, rtx value) | |
44037a66 | 827 | { |
b3694847 | 828 | enum machine_mode mode; |
770ae6cc | 829 | unsigned int total_bits = BITS_PER_WORD; |
c505fc06 | 830 | rtx temp; |
44037a66 TG |
831 | int all_zero = 0; |
832 | int all_one = 0; | |
833 | ||
44037a66 TG |
834 | /* There is a case not handled here: |
835 | a structure with a known alignment of just a halfword | |
836 | and a field split across two aligned halfwords within the structure. | |
837 | Or likewise a structure with a known alignment of just a byte | |
838 | and a field split across two bytes. | |
839 | Such cases are not supposed to be able to occur. */ | |
840 | ||
f8cfc6aa | 841 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
44037a66 | 842 | { |
5b0264cb | 843 | gcc_assert (!offset); |
44037a66 TG |
844 | /* Special treatment for a bit field split across two registers. */ |
845 | if (bitsize + bitpos > BITS_PER_WORD) | |
846 | { | |
04050c69 | 847 | store_split_bit_field (op0, bitsize, bitpos, value); |
44037a66 TG |
848 | return; |
849 | } | |
850 | } | |
851 | else | |
852 | { | |
853 | /* Get the proper mode to use for this field. We want a mode that | |
854 | includes the entire field. If such a mode would be larger than | |
c410d49e | 855 | a word, we won't be doing the extraction the normal way. |
053a35af | 856 | We don't want a mode bigger than the destination. */ |
44037a66 | 857 | |
053a35af AH |
858 | mode = GET_MODE (op0); |
859 | if (GET_MODE_BITSIZE (mode) == 0 | |
0fb7aeda KH |
860 | || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) |
861 | mode = word_mode; | |
44037a66 | 862 | mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, |
04050c69 | 863 | MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); |
44037a66 TG |
864 | |
865 | if (mode == VOIDmode) | |
866 | { | |
867 | /* The only way this should occur is if the field spans word | |
868 | boundaries. */ | |
04050c69 RK |
869 | store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT, |
870 | value); | |
44037a66 TG |
871 | return; |
872 | } | |
873 | ||
874 | total_bits = GET_MODE_BITSIZE (mode); | |
875 | ||
3bd98790 | 876 | /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to |
38e01259 | 877 | be in the range 0 to total_bits-1, and put any excess bytes in |
3bd98790 JW |
878 | OFFSET. */ |
879 | if (bitpos >= total_bits) | |
880 | { | |
881 | offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); | |
882 | bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) | |
883 | * BITS_PER_UNIT); | |
884 | } | |
885 | ||
44037a66 TG |
886 | /* Get ref to an aligned byte, halfword, or word containing the field. |
887 | Adjust BITPOS to be position within a word, | |
888 | and OFFSET to be the offset of that word. | |
889 | Then alter OP0 to refer to that word. */ | |
890 | bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; | |
891 | offset -= (offset % (total_bits / BITS_PER_UNIT)); | |
f4ef873c | 892 | op0 = adjust_address (op0, mode, offset); |
44037a66 TG |
893 | } |
894 | ||
895 | mode = GET_MODE (op0); | |
896 | ||
897 | /* Now MODE is either some integral mode for a MEM as OP0, | |
898 | or is a full-word for a REG as OP0. TOTAL_BITS corresponds. | |
899 | The bit field is contained entirely within OP0. | |
900 | BITPOS is the starting bit number within OP0. | |
901 | (OP0's mode may actually be narrower than MODE.) */ | |
902 | ||
f76b9db2 ILT |
903 | if (BYTES_BIG_ENDIAN) |
904 | /* BITPOS is the distance between our msb | |
905 | and that of the containing datum. | |
906 | Convert it to the distance from the lsb. */ | |
907 | bitpos = total_bits - bitsize - bitpos; | |
44037a66 | 908 | |
44037a66 TG |
909 | /* Now BITPOS is always the distance between our lsb |
910 | and that of OP0. */ | |
911 | ||
912 | /* Shift VALUE left by BITPOS bits. If VALUE is not constant, | |
913 | we must first convert its mode to MODE. */ | |
914 | ||
915 | if (GET_CODE (value) == CONST_INT) | |
916 | { | |
b3694847 | 917 | HOST_WIDE_INT v = INTVAL (value); |
44037a66 | 918 | |
b1ec3c92 CH |
919 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
920 | v &= ((HOST_WIDE_INT) 1 << bitsize) - 1; | |
44037a66 TG |
921 | |
922 | if (v == 0) | |
923 | all_zero = 1; | |
b1ec3c92 CH |
924 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
925 | && v == ((HOST_WIDE_INT) 1 << bitsize) - 1) | |
926 | || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1)) | |
44037a66 TG |
927 | all_one = 1; |
928 | ||
929 | value = lshift_value (mode, value, bitpos, bitsize); | |
930 | } | |
931 | else | |
932 | { | |
933 | int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize | |
934 | && bitpos + bitsize != GET_MODE_BITSIZE (mode)); | |
935 | ||
936 | if (GET_MODE (value) != mode) | |
937 | { | |
f8cfc6aa | 938 | if ((REG_P (value) || GET_CODE (value) == SUBREG) |
eec6bb06 | 939 | && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value))) |
44037a66 TG |
940 | value = gen_lowpart (mode, value); |
941 | else | |
942 | value = convert_to_mode (mode, value, 1); | |
943 | } | |
944 | ||
945 | if (must_and) | |
946 | value = expand_binop (mode, and_optab, value, | |
947 | mask_rtx (mode, 0, bitsize, 0), | |
b1ec3c92 | 948 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
44037a66 TG |
949 | if (bitpos > 0) |
950 | value = expand_shift (LSHIFT_EXPR, mode, value, | |
7d60be94 | 951 | build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1); |
44037a66 TG |
952 | } |
953 | ||
954 | /* Now clear the chosen bits in OP0, | |
955 | except that if VALUE is -1 we need not bother. */ | |
c505fc06 RS |
956 | /* We keep the intermediates in registers to allow CSE to combine |
957 | consecutive bitfield assignments. */ | |
44037a66 | 958 | |
c505fc06 | 959 | temp = force_reg (mode, op0); |
44037a66 TG |
960 | |
961 | if (! all_one) | |
962 | { | |
c505fc06 | 963 | temp = expand_binop (mode, and_optab, temp, |
44037a66 | 964 | mask_rtx (mode, bitpos, bitsize, 1), |
c505fc06 RS |
965 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
966 | temp = force_reg (mode, temp); | |
44037a66 | 967 | } |
44037a66 TG |
968 | |
969 | /* Now logical-or VALUE into OP0, unless it is zero. */ | |
970 | ||
971 | if (! all_zero) | |
c505fc06 RS |
972 | { |
973 | temp = expand_binop (mode, ior_optab, temp, value, | |
974 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
975 | temp = force_reg (mode, temp); | |
976 | } | |
977 | ||
44037a66 | 978 | if (op0 != temp) |
4679504c UB |
979 | { |
980 | op0 = copy_rtx (op0); | |
981 | emit_move_insn (op0, temp); | |
982 | } | |
44037a66 TG |
983 | } |
984 | \f | |
06c94bce | 985 | /* Store a bit field that is split across multiple accessible memory objects. |
44037a66 | 986 | |
06c94bce | 987 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
44037a66 TG |
988 | BITSIZE is the field width; BITPOS the position of its first bit |
989 | (within the word). | |
06c94bce | 990 | VALUE is the value to store. |
06c94bce RS |
991 | |
992 | This does not yet handle fields wider than BITS_PER_WORD. */ | |
44037a66 TG |
993 | |
994 | static void | |
502b8322 AJ |
995 | store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
996 | unsigned HOST_WIDE_INT bitpos, rtx value) | |
44037a66 | 997 | { |
770ae6cc RK |
998 | unsigned int unit; |
999 | unsigned int bitsdone = 0; | |
4ee16841 | 1000 | |
0eb61c19 DE |
1001 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1002 | much at a time. */ | |
f8cfc6aa | 1003 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1004 | unit = BITS_PER_WORD; |
1005 | else | |
04050c69 | 1006 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
e54d80d0 | 1007 | |
3d709ff0 RS |
1008 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1009 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note | |
1010 | that VALUE might be a floating-point constant. */ | |
44037a66 | 1011 | if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT) |
3d709ff0 RS |
1012 | { |
1013 | rtx word = gen_lowpart_common (word_mode, value); | |
1014 | ||
bc8a0e39 | 1015 | if (word && (value != word)) |
3d709ff0 RS |
1016 | value = word; |
1017 | else | |
1018 | value = gen_lowpart_common (word_mode, | |
d01bc862 DE |
1019 | force_reg (GET_MODE (value) != VOIDmode |
1020 | ? GET_MODE (value) | |
1021 | : word_mode, value)); | |
3d709ff0 | 1022 | } |
44037a66 | 1023 | |
06c94bce | 1024 | while (bitsdone < bitsize) |
44037a66 | 1025 | { |
770ae6cc | 1026 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 1027 | rtx part, word; |
770ae6cc RK |
1028 | unsigned HOST_WIDE_INT thispos; |
1029 | unsigned HOST_WIDE_INT offset; | |
44037a66 | 1030 | |
06c94bce RS |
1031 | offset = (bitpos + bitsdone) / unit; |
1032 | thispos = (bitpos + bitsdone) % unit; | |
44037a66 | 1033 | |
0eb61c19 DE |
1034 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1035 | store_fixed_bit_field will call us again, and we will mutually | |
1036 | recurse forever. */ | |
1037 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1038 | thissize = MIN (thissize, unit - thispos); | |
44037a66 | 1039 | |
f76b9db2 ILT |
1040 | if (BYTES_BIG_ENDIAN) |
1041 | { | |
37811a73 RK |
1042 | int total_bits; |
1043 | ||
1044 | /* We must do an endian conversion exactly the same way as it is | |
1045 | done in extract_bit_field, so that the two calls to | |
1046 | extract_fixed_bit_field will have comparable arguments. */ | |
3c0cb5de | 1047 | if (!MEM_P (value) || GET_MODE (value) == BLKmode) |
37811a73 RK |
1048 | total_bits = BITS_PER_WORD; |
1049 | else | |
1050 | total_bits = GET_MODE_BITSIZE (GET_MODE (value)); | |
1051 | ||
f76b9db2 ILT |
1052 | /* Fetch successively less significant portions. */ |
1053 | if (GET_CODE (value) == CONST_INT) | |
1054 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) | |
1055 | >> (bitsize - bitsdone - thissize)) | |
1056 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1057 | else | |
1058 | /* The args are chosen so that the last part includes the | |
1059 | lsb. Give extract_bit_field the value it needs (with | |
04050c69 RK |
1060 | endianness compensation) to fetch the piece we want. */ |
1061 | part = extract_fixed_bit_field (word_mode, value, 0, thissize, | |
1062 | total_bits - bitsize + bitsdone, | |
1063 | NULL_RTX, 1); | |
f76b9db2 | 1064 | } |
06c94bce | 1065 | else |
f76b9db2 ILT |
1066 | { |
1067 | /* Fetch successively more significant portions. */ | |
1068 | if (GET_CODE (value) == CONST_INT) | |
1069 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) | |
1070 | >> bitsdone) | |
1071 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1072 | else | |
04050c69 RK |
1073 | part = extract_fixed_bit_field (word_mode, value, 0, thissize, |
1074 | bitsdone, NULL_RTX, 1); | |
f76b9db2 | 1075 | } |
44037a66 | 1076 | |
06c94bce | 1077 | /* If OP0 is a register, then handle OFFSET here. |
5f57dff0 JW |
1078 | |
1079 | When handling multiword bitfields, extract_bit_field may pass | |
1080 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1081 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1082 | the current word starting from the base register. */ | |
1083 | if (GET_CODE (op0) == SUBREG) | |
1084 | { | |
ddef6bc7 JJ |
1085 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
1086 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
4ee16841 | 1087 | GET_MODE (SUBREG_REG (op0))); |
5f57dff0 JW |
1088 | offset = 0; |
1089 | } | |
f8cfc6aa | 1090 | else if (REG_P (op0)) |
06c94bce | 1091 | { |
4ee16841 | 1092 | word = operand_subword_force (op0, offset, GET_MODE (op0)); |
06c94bce RS |
1093 | offset = 0; |
1094 | } | |
1095 | else | |
1096 | word = op0; | |
44037a66 | 1097 | |
0eb61c19 DE |
1098 | /* OFFSET is in UNITs, and UNIT is in bits. |
1099 | store_fixed_bit_field wants offset in bytes. */ | |
04050c69 RK |
1100 | store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize, |
1101 | thispos, part); | |
06c94bce RS |
1102 | bitsdone += thissize; |
1103 | } | |
44037a66 TG |
1104 | } |
1105 | \f | |
6d7db3c5 RS |
1106 | /* A subroutine of extract_bit_field_1 that converts return value X |
1107 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments | |
1108 | to extract_bit_field. */ | |
44037a66 | 1109 | |
6d7db3c5 RS |
1110 | static rtx |
1111 | convert_extracted_bit_field (rtx x, enum machine_mode mode, | |
1112 | enum machine_mode tmode, bool unsignedp) | |
1113 | { | |
1114 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) | |
1115 | return x; | |
44037a66 | 1116 | |
6d7db3c5 RS |
1117 | /* If the x mode is not a scalar integral, first convert to the |
1118 | integer mode of that size and then access it as a floating-point | |
1119 | value via a SUBREG. */ | |
1120 | if (!SCALAR_INT_MODE_P (tmode)) | |
1121 | { | |
1122 | enum machine_mode smode; | |
44037a66 | 1123 | |
6d7db3c5 RS |
1124 | smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); |
1125 | x = convert_to_mode (smode, x, unsignedp); | |
1126 | x = force_reg (smode, x); | |
1127 | return gen_lowpart (tmode, x); | |
1128 | } | |
44037a66 | 1129 | |
6d7db3c5 RS |
1130 | return convert_to_mode (tmode, x, unsignedp); |
1131 | } | |
1132 | ||
1133 | /* A subroutine of extract_bit_field, with the same arguments. | |
1134 | If FALLBACK_P is true, fall back to extract_fixed_bit_field | |
1135 | if we can find no other means of implementing the operation. | |
1136 | if FALLBACK_P is false, return NULL instead. */ | |
1137 | ||
1138 | static rtx | |
1139 | extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
1140 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, | |
1141 | enum machine_mode mode, enum machine_mode tmode, | |
1142 | bool fallback_p) | |
44037a66 | 1143 | { |
770ae6cc | 1144 | unsigned int unit |
3c0cb5de | 1145 | = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; |
2c58f7dd | 1146 | unsigned HOST_WIDE_INT offset, bitpos; |
b3694847 | 1147 | rtx op0 = str_rtx; |
3306eb80 | 1148 | enum machine_mode int_mode; |
6d7db3c5 | 1149 | enum machine_mode ext_mode; |
e98f90d3 | 1150 | enum machine_mode mode1; |
6d7db3c5 | 1151 | enum insn_code icode; |
e98f90d3 | 1152 | int byte_offset; |
44037a66 | 1153 | |
44037a66 TG |
1154 | if (tmode == VOIDmode) |
1155 | tmode = mode; | |
6ca6193b | 1156 | |
44037a66 TG |
1157 | while (GET_CODE (op0) == SUBREG) |
1158 | { | |
2c58f7dd | 1159 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
44037a66 TG |
1160 | op0 = SUBREG_REG (op0); |
1161 | } | |
77295dec | 1162 | |
2c58f7dd | 1163 | /* If we have an out-of-bounds access to a register, just return an |
647eea9d | 1164 | uninitialized register of the required mode. This can occur if the |
2c58f7dd RS |
1165 | source code contains an out-of-bounds access to a small array. */ |
1166 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
1167 | return gen_reg_rtx (tmode); | |
1168 | ||
f8cfc6aa | 1169 | if (REG_P (op0) |
aac280fb DD |
1170 | && mode == GET_MODE (op0) |
1171 | && bitnum == 0 | |
0b69c29f | 1172 | && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |
aac280fb | 1173 | { |
0b69c29f | 1174 | /* We're trying to extract a full register from itself. */ |
aac280fb DD |
1175 | return op0; |
1176 | } | |
1177 | ||
0890b981 AP |
1178 | /* See if we can get a better vector mode before extracting. */ |
1179 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1180 | && !MEM_P (op0) | |
1181 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) | |
1182 | { | |
1183 | enum machine_mode new_mode; | |
1184 | int nunits = GET_MODE_NUNITS (GET_MODE (op0)); | |
1185 | ||
1186 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) | |
1187 | new_mode = MIN_MODE_VECTOR_FLOAT; | |
325217ed CF |
1188 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1189 | new_mode = MIN_MODE_VECTOR_FRACT; | |
1190 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) | |
1191 | new_mode = MIN_MODE_VECTOR_UFRACT; | |
1192 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) | |
1193 | new_mode = MIN_MODE_VECTOR_ACCUM; | |
1194 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) | |
1195 | new_mode = MIN_MODE_VECTOR_UACCUM; | |
0890b981 AP |
1196 | else |
1197 | new_mode = MIN_MODE_VECTOR_INT; | |
1198 | ||
1199 | for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) | |
1200 | if (GET_MODE_NUNITS (new_mode) == nunits | |
1201 | && GET_MODE_INNER (new_mode) == tmode | |
1202 | && targetm.vector_mode_supported_p (new_mode)) | |
1203 | break; | |
1204 | if (new_mode != VOIDmode) | |
1205 | op0 = gen_lowpart (new_mode, op0); | |
1206 | } | |
1207 | ||
997404de JH |
1208 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1209 | available. */ | |
1210 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 1211 | && !MEM_P (op0) |
166cdb08 | 1212 | && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code |
997404de | 1213 | != CODE_FOR_nothing) |
b42271d6 JB |
1214 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) |
1215 | == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
997404de JH |
1216 | { |
1217 | enum machine_mode outermode = GET_MODE (op0); | |
1218 | enum machine_mode innermode = GET_MODE_INNER (outermode); | |
166cdb08 | 1219 | int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code; |
b42271d6 | 1220 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |
997404de JH |
1221 | rtx rtxpos = GEN_INT (pos); |
1222 | rtx src = op0; | |
1223 | rtx dest = NULL, pat, seq; | |
1224 | enum machine_mode mode0 = insn_data[icode].operand[0].mode; | |
1225 | enum machine_mode mode1 = insn_data[icode].operand[1].mode; | |
1226 | enum machine_mode mode2 = insn_data[icode].operand[2].mode; | |
1227 | ||
1228 | if (innermode == tmode || innermode == mode) | |
1229 | dest = target; | |
1230 | ||
1231 | if (!dest) | |
1232 | dest = gen_reg_rtx (innermode); | |
1233 | ||
1234 | start_sequence (); | |
1235 | ||
1236 | if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)) | |
1237 | dest = copy_to_mode_reg (mode0, dest); | |
1238 | ||
1239 | if (! (*insn_data[icode].operand[1].predicate) (src, mode1)) | |
1240 | src = copy_to_mode_reg (mode1, src); | |
1241 | ||
1242 | if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) | |
1243 | rtxpos = copy_to_mode_reg (mode1, rtxpos); | |
1244 | ||
1245 | /* We could handle this, but we should always be called with a pseudo | |
1246 | for our targets and all insns should take them as outputs. */ | |
5b0264cb NS |
1247 | gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0) |
1248 | && (*insn_data[icode].operand[1].predicate) (src, mode1) | |
1249 | && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)); | |
5c64c900 | 1250 | |
997404de JH |
1251 | pat = GEN_FCN (icode) (dest, src, rtxpos); |
1252 | seq = get_insns (); | |
1253 | end_sequence (); | |
1254 | if (pat) | |
1255 | { | |
1256 | emit_insn (seq); | |
1257 | emit_insn (pat); | |
0890b981 AP |
1258 | if (mode0 != mode) |
1259 | return gen_lowpart (tmode, dest); | |
5c64c900 | 1260 | return dest; |
997404de JH |
1261 | } |
1262 | } | |
1263 | ||
d006aa54 RH |
1264 | /* Make sure we are playing with integral modes. Pun with subregs |
1265 | if we aren't. */ | |
1266 | { | |
1267 | enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); | |
1268 | if (imode != GET_MODE (op0)) | |
1269 | { | |
a6d2976a JDA |
1270 | if (MEM_P (op0)) |
1271 | op0 = adjust_address (op0, imode, 0); | |
1272 | else | |
1273 | { | |
1274 | gcc_assert (imode != BLKmode); | |
1275 | op0 = gen_lowpart (imode, op0); | |
360e3535 | 1276 | |
a6d2976a JDA |
1277 | /* If we got a SUBREG, force it into a register since we |
1278 | aren't going to be able to do another SUBREG on it. */ | |
1279 | if (GET_CODE (op0) == SUBREG) | |
1280 | op0 = force_reg (imode, op0); | |
1281 | } | |
d006aa54 RH |
1282 | } |
1283 | } | |
1284 | ||
4e9bb42b AH |
1285 | /* We may be accessing data outside the field, which means |
1286 | we can alias adjacent data. */ | |
3c0cb5de | 1287 | if (MEM_P (op0)) |
4e9bb42b AH |
1288 | { |
1289 | op0 = shallow_copy_rtx (op0); | |
1290 | set_mem_alias_set (op0, 0); | |
1291 | set_mem_expr (op0, 0); | |
1292 | } | |
1293 | ||
6ca6193b JDA |
1294 | /* Extraction of a full-word or multi-word value from a structure |
1295 | in a register or aligned memory can be done with just a SUBREG. | |
1296 | A subword value in the least significant part of a register | |
1297 | can also be extracted with a SUBREG. For this, we need the | |
1298 | byte offset of the value in op0. */ | |
1299 | ||
2c58f7dd RS |
1300 | bitpos = bitnum % unit; |
1301 | offset = bitnum / unit; | |
6ca6193b | 1302 | byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD; |
c410d49e | 1303 | |
44037a66 TG |
1304 | /* If OP0 is a register, BITPOS must count within a word. |
1305 | But as we have it, it counts within whatever size OP0 now has. | |
1306 | On a bigendian machine, these are not the same, so convert. */ | |
db3cf6fb | 1307 | if (BYTES_BIG_ENDIAN |
3c0cb5de | 1308 | && !MEM_P (op0) |
f76b9db2 | 1309 | && unit > GET_MODE_BITSIZE (GET_MODE (op0))) |
44037a66 | 1310 | bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); |
44037a66 | 1311 | |
6ca6193b JDA |
1312 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1313 | If that's wrong, the solution is to test for it and set TARGET to 0 | |
1314 | if needed. */ | |
e98f90d3 | 1315 | |
367d6d0b JW |
1316 | /* Only scalar integer modes can be converted via subregs. There is an |
1317 | additional problem for FP modes here in that they can have a precision | |
1318 | which is different from the size. mode_for_size uses precision, but | |
1319 | we want a mode based on the size, so we must avoid calling it for FP | |
1320 | modes. */ | |
1321 | mode1 = (SCALAR_INT_MODE_P (tmode) | |
1322 | ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) | |
1323 | : mode); | |
e98f90d3 | 1324 | |
0d2f38ee OH |
1325 | if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode) |
1326 | && bitpos % BITS_PER_WORD == 0) | |
367d6d0b | 1327 | || (mode1 != BLKmode |
0d2f38ee OH |
1328 | /* ??? The big endian test here is wrong. This is correct |
1329 | if the value is in a register, and if mode_for_size is not | |
1330 | the same mode as op0. This causes us to get unnecessarily | |
1331 | inefficient code from the Thumb port when -mbig-endian. */ | |
1332 | && (BYTES_BIG_ENDIAN | |
1333 | ? bitpos + bitsize == BITS_PER_WORD | |
1334 | : bitpos == 0))) | |
3c0cb5de | 1335 | && ((!MEM_P (op0) |
0d2f38ee OH |
1336 | && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode), |
1337 | GET_MODE_BITSIZE (GET_MODE (op0))) | |
1338 | && GET_MODE_SIZE (mode1) != 0 | |
1339 | && byte_offset % GET_MODE_SIZE (mode1) == 0) | |
3c0cb5de | 1340 | || (MEM_P (op0) |
0d2f38ee OH |
1341 | && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) |
1342 | || (offset * BITS_PER_UNIT % bitsize == 0 | |
1343 | && MEM_ALIGN (op0) % bitsize == 0))))) | |
44037a66 | 1344 | { |
8ddcfde1 DJ |
1345 | if (MEM_P (op0)) |
1346 | op0 = adjust_address (op0, mode1, offset); | |
1347 | else if (mode1 != GET_MODE (op0)) | |
c7e33f89 | 1348 | { |
8ddcfde1 DJ |
1349 | rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), |
1350 | byte_offset); | |
1351 | if (sub == NULL) | |
1352 | goto no_subreg_mode_swap; | |
1353 | op0 = sub; | |
c7e33f89 | 1354 | } |
44037a66 TG |
1355 | if (mode1 != mode) |
1356 | return convert_to_mode (tmode, op0, unsignedp); | |
1357 | return op0; | |
1358 | } | |
28ce94d4 | 1359 | no_subreg_mode_swap: |
44037a66 TG |
1360 | |
1361 | /* Handle fields bigger than a word. */ | |
c410d49e | 1362 | |
44037a66 TG |
1363 | if (bitsize > BITS_PER_WORD) |
1364 | { | |
1365 | /* Here we transfer the words of the field | |
1366 | in the order least significant first. | |
1367 | This is because the most significant word is the one which may | |
1368 | be less than full. */ | |
1369 | ||
770ae6cc RK |
1370 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1371 | unsigned int i; | |
44037a66 | 1372 | |
f8cfc6aa | 1373 | if (target == 0 || !REG_P (target)) |
44037a66 TG |
1374 | target = gen_reg_rtx (mode); |
1375 | ||
34ea783b | 1376 | /* Indicate for flow that the entire target reg is being set. */ |
c41c1387 | 1377 | emit_clobber (target); |
34ea783b | 1378 | |
44037a66 TG |
1379 | for (i = 0; i < nwords; i++) |
1380 | { | |
1381 | /* If I is 0, use the low-order word in both field and target; | |
1382 | if I is 1, use the next to lowest word; and so on. */ | |
77295dec | 1383 | /* Word number in TARGET to use. */ |
770ae6cc RK |
1384 | unsigned int wordnum |
1385 | = (WORDS_BIG_ENDIAN | |
1386 | ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 | |
1387 | : i); | |
77295dec | 1388 | /* Offset from start of field in OP0. */ |
770ae6cc RK |
1389 | unsigned int bit_offset = (WORDS_BIG_ENDIAN |
1390 | ? MAX (0, ((int) bitsize - ((int) i + 1) | |
75131237 | 1391 | * (int) BITS_PER_WORD)) |
770ae6cc | 1392 | : (int) i * BITS_PER_WORD); |
44037a66 TG |
1393 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1394 | rtx result_part | |
1395 | = extract_bit_field (op0, MIN (BITS_PER_WORD, | |
1396 | bitsize - i * BITS_PER_WORD), | |
19caa751 | 1397 | bitnum + bit_offset, 1, target_part, mode, |
b3520980 | 1398 | word_mode); |
44037a66 | 1399 | |
5b0264cb | 1400 | gcc_assert (target_part); |
44037a66 TG |
1401 | |
1402 | if (result_part != target_part) | |
1403 | emit_move_insn (target_part, result_part); | |
1404 | } | |
1405 | ||
5f57dff0 | 1406 | if (unsignedp) |
77295dec DE |
1407 | { |
1408 | /* Unless we've filled TARGET, the upper regs in a multi-reg value | |
1409 | need to be zero'd out. */ | |
1410 | if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) | |
1411 | { | |
770ae6cc | 1412 | unsigned int i, total_words; |
77295dec DE |
1413 | |
1414 | total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; | |
1415 | for (i = nwords; i < total_words; i++) | |
04050c69 RK |
1416 | emit_move_insn |
1417 | (operand_subword (target, | |
1418 | WORDS_BIG_ENDIAN ? total_words - i - 1 : i, | |
1419 | 1, VOIDmode), | |
1420 | const0_rtx); | |
77295dec DE |
1421 | } |
1422 | return target; | |
1423 | } | |
1424 | ||
5f57dff0 JW |
1425 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1426 | target = expand_shift (LSHIFT_EXPR, mode, target, | |
4a90aeeb | 1427 | build_int_cst (NULL_TREE, |
7d60be94 | 1428 | GET_MODE_BITSIZE (mode) - bitsize), |
5f57dff0 JW |
1429 | NULL_RTX, 0); |
1430 | return expand_shift (RSHIFT_EXPR, mode, target, | |
4a90aeeb | 1431 | build_int_cst (NULL_TREE, |
7d60be94 | 1432 | GET_MODE_BITSIZE (mode) - bitsize), |
5f57dff0 | 1433 | NULL_RTX, 0); |
44037a66 | 1434 | } |
c410d49e | 1435 | |
3306eb80 GK |
1436 | /* From here on we know the desired field is smaller than a word. */ |
1437 | ||
1438 | /* Check if there is a correspondingly-sized integer field, so we can | |
1439 | safely extract it as one size of integer, if necessary; then | |
1440 | truncate or extend to the size that is wanted; then use SUBREGs or | |
1441 | convert_to_mode to get one of the modes we really wanted. */ | |
c410d49e | 1442 | |
3306eb80 GK |
1443 | int_mode = int_mode_for_mode (tmode); |
1444 | if (int_mode == BLKmode) | |
1445 | int_mode = int_mode_for_mode (mode); | |
5b0264cb NS |
1446 | /* Should probably push op0 out to memory and then do a load. */ |
1447 | gcc_assert (int_mode != BLKmode); | |
44037a66 TG |
1448 | |
1449 | /* OFFSET is the number of words or bytes (UNIT says which) | |
1450 | from STR_RTX to the first word or byte containing part of the field. */ | |
3c0cb5de | 1451 | if (!MEM_P (op0)) |
44037a66 TG |
1452 | { |
1453 | if (offset != 0 | |
1454 | || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
470032d7 | 1455 | { |
f8cfc6aa | 1456 | if (!REG_P (op0)) |
470032d7 RH |
1457 | op0 = copy_to_reg (op0); |
1458 | op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), | |
ddef6bc7 | 1459 | op0, (offset * UNITS_PER_WORD)); |
470032d7 | 1460 | } |
44037a66 TG |
1461 | offset = 0; |
1462 | } | |
44037a66 TG |
1463 | |
1464 | /* Now OFFSET is nonzero only for memory operands. */ | |
6d7db3c5 RS |
1465 | ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0); |
1466 | icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv; | |
1467 | if (ext_mode != MAX_MACHINE_MODE | |
1468 | && bitsize > 0 | |
1469 | && GET_MODE_BITSIZE (ext_mode) >= bitsize | |
1470 | /* If op0 is a register, we need it in EXT_MODE to make it | |
1471 | acceptable to the format of ext(z)v. */ | |
1472 | && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) | |
1473 | && !((REG_P (op0) || GET_CODE (op0) == SUBREG) | |
1474 | && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))) | |
1475 | && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0))) | |
44037a66 | 1476 | { |
6d7db3c5 RS |
1477 | unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset; |
1478 | rtx bitsize_rtx, bitpos_rtx; | |
1479 | rtx last = get_last_insn (); | |
1480 | rtx xop0 = op0; | |
1481 | rtx xtarget = target; | |
1482 | rtx xspec_target = target; | |
1483 | rtx xspec_target_subreg = 0; | |
1484 | rtx pat; | |
44037a66 | 1485 | |
6d7db3c5 RS |
1486 | /* If op0 is a register, we need it in EXT_MODE to make it |
1487 | acceptable to the format of ext(z)v. */ | |
1488 | if (REG_P (xop0) && GET_MODE (xop0) != ext_mode) | |
1489 | xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0); | |
1490 | if (MEM_P (xop0)) | |
1491 | /* Get ref to first byte containing part of the field. */ | |
1492 | xop0 = adjust_address (xop0, byte_mode, xoffset); | |
44037a66 | 1493 | |
6d7db3c5 RS |
1494 | /* On big-endian machines, we count bits from the most significant. |
1495 | If the bit field insn does not, we must invert. */ | |
1496 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
1497 | xbitpos = unit - bitsize - xbitpos; | |
f76b9db2 | 1498 | |
6d7db3c5 RS |
1499 | /* Now convert from counting within UNIT to counting in EXT_MODE. */ |
1500 | if (BITS_BIG_ENDIAN && !MEM_P (xop0)) | |
1501 | xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; | |
f76b9db2 | 1502 | |
6d7db3c5 | 1503 | unit = GET_MODE_BITSIZE (ext_mode); |
44037a66 | 1504 | |
6d7db3c5 RS |
1505 | if (xtarget == 0) |
1506 | xtarget = xspec_target = gen_reg_rtx (tmode); | |
44037a66 | 1507 | |
6d7db3c5 RS |
1508 | if (GET_MODE (xtarget) != ext_mode) |
1509 | { | |
1510 | if (REG_P (xtarget)) | |
44037a66 | 1511 | { |
6d7db3c5 RS |
1512 | xtarget = gen_lowpart (ext_mode, xtarget); |
1513 | if (GET_MODE_SIZE (ext_mode) | |
1514 | > GET_MODE_SIZE (GET_MODE (xspec_target))) | |
1515 | xspec_target_subreg = xtarget; | |
44037a66 | 1516 | } |
6d7db3c5 RS |
1517 | else |
1518 | xtarget = gen_reg_rtx (ext_mode); | |
1519 | } | |
44037a66 | 1520 | |
6d7db3c5 RS |
1521 | /* If this machine's ext(z)v insists on a register target, |
1522 | make sure we have one. */ | |
1523 | if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode)) | |
1524 | xtarget = gen_reg_rtx (ext_mode); | |
44037a66 | 1525 | |
6d7db3c5 RS |
1526 | bitsize_rtx = GEN_INT (bitsize); |
1527 | bitpos_rtx = GEN_INT (xbitpos); | |
44037a66 | 1528 | |
6d7db3c5 RS |
1529 | pat = (unsignedp |
1530 | ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx) | |
1531 | : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx)); | |
1532 | if (pat) | |
1533 | { | |
1534 | emit_insn (pat); | |
1535 | if (xtarget == xspec_target) | |
1536 | return xtarget; | |
1537 | if (xtarget == xspec_target_subreg) | |
1538 | return xspec_target; | |
1539 | return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp); | |
44037a66 | 1540 | } |
6d7db3c5 | 1541 | delete_insns_since (last); |
44037a66 | 1542 | } |
f76b9db2 | 1543 | |
6d7db3c5 RS |
1544 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1545 | cheap register alternative is available. */ | |
1546 | if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0)) | |
1547 | { | |
1548 | enum machine_mode bestmode; | |
1549 | ||
1550 | /* Get the mode to use for inserting into this field. If | |
1551 | OP0 is BLKmode, get the smallest mode consistent with the | |
1552 | alignment. If OP0 is a non-BLKmode object that is no | |
1553 | wider than EXT_MODE, use its mode. Otherwise, use the | |
1554 | smallest mode containing the field. */ | |
1555 | ||
1556 | if (GET_MODE (op0) == BLKmode | |
1557 | || (ext_mode != MAX_MACHINE_MODE | |
1558 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))) | |
1559 | bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), | |
1560 | (ext_mode == MAX_MACHINE_MODE | |
1561 | ? VOIDmode : ext_mode), | |
1562 | MEM_VOLATILE_P (op0)); | |
1563 | else | |
1564 | bestmode = GET_MODE (op0); | |
f76b9db2 | 1565 | |
6d7db3c5 RS |
1566 | if (bestmode != VOIDmode |
1567 | && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) | |
1568 | && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) | |
1569 | { | |
1570 | unsigned HOST_WIDE_INT xoffset, xbitpos; | |
44037a66 | 1571 | |
6d7db3c5 RS |
1572 | /* Compute the offset as a multiple of this unit, |
1573 | counting in bytes. */ | |
1574 | unit = GET_MODE_BITSIZE (bestmode); | |
1575 | xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); | |
1576 | xbitpos = bitnum % unit; | |
44037a66 | 1577 | |
6d7db3c5 RS |
1578 | /* Make sure the register is big enough for the whole field. */ |
1579 | if (xoffset * BITS_PER_UNIT + unit | |
1580 | >= offset * BITS_PER_UNIT + bitsize) | |
44037a66 | 1581 | { |
6d7db3c5 | 1582 | rtx last, result, xop0; |
44037a66 | 1583 | |
6d7db3c5 | 1584 | last = get_last_insn (); |
44037a66 | 1585 | |
6d7db3c5 RS |
1586 | /* Fetch it to a register in that size. */ |
1587 | xop0 = adjust_address (op0, bestmode, xoffset); | |
1588 | xop0 = force_reg (bestmode, xop0); | |
1589 | result = extract_bit_field_1 (xop0, bitsize, xbitpos, | |
1590 | unsignedp, target, | |
1591 | mode, tmode, false); | |
1592 | if (result) | |
1593 | return result; | |
44037a66 | 1594 | |
44037a66 | 1595 | delete_insns_since (last); |
44037a66 | 1596 | } |
c410d49e | 1597 | } |
44037a66 | 1598 | } |
562fc702 | 1599 | |
6d7db3c5 RS |
1600 | if (!fallback_p) |
1601 | return NULL; | |
1602 | ||
1603 | target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, | |
1604 | bitpos, target, unsignedp); | |
1605 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1606 | } | |
1607 | ||
1608 | /* Generate code to extract a byte-field from STR_RTX | |
1609 | containing BITSIZE bits, starting at BITNUM, | |
1610 | and put it in TARGET if possible (if TARGET is nonzero). | |
1611 | Regardless of TARGET, we return the rtx for where the value is placed. | |
1612 | ||
1613 | STR_RTX is the structure containing the byte (a REG or MEM). | |
1614 | UNSIGNEDP is nonzero if this is an unsigned bit field. | |
1615 | MODE is the natural mode of the field value once extracted. | |
1616 | TMODE is the mode the caller would like the value to have; | |
1617 | but the value may be returned with type MODE instead. | |
1618 | ||
1619 | If a TARGET is specified and we can store in it at no extra cost, | |
1620 | we do so, and return TARGET. | |
1621 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred | |
1622 | if they are equally easy. */ | |
1623 | ||
1624 | rtx | |
1625 | extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
1626 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, | |
1627 | enum machine_mode mode, enum machine_mode tmode) | |
1628 | { | |
1629 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, | |
1630 | target, mode, tmode, true); | |
44037a66 TG |
1631 | } |
1632 | \f | |
1633 | /* Extract a bit field using shifts and boolean operations | |
1634 | Returns an rtx to represent the value. | |
1635 | OP0 addresses a register (word) or memory (byte). | |
1636 | BITPOS says which bit within the word or byte the bit field starts in. | |
1637 | OFFSET says how many bytes farther the bit field starts; | |
1638 | it is 0 if OP0 is a register. | |
1639 | BITSIZE says how many bits long the bit field is. | |
1640 | (If OP0 is a register, it may be narrower than a full word, | |
1641 | but BITPOS still counts within a full word, | |
1642 | which is significant on bigendian machines.) | |
1643 | ||
1644 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). | |
1645 | If TARGET is nonzero, attempts to store the value there | |
1646 | and return TARGET, but this is not guaranteed. | |
04050c69 | 1647 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
44037a66 TG |
1648 | |
1649 | static rtx | |
502b8322 AJ |
1650 | extract_fixed_bit_field (enum machine_mode tmode, rtx op0, |
1651 | unsigned HOST_WIDE_INT offset, | |
1652 | unsigned HOST_WIDE_INT bitsize, | |
1653 | unsigned HOST_WIDE_INT bitpos, rtx target, | |
1654 | int unsignedp) | |
44037a66 | 1655 | { |
770ae6cc | 1656 | unsigned int total_bits = BITS_PER_WORD; |
44037a66 TG |
1657 | enum machine_mode mode; |
1658 | ||
f8cfc6aa | 1659 | if (GET_CODE (op0) == SUBREG || REG_P (op0)) |
44037a66 TG |
1660 | { |
1661 | /* Special treatment for a bit field split across two registers. */ | |
1662 | if (bitsize + bitpos > BITS_PER_WORD) | |
04050c69 | 1663 | return extract_split_bit_field (op0, bitsize, bitpos, unsignedp); |
44037a66 TG |
1664 | } |
1665 | else | |
1666 | { | |
1667 | /* Get the proper mode to use for this field. We want a mode that | |
1668 | includes the entire field. If such a mode would be larger than | |
1669 | a word, we won't be doing the extraction the normal way. */ | |
1670 | ||
04050c69 RK |
1671 | mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, |
1672 | MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0)); | |
44037a66 TG |
1673 | |
1674 | if (mode == VOIDmode) | |
1675 | /* The only way this should occur is if the field spans word | |
1676 | boundaries. */ | |
1677 | return extract_split_bit_field (op0, bitsize, | |
1678 | bitpos + offset * BITS_PER_UNIT, | |
04050c69 | 1679 | unsignedp); |
44037a66 TG |
1680 | |
1681 | total_bits = GET_MODE_BITSIZE (mode); | |
1682 | ||
401db791 | 1683 | /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to |
38e01259 | 1684 | be in the range 0 to total_bits-1, and put any excess bytes in |
401db791 JW |
1685 | OFFSET. */ |
1686 | if (bitpos >= total_bits) | |
1687 | { | |
1688 | offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); | |
1689 | bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) | |
1690 | * BITS_PER_UNIT); | |
1691 | } | |
1692 | ||
44037a66 TG |
1693 | /* Get ref to an aligned byte, halfword, or word containing the field. |
1694 | Adjust BITPOS to be position within a word, | |
1695 | and OFFSET to be the offset of that word. | |
1696 | Then alter OP0 to refer to that word. */ | |
1697 | bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; | |
1698 | offset -= (offset % (total_bits / BITS_PER_UNIT)); | |
f4ef873c | 1699 | op0 = adjust_address (op0, mode, offset); |
44037a66 TG |
1700 | } |
1701 | ||
37811a73 RK |
1702 | mode = GET_MODE (op0); |
1703 | ||
f76b9db2 | 1704 | if (BYTES_BIG_ENDIAN) |
04050c69 RK |
1705 | /* BITPOS is the distance between our msb and that of OP0. |
1706 | Convert it to the distance from the lsb. */ | |
1707 | bitpos = total_bits - bitsize - bitpos; | |
44037a66 | 1708 | |
44037a66 TG |
1709 | /* Now BITPOS is always the distance between the field's lsb and that of OP0. |
1710 | We have reduced the big-endian case to the little-endian case. */ | |
1711 | ||
1712 | if (unsignedp) | |
1713 | { | |
1714 | if (bitpos) | |
1715 | { | |
1716 | /* If the field does not already start at the lsb, | |
1717 | shift it so it does. */ | |
7d60be94 | 1718 | tree amount = build_int_cst (NULL_TREE, bitpos); |
44037a66 TG |
1719 | /* Maybe propagate the target for the shift. */ |
1720 | /* But not if we will return it--could confuse integrate.c. */ | |
f8cfc6aa | 1721 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
44037a66 TG |
1722 | if (tmode != mode) subtarget = 0; |
1723 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1); | |
1724 | } | |
1725 | /* Convert the value to the desired mode. */ | |
1726 | if (mode != tmode) | |
1727 | op0 = convert_to_mode (tmode, op0, 1); | |
1728 | ||
1729 | /* Unless the msb of the field used to be the msb when we shifted, | |
1730 | mask out the upper bits. */ | |
1731 | ||
c99d986a | 1732 | if (GET_MODE_BITSIZE (mode) != bitpos + bitsize) |
44037a66 TG |
1733 | return expand_binop (GET_MODE (op0), and_optab, op0, |
1734 | mask_rtx (GET_MODE (op0), 0, bitsize, 0), | |
1735 | target, 1, OPTAB_LIB_WIDEN); | |
1736 | return op0; | |
1737 | } | |
1738 | ||
1739 | /* To extract a signed bit-field, first shift its msb to the msb of the word, | |
1740 | then arithmetic-shift its lsb to the lsb of the word. */ | |
1741 | op0 = force_reg (mode, op0); | |
1742 | if (mode != tmode) | |
1743 | target = 0; | |
1744 | ||
1745 | /* Find the narrowest integer mode that contains the field. */ | |
1746 | ||
1747 | for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; | |
1748 | mode = GET_MODE_WIDER_MODE (mode)) | |
1749 | if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos) | |
1750 | { | |
1751 | op0 = convert_to_mode (mode, op0, 0); | |
1752 | break; | |
1753 | } | |
1754 | ||
1755 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos)) | |
1756 | { | |
04050c69 | 1757 | tree amount |
4a90aeeb | 1758 | = build_int_cst (NULL_TREE, |
7d60be94 | 1759 | GET_MODE_BITSIZE (mode) - (bitsize + bitpos)); |
44037a66 | 1760 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 1761 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
44037a66 TG |
1762 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
1763 | } | |
1764 | ||
1765 | return expand_shift (RSHIFT_EXPR, mode, op0, | |
4a90aeeb | 1766 | build_int_cst (NULL_TREE, |
7d60be94 | 1767 | GET_MODE_BITSIZE (mode) - bitsize), |
44037a66 TG |
1768 | target, 0); |
1769 | } | |
1770 | \f | |
1771 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value | |
1772 | of mode MODE with BITSIZE ones followed by BITPOS zeros, or the | |
1773 | complement of that if COMPLEMENT. The mask is truncated if | |
77295dec DE |
1774 | necessary to the width of mode MODE. The mask is zero-extended if |
1775 | BITSIZE+BITPOS is too small for MODE. */ | |
44037a66 TG |
1776 | |
1777 | static rtx | |
502b8322 | 1778 | mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement) |
44037a66 | 1779 | { |
b1ec3c92 | 1780 | HOST_WIDE_INT masklow, maskhigh; |
44037a66 | 1781 | |
21102f25 RK |
1782 | if (bitsize == 0) |
1783 | masklow = 0; | |
1784 | else if (bitpos < HOST_BITS_PER_WIDE_INT) | |
b1ec3c92 | 1785 | masklow = (HOST_WIDE_INT) -1 << bitpos; |
44037a66 TG |
1786 | else |
1787 | masklow = 0; | |
1788 | ||
b1ec3c92 CH |
1789 | if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT) |
1790 | masklow &= ((unsigned HOST_WIDE_INT) -1 | |
1791 | >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize)); | |
c410d49e | 1792 | |
b1ec3c92 | 1793 | if (bitpos <= HOST_BITS_PER_WIDE_INT) |
44037a66 TG |
1794 | maskhigh = -1; |
1795 | else | |
b1ec3c92 | 1796 | maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT); |
44037a66 | 1797 | |
21102f25 RK |
1798 | if (bitsize == 0) |
1799 | maskhigh = 0; | |
1800 | else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT) | |
b1ec3c92 CH |
1801 | maskhigh &= ((unsigned HOST_WIDE_INT) -1 |
1802 | >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize)); | |
44037a66 TG |
1803 | else |
1804 | maskhigh = 0; | |
1805 | ||
1806 | if (complement) | |
1807 | { | |
1808 | maskhigh = ~maskhigh; | |
1809 | masklow = ~masklow; | |
1810 | } | |
1811 | ||
1812 | return immed_double_const (masklow, maskhigh, mode); | |
1813 | } | |
1814 | ||
1815 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value | |
1816 | VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */ | |
1817 | ||
1818 | static rtx | |
502b8322 | 1819 | lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize) |
44037a66 | 1820 | { |
b1ec3c92 CH |
1821 | unsigned HOST_WIDE_INT v = INTVAL (value); |
1822 | HOST_WIDE_INT low, high; | |
44037a66 | 1823 | |
b1ec3c92 CH |
1824 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
1825 | v &= ~((HOST_WIDE_INT) -1 << bitsize); | |
44037a66 | 1826 | |
b1ec3c92 | 1827 | if (bitpos < HOST_BITS_PER_WIDE_INT) |
44037a66 TG |
1828 | { |
1829 | low = v << bitpos; | |
b1ec3c92 | 1830 | high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0); |
44037a66 TG |
1831 | } |
1832 | else | |
1833 | { | |
1834 | low = 0; | |
b1ec3c92 | 1835 | high = v << (bitpos - HOST_BITS_PER_WIDE_INT); |
44037a66 TG |
1836 | } |
1837 | ||
1838 | return immed_double_const (low, high, mode); | |
1839 | } | |
1840 | \f | |
1841 | /* Extract a bit field that is split across two words | |
1842 | and return an RTX for the result. | |
1843 | ||
1844 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. | |
1845 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. | |
04050c69 | 1846 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ |
44037a66 TG |
1847 | |
1848 | static rtx | |
502b8322 AJ |
1849 | extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1850 | unsigned HOST_WIDE_INT bitpos, int unsignedp) | |
44037a66 | 1851 | { |
770ae6cc RK |
1852 | unsigned int unit; |
1853 | unsigned int bitsdone = 0; | |
c16ddde3 | 1854 | rtx result = NULL_RTX; |
06c94bce | 1855 | int first = 1; |
44037a66 | 1856 | |
4ee16841 DE |
1857 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1858 | much at a time. */ | |
f8cfc6aa | 1859 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1860 | unit = BITS_PER_WORD; |
1861 | else | |
609023ff | 1862 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
4ee16841 | 1863 | |
06c94bce RS |
1864 | while (bitsdone < bitsize) |
1865 | { | |
770ae6cc | 1866 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 1867 | rtx part, word; |
770ae6cc RK |
1868 | unsigned HOST_WIDE_INT thispos; |
1869 | unsigned HOST_WIDE_INT offset; | |
06c94bce RS |
1870 | |
1871 | offset = (bitpos + bitsdone) / unit; | |
1872 | thispos = (bitpos + bitsdone) % unit; | |
1873 | ||
0eb61c19 DE |
1874 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1875 | extract_fixed_bit_field will call us again, and we will mutually | |
1876 | recurse forever. */ | |
1877 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1878 | thissize = MIN (thissize, unit - thispos); | |
06c94bce RS |
1879 | |
1880 | /* If OP0 is a register, then handle OFFSET here. | |
5f57dff0 JW |
1881 | |
1882 | When handling multiword bitfields, extract_bit_field may pass | |
1883 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1884 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1885 | the current word starting from the base register. */ | |
1886 | if (GET_CODE (op0) == SUBREG) | |
1887 | { | |
ddef6bc7 JJ |
1888 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
1889 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
5f57dff0 JW |
1890 | GET_MODE (SUBREG_REG (op0))); |
1891 | offset = 0; | |
1892 | } | |
f8cfc6aa | 1893 | else if (REG_P (op0)) |
06c94bce RS |
1894 | { |
1895 | word = operand_subword_force (op0, offset, GET_MODE (op0)); | |
1896 | offset = 0; | |
1897 | } | |
1898 | else | |
1899 | word = op0; | |
1900 | ||
06c94bce | 1901 | /* Extract the parts in bit-counting order, |
0eb61c19 DE |
1902 | whose meaning is determined by BYTES_PER_UNIT. |
1903 | OFFSET is in UNITs, and UNIT is in bits. | |
1904 | extract_fixed_bit_field wants offset in bytes. */ | |
1905 | part = extract_fixed_bit_field (word_mode, word, | |
1906 | offset * unit / BITS_PER_UNIT, | |
04050c69 | 1907 | thissize, thispos, 0, 1); |
06c94bce | 1908 | bitsdone += thissize; |
44037a66 | 1909 | |
06c94bce | 1910 | /* Shift this part into place for the result. */ |
f76b9db2 ILT |
1911 | if (BYTES_BIG_ENDIAN) |
1912 | { | |
1913 | if (bitsize != bitsdone) | |
1914 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
7d60be94 NS |
1915 | build_int_cst (NULL_TREE, bitsize - bitsdone), |
1916 | 0, 1); | |
f76b9db2 ILT |
1917 | } |
1918 | else | |
1919 | { | |
1920 | if (bitsdone != thissize) | |
1921 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
4a90aeeb | 1922 | build_int_cst (NULL_TREE, |
7d60be94 | 1923 | bitsdone - thissize), 0, 1); |
f76b9db2 | 1924 | } |
44037a66 | 1925 | |
06c94bce RS |
1926 | if (first) |
1927 | result = part; | |
1928 | else | |
1929 | /* Combine the parts with bitwise or. This works | |
1930 | because we extracted each part as an unsigned bit field. */ | |
1931 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, | |
1932 | OPTAB_LIB_WIDEN); | |
1933 | ||
1934 | first = 0; | |
1935 | } | |
44037a66 TG |
1936 | |
1937 | /* Unsigned bit field: we are done. */ | |
1938 | if (unsignedp) | |
1939 | return result; | |
1940 | /* Signed bit field: sign-extend with two arithmetic shifts. */ | |
1941 | result = expand_shift (LSHIFT_EXPR, word_mode, result, | |
7d60be94 | 1942 | build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize), |
b1ec3c92 | 1943 | NULL_RTX, 0); |
44037a66 | 1944 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
7d60be94 NS |
1945 | build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize), |
1946 | NULL_RTX, 0); | |
44037a66 TG |
1947 | } |
1948 | \f | |
18b526e8 RS |
1949 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
1950 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than | |
1951 | MODE, fill the upper bits with zeros. Fail if the layout of either | |
1952 | mode is unknown (as for CC modes) or if the extraction would involve | |
1953 | unprofitable mode punning. Return the value on success, otherwise | |
1954 | return null. | |
1955 | ||
1956 | This is different from gen_lowpart* in these respects: | |
1957 | ||
1958 | - the returned value must always be considered an rvalue | |
1959 | ||
1960 | - when MODE is wider than SRC_MODE, the extraction involves | |
1961 | a zero extension | |
1962 | ||
1963 | - when MODE is smaller than SRC_MODE, the extraction involves | |
1964 | a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). | |
1965 | ||
1966 | In other words, this routine performs a computation, whereas the | |
1967 | gen_lowpart* routines are conceptually lvalue or rvalue subreg | |
1968 | operations. */ | |
1969 | ||
1970 | rtx | |
1971 | extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src) | |
1972 | { | |
1973 | enum machine_mode int_mode, src_int_mode; | |
1974 | ||
1975 | if (mode == src_mode) | |
1976 | return src; | |
1977 | ||
1978 | if (CONSTANT_P (src)) | |
1979 | return simplify_gen_subreg (mode, src, src_mode, | |
1980 | subreg_lowpart_offset (mode, src_mode)); | |
1981 | ||
1982 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) | |
1983 | return NULL_RTX; | |
1984 | ||
1985 | if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) | |
1986 | && MODES_TIEABLE_P (mode, src_mode)) | |
1987 | { | |
1988 | rtx x = gen_lowpart_common (mode, src); | |
1989 | if (x) | |
1990 | return x; | |
1991 | } | |
1992 | ||
1993 | src_int_mode = int_mode_for_mode (src_mode); | |
1994 | int_mode = int_mode_for_mode (mode); | |
1995 | if (src_int_mode == BLKmode || int_mode == BLKmode) | |
1996 | return NULL_RTX; | |
1997 | ||
1998 | if (!MODES_TIEABLE_P (src_int_mode, src_mode)) | |
1999 | return NULL_RTX; | |
2000 | if (!MODES_TIEABLE_P (int_mode, mode)) | |
2001 | return NULL_RTX; | |
2002 | ||
2003 | src = gen_lowpart (src_int_mode, src); | |
2004 | src = convert_modes (int_mode, src_int_mode, src, true); | |
2005 | src = gen_lowpart (mode, src); | |
2006 | return src; | |
2007 | } | |
2008 | \f | |
44037a66 TG |
2009 | /* Add INC into TARGET. */ |
2010 | ||
2011 | void | |
502b8322 | 2012 | expand_inc (rtx target, rtx inc) |
44037a66 TG |
2013 | { |
2014 | rtx value = expand_binop (GET_MODE (target), add_optab, | |
2015 | target, inc, | |
2016 | target, 0, OPTAB_LIB_WIDEN); | |
2017 | if (value != target) | |
2018 | emit_move_insn (target, value); | |
2019 | } | |
2020 | ||
2021 | /* Subtract DEC from TARGET. */ | |
2022 | ||
2023 | void | |
502b8322 | 2024 | expand_dec (rtx target, rtx dec) |
44037a66 TG |
2025 | { |
2026 | rtx value = expand_binop (GET_MODE (target), sub_optab, | |
2027 | target, dec, | |
2028 | target, 0, OPTAB_LIB_WIDEN); | |
2029 | if (value != target) | |
2030 | emit_move_insn (target, value); | |
2031 | } | |
2032 | \f | |
2033 | /* Output a shift instruction for expression code CODE, | |
2034 | with SHIFTED being the rtx for the value to shift, | |
2035 | and AMOUNT the tree for the amount to shift by. | |
2036 | Store the result in the rtx TARGET, if that is convenient. | |
2037 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2038 | Return the rtx for where the value is. */ | |
2039 | ||
2040 | rtx | |
502b8322 AJ |
2041 | expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, |
2042 | tree amount, rtx target, int unsignedp) | |
44037a66 | 2043 | { |
b3694847 SS |
2044 | rtx op1, temp = 0; |
2045 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); | |
2046 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); | |
71d46ca5 MM |
2047 | optab lshift_optab = ashl_optab; |
2048 | optab rshift_arith_optab = ashr_optab; | |
2049 | optab rshift_uns_optab = lshr_optab; | |
2050 | optab lrotate_optab = rotl_optab; | |
2051 | optab rrotate_optab = rotr_optab; | |
2052 | enum machine_mode op1_mode; | |
44037a66 TG |
2053 | int try; |
2054 | ||
71d46ca5 MM |
2055 | op1 = expand_normal (amount); |
2056 | op1_mode = GET_MODE (op1); | |
2057 | ||
2058 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the | |
2059 | shift amount is a vector, use the vector/vector shift patterns. */ | |
2060 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) | |
2061 | { | |
2062 | lshift_optab = vashl_optab; | |
2063 | rshift_arith_optab = vashr_optab; | |
2064 | rshift_uns_optab = vlshr_optab; | |
2065 | lrotate_optab = vrotl_optab; | |
2066 | rrotate_optab = vrotr_optab; | |
2067 | } | |
2068 | ||
44037a66 TG |
2069 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2070 | and shifted in the other direction; but that does not work | |
2071 | on all machines. */ | |
2072 | ||
166cdf4a RH |
2073 | if (SHIFT_COUNT_TRUNCATED) |
2074 | { | |
2075 | if (GET_CODE (op1) == CONST_INT | |
0fb7aeda | 2076 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
c84e2712 | 2077 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode))) |
0fb7aeda | 2078 | op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |
166cdf4a RH |
2079 | % GET_MODE_BITSIZE (mode)); |
2080 | else if (GET_CODE (op1) == SUBREG | |
4e07d762 | 2081 | && subreg_lowpart_p (op1)) |
166cdf4a RH |
2082 | op1 = SUBREG_REG (op1); |
2083 | } | |
2ab0a5c4 | 2084 | |
44037a66 TG |
2085 | if (op1 == const0_rtx) |
2086 | return shifted; | |
2087 | ||
15bad393 RS |
2088 | /* Check whether its cheaper to implement a left shift by a constant |
2089 | bit count by a sequence of additions. */ | |
2090 | if (code == LSHIFT_EXPR | |
2091 | && GET_CODE (op1) == CONST_INT | |
2092 | && INTVAL (op1) > 0 | |
2093 | && INTVAL (op1) < GET_MODE_BITSIZE (mode) | |
cb2eb96f RS |
2094 | && INTVAL (op1) < MAX_BITS_PER_WORD |
2095 | && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode] | |
2096 | && shift_cost[mode][INTVAL (op1)] != MAX_COST) | |
15bad393 RS |
2097 | { |
2098 | int i; | |
2099 | for (i = 0; i < INTVAL (op1); i++) | |
2100 | { | |
2101 | temp = force_reg (mode, shifted); | |
2102 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, | |
2103 | unsignedp, OPTAB_LIB_WIDEN); | |
2104 | } | |
2105 | return shifted; | |
2106 | } | |
2107 | ||
44037a66 TG |
2108 | for (try = 0; temp == 0 && try < 3; try++) |
2109 | { | |
2110 | enum optab_methods methods; | |
2111 | ||
2112 | if (try == 0) | |
2113 | methods = OPTAB_DIRECT; | |
2114 | else if (try == 1) | |
2115 | methods = OPTAB_WIDEN; | |
2116 | else | |
2117 | methods = OPTAB_LIB_WIDEN; | |
2118 | ||
2119 | if (rotate) | |
2120 | { | |
2121 | /* Widening does not work for rotation. */ | |
2122 | if (methods == OPTAB_WIDEN) | |
2123 | continue; | |
2124 | else if (methods == OPTAB_LIB_WIDEN) | |
cbec710e | 2125 | { |
39e71615 | 2126 | /* If we have been unable to open-code this by a rotation, |
cbec710e RK |
2127 | do it as the IOR of two shifts. I.e., to rotate A |
2128 | by N bits, compute (A << N) | ((unsigned) A >> (C - N)) | |
2129 | where C is the bitsize of A. | |
2130 | ||
2131 | It is theoretically possible that the target machine might | |
2132 | not be able to perform either shift and hence we would | |
2133 | be making two libcalls rather than just the one for the | |
2134 | shift (similarly if IOR could not be done). We will allow | |
2135 | this extremely unlikely lossage to avoid complicating the | |
2136 | code below. */ | |
2137 | ||
39e71615 | 2138 | rtx subtarget = target == shifted ? 0 : target; |
fa00f91b | 2139 | tree new_amount, other_amount; |
39e71615 RK |
2140 | rtx temp1; |
2141 | tree type = TREE_TYPE (amount); | |
fa00f91b RS |
2142 | if (GET_MODE (op1) != TYPE_MODE (type) |
2143 | && GET_MODE (op1) != VOIDmode) | |
2144 | op1 = convert_to_mode (TYPE_MODE (type), op1, 1); | |
2145 | new_amount = make_tree (type, op1); | |
2146 | other_amount | |
4845b383 KH |
2147 | = fold_build2 (MINUS_EXPR, type, |
2148 | build_int_cst (type, GET_MODE_BITSIZE (mode)), | |
fa00f91b | 2149 | new_amount); |
39e71615 RK |
2150 | |
2151 | shifted = force_reg (mode, shifted); | |
2152 | ||
2153 | temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR, | |
6231646a | 2154 | mode, shifted, new_amount, 0, 1); |
39e71615 | 2155 | temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR, |
6231646a | 2156 | mode, shifted, other_amount, subtarget, 1); |
39e71615 RK |
2157 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2158 | unsignedp, methods); | |
cbec710e | 2159 | } |
44037a66 TG |
2160 | |
2161 | temp = expand_binop (mode, | |
71d46ca5 | 2162 | left ? lrotate_optab : rrotate_optab, |
44037a66 TG |
2163 | shifted, op1, target, unsignedp, methods); |
2164 | } | |
2165 | else if (unsignedp) | |
a34958c9 | 2166 | temp = expand_binop (mode, |
71d46ca5 | 2167 | left ? lshift_optab : rshift_uns_optab, |
a34958c9 | 2168 | shifted, op1, target, unsignedp, methods); |
44037a66 TG |
2169 | |
2170 | /* Do arithmetic shifts. | |
2171 | Also, if we are going to widen the operand, we can just as well | |
2172 | use an arithmetic right-shift instead of a logical one. */ | |
2173 | if (temp == 0 && ! rotate | |
2174 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) | |
2175 | { | |
2176 | enum optab_methods methods1 = methods; | |
2177 | ||
2178 | /* If trying to widen a log shift to an arithmetic shift, | |
2179 | don't accept an arithmetic shift of the same size. */ | |
2180 | if (unsignedp) | |
2181 | methods1 = OPTAB_MUST_WIDEN; | |
2182 | ||
2183 | /* Arithmetic shift */ | |
2184 | ||
2185 | temp = expand_binop (mode, | |
71d46ca5 | 2186 | left ? lshift_optab : rshift_arith_optab, |
44037a66 TG |
2187 | shifted, op1, target, unsignedp, methods1); |
2188 | } | |
2189 | ||
711a5e64 | 2190 | /* We used to try extzv here for logical right shifts, but that was |
c410d49e | 2191 | only useful for one machine, the VAX, and caused poor code |
711a5e64 RK |
2192 | generation there for lshrdi3, so the code was deleted and a |
2193 | define_expand for lshrsi3 was added to vax.md. */ | |
44037a66 TG |
2194 | } |
2195 | ||
5b0264cb | 2196 | gcc_assert (temp); |
44037a66 TG |
2197 | return temp; |
2198 | } | |
2199 | \f | |
0178027c KH |
2200 | enum alg_code { |
2201 | alg_unknown, | |
2202 | alg_zero, | |
2203 | alg_m, alg_shift, | |
2204 | alg_add_t_m2, | |
2205 | alg_sub_t_m2, | |
2206 | alg_add_factor, | |
2207 | alg_sub_factor, | |
2208 | alg_add_t2_m, | |
2209 | alg_sub_t2_m, | |
2210 | alg_impossible | |
2211 | }; | |
26276705 RS |
2212 | |
2213 | /* This structure holds the "cost" of a multiply sequence. The | |
2214 | "cost" field holds the total rtx_cost of every operator in the | |
2215 | synthetic multiplication sequence, hence cost(a op b) is defined | |
2216 | as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero. | |
2217 | The "latency" field holds the minimum possible latency of the | |
2218 | synthetic multiply, on a hypothetical infinitely parallel CPU. | |
2219 | This is the critical path, or the maximum height, of the expression | |
2220 | tree which is the sum of rtx_costs on the most expensive path from | |
2221 | any leaf to the root. Hence latency(a op b) is defined as zero for | |
2222 | leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */ | |
2223 | ||
2224 | struct mult_cost { | |
2225 | short cost; /* Total rtx_cost of the multiplication sequence. */ | |
2226 | short latency; /* The latency of the multiplication sequence. */ | |
2227 | }; | |
2228 | ||
2229 | /* This macro is used to compare a pointer to a mult_cost against an | |
2230 | single integer "rtx_cost" value. This is equivalent to the macro | |
2231 | CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */ | |
2232 | #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \ | |
2233 | || ((X)->cost == (Y) && (X)->latency < (Y))) | |
2234 | ||
2235 | /* This macro is used to compare two pointers to mult_costs against | |
2236 | each other. The macro returns true if X is cheaper than Y. | |
2237 | Currently, the cheaper of two mult_costs is the one with the | |
2238 | lower "cost". If "cost"s are tied, the lower latency is cheaper. */ | |
2239 | #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \ | |
2240 | || ((X)->cost == (Y)->cost \ | |
2241 | && (X)->latency < (Y)->latency)) | |
44037a66 TG |
2242 | |
2243 | /* This structure records a sequence of operations. | |
2244 | `ops' is the number of operations recorded. | |
2245 | `cost' is their total cost. | |
2246 | The operations are stored in `op' and the corresponding | |
b385aeda RK |
2247 | logarithms of the integer coefficients in `log'. |
2248 | ||
44037a66 | 2249 | These are the operations: |
b385aeda RK |
2250 | alg_zero total := 0; |
2251 | alg_m total := multiplicand; | |
b2fb324c | 2252 | alg_shift total := total * coeff |
7963ac37 RK |
2253 | alg_add_t_m2 total := total + multiplicand * coeff; |
2254 | alg_sub_t_m2 total := total - multiplicand * coeff; | |
2255 | alg_add_factor total := total * coeff + total; | |
2256 | alg_sub_factor total := total * coeff - total; | |
2257 | alg_add_t2_m total := total * coeff + multiplicand; | |
2258 | alg_sub_t2_m total := total * coeff - multiplicand; | |
b385aeda RK |
2259 | |
2260 | The first operand must be either alg_zero or alg_m. */ | |
44037a66 | 2261 | |
44037a66 TG |
2262 | struct algorithm |
2263 | { | |
26276705 | 2264 | struct mult_cost cost; |
7963ac37 | 2265 | short ops; |
b385aeda RK |
2266 | /* The size of the OP and LOG fields are not directly related to the |
2267 | word size, but the worst-case algorithms will be if we have few | |
2268 | consecutive ones or zeros, i.e., a multiplicand like 10101010101... | |
2269 | In that case we will generate shift-by-2, add, shift-by-2, add,..., | |
2270 | in total wordsize operations. */ | |
44037a66 | 2271 | enum alg_code op[MAX_BITS_PER_WORD]; |
b385aeda | 2272 | char log[MAX_BITS_PER_WORD]; |
44037a66 TG |
2273 | }; |
2274 | ||
7b13ee6b KH |
2275 | /* The entry for our multiplication cache/hash table. */ |
2276 | struct alg_hash_entry { | |
2277 | /* The number we are multiplying by. */ | |
33b881ca | 2278 | unsigned HOST_WIDE_INT t; |
7b13ee6b KH |
2279 | |
2280 | /* The mode in which we are multiplying something by T. */ | |
2281 | enum machine_mode mode; | |
2282 | ||
2283 | /* The best multiplication algorithm for t. */ | |
2284 | enum alg_code alg; | |
0178027c KH |
2285 | |
2286 | /* The cost of multiplication if ALG_CODE is not alg_impossible. | |
2287 | Otherwise, the cost within which multiplication by T is | |
2288 | impossible. */ | |
2289 | struct mult_cost cost; | |
7b13ee6b KH |
2290 | }; |
2291 | ||
2292 | /* The number of cache/hash entries. */ | |
33b881ca PB |
2293 | #if HOST_BITS_PER_WIDE_INT == 64 |
2294 | #define NUM_ALG_HASH_ENTRIES 1031 | |
2295 | #else | |
7b13ee6b | 2296 | #define NUM_ALG_HASH_ENTRIES 307 |
33b881ca | 2297 | #endif |
7b13ee6b KH |
2298 | |
2299 | /* Each entry of ALG_HASH caches alg_code for some integer. This is | |
2300 | actually a hash table. If we have a collision, that the older | |
2301 | entry is kicked out. */ | |
2302 | static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES]; | |
2303 | ||
8efc8980 RS |
2304 | /* Indicates the type of fixup needed after a constant multiplication. |
2305 | BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that | |
2306 | the result should be negated, and ADD_VARIANT means that the | |
2307 | multiplicand should be added to the result. */ | |
2308 | enum mult_variant {basic_variant, negate_variant, add_variant}; | |
2309 | ||
41c64ac0 | 2310 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
26276705 | 2311 | const struct mult_cost *, enum machine_mode mode); |
8efc8980 | 2312 | static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, |
f258e38b | 2313 | struct algorithm *, enum mult_variant *, int); |
8efc8980 RS |
2314 | static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, |
2315 | const struct algorithm *, enum mult_variant); | |
502b8322 | 2316 | static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int, |
e71c0aa7 | 2317 | int, rtx *, int *, int *); |
502b8322 | 2318 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
8efc8980 | 2319 | static rtx extract_high_half (enum machine_mode, rtx); |
0d282692 | 2320 | static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); |
8efc8980 RS |
2321 | static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, |
2322 | int, int); | |
44037a66 | 2323 | /* Compute and return the best algorithm for multiplying by T. |
7963ac37 RK |
2324 | The algorithm must cost less than cost_limit |
2325 | If retval.cost >= COST_LIMIT, no algorithm was found and all | |
41c64ac0 RS |
2326 | other field of the returned struct are undefined. |
2327 | MODE is the machine mode of the multiplication. */ | |
44037a66 | 2328 | |
819126a6 | 2329 | static void |
502b8322 | 2330 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
26276705 | 2331 | const struct mult_cost *cost_limit, enum machine_mode mode) |
44037a66 | 2332 | { |
b2fb324c | 2333 | int m; |
52786026 | 2334 | struct algorithm *alg_in, *best_alg; |
26276705 RS |
2335 | struct mult_cost best_cost; |
2336 | struct mult_cost new_limit; | |
2337 | int op_cost, op_latency; | |
b2fb324c | 2338 | unsigned HOST_WIDE_INT q; |
0792ab19 | 2339 | int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); |
7b13ee6b KH |
2340 | int hash_index; |
2341 | bool cache_hit = false; | |
2342 | enum alg_code cache_alg = alg_zero; | |
44037a66 | 2343 | |
7963ac37 RK |
2344 | /* Indicate that no algorithm is yet found. If no algorithm |
2345 | is found, this value will be returned and indicate failure. */ | |
26276705 | 2346 | alg_out->cost.cost = cost_limit->cost + 1; |
3ab0f290 | 2347 | alg_out->cost.latency = cost_limit->latency + 1; |
44037a66 | 2348 | |
26276705 RS |
2349 | if (cost_limit->cost < 0 |
2350 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) | |
819126a6 | 2351 | return; |
44037a66 | 2352 | |
0792ab19 RS |
2353 | /* Restrict the bits of "t" to the multiplication's mode. */ |
2354 | t &= GET_MODE_MASK (mode); | |
2355 | ||
b385aeda RK |
2356 | /* t == 1 can be done in zero cost. */ |
2357 | if (t == 1) | |
b2fb324c | 2358 | { |
819126a6 | 2359 | alg_out->ops = 1; |
26276705 RS |
2360 | alg_out->cost.cost = 0; |
2361 | alg_out->cost.latency = 0; | |
819126a6 RK |
2362 | alg_out->op[0] = alg_m; |
2363 | return; | |
b2fb324c RK |
2364 | } |
2365 | ||
b385aeda RK |
2366 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2367 | fail now. */ | |
819126a6 | 2368 | if (t == 0) |
b385aeda | 2369 | { |
26276705 | 2370 | if (MULT_COST_LESS (cost_limit, zero_cost)) |
819126a6 | 2371 | return; |
b385aeda RK |
2372 | else |
2373 | { | |
819126a6 | 2374 | alg_out->ops = 1; |
26276705 RS |
2375 | alg_out->cost.cost = zero_cost; |
2376 | alg_out->cost.latency = zero_cost; | |
819126a6 RK |
2377 | alg_out->op[0] = alg_zero; |
2378 | return; | |
b385aeda RK |
2379 | } |
2380 | } | |
2381 | ||
52786026 RK |
2382 | /* We'll be needing a couple extra algorithm structures now. */ |
2383 | ||
703ad42b KG |
2384 | alg_in = alloca (sizeof (struct algorithm)); |
2385 | best_alg = alloca (sizeof (struct algorithm)); | |
26276705 | 2386 | best_cost = *cost_limit; |
52786026 | 2387 | |
7b13ee6b KH |
2388 | /* Compute the hash index. */ |
2389 | hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES; | |
2390 | ||
2391 | /* See if we already know what to do for T. */ | |
2392 | if (alg_hash[hash_index].t == t | |
2393 | && alg_hash[hash_index].mode == mode | |
2394 | && alg_hash[hash_index].alg != alg_unknown) | |
2395 | { | |
7b13ee6b | 2396 | cache_alg = alg_hash[hash_index].alg; |
0178027c KH |
2397 | |
2398 | if (cache_alg == alg_impossible) | |
7b13ee6b | 2399 | { |
0178027c KH |
2400 | /* The cache tells us that it's impossible to synthesize |
2401 | multiplication by T within alg_hash[hash_index].cost. */ | |
2402 | if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit)) | |
2403 | /* COST_LIMIT is at least as restrictive as the one | |
2404 | recorded in the hash table, in which case we have no | |
2405 | hope of synthesizing a multiplication. Just | |
2406 | return. */ | |
2407 | return; | |
2408 | ||
2409 | /* If we get here, COST_LIMIT is less restrictive than the | |
2410 | one recorded in the hash table, so we may be able to | |
2411 | synthesize a multiplication. Proceed as if we didn't | |
2412 | have the cache entry. */ | |
2413 | } | |
2414 | else | |
2415 | { | |
2416 | if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost)) | |
2417 | /* The cached algorithm shows that this multiplication | |
2418 | requires more cost than COST_LIMIT. Just return. This | |
2419 | way, we don't clobber this cache entry with | |
2420 | alg_impossible but retain useful information. */ | |
2421 | return; | |
7b13ee6b | 2422 | |
0178027c KH |
2423 | cache_hit = true; |
2424 | ||
2425 | switch (cache_alg) | |
2426 | { | |
2427 | case alg_shift: | |
2428 | goto do_alg_shift; | |
7b13ee6b | 2429 | |
0178027c KH |
2430 | case alg_add_t_m2: |
2431 | case alg_sub_t_m2: | |
2432 | goto do_alg_addsub_t_m2; | |
7b13ee6b | 2433 | |
0178027c KH |
2434 | case alg_add_factor: |
2435 | case alg_sub_factor: | |
2436 | goto do_alg_addsub_factor; | |
7b13ee6b | 2437 | |
0178027c KH |
2438 | case alg_add_t2_m: |
2439 | goto do_alg_add_t2_m; | |
7b13ee6b | 2440 | |
0178027c KH |
2441 | case alg_sub_t2_m: |
2442 | goto do_alg_sub_t2_m; | |
2443 | ||
2444 | default: | |
2445 | gcc_unreachable (); | |
2446 | } | |
7b13ee6b KH |
2447 | } |
2448 | } | |
2449 | ||
b385aeda RK |
2450 | /* If we have a group of zero bits at the low-order part of T, try |
2451 | multiplying by the remaining bits and then doing a shift. */ | |
2452 | ||
b2fb324c | 2453 | if ((t & 1) == 0) |
44037a66 | 2454 | { |
7b13ee6b | 2455 | do_alg_shift: |
b2fb324c | 2456 | m = floor_log2 (t & -t); /* m = number of low zero bits */ |
0792ab19 | 2457 | if (m < maxm) |
44037a66 | 2458 | { |
02a65aef | 2459 | q = t >> m; |
15bad393 RS |
2460 | /* The function expand_shift will choose between a shift and |
2461 | a sequence of additions, so the observed cost is given as | |
2462 | MIN (m * add_cost[mode], shift_cost[mode][m]). */ | |
26276705 RS |
2463 | op_cost = m * add_cost[mode]; |
2464 | if (shift_cost[mode][m] < op_cost) | |
2465 | op_cost = shift_cost[mode][m]; | |
2466 | new_limit.cost = best_cost.cost - op_cost; | |
2467 | new_limit.latency = best_cost.latency - op_cost; | |
2468 | synth_mult (alg_in, q, &new_limit, mode); | |
2469 | ||
2470 | alg_in->cost.cost += op_cost; | |
2471 | alg_in->cost.latency += op_cost; | |
2472 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
02a65aef R |
2473 | { |
2474 | struct algorithm *x; | |
26276705 | 2475 | best_cost = alg_in->cost; |
02a65aef R |
2476 | x = alg_in, alg_in = best_alg, best_alg = x; |
2477 | best_alg->log[best_alg->ops] = m; | |
2478 | best_alg->op[best_alg->ops] = alg_shift; | |
02a65aef | 2479 | } |
819126a6 | 2480 | } |
7b13ee6b KH |
2481 | if (cache_hit) |
2482 | goto done; | |
819126a6 RK |
2483 | } |
2484 | ||
2485 | /* If we have an odd number, add or subtract one. */ | |
2486 | if ((t & 1) != 0) | |
2487 | { | |
2488 | unsigned HOST_WIDE_INT w; | |
2489 | ||
7b13ee6b | 2490 | do_alg_addsub_t_m2: |
819126a6 RK |
2491 | for (w = 1; (w & t) != 0; w <<= 1) |
2492 | ; | |
31031edd | 2493 | /* If T was -1, then W will be zero after the loop. This is another |
c410d49e | 2494 | case where T ends with ...111. Handling this with (T + 1) and |
31031edd JL |
2495 | subtract 1 produces slightly better code and results in algorithm |
2496 | selection much faster than treating it like the ...0111 case | |
2497 | below. */ | |
2498 | if (w == 0 | |
2499 | || (w > 2 | |
2500 | /* Reject the case where t is 3. | |
2501 | Thus we prefer addition in that case. */ | |
2502 | && t != 3)) | |
819126a6 RK |
2503 | { |
2504 | /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ | |
2505 | ||
26276705 RS |
2506 | op_cost = add_cost[mode]; |
2507 | new_limit.cost = best_cost.cost - op_cost; | |
2508 | new_limit.latency = best_cost.latency - op_cost; | |
2509 | synth_mult (alg_in, t + 1, &new_limit, mode); | |
b2fb324c | 2510 | |
26276705 RS |
2511 | alg_in->cost.cost += op_cost; |
2512 | alg_in->cost.latency += op_cost; | |
2513 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2514 | { |
b2fb324c | 2515 | struct algorithm *x; |
26276705 | 2516 | best_cost = alg_in->cost; |
b2fb324c | 2517 | x = alg_in, alg_in = best_alg, best_alg = x; |
819126a6 RK |
2518 | best_alg->log[best_alg->ops] = 0; |
2519 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
44037a66 | 2520 | } |
44037a66 | 2521 | } |
819126a6 RK |
2522 | else |
2523 | { | |
2524 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ | |
44037a66 | 2525 | |
26276705 RS |
2526 | op_cost = add_cost[mode]; |
2527 | new_limit.cost = best_cost.cost - op_cost; | |
2528 | new_limit.latency = best_cost.latency - op_cost; | |
2529 | synth_mult (alg_in, t - 1, &new_limit, mode); | |
819126a6 | 2530 | |
26276705 RS |
2531 | alg_in->cost.cost += op_cost; |
2532 | alg_in->cost.latency += op_cost; | |
2533 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
819126a6 RK |
2534 | { |
2535 | struct algorithm *x; | |
26276705 | 2536 | best_cost = alg_in->cost; |
819126a6 RK |
2537 | x = alg_in, alg_in = best_alg, best_alg = x; |
2538 | best_alg->log[best_alg->ops] = 0; | |
2539 | best_alg->op[best_alg->ops] = alg_add_t_m2; | |
819126a6 RK |
2540 | } |
2541 | } | |
7b13ee6b KH |
2542 | if (cache_hit) |
2543 | goto done; | |
819126a6 | 2544 | } |
63610db9 | 2545 | |
44037a66 | 2546 | /* Look for factors of t of the form |
7963ac37 | 2547 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
44037a66 | 2548 | If we find such a factor, we can multiply by t using an algorithm that |
7963ac37 | 2549 | multiplies by q, shift the result by m and add/subtract it to itself. |
44037a66 | 2550 | |
7963ac37 RK |
2551 | We search for large factors first and loop down, even if large factors |
2552 | are less probable than small; if we find a large factor we will find a | |
2553 | good sequence quickly, and therefore be able to prune (by decreasing | |
2554 | COST_LIMIT) the search. */ | |
2555 | ||
7b13ee6b | 2556 | do_alg_addsub_factor: |
7963ac37 | 2557 | for (m = floor_log2 (t - 1); m >= 2; m--) |
44037a66 | 2558 | { |
7963ac37 | 2559 | unsigned HOST_WIDE_INT d; |
44037a66 | 2560 | |
7963ac37 | 2561 | d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; |
7b13ee6b KH |
2562 | if (t % d == 0 && t > d && m < maxm |
2563 | && (!cache_hit || cache_alg == alg_add_factor)) | |
44037a66 | 2564 | { |
26276705 RS |
2565 | /* If the target has a cheap shift-and-add instruction use |
2566 | that in preference to a shift insn followed by an add insn. | |
2567 | Assume that the shift-and-add is "atomic" with a latency | |
a37739c1 | 2568 | equal to its cost, otherwise assume that on superscalar |
26276705 RS |
2569 | hardware the shift may be executed concurrently with the |
2570 | earlier steps in the algorithm. */ | |
2571 | op_cost = add_cost[mode] + shift_cost[mode][m]; | |
2572 | if (shiftadd_cost[mode][m] < op_cost) | |
2573 | { | |
2574 | op_cost = shiftadd_cost[mode][m]; | |
2575 | op_latency = op_cost; | |
2576 | } | |
2577 | else | |
2578 | op_latency = add_cost[mode]; | |
2579 | ||
2580 | new_limit.cost = best_cost.cost - op_cost; | |
2581 | new_limit.latency = best_cost.latency - op_latency; | |
2582 | synth_mult (alg_in, t / d, &new_limit, mode); | |
44037a66 | 2583 | |
26276705 RS |
2584 | alg_in->cost.cost += op_cost; |
2585 | alg_in->cost.latency += op_latency; | |
2586 | if (alg_in->cost.latency < op_cost) | |
2587 | alg_in->cost.latency = op_cost; | |
2588 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2589 | { |
7963ac37 | 2590 | struct algorithm *x; |
26276705 | 2591 | best_cost = alg_in->cost; |
7963ac37 | 2592 | x = alg_in, alg_in = best_alg, best_alg = x; |
b385aeda | 2593 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2594 | best_alg->op[best_alg->ops] = alg_add_factor; |
44037a66 | 2595 | } |
c0b262c1 TG |
2596 | /* Other factors will have been taken care of in the recursion. */ |
2597 | break; | |
44037a66 TG |
2598 | } |
2599 | ||
7963ac37 | 2600 | d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; |
7b13ee6b KH |
2601 | if (t % d == 0 && t > d && m < maxm |
2602 | && (!cache_hit || cache_alg == alg_sub_factor)) | |
44037a66 | 2603 | { |
26276705 RS |
2604 | /* If the target has a cheap shift-and-subtract insn use |
2605 | that in preference to a shift insn followed by a sub insn. | |
2606 | Assume that the shift-and-sub is "atomic" with a latency | |
2607 | equal to it's cost, otherwise assume that on superscalar | |
2608 | hardware the shift may be executed concurrently with the | |
2609 | earlier steps in the algorithm. */ | |
2610 | op_cost = add_cost[mode] + shift_cost[mode][m]; | |
2611 | if (shiftsub_cost[mode][m] < op_cost) | |
2612 | { | |
2613 | op_cost = shiftsub_cost[mode][m]; | |
2614 | op_latency = op_cost; | |
2615 | } | |
2616 | else | |
2617 | op_latency = add_cost[mode]; | |
2618 | ||
2619 | new_limit.cost = best_cost.cost - op_cost; | |
417c735c | 2620 | new_limit.latency = best_cost.latency - op_latency; |
26276705 | 2621 | synth_mult (alg_in, t / d, &new_limit, mode); |
44037a66 | 2622 | |
26276705 RS |
2623 | alg_in->cost.cost += op_cost; |
2624 | alg_in->cost.latency += op_latency; | |
2625 | if (alg_in->cost.latency < op_cost) | |
2626 | alg_in->cost.latency = op_cost; | |
2627 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2628 | { |
7963ac37 | 2629 | struct algorithm *x; |
26276705 | 2630 | best_cost = alg_in->cost; |
7963ac37 | 2631 | x = alg_in, alg_in = best_alg, best_alg = x; |
b385aeda | 2632 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2633 | best_alg->op[best_alg->ops] = alg_sub_factor; |
44037a66 | 2634 | } |
c0b262c1 | 2635 | break; |
44037a66 TG |
2636 | } |
2637 | } | |
7b13ee6b KH |
2638 | if (cache_hit) |
2639 | goto done; | |
44037a66 | 2640 | |
7963ac37 RK |
2641 | /* Try shift-and-add (load effective address) instructions, |
2642 | i.e. do a*3, a*5, a*9. */ | |
2643 | if ((t & 1) != 0) | |
2644 | { | |
7b13ee6b | 2645 | do_alg_add_t2_m: |
7963ac37 RK |
2646 | q = t - 1; |
2647 | q = q & -q; | |
2648 | m = exact_log2 (q); | |
0792ab19 | 2649 | if (m >= 0 && m < maxm) |
b385aeda | 2650 | { |
26276705 RS |
2651 | op_cost = shiftadd_cost[mode][m]; |
2652 | new_limit.cost = best_cost.cost - op_cost; | |
2653 | new_limit.latency = best_cost.latency - op_cost; | |
2654 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); | |
2655 | ||
2656 | alg_in->cost.cost += op_cost; | |
2657 | alg_in->cost.latency += op_cost; | |
2658 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb RK |
2659 | { |
2660 | struct algorithm *x; | |
26276705 | 2661 | best_cost = alg_in->cost; |
5eebe2eb RK |
2662 | x = alg_in, alg_in = best_alg, best_alg = x; |
2663 | best_alg->log[best_alg->ops] = m; | |
819126a6 | 2664 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
5eebe2eb | 2665 | } |
7963ac37 | 2666 | } |
7b13ee6b KH |
2667 | if (cache_hit) |
2668 | goto done; | |
44037a66 | 2669 | |
7b13ee6b | 2670 | do_alg_sub_t2_m: |
7963ac37 RK |
2671 | q = t + 1; |
2672 | q = q & -q; | |
2673 | m = exact_log2 (q); | |
0792ab19 | 2674 | if (m >= 0 && m < maxm) |
b385aeda | 2675 | { |
26276705 RS |
2676 | op_cost = shiftsub_cost[mode][m]; |
2677 | new_limit.cost = best_cost.cost - op_cost; | |
2678 | new_limit.latency = best_cost.latency - op_cost; | |
2679 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); | |
2680 | ||
2681 | alg_in->cost.cost += op_cost; | |
2682 | alg_in->cost.latency += op_cost; | |
2683 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb RK |
2684 | { |
2685 | struct algorithm *x; | |
26276705 | 2686 | best_cost = alg_in->cost; |
5eebe2eb RK |
2687 | x = alg_in, alg_in = best_alg, best_alg = x; |
2688 | best_alg->log[best_alg->ops] = m; | |
819126a6 | 2689 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
5eebe2eb | 2690 | } |
7963ac37 | 2691 | } |
7b13ee6b KH |
2692 | if (cache_hit) |
2693 | goto done; | |
7963ac37 | 2694 | } |
44037a66 | 2695 | |
7b13ee6b | 2696 | done: |
3ab0f290 DJ |
2697 | /* If best_cost has not decreased, we have not found any algorithm. */ |
2698 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) | |
0178027c KH |
2699 | { |
2700 | /* We failed to find an algorithm. Record alg_impossible for | |
2701 | this case (that is, <T, MODE, COST_LIMIT>) so that next time | |
2702 | we are asked to find an algorithm for T within the same or | |
2703 | lower COST_LIMIT, we can immediately return to the | |
2704 | caller. */ | |
2705 | alg_hash[hash_index].t = t; | |
2706 | alg_hash[hash_index].mode = mode; | |
2707 | alg_hash[hash_index].alg = alg_impossible; | |
2708 | alg_hash[hash_index].cost = *cost_limit; | |
2709 | return; | |
2710 | } | |
3ab0f290 | 2711 | |
7b13ee6b KH |
2712 | /* Cache the result. */ |
2713 | if (!cache_hit) | |
2714 | { | |
2715 | alg_hash[hash_index].t = t; | |
2716 | alg_hash[hash_index].mode = mode; | |
2717 | alg_hash[hash_index].alg = best_alg->op[best_alg->ops]; | |
0178027c KH |
2718 | alg_hash[hash_index].cost.cost = best_cost.cost; |
2719 | alg_hash[hash_index].cost.latency = best_cost.latency; | |
7b13ee6b KH |
2720 | } |
2721 | ||
52786026 RK |
2722 | /* If we are getting a too long sequence for `struct algorithm' |
2723 | to record, make this search fail. */ | |
2724 | if (best_alg->ops == MAX_BITS_PER_WORD) | |
2725 | return; | |
2726 | ||
819126a6 RK |
2727 | /* Copy the algorithm from temporary space to the space at alg_out. |
2728 | We avoid using structure assignment because the majority of | |
2729 | best_alg is normally undefined, and this is a critical function. */ | |
2730 | alg_out->ops = best_alg->ops + 1; | |
26276705 | 2731 | alg_out->cost = best_cost; |
4e135bdd KG |
2732 | memcpy (alg_out->op, best_alg->op, |
2733 | alg_out->ops * sizeof *alg_out->op); | |
2734 | memcpy (alg_out->log, best_alg->log, | |
2735 | alg_out->ops * sizeof *alg_out->log); | |
44037a66 TG |
2736 | } |
2737 | \f | |
d1a6adeb | 2738 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
8efc8980 RS |
2739 | Try three variations: |
2740 | ||
2741 | - a shift/add sequence based on VAL itself | |
2742 | - a shift/add sequence based on -VAL, followed by a negation | |
2743 | - a shift/add sequence based on VAL - 1, followed by an addition. | |
2744 | ||
f258e38b UW |
2745 | Return true if the cheapest of these cost less than MULT_COST, |
2746 | describing the algorithm in *ALG and final fixup in *VARIANT. */ | |
8efc8980 RS |
2747 | |
2748 | static bool | |
2749 | choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, | |
f258e38b UW |
2750 | struct algorithm *alg, enum mult_variant *variant, |
2751 | int mult_cost) | |
8efc8980 | 2752 | { |
8efc8980 | 2753 | struct algorithm alg2; |
26276705 RS |
2754 | struct mult_cost limit; |
2755 | int op_cost; | |
8efc8980 | 2756 | |
18eaea7f RS |
2757 | /* Fail quickly for impossible bounds. */ |
2758 | if (mult_cost < 0) | |
2759 | return false; | |
2760 | ||
2761 | /* Ensure that mult_cost provides a reasonable upper bound. | |
2762 | Any constant multiplication can be performed with less | |
2763 | than 2 * bits additions. */ | |
2764 | op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode]; | |
2765 | if (mult_cost > op_cost) | |
2766 | mult_cost = op_cost; | |
2767 | ||
8efc8980 | 2768 | *variant = basic_variant; |
26276705 RS |
2769 | limit.cost = mult_cost; |
2770 | limit.latency = mult_cost; | |
2771 | synth_mult (alg, val, &limit, mode); | |
8efc8980 RS |
2772 | |
2773 | /* This works only if the inverted value actually fits in an | |
2774 | `unsigned int' */ | |
2775 | if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) | |
2776 | { | |
26276705 RS |
2777 | op_cost = neg_cost[mode]; |
2778 | if (MULT_COST_LESS (&alg->cost, mult_cost)) | |
2779 | { | |
2780 | limit.cost = alg->cost.cost - op_cost; | |
2781 | limit.latency = alg->cost.latency - op_cost; | |
2782 | } | |
2783 | else | |
2784 | { | |
2785 | limit.cost = mult_cost - op_cost; | |
2786 | limit.latency = mult_cost - op_cost; | |
2787 | } | |
2788 | ||
2789 | synth_mult (&alg2, -val, &limit, mode); | |
2790 | alg2.cost.cost += op_cost; | |
2791 | alg2.cost.latency += op_cost; | |
2792 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
2793 | *alg = alg2, *variant = negate_variant; |
2794 | } | |
2795 | ||
2796 | /* This proves very useful for division-by-constant. */ | |
26276705 RS |
2797 | op_cost = add_cost[mode]; |
2798 | if (MULT_COST_LESS (&alg->cost, mult_cost)) | |
2799 | { | |
2800 | limit.cost = alg->cost.cost - op_cost; | |
2801 | limit.latency = alg->cost.latency - op_cost; | |
2802 | } | |
2803 | else | |
2804 | { | |
2805 | limit.cost = mult_cost - op_cost; | |
2806 | limit.latency = mult_cost - op_cost; | |
2807 | } | |
2808 | ||
2809 | synth_mult (&alg2, val - 1, &limit, mode); | |
2810 | alg2.cost.cost += op_cost; | |
2811 | alg2.cost.latency += op_cost; | |
2812 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
2813 | *alg = alg2, *variant = add_variant; |
2814 | ||
26276705 | 2815 | return MULT_COST_LESS (&alg->cost, mult_cost); |
8efc8980 RS |
2816 | } |
2817 | ||
2818 | /* A subroutine of expand_mult, used for constant multiplications. | |
2819 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if | |
2820 | convenient. Use the shift/add sequence described by ALG and apply | |
2821 | the final fixup specified by VARIANT. */ | |
2822 | ||
2823 | static rtx | |
2824 | expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val, | |
2825 | rtx target, const struct algorithm *alg, | |
2826 | enum mult_variant variant) | |
2827 | { | |
2828 | HOST_WIDE_INT val_so_far; | |
2829 | rtx insn, accum, tem; | |
2830 | int opno; | |
2831 | enum machine_mode nmode; | |
2832 | ||
d448860e JH |
2833 | /* Avoid referencing memory over and over and invalid sharing |
2834 | on SUBREGs. */ | |
2835 | op0 = force_reg (mode, op0); | |
8efc8980 RS |
2836 | |
2837 | /* ACCUM starts out either as OP0 or as a zero, depending on | |
2838 | the first operation. */ | |
2839 | ||
2840 | if (alg->op[0] == alg_zero) | |
2841 | { | |
2842 | accum = copy_to_mode_reg (mode, const0_rtx); | |
2843 | val_so_far = 0; | |
2844 | } | |
2845 | else if (alg->op[0] == alg_m) | |
2846 | { | |
2847 | accum = copy_to_mode_reg (mode, op0); | |
2848 | val_so_far = 1; | |
2849 | } | |
2850 | else | |
5b0264cb | 2851 | gcc_unreachable (); |
8efc8980 RS |
2852 | |
2853 | for (opno = 1; opno < alg->ops; opno++) | |
2854 | { | |
2855 | int log = alg->log[opno]; | |
7c27e184 | 2856 | rtx shift_subtarget = optimize ? 0 : accum; |
8efc8980 RS |
2857 | rtx add_target |
2858 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant | |
7c27e184 | 2859 | && !optimize) |
8efc8980 | 2860 | ? target : 0; |
7c27e184 | 2861 | rtx accum_target = optimize ? 0 : accum; |
8efc8980 RS |
2862 | |
2863 | switch (alg->op[opno]) | |
2864 | { | |
2865 | case alg_shift: | |
2866 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2867 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2868 | NULL_RTX, 0); |
8efc8980 RS |
2869 | val_so_far <<= log; |
2870 | break; | |
2871 | ||
2872 | case alg_add_t_m2: | |
2873 | tem = expand_shift (LSHIFT_EXPR, mode, op0, | |
7d60be94 | 2874 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2875 | NULL_RTX, 0); |
8efc8980 RS |
2876 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
2877 | add_target ? add_target : accum_target); | |
2878 | val_so_far += (HOST_WIDE_INT) 1 << log; | |
2879 | break; | |
2880 | ||
2881 | case alg_sub_t_m2: | |
2882 | tem = expand_shift (LSHIFT_EXPR, mode, op0, | |
7d60be94 | 2883 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2884 | NULL_RTX, 0); |
8efc8980 RS |
2885 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
2886 | add_target ? add_target : accum_target); | |
2887 | val_so_far -= (HOST_WIDE_INT) 1 << log; | |
2888 | break; | |
2889 | ||
2890 | case alg_add_t2_m: | |
2891 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2892 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2893 | shift_subtarget, |
8efc8980 RS |
2894 | 0); |
2895 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), | |
2896 | add_target ? add_target : accum_target); | |
2897 | val_so_far = (val_so_far << log) + 1; | |
2898 | break; | |
2899 | ||
2900 | case alg_sub_t2_m: | |
2901 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2902 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2903 | shift_subtarget, 0); |
8efc8980 RS |
2904 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
2905 | add_target ? add_target : accum_target); | |
2906 | val_so_far = (val_so_far << log) - 1; | |
2907 | break; | |
2908 | ||
2909 | case alg_add_factor: | |
2910 | tem = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2911 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2912 | NULL_RTX, 0); |
8efc8980 RS |
2913 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
2914 | add_target ? add_target : accum_target); | |
2915 | val_so_far += val_so_far << log; | |
2916 | break; | |
2917 | ||
2918 | case alg_sub_factor: | |
2919 | tem = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2920 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2921 | NULL_RTX, 0); |
8efc8980 | 2922 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
7c27e184 PB |
2923 | (add_target |
2924 | ? add_target : (optimize ? 0 : tem))); | |
8efc8980 RS |
2925 | val_so_far = (val_so_far << log) - val_so_far; |
2926 | break; | |
2927 | ||
2928 | default: | |
5b0264cb | 2929 | gcc_unreachable (); |
8efc8980 RS |
2930 | } |
2931 | ||
2932 | /* Write a REG_EQUAL note on the last insn so that we can cse | |
2933 | multiplication sequences. Note that if ACCUM is a SUBREG, | |
2934 | we've set the inner register and must properly indicate | |
2935 | that. */ | |
2936 | ||
2937 | tem = op0, nmode = mode; | |
2938 | if (GET_CODE (accum) == SUBREG) | |
2939 | { | |
2940 | nmode = GET_MODE (SUBREG_REG (accum)); | |
2941 | tem = gen_lowpart (nmode, op0); | |
2942 | } | |
2943 | ||
2944 | insn = get_last_insn (); | |
2945 | set_unique_reg_note (insn, REG_EQUAL, | |
d448860e JH |
2946 | gen_rtx_MULT (nmode, tem, |
2947 | GEN_INT (val_so_far))); | |
8efc8980 RS |
2948 | } |
2949 | ||
2950 | if (variant == negate_variant) | |
2951 | { | |
2952 | val_so_far = -val_so_far; | |
2953 | accum = expand_unop (mode, neg_optab, accum, target, 0); | |
2954 | } | |
2955 | else if (variant == add_variant) | |
2956 | { | |
2957 | val_so_far = val_so_far + 1; | |
2958 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); | |
2959 | } | |
2960 | ||
42eb30b5 ZW |
2961 | /* Compare only the bits of val and val_so_far that are significant |
2962 | in the result mode, to avoid sign-/zero-extension confusion. */ | |
2963 | val &= GET_MODE_MASK (mode); | |
2964 | val_so_far &= GET_MODE_MASK (mode); | |
5b0264cb | 2965 | gcc_assert (val == val_so_far); |
8efc8980 RS |
2966 | |
2967 | return accum; | |
2968 | } | |
2969 | ||
44037a66 TG |
2970 | /* Perform a multiplication and return an rtx for the result. |
2971 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
2972 | TARGET is a suggestion for where to store the result (an rtx). | |
2973 | ||
2974 | We check specially for a constant integer as OP1. | |
2975 | If you want this check for OP0 as well, then before calling | |
2976 | you should swap the two operands if OP0 would be constant. */ | |
2977 | ||
2978 | rtx | |
f2593a66 RS |
2979 | expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, |
2980 | int unsignedp) | |
44037a66 | 2981 | { |
8efc8980 RS |
2982 | enum mult_variant variant; |
2983 | struct algorithm algorithm; | |
65dc9350 | 2984 | int max_cost; |
44037a66 | 2985 | |
65dc9350 RS |
2986 | /* Handling const0_rtx here allows us to use zero as a rogue value for |
2987 | coeff below. */ | |
2988 | if (op1 == const0_rtx) | |
2989 | return const0_rtx; | |
2990 | if (op1 == const1_rtx) | |
2991 | return op0; | |
2992 | if (op1 == constm1_rtx) | |
2993 | return expand_unop (mode, | |
2994 | GET_MODE_CLASS (mode) == MODE_INT | |
2995 | && !unsignedp && flag_trapv | |
2996 | ? negv_optab : neg_optab, | |
2997 | op0, target, 0); | |
2998 | ||
2999 | /* These are the operations that are potentially turned into a sequence | |
3000 | of shifts and additions. */ | |
d2348bd5 | 3001 | if (SCALAR_INT_MODE_P (mode) |
f258e38b UW |
3002 | && (unsignedp || !flag_trapv)) |
3003 | { | |
65dc9350 | 3004 | HOST_WIDE_INT coeff = 0; |
5e839bc8 | 3005 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
65dc9350 RS |
3006 | |
3007 | /* synth_mult does an `unsigned int' multiply. As long as the mode is | |
3008 | less than or equal in size to `unsigned int' this doesn't matter. | |
3009 | If the mode is larger than `unsigned int', then synth_mult works | |
3010 | only if the constant value exactly fits in an `unsigned int' without | |
3011 | any truncation. This means that multiplying by negative values does | |
3012 | not work; results are off by 2^32 on a 32 bit machine. */ | |
f258e38b | 3013 | |
65dc9350 | 3014 | if (GET_CODE (op1) == CONST_INT) |
58b42e19 | 3015 | { |
65dc9350 RS |
3016 | /* Attempt to handle multiplication of DImode values by negative |
3017 | coefficients, by performing the multiplication by a positive | |
3018 | multiplier and then inverting the result. */ | |
3019 | if (INTVAL (op1) < 0 | |
3020 | && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT) | |
3021 | { | |
3022 | /* Its safe to use -INTVAL (op1) even for INT_MIN, as the | |
5e839bc8 DE |
3023 | result is interpreted as an unsigned coefficient. |
3024 | Exclude cost of op0 from max_cost to match the cost | |
3025 | calculation of the synth_mult. */ | |
3026 | max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET) | |
65dc9350 RS |
3027 | - neg_cost[mode]; |
3028 | if (max_cost > 0 | |
3029 | && choose_mult_variant (mode, -INTVAL (op1), &algorithm, | |
3030 | &variant, max_cost)) | |
3031 | { | |
3032 | rtx temp = expand_mult_const (mode, op0, -INTVAL (op1), | |
3033 | NULL_RTX, &algorithm, | |
3034 | variant); | |
3035 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3036 | } | |
3037 | } | |
3038 | else coeff = INTVAL (op1); | |
3039 | } | |
3040 | else if (GET_CODE (op1) == CONST_DOUBLE) | |
3041 | { | |
3042 | /* If we are multiplying in DImode, it may still be a win | |
3043 | to try to work with shifts and adds. */ | |
3044 | if (CONST_DOUBLE_HIGH (op1) == 0) | |
3045 | coeff = CONST_DOUBLE_LOW (op1); | |
3046 | else if (CONST_DOUBLE_LOW (op1) == 0 | |
3047 | && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1))) | |
3048 | { | |
3049 | int shift = floor_log2 (CONST_DOUBLE_HIGH (op1)) | |
3050 | + HOST_BITS_PER_WIDE_INT; | |
3051 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3052 | build_int_cst (NULL_TREE, shift), | |
3053 | target, unsignedp); | |
3054 | } | |
3055 | } | |
3056 | ||
3057 | /* We used to test optimize here, on the grounds that it's better to | |
3058 | produce a smaller program when -O is not used. But this causes | |
3059 | such a terrible slowdown sometimes that it seems better to always | |
3060 | use synth_mult. */ | |
3061 | if (coeff != 0) | |
3062 | { | |
3063 | /* Special case powers of two. */ | |
3064 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3065 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3066 | build_int_cst (NULL_TREE, floor_log2 (coeff)), | |
3067 | target, unsignedp); | |
3068 | ||
5e839bc8 DE |
3069 | /* Exclude cost of op0 from max_cost to match the cost |
3070 | calculation of the synth_mult. */ | |
3071 | max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET); | |
65dc9350 RS |
3072 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |
3073 | max_cost)) | |
3074 | return expand_mult_const (mode, op0, coeff, target, | |
3075 | &algorithm, variant); | |
58b42e19 | 3076 | } |
f258e38b | 3077 | } |
44037a66 | 3078 | |
f2593a66 RS |
3079 | if (GET_CODE (op0) == CONST_DOUBLE) |
3080 | { | |
3081 | rtx temp = op0; | |
3082 | op0 = op1; | |
3083 | op1 = temp; | |
3084 | } | |
3085 | ||
3086 | /* Expand x*2.0 as x+x. */ | |
3087 | if (GET_CODE (op1) == CONST_DOUBLE | |
3d8bf70f | 3088 | && SCALAR_FLOAT_MODE_P (mode)) |
f2593a66 RS |
3089 | { |
3090 | REAL_VALUE_TYPE d; | |
3091 | REAL_VALUE_FROM_CONST_DOUBLE (d, op1); | |
3092 | ||
3093 | if (REAL_VALUES_EQUAL (d, dconst2)) | |
3094 | { | |
3095 | op0 = force_reg (GET_MODE (op0), op0); | |
3096 | return expand_binop (mode, add_optab, op0, op0, | |
3097 | target, unsignedp, OPTAB_LIB_WIDEN); | |
3098 | } | |
3099 | } | |
3100 | ||
819126a6 RK |
3101 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3102 | there is no difference between signed and unsigned. */ | |
c410d49e | 3103 | op0 = expand_binop (mode, |
91ce572a | 3104 | ! unsignedp |
0fb7aeda KH |
3105 | && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT) |
3106 | ? smulv_optab : smul_optab, | |
44037a66 | 3107 | op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); |
5b0264cb | 3108 | gcc_assert (op0); |
44037a66 TG |
3109 | return op0; |
3110 | } | |
3111 | \f | |
55c2d311 TG |
3112 | /* Return the smallest n such that 2**n >= X. */ |
3113 | ||
3114 | int | |
502b8322 | 3115 | ceil_log2 (unsigned HOST_WIDE_INT x) |
55c2d311 TG |
3116 | { |
3117 | return floor_log2 (x - 1) + 1; | |
3118 | } | |
3119 | ||
3120 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to | |
3121 | replace division by D, and put the least significant N bits of the result | |
3122 | in *MULTIPLIER_PTR and return the most significant bit. | |
3123 | ||
3124 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the | |
3125 | needed precision is in PRECISION (should be <= N). | |
3126 | ||
3127 | PRECISION should be as small as possible so this function can choose | |
3128 | multiplier more freely. | |
3129 | ||
3130 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that | |
3131 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. | |
3132 | ||
3133 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), | |
3134 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ | |
3135 | ||
3136 | static | |
3137 | unsigned HOST_WIDE_INT | |
502b8322 | 3138 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
e71c0aa7 | 3139 | rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr) |
55c2d311 | 3140 | { |
f9e158c3 JM |
3141 | HOST_WIDE_INT mhigh_hi, mlow_hi; |
3142 | unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; | |
55c2d311 TG |
3143 | int lgup, post_shift; |
3144 | int pow, pow2; | |
f9e158c3 JM |
3145 | unsigned HOST_WIDE_INT nl, dummy1; |
3146 | HOST_WIDE_INT nh, dummy2; | |
55c2d311 TG |
3147 | |
3148 | /* lgup = ceil(log2(divisor)); */ | |
3149 | lgup = ceil_log2 (d); | |
3150 | ||
5b0264cb | 3151 | gcc_assert (lgup <= n); |
55c2d311 TG |
3152 | |
3153 | pow = n + lgup; | |
3154 | pow2 = n + lgup - precision; | |
3155 | ||
5b0264cb NS |
3156 | /* We could handle this with some effort, but this case is much |
3157 | better handled directly with a scc insn, so rely on caller using | |
3158 | that. */ | |
3159 | gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT); | |
55c2d311 TG |
3160 | |
3161 | /* mlow = 2^(N + lgup)/d */ | |
3162 | if (pow >= HOST_BITS_PER_WIDE_INT) | |
3163 | { | |
f9e158c3 | 3164 | nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT); |
55c2d311 TG |
3165 | nl = 0; |
3166 | } | |
3167 | else | |
3168 | { | |
3169 | nh = 0; | |
3170 | nl = (unsigned HOST_WIDE_INT) 1 << pow; | |
3171 | } | |
3172 | div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, | |
3173 | &mlow_lo, &mlow_hi, &dummy1, &dummy2); | |
3174 | ||
3175 | /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */ | |
3176 | if (pow2 >= HOST_BITS_PER_WIDE_INT) | |
f9e158c3 | 3177 | nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT); |
55c2d311 TG |
3178 | else |
3179 | nl |= (unsigned HOST_WIDE_INT) 1 << pow2; | |
3180 | div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, | |
3181 | &mhigh_lo, &mhigh_hi, &dummy1, &dummy2); | |
3182 | ||
5b0264cb NS |
3183 | gcc_assert (!mhigh_hi || nh - d < d); |
3184 | gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1); | |
beb235f8 | 3185 | /* Assert that mlow < mhigh. */ |
5b0264cb NS |
3186 | gcc_assert (mlow_hi < mhigh_hi |
3187 | || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)); | |
55c2d311 TG |
3188 | |
3189 | /* If precision == N, then mlow, mhigh exceed 2^N | |
3190 | (but they do not exceed 2^(N+1)). */ | |
3191 | ||
f9da5064 | 3192 | /* Reduce to lowest terms. */ |
55c2d311 TG |
3193 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3194 | { | |
3195 | unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1); | |
3196 | unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1); | |
3197 | if (ml_lo >= mh_lo) | |
3198 | break; | |
3199 | ||
3200 | mlow_hi = 0; | |
3201 | mlow_lo = ml_lo; | |
3202 | mhigh_hi = 0; | |
3203 | mhigh_lo = mh_lo; | |
3204 | } | |
3205 | ||
3206 | *post_shift_ptr = post_shift; | |
3207 | *lgup_ptr = lgup; | |
3208 | if (n < HOST_BITS_PER_WIDE_INT) | |
3209 | { | |
3210 | unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; | |
e71c0aa7 | 3211 | *multiplier_ptr = GEN_INT (mhigh_lo & mask); |
55c2d311 TG |
3212 | return mhigh_lo >= mask; |
3213 | } | |
3214 | else | |
3215 | { | |
e71c0aa7 | 3216 | *multiplier_ptr = GEN_INT (mhigh_lo); |
55c2d311 TG |
3217 | return mhigh_hi; |
3218 | } | |
3219 | } | |
3220 | ||
3221 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is | |
3222 | congruent to 1 (mod 2**N). */ | |
3223 | ||
3224 | static unsigned HOST_WIDE_INT | |
502b8322 | 3225 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
55c2d311 | 3226 | { |
0f41302f | 3227 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
55c2d311 TG |
3228 | |
3229 | /* The algorithm notes that the choice y = x satisfies | |
3230 | x*y == 1 mod 2^3, since x is assumed odd. | |
3231 | Each iteration doubles the number of bits of significance in y. */ | |
3232 | ||
3233 | unsigned HOST_WIDE_INT mask; | |
3234 | unsigned HOST_WIDE_INT y = x; | |
3235 | int nbit = 3; | |
3236 | ||
3237 | mask = (n == HOST_BITS_PER_WIDE_INT | |
3238 | ? ~(unsigned HOST_WIDE_INT) 0 | |
3239 | : ((unsigned HOST_WIDE_INT) 1 << n) - 1); | |
3240 | ||
3241 | while (nbit < n) | |
3242 | { | |
3243 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ | |
3244 | nbit *= 2; | |
3245 | } | |
3246 | return y; | |
3247 | } | |
3248 | ||
3249 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness | |
3250 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the | |
3251 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product | |
3252 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to | |
3253 | become signed. | |
3254 | ||
3255 | The result is put in TARGET if that is convenient. | |
3256 | ||
3257 | MODE is the mode of operation. */ | |
3258 | ||
3259 | rtx | |
502b8322 AJ |
3260 | expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0, |
3261 | rtx op1, rtx target, int unsignedp) | |
55c2d311 TG |
3262 | { |
3263 | rtx tem; | |
3264 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; | |
3265 | ||
3266 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
7d60be94 | 3267 | build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1), |
55c2d311 | 3268 | NULL_RTX, 0); |
22273300 | 3269 | tem = expand_and (mode, tem, op1, NULL_RTX); |
38a448ca RH |
3270 | adj_operand |
3271 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), | |
3272 | adj_operand); | |
55c2d311 TG |
3273 | |
3274 | tem = expand_shift (RSHIFT_EXPR, mode, op1, | |
7d60be94 | 3275 | build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1), |
55c2d311 | 3276 | NULL_RTX, 0); |
22273300 | 3277 | tem = expand_and (mode, tem, op0, NULL_RTX); |
38a448ca RH |
3278 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3279 | target); | |
55c2d311 TG |
3280 | |
3281 | return target; | |
3282 | } | |
3283 | ||
8efc8980 | 3284 | /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */ |
55c2d311 | 3285 | |
8efc8980 RS |
3286 | static rtx |
3287 | extract_high_half (enum machine_mode mode, rtx op) | |
3288 | { | |
3289 | enum machine_mode wider_mode; | |
55c2d311 | 3290 | |
8efc8980 RS |
3291 | if (mode == word_mode) |
3292 | return gen_highpart (mode, op); | |
71af73bb | 3293 | |
15ed7b52 JG |
3294 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3295 | ||
8efc8980 RS |
3296 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3297 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, | |
7d60be94 | 3298 | build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1); |
8efc8980 RS |
3299 | return convert_modes (mode, wider_mode, op, 0); |
3300 | } | |
55c2d311 | 3301 | |
8efc8980 RS |
3302 | /* Like expand_mult_highpart, but only consider using a multiplication |
3303 | optab. OP1 is an rtx for the constant operand. */ | |
3304 | ||
3305 | static rtx | |
3306 | expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, | |
3307 | rtx target, int unsignedp, int max_cost) | |
55c2d311 | 3308 | { |
665acd1e | 3309 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
8efc8980 | 3310 | enum machine_mode wider_mode; |
55c2d311 TG |
3311 | optab moptab; |
3312 | rtx tem; | |
8efc8980 | 3313 | int size; |
55c2d311 | 3314 | |
15ed7b52 JG |
3315 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3316 | ||
8efc8980 RS |
3317 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3318 | size = GET_MODE_BITSIZE (mode); | |
55c2d311 TG |
3319 | |
3320 | /* Firstly, try using a multiplication insn that only generates the needed | |
3321 | high part of the product, and in the sign flavor of unsignedp. */ | |
58777718 | 3322 | if (mul_highpart_cost[mode] < max_cost) |
71af73bb | 3323 | { |
8efc8980 | 3324 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
665acd1e | 3325 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3326 | unsignedp, OPTAB_DIRECT); |
3327 | if (tem) | |
3328 | return tem; | |
71af73bb | 3329 | } |
55c2d311 TG |
3330 | |
3331 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. | |
3332 | Need to adjust the result after the multiplication. */ | |
02a65aef | 3333 | if (size - 1 < BITS_PER_WORD |
965703ed | 3334 | && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1] |
58777718 | 3335 | + 4 * add_cost[mode] < max_cost)) |
71af73bb | 3336 | { |
8efc8980 | 3337 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
665acd1e | 3338 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3339 | unsignedp, OPTAB_DIRECT); |
3340 | if (tem) | |
71af73bb | 3341 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3342 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
8efc8980 | 3343 | tem, unsignedp); |
71af73bb | 3344 | } |
55c2d311 | 3345 | |
71af73bb | 3346 | /* Try widening multiplication. */ |
55c2d311 | 3347 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
166cdb08 | 3348 | if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing |
58777718 | 3349 | && mul_widen_cost[wider_mode] < max_cost) |
a295d331 | 3350 | { |
665acd1e | 3351 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
8efc8980 RS |
3352 | unsignedp, OPTAB_WIDEN); |
3353 | if (tem) | |
3354 | return extract_high_half (mode, tem); | |
c410d49e | 3355 | } |
71af73bb TG |
3356 | |
3357 | /* Try widening the mode and perform a non-widening multiplication. */ | |
166cdb08 | 3358 | if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing |
02a65aef | 3359 | && size - 1 < BITS_PER_WORD |
965703ed | 3360 | && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost) |
a295d331 | 3361 | { |
82dfb9a5 RS |
3362 | rtx insns, wop0, wop1; |
3363 | ||
3364 | /* We need to widen the operands, for example to ensure the | |
3365 | constant multiplier is correctly sign or zero extended. | |
3366 | Use a sequence to clean-up any instructions emitted by | |
3367 | the conversions if things don't work out. */ | |
3368 | start_sequence (); | |
3369 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); | |
3370 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); | |
3371 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, | |
8efc8980 | 3372 | unsignedp, OPTAB_WIDEN); |
82dfb9a5 RS |
3373 | insns = get_insns (); |
3374 | end_sequence (); | |
3375 | ||
8efc8980 | 3376 | if (tem) |
82dfb9a5 RS |
3377 | { |
3378 | emit_insn (insns); | |
3379 | return extract_high_half (mode, tem); | |
3380 | } | |
a295d331 | 3381 | } |
71af73bb TG |
3382 | |
3383 | /* Try widening multiplication of opposite signedness, and adjust. */ | |
3384 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; | |
166cdb08 | 3385 | if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing |
02a65aef | 3386 | && size - 1 < BITS_PER_WORD |
965703ed | 3387 | && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1] |
58777718 | 3388 | + 4 * add_cost[mode] < max_cost)) |
55c2d311 | 3389 | { |
665acd1e | 3390 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
71af73bb TG |
3391 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3392 | if (tem != 0) | |
55c2d311 | 3393 | { |
8efc8980 | 3394 | tem = extract_high_half (mode, tem); |
71af73bb | 3395 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3396 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
71af73bb | 3397 | target, unsignedp); |
55c2d311 | 3398 | } |
55c2d311 TG |
3399 | } |
3400 | ||
71af73bb | 3401 | return 0; |
8efc8980 | 3402 | } |
71af73bb | 3403 | |
0d282692 RS |
3404 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3405 | putting the high half of the result in TARGET if that is convenient, | |
3406 | and return where the result is. If the operation can not be performed, | |
3407 | 0 is returned. | |
55c2d311 | 3408 | |
8efc8980 RS |
3409 | MODE is the mode of operation and result. |
3410 | ||
3411 | UNSIGNEDP nonzero means unsigned multiply. | |
3412 | ||
3413 | MAX_COST is the total allowed cost for the expanded RTL. */ | |
3414 | ||
0d282692 RS |
3415 | static rtx |
3416 | expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, | |
3417 | rtx target, int unsignedp, int max_cost) | |
8efc8980 | 3418 | { |
f258e38b | 3419 | enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); |
0d282692 | 3420 | unsigned HOST_WIDE_INT cnst1; |
f258e38b UW |
3421 | int extra_cost; |
3422 | bool sign_adjust = false; | |
8efc8980 RS |
3423 | enum mult_variant variant; |
3424 | struct algorithm alg; | |
0d282692 | 3425 | rtx tem; |
8efc8980 | 3426 | |
15ed7b52 | 3427 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
8efc8980 | 3428 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |
5b0264cb | 3429 | gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT); |
8efc8980 | 3430 | |
0d282692 | 3431 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
f258e38b UW |
3432 | |
3433 | /* We can't optimize modes wider than BITS_PER_WORD. | |
3434 | ??? We might be able to perform double-word arithmetic if | |
3435 | mode == word_mode, however all the cost calculations in | |
3436 | synth_mult etc. assume single-word operations. */ | |
3437 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) | |
3438 | return expand_mult_highpart_optab (mode, op0, op1, target, | |
3439 | unsignedp, max_cost); | |
3440 | ||
965703ed | 3441 | extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1]; |
f258e38b UW |
3442 | |
3443 | /* Check whether we try to multiply by a negative constant. */ | |
3444 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) | |
3445 | { | |
3446 | sign_adjust = true; | |
58777718 | 3447 | extra_cost += add_cost[mode]; |
f258e38b | 3448 | } |
8efc8980 RS |
3449 | |
3450 | /* See whether shift/add multiplication is cheap enough. */ | |
f258e38b UW |
3451 | if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, |
3452 | max_cost - extra_cost)) | |
a295d331 | 3453 | { |
8efc8980 RS |
3454 | /* See whether the specialized multiplication optabs are |
3455 | cheaper than the shift/add version. */ | |
26276705 RS |
3456 | tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
3457 | alg.cost.cost + extra_cost); | |
8efc8980 RS |
3458 | if (tem) |
3459 | return tem; | |
3460 | ||
f258e38b UW |
3461 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3462 | tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); | |
3463 | tem = extract_high_half (mode, tem); | |
3464 | ||
9cf737f8 | 3465 | /* Adjust result for signedness. */ |
f258e38b UW |
3466 | if (sign_adjust) |
3467 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); | |
3468 | ||
3469 | return tem; | |
a295d331 | 3470 | } |
8efc8980 RS |
3471 | return expand_mult_highpart_optab (mode, op0, op1, target, |
3472 | unsignedp, max_cost); | |
55c2d311 | 3473 | } |
0b55e932 RS |
3474 | |
3475 | ||
3476 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ | |
3477 | ||
3478 | static rtx | |
3479 | expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) | |
3480 | { | |
6e7a355c | 3481 | unsigned HOST_WIDE_INT masklow, maskhigh; |
1c234fcb | 3482 | rtx result, temp, shift, label; |
0b55e932 RS |
3483 | int logd; |
3484 | ||
3485 | logd = floor_log2 (d); | |
3486 | result = gen_reg_rtx (mode); | |
3487 | ||
3488 | /* Avoid conditional branches when they're expensive. */ | |
3489 | if (BRANCH_COST >= 2 | |
3490 | && !optimize_size) | |
3491 | { | |
3492 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, | |
3493 | mode, 0, -1); | |
3494 | if (signmask) | |
3495 | { | |
3496 | signmask = force_reg (mode, signmask); | |
6e7a355c | 3497 | masklow = ((HOST_WIDE_INT) 1 << logd) - 1; |
1c234fcb RS |
3498 | shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); |
3499 | ||
3500 | /* Use the rtx_cost of a LSHIFTRT instruction to determine | |
3501 | which instruction sequence to use. If logical right shifts | |
3502 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise | |
3503 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ | |
6e7a355c | 3504 | |
1c234fcb | 3505 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
166cdb08 | 3506 | if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing |
1c234fcb RS |
3507 | || rtx_cost (temp, SET) > COSTS_N_INSNS (2)) |
3508 | { | |
3509 | temp = expand_binop (mode, xor_optab, op0, signmask, | |
3510 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3511 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3512 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
6e7a355c | 3513 | temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow), |
1c234fcb RS |
3514 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3515 | temp = expand_binop (mode, xor_optab, temp, signmask, | |
3516 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3517 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3518 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3519 | } | |
3520 | else | |
3521 | { | |
3522 | signmask = expand_binop (mode, lshr_optab, signmask, shift, | |
3523 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3524 | signmask = force_reg (mode, signmask); | |
3525 | ||
3526 | temp = expand_binop (mode, add_optab, op0, signmask, | |
3527 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
6e7a355c | 3528 | temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow), |
1c234fcb RS |
3529 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3530 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3531 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3532 | } | |
0b55e932 RS |
3533 | return temp; |
3534 | } | |
3535 | } | |
3536 | ||
3537 | /* Mask contains the mode's signbit and the significant bits of the | |
3538 | modulus. By including the signbit in the operation, many targets | |
3539 | can avoid an explicit compare operation in the following comparison | |
3540 | against zero. */ | |
3541 | ||
6e7a355c EB |
3542 | masklow = ((HOST_WIDE_INT) 1 << logd) - 1; |
3543 | if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) | |
3544 | { | |
3545 | masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1); | |
3546 | maskhigh = -1; | |
3547 | } | |
3548 | else | |
3549 | maskhigh = (HOST_WIDE_INT) -1 | |
3550 | << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1); | |
0b55e932 | 3551 | |
6e7a355c EB |
3552 | temp = expand_binop (mode, and_optab, op0, |
3553 | immed_double_const (masklow, maskhigh, mode), | |
3554 | result, 1, OPTAB_LIB_WIDEN); | |
0b55e932 RS |
3555 | if (temp != result) |
3556 | emit_move_insn (result, temp); | |
3557 | ||
3558 | label = gen_label_rtx (); | |
3559 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); | |
3560 | ||
3561 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, | |
3562 | 0, OPTAB_LIB_WIDEN); | |
6e7a355c EB |
3563 | masklow = (HOST_WIDE_INT) -1 << logd; |
3564 | maskhigh = -1; | |
3565 | temp = expand_binop (mode, ior_optab, temp, | |
3566 | immed_double_const (masklow, maskhigh, mode), | |
3567 | result, 1, OPTAB_LIB_WIDEN); | |
0b55e932 RS |
3568 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
3569 | 0, OPTAB_LIB_WIDEN); | |
3570 | if (temp != result) | |
3571 | emit_move_insn (result, temp); | |
3572 | emit_label (label); | |
3573 | return result; | |
3574 | } | |
39cab019 RS |
3575 | |
3576 | /* Expand signed division of OP0 by a power of two D in mode MODE. | |
3577 | This routine is only called for positive values of D. */ | |
3578 | ||
3579 | static rtx | |
3580 | expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) | |
3581 | { | |
3582 | rtx temp, label; | |
3583 | tree shift; | |
3584 | int logd; | |
3585 | ||
3586 | logd = floor_log2 (d); | |
7d60be94 | 3587 | shift = build_int_cst (NULL_TREE, logd); |
39cab019 RS |
3588 | |
3589 | if (d == 2 && BRANCH_COST >= 1) | |
3590 | { | |
3591 | temp = gen_reg_rtx (mode); | |
3592 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); | |
3593 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
3594 | 0, OPTAB_LIB_WIDEN); | |
3595 | return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0); | |
3596 | } | |
3597 | ||
fdded401 RS |
3598 | #ifdef HAVE_conditional_move |
3599 | if (BRANCH_COST >= 2) | |
3600 | { | |
3601 | rtx temp2; | |
3602 | ||
38636eac EB |
3603 | /* ??? emit_conditional_move forces a stack adjustment via |
3604 | compare_from_rtx so, if the sequence is discarded, it will | |
3605 | be lost. Do it now instead. */ | |
3606 | do_pending_stack_adjust (); | |
3607 | ||
fdded401 RS |
3608 | start_sequence (); |
3609 | temp2 = copy_to_mode_reg (mode, op0); | |
3610 | temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1), | |
3611 | NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
3612 | temp = force_reg (mode, temp); | |
3613 | ||
3614 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ | |
3615 | temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, | |
3616 | mode, temp, temp2, mode, 0); | |
3617 | if (temp2) | |
3618 | { | |
3619 | rtx seq = get_insns (); | |
3620 | end_sequence (); | |
3621 | emit_insn (seq); | |
3622 | return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0); | |
3623 | } | |
3624 | end_sequence (); | |
3625 | } | |
3626 | #endif | |
3627 | ||
39cab019 RS |
3628 | if (BRANCH_COST >= 2) |
3629 | { | |
3630 | int ushift = GET_MODE_BITSIZE (mode) - logd; | |
3631 | ||
3632 | temp = gen_reg_rtx (mode); | |
3633 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); | |
3634 | if (shift_cost[mode][ushift] > COSTS_N_INSNS (1)) | |
3635 | temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1), | |
3636 | NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
3637 | else | |
3638 | temp = expand_shift (RSHIFT_EXPR, mode, temp, | |
7d60be94 | 3639 | build_int_cst (NULL_TREE, ushift), |
4a90aeeb | 3640 | NULL_RTX, 1); |
39cab019 RS |
3641 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
3642 | 0, OPTAB_LIB_WIDEN); | |
3643 | return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0); | |
3644 | } | |
3645 | ||
3646 | label = gen_label_rtx (); | |
3647 | temp = copy_to_mode_reg (mode, op0); | |
3648 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); | |
3649 | expand_inc (temp, GEN_INT (d - 1)); | |
3650 | emit_label (label); | |
3651 | return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0); | |
3652 | } | |
55c2d311 | 3653 | \f |
44037a66 TG |
3654 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
3655 | if that is convenient, and returning where the result is. | |
3656 | You may request either the quotient or the remainder as the result; | |
3657 | specify REM_FLAG nonzero to get the remainder. | |
3658 | ||
3659 | CODE is the expression code for which kind of division this is; | |
3660 | it controls how rounding is done. MODE is the machine mode to use. | |
3661 | UNSIGNEDP nonzero means do unsigned division. */ | |
3662 | ||
3663 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI | |
3664 | and then correct it by or'ing in missing high bits | |
3665 | if result of ANDI is nonzero. | |
3666 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. | |
3667 | This could optimize to a bfexts instruction. | |
3668 | But C doesn't use these operations, so their optimizations are | |
3669 | left for later. */ | |
5353610b R |
3670 | /* ??? For modulo, we don't actually need the highpart of the first product, |
3671 | the low part will do nicely. And for small divisors, the second multiply | |
3672 | can also be a low-part only multiply or even be completely left out. | |
3673 | E.g. to calculate the remainder of a division by 3 with a 32 bit | |
3674 | multiply, multiply with 0x55555556 and extract the upper two bits; | |
3675 | the result is exact for inputs up to 0x1fffffff. | |
3676 | The input range can be reduced by using cross-sum rules. | |
3677 | For odd divisors >= 3, the following table gives right shift counts | |
09da1532 | 3678 | so that if a number is shifted by an integer multiple of the given |
5353610b R |
3679 | amount, the remainder stays the same: |
3680 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, | |
3681 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, | |
3682 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, | |
3683 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, | |
3684 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 | |
3685 | ||
3686 | Cross-sum rules for even numbers can be derived by leaving as many bits | |
3687 | to the right alone as the divisor has zeros to the right. | |
3688 | E.g. if x is an unsigned 32 bit number: | |
3689 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 | |
3690 | */ | |
44037a66 TG |
3691 | |
3692 | rtx | |
502b8322 AJ |
3693 | expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, |
3694 | rtx op0, rtx op1, rtx target, int unsignedp) | |
44037a66 | 3695 | { |
44037a66 | 3696 | enum machine_mode compute_mode; |
b3694847 | 3697 | rtx tquotient; |
55c2d311 TG |
3698 | rtx quotient = 0, remainder = 0; |
3699 | rtx last; | |
2c414fba | 3700 | int size; |
4e430df8 | 3701 | rtx insn, set; |
44037a66 | 3702 | optab optab1, optab2; |
1c4a429a | 3703 | int op1_is_constant, op1_is_pow2 = 0; |
71af73bb | 3704 | int max_cost, extra_cost; |
9ec36da5 | 3705 | static HOST_WIDE_INT last_div_const = 0; |
1c4a429a | 3706 | static HOST_WIDE_INT ext_op1; |
55c2d311 TG |
3707 | |
3708 | op1_is_constant = GET_CODE (op1) == CONST_INT; | |
1c4a429a JH |
3709 | if (op1_is_constant) |
3710 | { | |
3711 | ext_op1 = INTVAL (op1); | |
3712 | if (unsignedp) | |
3713 | ext_op1 &= GET_MODE_MASK (mode); | |
3714 | op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) | |
3715 | || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1)))); | |
3716 | } | |
55c2d311 TG |
3717 | |
3718 | /* | |
3719 | This is the structure of expand_divmod: | |
3720 | ||
3721 | First comes code to fix up the operands so we can perform the operations | |
3722 | correctly and efficiently. | |
3723 | ||
3724 | Second comes a switch statement with code specific for each rounding mode. | |
3725 | For some special operands this code emits all RTL for the desired | |
69f61901 | 3726 | operation, for other cases, it generates only a quotient and stores it in |
55c2d311 TG |
3727 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
3728 | to indicate that it has not done anything. | |
3729 | ||
69f61901 RK |
3730 | Last comes code that finishes the operation. If QUOTIENT is set and |
3731 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If | |
3732 | QUOTIENT is not set, it is computed using trunc rounding. | |
44037a66 | 3733 | |
55c2d311 TG |
3734 | We try to generate special code for division and remainder when OP1 is a |
3735 | constant. If |OP1| = 2**n we can use shifts and some other fast | |
3736 | operations. For other values of OP1, we compute a carefully selected | |
3737 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 | |
3738 | by m. | |
3739 | ||
3740 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper | |
3741 | half of the product. Different strategies for generating the product are | |
3742 | implemented in expand_mult_highpart. | |
3743 | ||
3744 | If what we actually want is the remainder, we generate that by another | |
3745 | by-constant multiplication and a subtraction. */ | |
3746 | ||
3747 | /* We shouldn't be called with OP1 == const1_rtx, but some of the | |
3d32ffd1 TW |
3748 | code below will malfunction if we are, so check here and handle |
3749 | the special case if so. */ | |
3750 | if (op1 == const1_rtx) | |
3751 | return rem_flag ? const0_rtx : op0; | |
3752 | ||
91ce572a CC |
3753 | /* When dividing by -1, we could get an overflow. |
3754 | negv_optab can handle overflows. */ | |
3755 | if (! unsignedp && op1 == constm1_rtx) | |
3756 | { | |
3757 | if (rem_flag) | |
0fb7aeda | 3758 | return const0_rtx; |
91ce572a | 3759 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT |
0fb7aeda | 3760 | ? negv_optab : neg_optab, op0, target, 0); |
91ce572a CC |
3761 | } |
3762 | ||
bc1c7e93 RK |
3763 | if (target |
3764 | /* Don't use the function value register as a target | |
3765 | since we have to read it as well as write it, | |
3766 | and function-inlining gets confused by this. */ | |
3767 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) | |
3768 | /* Don't clobber an operand while doing a multi-step calculation. */ | |
515dfc7a | 3769 | || ((rem_flag || op1_is_constant) |
bc1c7e93 | 3770 | && (reg_mentioned_p (target, op0) |
3c0cb5de | 3771 | || (MEM_P (op0) && MEM_P (target)))) |
bc1c7e93 | 3772 | || reg_mentioned_p (target, op1) |
3c0cb5de | 3773 | || (MEM_P (op1) && MEM_P (target)))) |
44037a66 TG |
3774 | target = 0; |
3775 | ||
44037a66 TG |
3776 | /* Get the mode in which to perform this computation. Normally it will |
3777 | be MODE, but sometimes we can't do the desired operation in MODE. | |
3778 | If so, pick a wider mode in which we can do the operation. Convert | |
3779 | to that mode at the start to avoid repeated conversions. | |
3780 | ||
3781 | First see what operations we need. These depend on the expression | |
3782 | we are evaluating. (We assume that divxx3 insns exist under the | |
3783 | same conditions that modxx3 insns and that these insns don't normally | |
3784 | fail. If these assumptions are not correct, we may generate less | |
3785 | efficient code in some cases.) | |
3786 | ||
3787 | Then see if we find a mode in which we can open-code that operation | |
3788 | (either a division, modulus, or shift). Finally, check for the smallest | |
3789 | mode for which we can do the operation with a library call. */ | |
3790 | ||
55c2d311 TG |
3791 | /* We might want to refine this now that we have division-by-constant |
3792 | optimization. Since expand_mult_highpart tries so many variants, it is | |
3793 | not straightforward to generalize this. Maybe we should make an array | |
3794 | of possible modes in init_expmed? Save this for GCC 2.7. */ | |
3795 | ||
556a56ac DM |
3796 | optab1 = ((op1_is_pow2 && op1 != const0_rtx) |
3797 | ? (unsignedp ? lshr_optab : ashr_optab) | |
44037a66 | 3798 | : (unsignedp ? udiv_optab : sdiv_optab)); |
556a56ac DM |
3799 | optab2 = ((op1_is_pow2 && op1 != const0_rtx) |
3800 | ? optab1 | |
3801 | : (unsignedp ? udivmod_optab : sdivmod_optab)); | |
44037a66 TG |
3802 | |
3803 | for (compute_mode = mode; compute_mode != VOIDmode; | |
3804 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
166cdb08 JH |
3805 | if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing |
3806 | || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing) | |
44037a66 TG |
3807 | break; |
3808 | ||
3809 | if (compute_mode == VOIDmode) | |
3810 | for (compute_mode = mode; compute_mode != VOIDmode; | |
3811 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
8a33f100 JH |
3812 | if (optab_libfunc (optab1, compute_mode) |
3813 | || optab_libfunc (optab2, compute_mode)) | |
44037a66 TG |
3814 | break; |
3815 | ||
535a42b1 NS |
3816 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
3817 | probably die. */ | |
44037a66 TG |
3818 | if (compute_mode == VOIDmode) |
3819 | compute_mode = mode; | |
3820 | ||
55c2d311 TG |
3821 | if (target && GET_MODE (target) == compute_mode) |
3822 | tquotient = target; | |
3823 | else | |
3824 | tquotient = gen_reg_rtx (compute_mode); | |
2c414fba | 3825 | |
55c2d311 TG |
3826 | size = GET_MODE_BITSIZE (compute_mode); |
3827 | #if 0 | |
3828 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE | |
71af73bb TG |
3829 | (mode), and thereby get better code when OP1 is a constant. Do that |
3830 | later. It will require going over all usages of SIZE below. */ | |
55c2d311 TG |
3831 | size = GET_MODE_BITSIZE (mode); |
3832 | #endif | |
bc1c7e93 | 3833 | |
9ec36da5 JL |
3834 | /* Only deduct something for a REM if the last divide done was |
3835 | for a different constant. Then set the constant of the last | |
3836 | divide. */ | |
a28b2ac6 RS |
3837 | max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode]; |
3838 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant | |
3839 | && INTVAL (op1) == last_div_const)) | |
3840 | max_cost -= mul_cost[compute_mode] + add_cost[compute_mode]; | |
9ec36da5 JL |
3841 | |
3842 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; | |
71af73bb | 3843 | |
55c2d311 | 3844 | /* Now convert to the best mode to use. */ |
44037a66 TG |
3845 | if (compute_mode != mode) |
3846 | { | |
55c2d311 | 3847 | op0 = convert_modes (compute_mode, mode, op0, unsignedp); |
81722fa9 | 3848 | op1 = convert_modes (compute_mode, mode, op1, unsignedp); |
e13a25d5 | 3849 | |
e9a25f70 JL |
3850 | /* convert_modes may have placed op1 into a register, so we |
3851 | must recompute the following. */ | |
e13a25d5 DM |
3852 | op1_is_constant = GET_CODE (op1) == CONST_INT; |
3853 | op1_is_pow2 = (op1_is_constant | |
3854 | && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) | |
3855 | || (! unsignedp | |
e9a25f70 | 3856 | && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ; |
44037a66 TG |
3857 | } |
3858 | ||
55c2d311 | 3859 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
c2a47e48 | 3860 | |
3c0cb5de | 3861 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
55c2d311 | 3862 | op0 = force_reg (compute_mode, op0); |
3c0cb5de | 3863 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
c2a47e48 RK |
3864 | op1 = force_reg (compute_mode, op1); |
3865 | ||
ab0b6581 TG |
3866 | /* If we need the remainder or if OP1 is constant, we need to |
3867 | put OP0 in a register in case it has any queued subexpressions. */ | |
3868 | if (rem_flag || op1_is_constant) | |
3869 | op0 = force_reg (compute_mode, op0); | |
bc1c7e93 | 3870 | |
55c2d311 | 3871 | last = get_last_insn (); |
44037a66 | 3872 | |
9faa82d8 | 3873 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
55c2d311 | 3874 | if (unsignedp) |
44037a66 | 3875 | { |
55c2d311 TG |
3876 | if (code == FLOOR_DIV_EXPR) |
3877 | code = TRUNC_DIV_EXPR; | |
3878 | if (code == FLOOR_MOD_EXPR) | |
3879 | code = TRUNC_MOD_EXPR; | |
db7cafb0 JL |
3880 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
3881 | code = TRUNC_DIV_EXPR; | |
55c2d311 | 3882 | } |
bc1c7e93 | 3883 | |
55c2d311 TG |
3884 | if (op1 != const0_rtx) |
3885 | switch (code) | |
3886 | { | |
3887 | case TRUNC_MOD_EXPR: | |
3888 | case TRUNC_DIV_EXPR: | |
34f016ed | 3889 | if (op1_is_constant) |
55c2d311 | 3890 | { |
d8f1376c | 3891 | if (unsignedp) |
55c2d311 | 3892 | { |
e71c0aa7 | 3893 | unsigned HOST_WIDE_INT mh; |
55c2d311 TG |
3894 | int pre_shift, post_shift; |
3895 | int dummy; | |
e71c0aa7 | 3896 | rtx ml; |
1c4a429a JH |
3897 | unsigned HOST_WIDE_INT d = (INTVAL (op1) |
3898 | & GET_MODE_MASK (compute_mode)); | |
55c2d311 TG |
3899 | |
3900 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
3901 | { | |
3902 | pre_shift = floor_log2 (d); | |
3903 | if (rem_flag) | |
3904 | { | |
db3cf6fb MS |
3905 | remainder |
3906 | = expand_binop (compute_mode, and_optab, op0, | |
3907 | GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), | |
3908 | remainder, 1, | |
3909 | OPTAB_LIB_WIDEN); | |
55c2d311 | 3910 | if (remainder) |
c8dbc8ca | 3911 | return gen_lowpart (mode, remainder); |
55c2d311 TG |
3912 | } |
3913 | quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
4a90aeeb | 3914 | build_int_cst (NULL_TREE, |
7d60be94 | 3915 | pre_shift), |
55c2d311 TG |
3916 | tquotient, 1); |
3917 | } | |
34f016ed | 3918 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 | 3919 | { |
dc1d6150 | 3920 | if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) |
55c2d311 | 3921 | { |
dc1d6150 TG |
3922 | /* Most significant bit of divisor is set; emit an scc |
3923 | insn. */ | |
3924 | quotient = emit_store_flag (tquotient, GEU, op0, op1, | |
3925 | compute_mode, 1, 1); | |
3926 | if (quotient == 0) | |
55c2d311 | 3927 | goto fail1; |
55c2d311 TG |
3928 | } |
3929 | else | |
3930 | { | |
dc1d6150 TG |
3931 | /* Find a suitable multiplier and right shift count |
3932 | instead of multiplying with D. */ | |
3933 | ||
3934 | mh = choose_multiplier (d, size, size, | |
3935 | &ml, &post_shift, &dummy); | |
3936 | ||
3937 | /* If the suggested multiplier is more than SIZE bits, | |
3938 | we can do better for even divisors, using an | |
3939 | initial right shift. */ | |
3940 | if (mh != 0 && (d & 1) == 0) | |
3941 | { | |
3942 | pre_shift = floor_log2 (d & -d); | |
3943 | mh = choose_multiplier (d >> pre_shift, size, | |
3944 | size - pre_shift, | |
3945 | &ml, &post_shift, &dummy); | |
5b0264cb | 3946 | gcc_assert (!mh); |
dc1d6150 TG |
3947 | } |
3948 | else | |
3949 | pre_shift = 0; | |
3950 | ||
3951 | if (mh != 0) | |
3952 | { | |
3953 | rtx t1, t2, t3, t4; | |
3954 | ||
02a65aef R |
3955 | if (post_shift - 1 >= BITS_PER_WORD) |
3956 | goto fail1; | |
3957 | ||
965703ed RS |
3958 | extra_cost |
3959 | = (shift_cost[compute_mode][post_shift - 1] | |
3960 | + shift_cost[compute_mode][1] | |
3961 | + 2 * add_cost[compute_mode]); | |
e71c0aa7 | 3962 | t1 = expand_mult_highpart (compute_mode, op0, ml, |
dc1d6150 TG |
3963 | NULL_RTX, 1, |
3964 | max_cost - extra_cost); | |
3965 | if (t1 == 0) | |
3966 | goto fail1; | |
38a448ca RH |
3967 | t2 = force_operand (gen_rtx_MINUS (compute_mode, |
3968 | op0, t1), | |
dc1d6150 | 3969 | NULL_RTX); |
4a90aeeb NS |
3970 | t3 = expand_shift |
3971 | (RSHIFT_EXPR, compute_mode, t2, | |
7d60be94 | 3972 | build_int_cst (NULL_TREE, 1), |
4a90aeeb | 3973 | NULL_RTX,1); |
38a448ca RH |
3974 | t4 = force_operand (gen_rtx_PLUS (compute_mode, |
3975 | t1, t3), | |
dc1d6150 | 3976 | NULL_RTX); |
4a90aeeb NS |
3977 | quotient = expand_shift |
3978 | (RSHIFT_EXPR, compute_mode, t4, | |
7d60be94 | 3979 | build_int_cst (NULL_TREE, post_shift - 1), |
4a90aeeb | 3980 | tquotient, 1); |
dc1d6150 TG |
3981 | } |
3982 | else | |
3983 | { | |
3984 | rtx t1, t2; | |
3985 | ||
02a65aef R |
3986 | if (pre_shift >= BITS_PER_WORD |
3987 | || post_shift >= BITS_PER_WORD) | |
3988 | goto fail1; | |
3989 | ||
4a90aeeb NS |
3990 | t1 = expand_shift |
3991 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 3992 | build_int_cst (NULL_TREE, pre_shift), |
4a90aeeb | 3993 | NULL_RTX, 1); |
965703ed RS |
3994 | extra_cost |
3995 | = (shift_cost[compute_mode][pre_shift] | |
3996 | + shift_cost[compute_mode][post_shift]); | |
e71c0aa7 | 3997 | t2 = expand_mult_highpart (compute_mode, t1, ml, |
dc1d6150 TG |
3998 | NULL_RTX, 1, |
3999 | max_cost - extra_cost); | |
4000 | if (t2 == 0) | |
4001 | goto fail1; | |
4a90aeeb NS |
4002 | quotient = expand_shift |
4003 | (RSHIFT_EXPR, compute_mode, t2, | |
7d60be94 | 4004 | build_int_cst (NULL_TREE, post_shift), |
4a90aeeb | 4005 | tquotient, 1); |
dc1d6150 | 4006 | } |
55c2d311 TG |
4007 | } |
4008 | } | |
34f016ed TG |
4009 | else /* Too wide mode to use tricky code */ |
4010 | break; | |
55c2d311 TG |
4011 | |
4012 | insn = get_last_insn (); | |
4e430df8 RK |
4013 | if (insn != last |
4014 | && (set = single_set (insn)) != 0 | |
4015 | && SET_DEST (set) == quotient) | |
c410d49e | 4016 | set_unique_reg_note (insn, |
502b8322 | 4017 | REG_EQUAL, |
7e5bda2c | 4018 | gen_rtx_UDIV (compute_mode, op0, op1)); |
55c2d311 TG |
4019 | } |
4020 | else /* TRUNC_DIV, signed */ | |
4021 | { | |
4022 | unsigned HOST_WIDE_INT ml; | |
4023 | int lgup, post_shift; | |
e71c0aa7 | 4024 | rtx mlr; |
55c2d311 | 4025 | HOST_WIDE_INT d = INTVAL (op1); |
e4c9f3c2 ILT |
4026 | unsigned HOST_WIDE_INT abs_d; |
4027 | ||
093253be ILT |
4028 | /* Since d might be INT_MIN, we have to cast to |
4029 | unsigned HOST_WIDE_INT before negating to avoid | |
4030 | undefined signed overflow. */ | |
6d9c91e9 ILT |
4031 | abs_d = (d >= 0 |
4032 | ? (unsigned HOST_WIDE_INT) d | |
4033 | : - (unsigned HOST_WIDE_INT) d); | |
55c2d311 TG |
4034 | |
4035 | /* n rem d = n rem -d */ | |
4036 | if (rem_flag && d < 0) | |
4037 | { | |
4038 | d = abs_d; | |
2496c7bd | 4039 | op1 = gen_int_mode (abs_d, compute_mode); |
55c2d311 TG |
4040 | } |
4041 | ||
4042 | if (d == 1) | |
4043 | quotient = op0; | |
4044 | else if (d == -1) | |
4045 | quotient = expand_unop (compute_mode, neg_optab, op0, | |
4046 | tquotient, 0); | |
f737b132 RK |
4047 | else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) |
4048 | { | |
4049 | /* This case is not handled correctly below. */ | |
4050 | quotient = emit_store_flag (tquotient, EQ, op0, op1, | |
4051 | compute_mode, 1, 1); | |
4052 | if (quotient == 0) | |
4053 | goto fail1; | |
4054 | } | |
55c2d311 | 4055 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
58777718 RS |
4056 | && (rem_flag ? smod_pow2_cheap[compute_mode] |
4057 | : sdiv_pow2_cheap[compute_mode]) | |
0b55e932 RS |
4058 | /* We assume that cheap metric is true if the |
4059 | optab has an expander for this mode. */ | |
166cdb08 JH |
4060 | && ((optab_handler ((rem_flag ? smod_optab |
4061 | : sdiv_optab), | |
4062 | compute_mode)->insn_code | |
a8c7e72d | 4063 | != CODE_FOR_nothing) |
166cdb08 JH |
4064 | || (optab_handler(sdivmod_optab, |
4065 | compute_mode) | |
4066 | ->insn_code != CODE_FOR_nothing))) | |
55c2d311 TG |
4067 | ; |
4068 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) | |
4069 | { | |
0b55e932 RS |
4070 | if (rem_flag) |
4071 | { | |
4072 | remainder = expand_smod_pow2 (compute_mode, op0, d); | |
4073 | if (remainder) | |
4074 | return gen_lowpart (mode, remainder); | |
4075 | } | |
3d520aaf DE |
4076 | |
4077 | if (sdiv_pow2_cheap[compute_mode] | |
166cdb08 | 4078 | && ((optab_handler (sdiv_optab, compute_mode)->insn_code |
3d520aaf | 4079 | != CODE_FOR_nothing) |
166cdb08 | 4080 | || (optab_handler (sdivmod_optab, compute_mode)->insn_code |
3d520aaf DE |
4081 | != CODE_FOR_nothing))) |
4082 | quotient = expand_divmod (0, TRUNC_DIV_EXPR, | |
4083 | compute_mode, op0, | |
4084 | gen_int_mode (abs_d, | |
4085 | compute_mode), | |
4086 | NULL_RTX, 0); | |
4087 | else | |
4088 | quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); | |
55c2d311 | 4089 | |
0b55e932 RS |
4090 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4091 | negate the quotient. */ | |
55c2d311 TG |
4092 | if (d < 0) |
4093 | { | |
4094 | insn = get_last_insn (); | |
4e430df8 RK |
4095 | if (insn != last |
4096 | && (set = single_set (insn)) != 0 | |
c8e7fe58 DE |
4097 | && SET_DEST (set) == quotient |
4098 | && abs_d < ((unsigned HOST_WIDE_INT) 1 | |
4099 | << (HOST_BITS_PER_WIDE_INT - 1))) | |
c410d49e | 4100 | set_unique_reg_note (insn, |
502b8322 | 4101 | REG_EQUAL, |
7e5bda2c AM |
4102 | gen_rtx_DIV (compute_mode, |
4103 | op0, | |
69107307 AO |
4104 | GEN_INT |
4105 | (trunc_int_for_mode | |
4106 | (abs_d, | |
4107 | compute_mode)))); | |
55c2d311 TG |
4108 | |
4109 | quotient = expand_unop (compute_mode, neg_optab, | |
4110 | quotient, quotient, 0); | |
4111 | } | |
4112 | } | |
34f016ed | 4113 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 TG |
4114 | { |
4115 | choose_multiplier (abs_d, size, size - 1, | |
e71c0aa7 RS |
4116 | &mlr, &post_shift, &lgup); |
4117 | ml = (unsigned HOST_WIDE_INT) INTVAL (mlr); | |
55c2d311 TG |
4118 | if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) |
4119 | { | |
4120 | rtx t1, t2, t3; | |
4121 | ||
02a65aef R |
4122 | if (post_shift >= BITS_PER_WORD |
4123 | || size - 1 >= BITS_PER_WORD) | |
4124 | goto fail1; | |
4125 | ||
965703ed RS |
4126 | extra_cost = (shift_cost[compute_mode][post_shift] |
4127 | + shift_cost[compute_mode][size - 1] | |
58777718 | 4128 | + add_cost[compute_mode]); |
e71c0aa7 | 4129 | t1 = expand_mult_highpart (compute_mode, op0, mlr, |
71af73bb TG |
4130 | NULL_RTX, 0, |
4131 | max_cost - extra_cost); | |
55c2d311 TG |
4132 | if (t1 == 0) |
4133 | goto fail1; | |
4a90aeeb NS |
4134 | t2 = expand_shift |
4135 | (RSHIFT_EXPR, compute_mode, t1, | |
7d60be94 | 4136 | build_int_cst (NULL_TREE, post_shift), |
4a90aeeb NS |
4137 | NULL_RTX, 0); |
4138 | t3 = expand_shift | |
4139 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4140 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4141 | NULL_RTX, 0); |
55c2d311 | 4142 | if (d < 0) |
c5c76735 JL |
4143 | quotient |
4144 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4145 | t3, t2), | |
4146 | tquotient); | |
55c2d311 | 4147 | else |
c5c76735 JL |
4148 | quotient |
4149 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4150 | t2, t3), | |
4151 | tquotient); | |
55c2d311 TG |
4152 | } |
4153 | else | |
4154 | { | |
4155 | rtx t1, t2, t3, t4; | |
4156 | ||
02a65aef R |
4157 | if (post_shift >= BITS_PER_WORD |
4158 | || size - 1 >= BITS_PER_WORD) | |
4159 | goto fail1; | |
4160 | ||
55c2d311 | 4161 | ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); |
e71c0aa7 | 4162 | mlr = gen_int_mode (ml, compute_mode); |
965703ed RS |
4163 | extra_cost = (shift_cost[compute_mode][post_shift] |
4164 | + shift_cost[compute_mode][size - 1] | |
58777718 | 4165 | + 2 * add_cost[compute_mode]); |
e71c0aa7 | 4166 | t1 = expand_mult_highpart (compute_mode, op0, mlr, |
71af73bb TG |
4167 | NULL_RTX, 0, |
4168 | max_cost - extra_cost); | |
55c2d311 TG |
4169 | if (t1 == 0) |
4170 | goto fail1; | |
c5c76735 JL |
4171 | t2 = force_operand (gen_rtx_PLUS (compute_mode, |
4172 | t1, op0), | |
55c2d311 | 4173 | NULL_RTX); |
4a90aeeb NS |
4174 | t3 = expand_shift |
4175 | (RSHIFT_EXPR, compute_mode, t2, | |
7d60be94 | 4176 | build_int_cst (NULL_TREE, post_shift), |
4a90aeeb NS |
4177 | NULL_RTX, 0); |
4178 | t4 = expand_shift | |
4179 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4180 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4181 | NULL_RTX, 0); |
55c2d311 | 4182 | if (d < 0) |
c5c76735 JL |
4183 | quotient |
4184 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4185 | t4, t3), | |
4186 | tquotient); | |
55c2d311 | 4187 | else |
c5c76735 JL |
4188 | quotient |
4189 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4190 | t3, t4), | |
4191 | tquotient); | |
55c2d311 TG |
4192 | } |
4193 | } | |
34f016ed TG |
4194 | else /* Too wide mode to use tricky code */ |
4195 | break; | |
55c2d311 | 4196 | |
4e430df8 RK |
4197 | insn = get_last_insn (); |
4198 | if (insn != last | |
4199 | && (set = single_set (insn)) != 0 | |
4200 | && SET_DEST (set) == quotient) | |
c410d49e | 4201 | set_unique_reg_note (insn, |
502b8322 | 4202 | REG_EQUAL, |
7e5bda2c | 4203 | gen_rtx_DIV (compute_mode, op0, op1)); |
55c2d311 TG |
4204 | } |
4205 | break; | |
4206 | } | |
4207 | fail1: | |
4208 | delete_insns_since (last); | |
4209 | break; | |
44037a66 | 4210 | |
55c2d311 TG |
4211 | case FLOOR_DIV_EXPR: |
4212 | case FLOOR_MOD_EXPR: | |
4213 | /* We will come here only for signed operations. */ | |
4214 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4215 | { | |
e71c0aa7 | 4216 | unsigned HOST_WIDE_INT mh; |
55c2d311 TG |
4217 | int pre_shift, lgup, post_shift; |
4218 | HOST_WIDE_INT d = INTVAL (op1); | |
e71c0aa7 | 4219 | rtx ml; |
55c2d311 TG |
4220 | |
4221 | if (d > 0) | |
4222 | { | |
4223 | /* We could just as easily deal with negative constants here, | |
4224 | but it does not seem worth the trouble for GCC 2.6. */ | |
4225 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4226 | { | |
4227 | pre_shift = floor_log2 (d); | |
4228 | if (rem_flag) | |
4229 | { | |
4230 | remainder = expand_binop (compute_mode, and_optab, op0, | |
4231 | GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), | |
4232 | remainder, 0, OPTAB_LIB_WIDEN); | |
4233 | if (remainder) | |
c8dbc8ca | 4234 | return gen_lowpart (mode, remainder); |
55c2d311 | 4235 | } |
4a90aeeb NS |
4236 | quotient = expand_shift |
4237 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4238 | build_int_cst (NULL_TREE, pre_shift), |
4a90aeeb | 4239 | tquotient, 0); |
55c2d311 TG |
4240 | } |
4241 | else | |
4242 | { | |
4243 | rtx t1, t2, t3, t4; | |
4244 | ||
4245 | mh = choose_multiplier (d, size, size - 1, | |
4246 | &ml, &post_shift, &lgup); | |
5b0264cb | 4247 | gcc_assert (!mh); |
55c2d311 | 4248 | |
02a65aef R |
4249 | if (post_shift < BITS_PER_WORD |
4250 | && size - 1 < BITS_PER_WORD) | |
55c2d311 | 4251 | { |
4a90aeeb NS |
4252 | t1 = expand_shift |
4253 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4254 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4255 | NULL_RTX, 0); |
02a65aef R |
4256 | t2 = expand_binop (compute_mode, xor_optab, op0, t1, |
4257 | NULL_RTX, 0, OPTAB_WIDEN); | |
965703ed RS |
4258 | extra_cost = (shift_cost[compute_mode][post_shift] |
4259 | + shift_cost[compute_mode][size - 1] | |
58777718 | 4260 | + 2 * add_cost[compute_mode]); |
e71c0aa7 | 4261 | t3 = expand_mult_highpart (compute_mode, t2, ml, |
02a65aef R |
4262 | NULL_RTX, 1, |
4263 | max_cost - extra_cost); | |
4264 | if (t3 != 0) | |
4265 | { | |
4a90aeeb NS |
4266 | t4 = expand_shift |
4267 | (RSHIFT_EXPR, compute_mode, t3, | |
7d60be94 | 4268 | build_int_cst (NULL_TREE, post_shift), |
4a90aeeb | 4269 | NULL_RTX, 1); |
02a65aef R |
4270 | quotient = expand_binop (compute_mode, xor_optab, |
4271 | t4, t1, tquotient, 0, | |
4272 | OPTAB_WIDEN); | |
4273 | } | |
55c2d311 TG |
4274 | } |
4275 | } | |
4276 | } | |
4277 | else | |
4278 | { | |
4279 | rtx nsign, t1, t2, t3, t4; | |
38a448ca RH |
4280 | t1 = force_operand (gen_rtx_PLUS (compute_mode, |
4281 | op0, constm1_rtx), NULL_RTX); | |
55c2d311 TG |
4282 | t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, |
4283 | 0, OPTAB_WIDEN); | |
4a90aeeb NS |
4284 | nsign = expand_shift |
4285 | (RSHIFT_EXPR, compute_mode, t2, | |
7d60be94 | 4286 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4287 | NULL_RTX, 0); |
38a448ca | 4288 | t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), |
55c2d311 TG |
4289 | NULL_RTX); |
4290 | t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, | |
4291 | NULL_RTX, 0); | |
4292 | if (t4) | |
4293 | { | |
4294 | rtx t5; | |
4295 | t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, | |
4296 | NULL_RTX, 0); | |
38a448ca RH |
4297 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4298 | t4, t5), | |
55c2d311 TG |
4299 | tquotient); |
4300 | } | |
4301 | } | |
4302 | } | |
4303 | ||
4304 | if (quotient != 0) | |
4305 | break; | |
4306 | delete_insns_since (last); | |
4307 | ||
4308 | /* Try using an instruction that produces both the quotient and | |
4309 | remainder, using truncation. We can easily compensate the quotient | |
4310 | or remainder to get floor rounding, once we have the remainder. | |
4311 | Notice that we compute also the final remainder value here, | |
4312 | and return the result right away. */ | |
a45cf58c | 4313 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4314 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4315 | |
55c2d311 TG |
4316 | if (rem_flag) |
4317 | { | |
668443c9 | 4318 | remainder |
f8cfc6aa | 4319 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4320 | quotient = gen_reg_rtx (compute_mode); |
4321 | } | |
4322 | else | |
4323 | { | |
668443c9 | 4324 | quotient |
f8cfc6aa | 4325 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4326 | remainder = gen_reg_rtx (compute_mode); |
4327 | } | |
4328 | ||
4329 | if (expand_twoval_binop (sdivmod_optab, op0, op1, | |
4330 | quotient, remainder, 0)) | |
4331 | { | |
4332 | /* This could be computed with a branch-less sequence. | |
4333 | Save that for later. */ | |
4334 | rtx tem; | |
4335 | rtx label = gen_label_rtx (); | |
f5963e61 | 4336 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
55c2d311 TG |
4337 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4338 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4339 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
55c2d311 TG |
4340 | expand_dec (quotient, const1_rtx); |
4341 | expand_inc (remainder, op1); | |
4342 | emit_label (label); | |
c8dbc8ca | 4343 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4344 | } |
4345 | ||
4346 | /* No luck with division elimination or divmod. Have to do it | |
4347 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4348 | { |
55c2d311 TG |
4349 | rtx label1, label2, label3, label4, label5; |
4350 | rtx adjusted_op0; | |
4351 | rtx tem; | |
4352 | ||
4353 | quotient = gen_reg_rtx (compute_mode); | |
4354 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4355 | label1 = gen_label_rtx (); | |
4356 | label2 = gen_label_rtx (); | |
4357 | label3 = gen_label_rtx (); | |
4358 | label4 = gen_label_rtx (); | |
4359 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4360 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4361 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); | |
55c2d311 TG |
4362 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4363 | quotient, 0, OPTAB_LIB_WIDEN); | |
4364 | if (tem != quotient) | |
4365 | emit_move_insn (quotient, tem); | |
4366 | emit_jump_insn (gen_jump (label5)); | |
4367 | emit_barrier (); | |
4368 | emit_label (label1); | |
44037a66 | 4369 | expand_inc (adjusted_op0, const1_rtx); |
55c2d311 TG |
4370 | emit_jump_insn (gen_jump (label4)); |
4371 | emit_barrier (); | |
4372 | emit_label (label2); | |
f5963e61 | 4373 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
55c2d311 TG |
4374 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4375 | quotient, 0, OPTAB_LIB_WIDEN); | |
4376 | if (tem != quotient) | |
4377 | emit_move_insn (quotient, tem); | |
4378 | emit_jump_insn (gen_jump (label5)); | |
4379 | emit_barrier (); | |
4380 | emit_label (label3); | |
4381 | expand_dec (adjusted_op0, const1_rtx); | |
4382 | emit_label (label4); | |
4383 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4384 | quotient, 0, OPTAB_LIB_WIDEN); | |
4385 | if (tem != quotient) | |
4386 | emit_move_insn (quotient, tem); | |
4387 | expand_dec (quotient, const1_rtx); | |
4388 | emit_label (label5); | |
44037a66 | 4389 | } |
55c2d311 | 4390 | break; |
44037a66 | 4391 | |
55c2d311 TG |
4392 | case CEIL_DIV_EXPR: |
4393 | case CEIL_MOD_EXPR: | |
4394 | if (unsignedp) | |
4395 | { | |
9176af2f TG |
4396 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))) |
4397 | { | |
4398 | rtx t1, t2, t3; | |
4399 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4400 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4401 | build_int_cst (NULL_TREE, floor_log2 (d)), |
412381d9 | 4402 | tquotient, 1); |
9176af2f TG |
4403 | t2 = expand_binop (compute_mode, and_optab, op0, |
4404 | GEN_INT (d - 1), | |
4405 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4406 | t3 = gen_reg_rtx (compute_mode); | |
4407 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4408 | compute_mode, 1, 1); | |
412381d9 TG |
4409 | if (t3 == 0) |
4410 | { | |
4411 | rtx lab; | |
4412 | lab = gen_label_rtx (); | |
f5963e61 | 4413 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
412381d9 TG |
4414 | expand_inc (t1, const1_rtx); |
4415 | emit_label (lab); | |
4416 | quotient = t1; | |
4417 | } | |
4418 | else | |
38a448ca RH |
4419 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4420 | t1, t3), | |
412381d9 | 4421 | tquotient); |
9176af2f TG |
4422 | break; |
4423 | } | |
55c2d311 TG |
4424 | |
4425 | /* Try using an instruction that produces both the quotient and | |
4426 | remainder, using truncation. We can easily compensate the | |
4427 | quotient or remainder to get ceiling rounding, once we have the | |
4428 | remainder. Notice that we compute also the final remainder | |
4429 | value here, and return the result right away. */ | |
a45cf58c | 4430 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4431 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4432 | |
55c2d311 TG |
4433 | if (rem_flag) |
4434 | { | |
f8cfc6aa | 4435 | remainder = (REG_P (target) |
668443c9 | 4436 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4437 | quotient = gen_reg_rtx (compute_mode); |
4438 | } | |
4439 | else | |
4440 | { | |
f8cfc6aa | 4441 | quotient = (REG_P (target) |
668443c9 | 4442 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4443 | remainder = gen_reg_rtx (compute_mode); |
4444 | } | |
4445 | ||
4446 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, | |
4447 | remainder, 1)) | |
4448 | { | |
4449 | /* This could be computed with a branch-less sequence. | |
4450 | Save that for later. */ | |
4451 | rtx label = gen_label_rtx (); | |
f5963e61 JL |
4452 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4453 | compute_mode, label); | |
55c2d311 TG |
4454 | expand_inc (quotient, const1_rtx); |
4455 | expand_dec (remainder, op1); | |
4456 | emit_label (label); | |
c8dbc8ca | 4457 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4458 | } |
4459 | ||
4460 | /* No luck with division elimination or divmod. Have to do it | |
4461 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4462 | { |
55c2d311 TG |
4463 | rtx label1, label2; |
4464 | rtx adjusted_op0, tem; | |
4465 | ||
4466 | quotient = gen_reg_rtx (compute_mode); | |
4467 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4468 | label1 = gen_label_rtx (); | |
4469 | label2 = gen_label_rtx (); | |
f5963e61 JL |
4470 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4471 | compute_mode, label1); | |
55c2d311 TG |
4472 | emit_move_insn (quotient, const0_rtx); |
4473 | emit_jump_insn (gen_jump (label2)); | |
4474 | emit_barrier (); | |
4475 | emit_label (label1); | |
4476 | expand_dec (adjusted_op0, const1_rtx); | |
4477 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, | |
4478 | quotient, 1, OPTAB_LIB_WIDEN); | |
4479 | if (tem != quotient) | |
4480 | emit_move_insn (quotient, tem); | |
4481 | expand_inc (quotient, const1_rtx); | |
4482 | emit_label (label2); | |
44037a66 | 4483 | } |
55c2d311 TG |
4484 | } |
4485 | else /* signed */ | |
4486 | { | |
73f27728 RK |
4487 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4488 | && INTVAL (op1) >= 0) | |
4489 | { | |
4490 | /* This is extremely similar to the code for the unsigned case | |
4491 | above. For 2.7 we should merge these variants, but for | |
4492 | 2.6.1 I don't want to touch the code for unsigned since that | |
4493 | get used in C. The signed case will only be used by other | |
4494 | languages (Ada). */ | |
4495 | ||
4496 | rtx t1, t2, t3; | |
4497 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4498 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4499 | build_int_cst (NULL_TREE, floor_log2 (d)), |
73f27728 RK |
4500 | tquotient, 0); |
4501 | t2 = expand_binop (compute_mode, and_optab, op0, | |
4502 | GEN_INT (d - 1), | |
4503 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4504 | t3 = gen_reg_rtx (compute_mode); | |
4505 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4506 | compute_mode, 1, 1); | |
4507 | if (t3 == 0) | |
4508 | { | |
4509 | rtx lab; | |
4510 | lab = gen_label_rtx (); | |
f5963e61 | 4511 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
73f27728 RK |
4512 | expand_inc (t1, const1_rtx); |
4513 | emit_label (lab); | |
4514 | quotient = t1; | |
4515 | } | |
4516 | else | |
38a448ca RH |
4517 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4518 | t1, t3), | |
73f27728 RK |
4519 | tquotient); |
4520 | break; | |
4521 | } | |
4522 | ||
55c2d311 TG |
4523 | /* Try using an instruction that produces both the quotient and |
4524 | remainder, using truncation. We can easily compensate the | |
4525 | quotient or remainder to get ceiling rounding, once we have the | |
4526 | remainder. Notice that we compute also the final remainder | |
4527 | value here, and return the result right away. */ | |
a45cf58c | 4528 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 TG |
4529 | target = gen_reg_rtx (compute_mode); |
4530 | if (rem_flag) | |
4531 | { | |
f8cfc6aa | 4532 | remainder= (REG_P (target) |
668443c9 | 4533 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4534 | quotient = gen_reg_rtx (compute_mode); |
4535 | } | |
4536 | else | |
4537 | { | |
f8cfc6aa | 4538 | quotient = (REG_P (target) |
668443c9 | 4539 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4540 | remainder = gen_reg_rtx (compute_mode); |
4541 | } | |
4542 | ||
4543 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, | |
4544 | remainder, 0)) | |
4545 | { | |
4546 | /* This could be computed with a branch-less sequence. | |
4547 | Save that for later. */ | |
4548 | rtx tem; | |
4549 | rtx label = gen_label_rtx (); | |
f5963e61 JL |
4550 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4551 | compute_mode, label); | |
55c2d311 TG |
4552 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4553 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4554 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
55c2d311 TG |
4555 | expand_inc (quotient, const1_rtx); |
4556 | expand_dec (remainder, op1); | |
4557 | emit_label (label); | |
c8dbc8ca | 4558 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4559 | } |
4560 | ||
4561 | /* No luck with division elimination or divmod. Have to do it | |
4562 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4563 | { |
55c2d311 TG |
4564 | rtx label1, label2, label3, label4, label5; |
4565 | rtx adjusted_op0; | |
4566 | rtx tem; | |
4567 | ||
4568 | quotient = gen_reg_rtx (compute_mode); | |
4569 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4570 | label1 = gen_label_rtx (); | |
4571 | label2 = gen_label_rtx (); | |
4572 | label3 = gen_label_rtx (); | |
4573 | label4 = gen_label_rtx (); | |
4574 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4575 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4576 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, | |
4577 | compute_mode, label1); | |
55c2d311 TG |
4578 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4579 | quotient, 0, OPTAB_LIB_WIDEN); | |
4580 | if (tem != quotient) | |
4581 | emit_move_insn (quotient, tem); | |
4582 | emit_jump_insn (gen_jump (label5)); | |
4583 | emit_barrier (); | |
4584 | emit_label (label1); | |
4585 | expand_dec (adjusted_op0, const1_rtx); | |
4586 | emit_jump_insn (gen_jump (label4)); | |
4587 | emit_barrier (); | |
4588 | emit_label (label2); | |
f5963e61 JL |
4589 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
4590 | compute_mode, label3); | |
55c2d311 TG |
4591 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4592 | quotient, 0, OPTAB_LIB_WIDEN); | |
4593 | if (tem != quotient) | |
4594 | emit_move_insn (quotient, tem); | |
4595 | emit_jump_insn (gen_jump (label5)); | |
4596 | emit_barrier (); | |
4597 | emit_label (label3); | |
4598 | expand_inc (adjusted_op0, const1_rtx); | |
4599 | emit_label (label4); | |
4600 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4601 | quotient, 0, OPTAB_LIB_WIDEN); | |
4602 | if (tem != quotient) | |
4603 | emit_move_insn (quotient, tem); | |
4604 | expand_inc (quotient, const1_rtx); | |
4605 | emit_label (label5); | |
44037a66 | 4606 | } |
55c2d311 TG |
4607 | } |
4608 | break; | |
bc1c7e93 | 4609 | |
55c2d311 TG |
4610 | case EXACT_DIV_EXPR: |
4611 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4612 | { | |
4613 | HOST_WIDE_INT d = INTVAL (op1); | |
4614 | unsigned HOST_WIDE_INT ml; | |
91ce572a | 4615 | int pre_shift; |
55c2d311 TG |
4616 | rtx t1; |
4617 | ||
91ce572a CC |
4618 | pre_shift = floor_log2 (d & -d); |
4619 | ml = invert_mod2n (d >> pre_shift, size); | |
4620 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4621 | build_int_cst (NULL_TREE, pre_shift), |
4a90aeeb | 4622 | NULL_RTX, unsignedp); |
69107307 | 4623 | quotient = expand_mult (compute_mode, t1, |
2496c7bd | 4624 | gen_int_mode (ml, compute_mode), |
31ff3e0b | 4625 | NULL_RTX, 1); |
55c2d311 TG |
4626 | |
4627 | insn = get_last_insn (); | |
7e5bda2c | 4628 | set_unique_reg_note (insn, |
502b8322 | 4629 | REG_EQUAL, |
7e5bda2c AM |
4630 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, |
4631 | compute_mode, | |
4632 | op0, op1)); | |
55c2d311 TG |
4633 | } |
4634 | break; | |
4635 | ||
4636 | case ROUND_DIV_EXPR: | |
4637 | case ROUND_MOD_EXPR: | |
69f61901 RK |
4638 | if (unsignedp) |
4639 | { | |
4640 | rtx tem; | |
4641 | rtx label; | |
4642 | label = gen_label_rtx (); | |
4643 | quotient = gen_reg_rtx (compute_mode); | |
4644 | remainder = gen_reg_rtx (compute_mode); | |
4645 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) | |
4646 | { | |
4647 | rtx tem; | |
4648 | quotient = expand_binop (compute_mode, udiv_optab, op0, op1, | |
4649 | quotient, 1, OPTAB_LIB_WIDEN); | |
4650 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); | |
4651 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4652 | remainder, 1, OPTAB_LIB_WIDEN); | |
4653 | } | |
4654 | tem = plus_constant (op1, -1); | |
4655 | tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, | |
7d60be94 | 4656 | build_int_cst (NULL_TREE, 1), |
4a90aeeb | 4657 | NULL_RTX, 1); |
f5963e61 | 4658 | do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); |
69f61901 RK |
4659 | expand_inc (quotient, const1_rtx); |
4660 | expand_dec (remainder, op1); | |
4661 | emit_label (label); | |
4662 | } | |
4663 | else | |
4664 | { | |
4665 | rtx abs_rem, abs_op1, tem, mask; | |
4666 | rtx label; | |
4667 | label = gen_label_rtx (); | |
4668 | quotient = gen_reg_rtx (compute_mode); | |
4669 | remainder = gen_reg_rtx (compute_mode); | |
4670 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) | |
4671 | { | |
4672 | rtx tem; | |
4673 | quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, | |
4674 | quotient, 0, OPTAB_LIB_WIDEN); | |
4675 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); | |
4676 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4677 | remainder, 0, OPTAB_LIB_WIDEN); | |
4678 | } | |
91ce572a CC |
4679 | abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); |
4680 | abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); | |
69f61901 | 4681 | tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, |
7d60be94 | 4682 | build_int_cst (NULL_TREE, 1), |
4a90aeeb | 4683 | NULL_RTX, 1); |
f5963e61 | 4684 | do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); |
69f61901 RK |
4685 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4686 | NULL_RTX, 0, OPTAB_WIDEN); | |
4687 | mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, | |
7d60be94 | 4688 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4689 | NULL_RTX, 0); |
69f61901 RK |
4690 | tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, |
4691 | NULL_RTX, 0, OPTAB_WIDEN); | |
4692 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4693 | NULL_RTX, 0, OPTAB_WIDEN); | |
4694 | expand_inc (quotient, tem); | |
4695 | tem = expand_binop (compute_mode, xor_optab, mask, op1, | |
4696 | NULL_RTX, 0, OPTAB_WIDEN); | |
4697 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4698 | NULL_RTX, 0, OPTAB_WIDEN); | |
4699 | expand_dec (remainder, tem); | |
4700 | emit_label (label); | |
4701 | } | |
4702 | return gen_lowpart (mode, rem_flag ? remainder : quotient); | |
c410d49e | 4703 | |
e9a25f70 | 4704 | default: |
5b0264cb | 4705 | gcc_unreachable (); |
55c2d311 | 4706 | } |
44037a66 | 4707 | |
55c2d311 | 4708 | if (quotient == 0) |
44037a66 | 4709 | { |
a45cf58c RK |
4710 | if (target && GET_MODE (target) != compute_mode) |
4711 | target = 0; | |
4712 | ||
55c2d311 | 4713 | if (rem_flag) |
44037a66 | 4714 | { |
32fdf36b | 4715 | /* Try to produce the remainder without producing the quotient. |
d6a7951f | 4716 | If we seem to have a divmod pattern that does not require widening, |
b20b352b | 4717 | don't try widening here. We should really have a WIDEN argument |
32fdf36b TG |
4718 | to expand_twoval_binop, since what we'd really like to do here is |
4719 | 1) try a mod insn in compute_mode | |
4720 | 2) try a divmod insn in compute_mode | |
4721 | 3) try a div insn in compute_mode and multiply-subtract to get | |
4722 | remainder | |
4723 | 4) try the same things with widening allowed. */ | |
4724 | remainder | |
4725 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4726 | op0, op1, target, | |
4727 | unsignedp, | |
166cdb08 | 4728 | ((optab_handler (optab2, compute_mode)->insn_code |
32fdf36b TG |
4729 | != CODE_FOR_nothing) |
4730 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
55c2d311 | 4731 | if (remainder == 0) |
44037a66 TG |
4732 | { |
4733 | /* No luck there. Can we do remainder and divide at once | |
4734 | without a library call? */ | |
55c2d311 TG |
4735 | remainder = gen_reg_rtx (compute_mode); |
4736 | if (! expand_twoval_binop ((unsignedp | |
4737 | ? udivmod_optab | |
4738 | : sdivmod_optab), | |
4739 | op0, op1, | |
4740 | NULL_RTX, remainder, unsignedp)) | |
4741 | remainder = 0; | |
44037a66 | 4742 | } |
55c2d311 TG |
4743 | |
4744 | if (remainder) | |
4745 | return gen_lowpart (mode, remainder); | |
44037a66 | 4746 | } |
44037a66 | 4747 | |
dc38b292 RK |
4748 | /* Produce the quotient. Try a quotient insn, but not a library call. |
4749 | If we have a divmod in this mode, use it in preference to widening | |
4750 | the div (for this test we assume it will not fail). Note that optab2 | |
4751 | is set to the one of the two optabs that the call below will use. */ | |
4752 | quotient | |
4753 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, | |
4754 | op0, op1, rem_flag ? NULL_RTX : target, | |
4755 | unsignedp, | |
166cdb08 | 4756 | ((optab_handler (optab2, compute_mode)->insn_code |
dc38b292 RK |
4757 | != CODE_FOR_nothing) |
4758 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
4759 | ||
55c2d311 | 4760 | if (quotient == 0) |
44037a66 TG |
4761 | { |
4762 | /* No luck there. Try a quotient-and-remainder insn, | |
4763 | keeping the quotient alone. */ | |
55c2d311 | 4764 | quotient = gen_reg_rtx (compute_mode); |
44037a66 | 4765 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
55c2d311 TG |
4766 | op0, op1, |
4767 | quotient, NULL_RTX, unsignedp)) | |
4768 | { | |
4769 | quotient = 0; | |
4770 | if (! rem_flag) | |
4771 | /* Still no luck. If we are not computing the remainder, | |
4772 | use a library call for the quotient. */ | |
4773 | quotient = sign_expand_binop (compute_mode, | |
4774 | udiv_optab, sdiv_optab, | |
4775 | op0, op1, target, | |
4776 | unsignedp, OPTAB_LIB_WIDEN); | |
4777 | } | |
44037a66 | 4778 | } |
44037a66 TG |
4779 | } |
4780 | ||
44037a66 TG |
4781 | if (rem_flag) |
4782 | { | |
a45cf58c RK |
4783 | if (target && GET_MODE (target) != compute_mode) |
4784 | target = 0; | |
4785 | ||
55c2d311 | 4786 | if (quotient == 0) |
b3f8d95d MM |
4787 | { |
4788 | /* No divide instruction either. Use library for remainder. */ | |
4789 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4790 | op0, op1, target, | |
4791 | unsignedp, OPTAB_LIB_WIDEN); | |
4792 | /* No remainder function. Try a quotient-and-remainder | |
4793 | function, keeping the remainder. */ | |
4794 | if (!remainder) | |
4795 | { | |
4796 | remainder = gen_reg_rtx (compute_mode); | |
4797 | if (!expand_twoval_binop_libfunc | |
4798 | (unsignedp ? udivmod_optab : sdivmod_optab, | |
4799 | op0, op1, | |
4800 | NULL_RTX, remainder, | |
4801 | unsignedp ? UMOD : MOD)) | |
4802 | remainder = NULL_RTX; | |
4803 | } | |
4804 | } | |
44037a66 TG |
4805 | else |
4806 | { | |
4807 | /* We divided. Now finish doing X - Y * (X / Y). */ | |
55c2d311 TG |
4808 | remainder = expand_mult (compute_mode, quotient, op1, |
4809 | NULL_RTX, unsignedp); | |
4810 | remainder = expand_binop (compute_mode, sub_optab, op0, | |
4811 | remainder, target, unsignedp, | |
4812 | OPTAB_LIB_WIDEN); | |
44037a66 TG |
4813 | } |
4814 | } | |
4815 | ||
55c2d311 | 4816 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
44037a66 TG |
4817 | } |
4818 | \f | |
4819 | /* Return a tree node with data type TYPE, describing the value of X. | |
4dfa0342 | 4820 | Usually this is an VAR_DECL, if there is no obvious better choice. |
44037a66 | 4821 | X may be an expression, however we only support those expressions |
6d2f8887 | 4822 | generated by loop.c. */ |
44037a66 TG |
4823 | |
4824 | tree | |
502b8322 | 4825 | make_tree (tree type, rtx x) |
44037a66 TG |
4826 | { |
4827 | tree t; | |
4828 | ||
4829 | switch (GET_CODE (x)) | |
4830 | { | |
4831 | case CONST_INT: | |
4a90aeeb NS |
4832 | { |
4833 | HOST_WIDE_INT hi = 0; | |
4834 | ||
4835 | if (INTVAL (x) < 0 | |
4836 | && !(TYPE_UNSIGNED (type) | |
4837 | && (GET_MODE_BITSIZE (TYPE_MODE (type)) | |
4838 | < HOST_BITS_PER_WIDE_INT))) | |
4839 | hi = -1; | |
4840 | ||
7d60be94 | 4841 | t = build_int_cst_wide (type, INTVAL (x), hi); |
4a90aeeb NS |
4842 | |
4843 | return t; | |
4844 | } | |
4845 | ||
44037a66 TG |
4846 | case CONST_DOUBLE: |
4847 | if (GET_MODE (x) == VOIDmode) | |
7d60be94 NS |
4848 | t = build_int_cst_wide (type, |
4849 | CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x)); | |
44037a66 TG |
4850 | else |
4851 | { | |
4852 | REAL_VALUE_TYPE d; | |
4853 | ||
4854 | REAL_VALUE_FROM_CONST_DOUBLE (d, x); | |
4855 | t = build_real (type, d); | |
4856 | } | |
4857 | ||
4858 | return t; | |
69ef87e2 AH |
4859 | |
4860 | case CONST_VECTOR: | |
4861 | { | |
b8b7f162 RS |
4862 | int units = CONST_VECTOR_NUNITS (x); |
4863 | tree itype = TREE_TYPE (type); | |
69ef87e2 | 4864 | tree t = NULL_TREE; |
b8b7f162 | 4865 | int i; |
69ef87e2 | 4866 | |
69ef87e2 AH |
4867 | |
4868 | /* Build a tree with vector elements. */ | |
4869 | for (i = units - 1; i >= 0; --i) | |
4870 | { | |
b8b7f162 RS |
4871 | rtx elt = CONST_VECTOR_ELT (x, i); |
4872 | t = tree_cons (NULL_TREE, make_tree (itype, elt), t); | |
69ef87e2 | 4873 | } |
c410d49e | 4874 | |
69ef87e2 AH |
4875 | return build_vector (type, t); |
4876 | } | |
4877 | ||
44037a66 | 4878 | case PLUS: |
4845b383 KH |
4879 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
4880 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 4881 | |
44037a66 | 4882 | case MINUS: |
4845b383 KH |
4883 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
4884 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 4885 | |
44037a66 | 4886 | case NEG: |
4845b383 | 4887 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
44037a66 TG |
4888 | |
4889 | case MULT: | |
4845b383 KH |
4890 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
4891 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 4892 | |
44037a66 | 4893 | case ASHIFT: |
4845b383 KH |
4894 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
4895 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 4896 | |
44037a66 | 4897 | case LSHIFTRT: |
ca5ba2a3 | 4898 | t = unsigned_type_for (type); |
aeba6c28 JM |
4899 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
4900 | make_tree (t, XEXP (x, 0)), | |
4901 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 4902 | |
44037a66 | 4903 | case ASHIFTRT: |
12753674 | 4904 | t = signed_type_for (type); |
aeba6c28 JM |
4905 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
4906 | make_tree (t, XEXP (x, 0)), | |
4907 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 4908 | |
44037a66 TG |
4909 | case DIV: |
4910 | if (TREE_CODE (type) != REAL_TYPE) | |
12753674 | 4911 | t = signed_type_for (type); |
44037a66 TG |
4912 | else |
4913 | t = type; | |
4914 | ||
aeba6c28 JM |
4915 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
4916 | make_tree (t, XEXP (x, 0)), | |
4917 | make_tree (t, XEXP (x, 1)))); | |
44037a66 | 4918 | case UDIV: |
ca5ba2a3 | 4919 | t = unsigned_type_for (type); |
aeba6c28 JM |
4920 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
4921 | make_tree (t, XEXP (x, 0)), | |
4922 | make_tree (t, XEXP (x, 1)))); | |
5c45425b RH |
4923 | |
4924 | case SIGN_EXTEND: | |
4925 | case ZERO_EXTEND: | |
ae2bcd98 RS |
4926 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
4927 | GET_CODE (x) == ZERO_EXTEND); | |
aeba6c28 | 4928 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
5c45425b | 4929 | |
84816907 JM |
4930 | case CONST: |
4931 | return make_tree (type, XEXP (x, 0)); | |
4932 | ||
4933 | case SYMBOL_REF: | |
4934 | t = SYMBOL_REF_DECL (x); | |
4935 | if (t) | |
4936 | return fold_convert (type, build_fold_addr_expr (t)); | |
4937 | /* else fall through. */ | |
4938 | ||
4dfa0342 RH |
4939 | default: |
4940 | t = build_decl (VAR_DECL, NULL_TREE, type); | |
d1608933 | 4941 | |
d1608933 RK |
4942 | /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being |
4943 | ptr_mode. So convert. */ | |
5ae6cd0d | 4944 | if (POINTER_TYPE_P (type)) |
d1608933 | 4945 | x = convert_memory_address (TYPE_MODE (type), x); |
d1608933 | 4946 | |
8a0aa06e RH |
4947 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
4948 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ | |
820cc88f | 4949 | t->decl_with_rtl.rtl = x; |
4dfa0342 | 4950 | |
44037a66 TG |
4951 | return t; |
4952 | } | |
4953 | } | |
44037a66 TG |
4954 | \f |
4955 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET | |
4956 | and returning TARGET. | |
4957 | ||
4958 | If TARGET is 0, a pseudo-register or constant is returned. */ | |
4959 | ||
4960 | rtx | |
502b8322 | 4961 | expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target) |
44037a66 | 4962 | { |
22273300 | 4963 | rtx tem = 0; |
44037a66 | 4964 | |
22273300 JJ |
4965 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
4966 | tem = simplify_binary_operation (AND, mode, op0, op1); | |
4967 | if (tem == 0) | |
44037a66 | 4968 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
44037a66 TG |
4969 | |
4970 | if (target == 0) | |
4971 | target = tem; | |
4972 | else if (tem != target) | |
4973 | emit_move_insn (target, tem); | |
4974 | return target; | |
4975 | } | |
4976 | \f | |
a41a56b6 RE |
4977 | /* Helper function for emit_store_flag. */ |
4978 | static rtx | |
4979 | emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode, | |
4980 | int normalizep) | |
4981 | { | |
4982 | rtx op0; | |
4983 | enum machine_mode target_mode = GET_MODE (target); | |
4984 | ||
4985 | /* If we are converting to a wider mode, first convert to | |
4986 | TARGET_MODE, then normalize. This produces better combining | |
4987 | opportunities on machines that have a SIGN_EXTRACT when we are | |
4988 | testing a single bit. This mostly benefits the 68k. | |
4989 | ||
4990 | If STORE_FLAG_VALUE does not have the sign bit set when | |
4991 | interpreted in MODE, we can do this conversion as unsigned, which | |
4992 | is usually more efficient. */ | |
4993 | if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) | |
4994 | { | |
4995 | convert_move (target, subtarget, | |
4996 | (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) | |
4997 | && 0 == (STORE_FLAG_VALUE | |
4998 | & ((HOST_WIDE_INT) 1 | |
4999 | << (GET_MODE_BITSIZE (mode) -1)))); | |
5000 | op0 = target; | |
5001 | mode = target_mode; | |
5002 | } | |
5003 | else | |
5004 | op0 = subtarget; | |
5005 | ||
5006 | /* If we want to keep subexpressions around, don't reuse our last | |
5007 | target. */ | |
5008 | if (optimize) | |
5009 | subtarget = 0; | |
5010 | ||
5011 | /* Now normalize to the proper value in MODE. Sometimes we don't | |
5012 | have to do anything. */ | |
5013 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) | |
5014 | ; | |
5015 | /* STORE_FLAG_VALUE might be the most negative number, so write | |
5016 | the comparison this way to avoid a compiler-time warning. */ | |
5017 | else if (- normalizep == STORE_FLAG_VALUE) | |
5018 | op0 = expand_unop (mode, neg_optab, op0, subtarget, 0); | |
5019 | ||
5020 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes | |
5021 | it hard to use a value of just the sign bit due to ANSI integer | |
5022 | constant typing rules. */ | |
5023 | else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT | |
5024 | && (STORE_FLAG_VALUE | |
5025 | & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))) | |
5026 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, | |
5027 | size_int (GET_MODE_BITSIZE (mode) - 1), subtarget, | |
5028 | normalizep == 1); | |
5029 | else | |
5030 | { | |
5031 | gcc_assert (STORE_FLAG_VALUE & 1); | |
5032 | ||
5033 | op0 = expand_and (mode, op0, const1_rtx, subtarget); | |
5034 | if (normalizep == -1) | |
5035 | op0 = expand_unop (mode, neg_optab, op0, op0, 0); | |
5036 | } | |
5037 | ||
5038 | /* If we were converting to a smaller mode, do the conversion now. */ | |
5039 | if (target_mode != mode) | |
5040 | { | |
5041 | convert_move (target, op0, 0); | |
5042 | return target; | |
5043 | } | |
5044 | else | |
5045 | return op0; | |
5046 | } | |
5047 | ||
44037a66 TG |
5048 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 |
5049 | and storing in TARGET. Normally return TARGET. | |
5050 | Return 0 if that cannot be done. | |
5051 | ||
5052 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If | |
c410d49e | 5053 | it is VOIDmode, they cannot both be CONST_INT. |
44037a66 TG |
5054 | |
5055 | UNSIGNEDP is for the case where we have to widen the operands | |
5056 | to perform the operation. It says to use zero-extension. | |
5057 | ||
5058 | NORMALIZEP is 1 if we should convert the result to be either zero | |
373e7d69 | 5059 | or one. Normalize is -1 if we should convert the result to be |
44037a66 TG |
5060 | either zero or -1. If NORMALIZEP is zero, the result will be left |
5061 | "raw" out of the scc insn. */ | |
5062 | ||
5063 | rtx | |
502b8322 AJ |
5064 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, |
5065 | enum machine_mode mode, int unsignedp, int normalizep) | |
44037a66 TG |
5066 | { |
5067 | rtx subtarget; | |
5068 | enum insn_code icode; | |
5069 | enum machine_mode compare_mode; | |
5070 | enum machine_mode target_mode = GET_MODE (target); | |
5071 | rtx tem; | |
db2f8a07 | 5072 | rtx last = get_last_insn (); |
44037a66 TG |
5073 | rtx pattern, comparison; |
5074 | ||
b30f05db BS |
5075 | if (unsignedp) |
5076 | code = unsigned_condition (code); | |
5077 | ||
c2615a67 RK |
5078 | /* If one operand is constant, make it the second one. Only do this |
5079 | if the other operand is not constant as well. */ | |
5080 | ||
8c9864f3 | 5081 | if (swap_commutative_operands_p (op0, op1)) |
c2615a67 RK |
5082 | { |
5083 | tem = op0; | |
5084 | op0 = op1; | |
5085 | op1 = tem; | |
5086 | code = swap_condition (code); | |
5087 | } | |
5088 | ||
6405e07b DE |
5089 | if (mode == VOIDmode) |
5090 | mode = GET_MODE (op0); | |
5091 | ||
c410d49e | 5092 | /* For some comparisons with 1 and -1, we can convert this to |
44037a66 | 5093 | comparisons with zero. This will often produce more opportunities for |
0f41302f | 5094 | store-flag insns. */ |
44037a66 TG |
5095 | |
5096 | switch (code) | |
5097 | { | |
5098 | case LT: | |
5099 | if (op1 == const1_rtx) | |
5100 | op1 = const0_rtx, code = LE; | |
5101 | break; | |
5102 | case LE: | |
5103 | if (op1 == constm1_rtx) | |
5104 | op1 = const0_rtx, code = LT; | |
5105 | break; | |
5106 | case GE: | |
5107 | if (op1 == const1_rtx) | |
5108 | op1 = const0_rtx, code = GT; | |
5109 | break; | |
5110 | case GT: | |
5111 | if (op1 == constm1_rtx) | |
5112 | op1 = const0_rtx, code = GE; | |
5113 | break; | |
5114 | case GEU: | |
5115 | if (op1 == const1_rtx) | |
5116 | op1 = const0_rtx, code = NE; | |
5117 | break; | |
5118 | case LTU: | |
5119 | if (op1 == const1_rtx) | |
5120 | op1 = const0_rtx, code = EQ; | |
5121 | break; | |
e9a25f70 JL |
5122 | default: |
5123 | break; | |
44037a66 TG |
5124 | } |
5125 | ||
884815aa JB |
5126 | /* If we are comparing a double-word integer with zero or -1, we can |
5127 | convert the comparison into one involving a single word. */ | |
6912b84b RK |
5128 | if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 |
5129 | && GET_MODE_CLASS (mode) == MODE_INT | |
3c0cb5de | 5130 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
6912b84b | 5131 | { |
884815aa JB |
5132 | if ((code == EQ || code == NE) |
5133 | && (op1 == const0_rtx || op1 == constm1_rtx)) | |
6912b84b | 5134 | { |
8433f113 RH |
5135 | rtx op00, op01, op0both; |
5136 | ||
a41a56b6 RE |
5137 | /* Do a logical OR or AND of the two words and compare the |
5138 | result. */ | |
8433f113 RH |
5139 | op00 = simplify_gen_subreg (word_mode, op0, mode, 0); |
5140 | op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); | |
884815aa JB |
5141 | op0both = expand_binop (word_mode, |
5142 | op1 == const0_rtx ? ior_optab : and_optab, | |
a41a56b6 RE |
5143 | op00, op01, NULL_RTX, unsignedp, |
5144 | OPTAB_DIRECT); | |
884815aa | 5145 | |
6912b84b RK |
5146 | if (op0both != 0) |
5147 | return emit_store_flag (target, code, op0both, op1, word_mode, | |
5148 | unsignedp, normalizep); | |
5149 | } | |
884815aa | 5150 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
8433f113 RH |
5151 | { |
5152 | rtx op0h; | |
5153 | ||
5154 | /* If testing the sign bit, can just test on high word. */ | |
5155 | op0h = simplify_gen_subreg (word_mode, op0, mode, | |
a41a56b6 RE |
5156 | subreg_highpart_offset (word_mode, |
5157 | mode)); | |
8433f113 RH |
5158 | return emit_store_flag (target, code, op0h, op1, word_mode, |
5159 | unsignedp, normalizep); | |
5160 | } | |
6912b84b RK |
5161 | } |
5162 | ||
44037a66 TG |
5163 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5164 | complement of A (for GE) and shifting the sign bit to the low bit. */ | |
5165 | if (op1 == const0_rtx && (code == LT || code == GE) | |
5166 | && GET_MODE_CLASS (mode) == MODE_INT | |
5167 | && (normalizep || STORE_FLAG_VALUE == 1 | |
b1ec3c92 | 5168 | || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT |
12dd565f | 5169 | && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) |
a41a56b6 RE |
5170 | == ((unsigned HOST_WIDE_INT) 1 |
5171 | << (GET_MODE_BITSIZE (mode) - 1)))))) | |
44037a66 | 5172 | { |
8deb7047 | 5173 | subtarget = target; |
44037a66 TG |
5174 | |
5175 | /* If the result is to be wider than OP0, it is best to convert it | |
5176 | first. If it is to be narrower, it is *incorrect* to convert it | |
5177 | first. */ | |
5178 | if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) | |
5179 | { | |
81722fa9 | 5180 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5181 | mode = target_mode; |
5182 | } | |
5183 | ||
5184 | if (target_mode != mode) | |
5185 | subtarget = 0; | |
5186 | ||
5187 | if (code == GE) | |
1d6eaf3d RK |
5188 | op0 = expand_unop (mode, one_cmpl_optab, op0, |
5189 | ((STORE_FLAG_VALUE == 1 || normalizep) | |
5190 | ? 0 : subtarget), 0); | |
44037a66 | 5191 | |
1d6eaf3d | 5192 | if (STORE_FLAG_VALUE == 1 || normalizep) |
44037a66 TG |
5193 | /* If we are supposed to produce a 0/1 value, we want to do |
5194 | a logical shift from the sign bit to the low-order bit; for | |
5195 | a -1/0 value, we do an arithmetic shift. */ | |
5196 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, | |
5197 | size_int (GET_MODE_BITSIZE (mode) - 1), | |
5198 | subtarget, normalizep != -1); | |
5199 | ||
5200 | if (mode != target_mode) | |
c2ec26b8 | 5201 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5202 | |
5203 | return op0; | |
5204 | } | |
5205 | ||
a41a56b6 RE |
5206 | icode = setcc_gen_code[(int) code]; |
5207 | ||
44037a66 TG |
5208 | if (icode != CODE_FOR_nothing) |
5209 | { | |
a995e389 RH |
5210 | insn_operand_predicate_fn pred; |
5211 | ||
44037a66 | 5212 | /* We think we may be able to do this with a scc insn. Emit the |
ad76cef8 | 5213 | comparison and then the scc insn. */ |
44037a66 | 5214 | |
3bdf5848 | 5215 | do_pending_stack_adjust (); |
44037a66 TG |
5216 | last = get_last_insn (); |
5217 | ||
b1ec3c92 | 5218 | comparison |
d43e0b7d | 5219 | = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX); |
bb394606 RH |
5220 | if (CONSTANT_P (comparison)) |
5221 | { | |
5b0264cb | 5222 | switch (GET_CODE (comparison)) |
bb394606 | 5223 | { |
5b0264cb | 5224 | case CONST_INT: |
bb394606 RH |
5225 | if (comparison == const0_rtx) |
5226 | return const0_rtx; | |
5b0264cb NS |
5227 | break; |
5228 | ||
bb394606 | 5229 | #ifdef FLOAT_STORE_FLAG_VALUE |
5b0264cb | 5230 | case CONST_DOUBLE: |
bb394606 RH |
5231 | if (comparison == CONST0_RTX (GET_MODE (comparison))) |
5232 | return const0_rtx; | |
5b0264cb | 5233 | break; |
bb394606 | 5234 | #endif |
5b0264cb NS |
5235 | default: |
5236 | gcc_unreachable (); | |
5237 | } | |
5238 | ||
bb394606 RH |
5239 | if (normalizep == 1) |
5240 | return const1_rtx; | |
5241 | if (normalizep == -1) | |
5242 | return constm1_rtx; | |
5243 | return const_true_rtx; | |
5244 | } | |
44037a66 | 5245 | |
8f08e8c0 JL |
5246 | /* The code of COMPARISON may not match CODE if compare_from_rtx |
5247 | decided to swap its operands and reverse the original code. | |
c2615a67 | 5248 | |
8f08e8c0 JL |
5249 | We know that compare_from_rtx returns either a CONST_INT or |
5250 | a new comparison code, so it is safe to just extract the | |
5251 | code from COMPARISON. */ | |
5252 | code = GET_CODE (comparison); | |
8deb7047 | 5253 | |
44037a66 | 5254 | /* Get a reference to the target in the proper mode for this insn. */ |
a995e389 | 5255 | compare_mode = insn_data[(int) icode].operand[0].mode; |
44037a66 | 5256 | subtarget = target; |
a995e389 | 5257 | pred = insn_data[(int) icode].operand[0].predicate; |
7c27e184 | 5258 | if (optimize || ! (*pred) (subtarget, compare_mode)) |
44037a66 TG |
5259 | subtarget = gen_reg_rtx (compare_mode); |
5260 | ||
5261 | pattern = GEN_FCN (icode) (subtarget); | |
5262 | if (pattern) | |
5263 | { | |
5264 | emit_insn (pattern); | |
a41a56b6 RE |
5265 | return emit_store_flag_1 (target, subtarget, compare_mode, |
5266 | normalizep); | |
5267 | } | |
5268 | } | |
5269 | else | |
5270 | { | |
5271 | /* We don't have an scc insn, so try a cstore insn. */ | |
44037a66 | 5272 | |
a41a56b6 RE |
5273 | for (compare_mode = mode; compare_mode != VOIDmode; |
5274 | compare_mode = GET_MODE_WIDER_MODE (compare_mode)) | |
5275 | { | |
166cdb08 | 5276 | icode = optab_handler (cstore_optab, compare_mode)->insn_code; |
a41a56b6 RE |
5277 | if (icode != CODE_FOR_nothing) |
5278 | break; | |
5279 | } | |
44037a66 | 5280 | |
a41a56b6 RE |
5281 | if (icode != CODE_FOR_nothing) |
5282 | { | |
5283 | enum machine_mode result_mode | |
5284 | = insn_data[(int) icode].operand[0].mode; | |
5285 | rtx cstore_op0 = op0; | |
5286 | rtx cstore_op1 = op1; | |
5287 | ||
5288 | do_pending_stack_adjust (); | |
5289 | last = get_last_insn (); | |
5290 | ||
5291 | if (compare_mode != mode) | |
44037a66 | 5292 | { |
a41a56b6 RE |
5293 | cstore_op0 = convert_modes (compare_mode, mode, cstore_op0, |
5294 | unsignedp); | |
5295 | cstore_op1 = convert_modes (compare_mode, mode, cstore_op1, | |
5296 | unsignedp); | |
44037a66 | 5297 | } |
a41a56b6 RE |
5298 | |
5299 | if (!insn_data[(int) icode].operand[2].predicate (cstore_op0, | |
5300 | compare_mode)) | |
5301 | cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0); | |
44037a66 | 5302 | |
a41a56b6 RE |
5303 | if (!insn_data[(int) icode].operand[3].predicate (cstore_op1, |
5304 | compare_mode)) | |
5305 | cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1); | |
5306 | ||
5307 | comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0, | |
5308 | cstore_op1); | |
5309 | subtarget = target; | |
5310 | ||
5311 | if (optimize || !(insn_data[(int) icode].operand[0].predicate | |
5312 | (subtarget, result_mode))) | |
5313 | subtarget = gen_reg_rtx (result_mode); | |
5314 | ||
5315 | pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0, | |
5316 | cstore_op1); | |
5317 | ||
5318 | if (pattern) | |
44037a66 | 5319 | { |
a41a56b6 RE |
5320 | emit_insn (pattern); |
5321 | return emit_store_flag_1 (target, subtarget, result_mode, | |
5322 | normalizep); | |
44037a66 | 5323 | } |
44037a66 TG |
5324 | } |
5325 | } | |
5326 | ||
db2f8a07 | 5327 | delete_insns_since (last); |
44037a66 | 5328 | |
7c27e184 PB |
5329 | /* If optimizing, use different pseudo registers for each insn, instead |
5330 | of reusing the same pseudo. This leads to better CSE, but slows | |
5331 | down the compiler, since there are more pseudos */ | |
5332 | subtarget = (!optimize | |
91e66235 | 5333 | && (target_mode == mode)) ? target : NULL_RTX; |
44037a66 TG |
5334 | |
5335 | /* If we reached here, we can't do this with a scc insn. However, there | |
5336 | are some comparisons that can be done directly. For example, if | |
5337 | this is an equality comparison of integers, we can try to exclusive-or | |
5338 | (or subtract) the two operands and use a recursive call to try the | |
5339 | comparison with zero. Don't do any of these cases if branches are | |
5340 | very cheap. */ | |
5341 | ||
c8c1bde3 | 5342 | if (BRANCH_COST > 0 |
44037a66 TG |
5343 | && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE) |
5344 | && op1 != const0_rtx) | |
5345 | { | |
5346 | tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, | |
5347 | OPTAB_WIDEN); | |
5348 | ||
5349 | if (tem == 0) | |
5350 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, | |
5351 | OPTAB_WIDEN); | |
5352 | if (tem != 0) | |
5353 | tem = emit_store_flag (target, code, tem, const0_rtx, | |
5354 | mode, unsignedp, normalizep); | |
5355 | if (tem == 0) | |
5356 | delete_insns_since (last); | |
5357 | return tem; | |
5358 | } | |
5359 | ||
c410d49e | 5360 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
44037a66 TG |
5361 | the constant zero. Reject all other comparisons at this point. Only |
5362 | do LE and GT if branches are expensive since they are expensive on | |
5363 | 2-operand machines. */ | |
5364 | ||
5365 | if (BRANCH_COST == 0 | |
5366 | || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx | |
5367 | || (code != EQ && code != NE | |
5368 | && (BRANCH_COST <= 1 || (code != LE && code != GT)))) | |
5369 | return 0; | |
5370 | ||
5371 | /* See what we need to return. We can only return a 1, -1, or the | |
5372 | sign bit. */ | |
5373 | ||
5374 | if (normalizep == 0) | |
5375 | { | |
5376 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) | |
5377 | normalizep = STORE_FLAG_VALUE; | |
5378 | ||
b1ec3c92 | 5379 | else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT |
12dd565f | 5380 | && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) |
c84e2712 | 5381 | == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))) |
44037a66 TG |
5382 | ; |
5383 | else | |
5384 | return 0; | |
5385 | } | |
5386 | ||
5387 | /* Try to put the result of the comparison in the sign bit. Assume we can't | |
5388 | do the necessary operation below. */ | |
5389 | ||
5390 | tem = 0; | |
5391 | ||
5392 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has | |
5393 | the sign bit set. */ | |
5394 | ||
5395 | if (code == LE) | |
5396 | { | |
5397 | /* This is destructive, so SUBTARGET can't be OP0. */ | |
5398 | if (rtx_equal_p (subtarget, op0)) | |
5399 | subtarget = 0; | |
5400 | ||
5401 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, | |
5402 | OPTAB_WIDEN); | |
5403 | if (tem) | |
5404 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, | |
5405 | OPTAB_WIDEN); | |
5406 | } | |
5407 | ||
5408 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the | |
5409 | number of bits in the mode of OP0, minus one. */ | |
5410 | ||
5411 | if (code == GT) | |
5412 | { | |
5413 | if (rtx_equal_p (subtarget, op0)) | |
5414 | subtarget = 0; | |
5415 | ||
5416 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
5417 | size_int (GET_MODE_BITSIZE (mode) - 1), | |
5418 | subtarget, 0); | |
5419 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, | |
5420 | OPTAB_WIDEN); | |
5421 | } | |
c410d49e | 5422 | |
44037a66 TG |
5423 | if (code == EQ || code == NE) |
5424 | { | |
5425 | /* For EQ or NE, one way to do the comparison is to apply an operation | |
cc2902df | 5426 | that converts the operand into a positive number if it is nonzero |
44037a66 TG |
5427 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5428 | for NE we negate. This puts the result in the sign bit. Then we | |
c410d49e | 5429 | normalize with a shift, if needed. |
44037a66 TG |
5430 | |
5431 | Two operations that can do the above actions are ABS and FFS, so try | |
5432 | them. If that doesn't work, and MODE is smaller than a full word, | |
36d747f6 | 5433 | we can use zero-extension to the wider mode (an unsigned conversion) |
44037a66 TG |
5434 | as the operation. */ |
5435 | ||
c410d49e EC |
5436 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5437 | that is compensated by the subsequent overflow when subtracting | |
30f7a378 | 5438 | one / negating. */ |
91ce572a | 5439 | |
166cdb08 | 5440 | if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing) |
44037a66 | 5441 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
166cdb08 | 5442 | else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing) |
44037a66 TG |
5443 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5444 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5445 | { | |
c2ec26b8 | 5446 | tem = convert_modes (word_mode, mode, op0, 1); |
81722fa9 | 5447 | mode = word_mode; |
44037a66 TG |
5448 | } |
5449 | ||
5450 | if (tem != 0) | |
5451 | { | |
5452 | if (code == EQ) | |
5453 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, | |
5454 | 0, OPTAB_WIDEN); | |
5455 | else | |
5456 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); | |
5457 | } | |
5458 | ||
5459 | /* If we couldn't do it that way, for NE we can "or" the two's complement | |
5460 | of the value with itself. For EQ, we take the one's complement of | |
5461 | that "or", which is an extra insn, so we only handle EQ if branches | |
5462 | are expensive. */ | |
5463 | ||
5464 | if (tem == 0 && (code == NE || BRANCH_COST > 1)) | |
5465 | { | |
36d747f6 RS |
5466 | if (rtx_equal_p (subtarget, op0)) |
5467 | subtarget = 0; | |
5468 | ||
44037a66 TG |
5469 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5470 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, | |
5471 | OPTAB_WIDEN); | |
5472 | ||
5473 | if (tem && code == EQ) | |
5474 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); | |
5475 | } | |
5476 | } | |
5477 | ||
5478 | if (tem && normalizep) | |
5479 | tem = expand_shift (RSHIFT_EXPR, mode, tem, | |
5480 | size_int (GET_MODE_BITSIZE (mode) - 1), | |
91e66235 | 5481 | subtarget, normalizep == 1); |
44037a66 | 5482 | |
91e66235 | 5483 | if (tem) |
44037a66 | 5484 | { |
91e66235 MM |
5485 | if (GET_MODE (tem) != target_mode) |
5486 | { | |
5487 | convert_move (target, tem, 0); | |
5488 | tem = target; | |
5489 | } | |
5490 | else if (!subtarget) | |
5491 | { | |
5492 | emit_move_insn (target, tem); | |
5493 | tem = target; | |
5494 | } | |
44037a66 | 5495 | } |
91e66235 | 5496 | else |
44037a66 TG |
5497 | delete_insns_since (last); |
5498 | ||
5499 | return tem; | |
5500 | } | |
04a8ee2f TG |
5501 | |
5502 | /* Like emit_store_flag, but always succeeds. */ | |
5503 | ||
5504 | rtx | |
502b8322 AJ |
5505 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
5506 | enum machine_mode mode, int unsignedp, int normalizep) | |
04a8ee2f TG |
5507 | { |
5508 | rtx tem, label; | |
5509 | ||
5510 | /* First see if emit_store_flag can do the job. */ | |
5511 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); | |
5512 | if (tem != 0) | |
5513 | return tem; | |
5514 | ||
5515 | if (normalizep == 0) | |
5516 | normalizep = 1; | |
5517 | ||
5518 | /* If this failed, we have to do this with set/compare/jump/set code. */ | |
5519 | ||
f8cfc6aa | 5520 | if (!REG_P (target) |
04a8ee2f TG |
5521 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
5522 | target = gen_reg_rtx (GET_MODE (target)); | |
5523 | ||
e4565aff | 5524 | emit_move_insn (target, const1_rtx); |
04a8ee2f | 5525 | label = gen_label_rtx (); |
d43e0b7d | 5526 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, |
b30f05db | 5527 | NULL_RTX, label); |
04a8ee2f | 5528 | |
e4565aff | 5529 | emit_move_insn (target, const0_rtx); |
44037a66 TG |
5530 | emit_label (label); |
5531 | ||
5532 | return target; | |
5533 | } | |
f5963e61 JL |
5534 | \f |
5535 | /* Perform possibly multi-word comparison and conditional jump to LABEL | |
feb04780 RS |
5536 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
5537 | now a thin wrapper around do_compare_rtx_and_jump. */ | |
f5963e61 JL |
5538 | |
5539 | static void | |
502b8322 AJ |
5540 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode, |
5541 | rtx label) | |
f5963e61 | 5542 | { |
feb04780 RS |
5543 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
5544 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, | |
5545 | NULL_RTX, NULL_RTX, label); | |
f5963e61 | 5546 | } |