]>
Commit | Line | Data |
---|---|---|
44037a66 TG |
1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. | |
ef58a523 | 3 | Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, |
40e90eac | 4 | 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
feb04780 | 5 | Free Software Foundation, Inc. |
44037a66 | 6 | |
1322177d | 7 | This file is part of GCC. |
44037a66 | 8 | |
1322177d LB |
9 | GCC is free software; you can redistribute it and/or modify it under |
10 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 11 | Software Foundation; either version 3, or (at your option) any later |
1322177d | 12 | version. |
44037a66 | 13 | |
1322177d LB |
14 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 | for more details. | |
44037a66 TG |
18 | |
19 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
20 | along with GCC; see the file COPYING3. If not see |
21 | <http://www.gnu.org/licenses/>. */ | |
44037a66 TG |
22 | |
23 | ||
24 | #include "config.h" | |
670ee920 | 25 | #include "system.h" |
4977bab6 ZW |
26 | #include "coretypes.h" |
27 | #include "tm.h" | |
718f9c0f | 28 | #include "diagnostic-core.h" |
01198c2f | 29 | #include "toplev.h" |
44037a66 TG |
30 | #include "rtl.h" |
31 | #include "tree.h" | |
6baf1cc8 | 32 | #include "tm_p.h" |
44037a66 | 33 | #include "flags.h" |
44037a66 TG |
34 | #include "insn-config.h" |
35 | #include "expr.h" | |
e78d8e51 | 36 | #include "optabs.h" |
44037a66 | 37 | #include "recog.h" |
b0c48229 | 38 | #include "langhooks.h" |
6fb5fa3c | 39 | #include "df.h" |
0890b981 | 40 | #include "target.h" |
462f85ce RS |
41 | #include "expmed.h" |
42 | ||
43 | struct target_expmed default_target_expmed; | |
44 | #if SWITCHABLE_TARGET | |
45 | struct target_expmed *this_target_expmed = &default_target_expmed; | |
46 | #endif | |
44037a66 | 47 | |
502b8322 AJ |
48 | static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, |
49 | unsigned HOST_WIDE_INT, | |
50 | unsigned HOST_WIDE_INT, rtx); | |
51 | static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, | |
52 | unsigned HOST_WIDE_INT, rtx); | |
53 | static rtx extract_fixed_bit_field (enum machine_mode, rtx, | |
54 | unsigned HOST_WIDE_INT, | |
55 | unsigned HOST_WIDE_INT, | |
62519f7f | 56 | unsigned HOST_WIDE_INT, rtx, int, bool); |
502b8322 AJ |
57 | static rtx mask_rtx (enum machine_mode, int, int, int); |
58 | static rtx lshift_value (enum machine_mode, rtx, int, int); | |
59 | static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, | |
60 | unsigned HOST_WIDE_INT, int); | |
61 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx); | |
0b55e932 | 62 | static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); |
39cab019 | 63 | static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); |
44037a66 | 64 | |
58b42e19 RS |
65 | /* Test whether a value is zero of a power of two. */ |
66 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0) | |
67 | ||
c7e33f89 | 68 | #ifndef SLOW_UNALIGNED_ACCESS |
e1565e65 | 69 | #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT |
c7e33f89 RS |
70 | #endif |
71 | ||
e49a094d | 72 | |
d523b40e RH |
73 | /* Reduce conditional compilation elsewhere. */ |
74 | #ifndef HAVE_insv | |
a242b083 | 75 | #define HAVE_insv 0 |
d523b40e RH |
76 | #define CODE_FOR_insv CODE_FOR_nothing |
77 | #define gen_insv(a,b,c,d) NULL_RTX | |
78 | #endif | |
79 | #ifndef HAVE_extv | |
a242b083 | 80 | #define HAVE_extv 0 |
d523b40e RH |
81 | #define CODE_FOR_extv CODE_FOR_nothing |
82 | #define gen_extv(a,b,c,d) NULL_RTX | |
83 | #endif | |
84 | #ifndef HAVE_extzv | |
a242b083 | 85 | #define HAVE_extzv 0 |
d523b40e RH |
86 | #define CODE_FOR_extzv CODE_FOR_nothing |
87 | #define gen_extzv(a,b,c,d) NULL_RTX | |
88 | #endif | |
89 | ||
44037a66 | 90 | void |
502b8322 | 91 | init_expmed (void) |
44037a66 | 92 | { |
79b4a8dc RH |
93 | struct |
94 | { | |
fdded401 | 95 | struct rtx_def reg; rtunion reg_fld[2]; |
79b4a8dc RH |
96 | struct rtx_def plus; rtunion plus_fld1; |
97 | struct rtx_def neg; | |
79b4a8dc | 98 | struct rtx_def mult; rtunion mult_fld1; |
a28b2ac6 RS |
99 | struct rtx_def sdiv; rtunion sdiv_fld1; |
100 | struct rtx_def udiv; rtunion udiv_fld1; | |
79b4a8dc | 101 | struct rtx_def zext; |
a28b2ac6 RS |
102 | struct rtx_def sdiv_32; rtunion sdiv_32_fld1; |
103 | struct rtx_def smod_32; rtunion smod_32_fld1; | |
79b4a8dc RH |
104 | struct rtx_def wide_mult; rtunion wide_mult_fld1; |
105 | struct rtx_def wide_lshr; rtunion wide_lshr_fld1; | |
106 | struct rtx_def wide_trunc; | |
107 | struct rtx_def shift; rtunion shift_fld1; | |
108 | struct rtx_def shift_mult; rtunion shift_mult_fld1; | |
109 | struct rtx_def shift_add; rtunion shift_add_fld1; | |
ef268d34 KH |
110 | struct rtx_def shift_sub0; rtunion shift_sub0_fld1; |
111 | struct rtx_def shift_sub1; rtunion shift_sub1_fld1; | |
79b4a8dc RH |
112 | } all; |
113 | ||
965703ed RS |
114 | rtx pow2[MAX_BITS_PER_WORD]; |
115 | rtx cint[MAX_BITS_PER_WORD]; | |
965703ed | 116 | int m, n; |
71af73bb | 117 | enum machine_mode mode, wider_mode; |
f40751dd | 118 | int speed; |
44037a66 | 119 | |
38a448ca | 120 | |
965703ed RS |
121 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
122 | { | |
123 | pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); | |
124 | cint[m] = GEN_INT (m); | |
125 | } | |
79b4a8dc RH |
126 | memset (&all, 0, sizeof all); |
127 | ||
128 | PUT_CODE (&all.reg, REG); | |
1d27fed4 | 129 | /* Avoid using hard regs in ways which may be unsupported. */ |
6fb5fa3c | 130 | SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1); |
79b4a8dc RH |
131 | |
132 | PUT_CODE (&all.plus, PLUS); | |
133 | XEXP (&all.plus, 0) = &all.reg; | |
134 | XEXP (&all.plus, 1) = &all.reg; | |
135 | ||
136 | PUT_CODE (&all.neg, NEG); | |
137 | XEXP (&all.neg, 0) = &all.reg; | |
138 | ||
79b4a8dc RH |
139 | PUT_CODE (&all.mult, MULT); |
140 | XEXP (&all.mult, 0) = &all.reg; | |
141 | XEXP (&all.mult, 1) = &all.reg; | |
142 | ||
a28b2ac6 RS |
143 | PUT_CODE (&all.sdiv, DIV); |
144 | XEXP (&all.sdiv, 0) = &all.reg; | |
145 | XEXP (&all.sdiv, 1) = &all.reg; | |
79b4a8dc | 146 | |
a28b2ac6 RS |
147 | PUT_CODE (&all.udiv, UDIV); |
148 | XEXP (&all.udiv, 0) = &all.reg; | |
149 | XEXP (&all.udiv, 1) = &all.reg; | |
150 | ||
151 | PUT_CODE (&all.sdiv_32, DIV); | |
152 | XEXP (&all.sdiv_32, 0) = &all.reg; | |
153 | XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32); | |
154 | ||
155 | PUT_CODE (&all.smod_32, MOD); | |
156 | XEXP (&all.smod_32, 0) = &all.reg; | |
157 | XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1); | |
79b4a8dc RH |
158 | |
159 | PUT_CODE (&all.zext, ZERO_EXTEND); | |
160 | XEXP (&all.zext, 0) = &all.reg; | |
161 | ||
162 | PUT_CODE (&all.wide_mult, MULT); | |
163 | XEXP (&all.wide_mult, 0) = &all.zext; | |
164 | XEXP (&all.wide_mult, 1) = &all.zext; | |
165 | ||
166 | PUT_CODE (&all.wide_lshr, LSHIFTRT); | |
167 | XEXP (&all.wide_lshr, 0) = &all.wide_mult; | |
168 | ||
169 | PUT_CODE (&all.wide_trunc, TRUNCATE); | |
170 | XEXP (&all.wide_trunc, 0) = &all.wide_lshr; | |
171 | ||
172 | PUT_CODE (&all.shift, ASHIFT); | |
173 | XEXP (&all.shift, 0) = &all.reg; | |
174 | ||
175 | PUT_CODE (&all.shift_mult, MULT); | |
176 | XEXP (&all.shift_mult, 0) = &all.reg; | |
177 | ||
178 | PUT_CODE (&all.shift_add, PLUS); | |
179 | XEXP (&all.shift_add, 0) = &all.shift_mult; | |
180 | XEXP (&all.shift_add, 1) = &all.reg; | |
181 | ||
ef268d34 KH |
182 | PUT_CODE (&all.shift_sub0, MINUS); |
183 | XEXP (&all.shift_sub0, 0) = &all.shift_mult; | |
184 | XEXP (&all.shift_sub0, 1) = &all.reg; | |
185 | ||
186 | PUT_CODE (&all.shift_sub1, MINUS); | |
187 | XEXP (&all.shift_sub1, 0) = &all.reg; | |
188 | XEXP (&all.shift_sub1, 1) = &all.shift_mult; | |
79b4a8dc | 189 | |
f40751dd | 190 | for (speed = 0; speed < 2; speed++) |
71af73bb | 191 | { |
f40751dd | 192 | crtl->maybe_hot_insn_p = speed; |
bbbbb16a | 193 | zero_cost[speed] = rtx_cost (const0_rtx, SET, speed); |
79b4a8dc | 194 | |
f40751dd JH |
195 | for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); |
196 | mode != VOIDmode; | |
197 | mode = GET_MODE_WIDER_MODE (mode)) | |
198 | { | |
199 | PUT_MODE (&all.reg, mode); | |
200 | PUT_MODE (&all.plus, mode); | |
201 | PUT_MODE (&all.neg, mode); | |
202 | PUT_MODE (&all.mult, mode); | |
203 | PUT_MODE (&all.sdiv, mode); | |
204 | PUT_MODE (&all.udiv, mode); | |
205 | PUT_MODE (&all.sdiv_32, mode); | |
206 | PUT_MODE (&all.smod_32, mode); | |
207 | PUT_MODE (&all.wide_trunc, mode); | |
208 | PUT_MODE (&all.shift, mode); | |
209 | PUT_MODE (&all.shift_mult, mode); | |
210 | PUT_MODE (&all.shift_add, mode); | |
ef268d34 KH |
211 | PUT_MODE (&all.shift_sub0, mode); |
212 | PUT_MODE (&all.shift_sub1, mode); | |
f40751dd JH |
213 | |
214 | add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed); | |
215 | neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed); | |
216 | mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed); | |
217 | sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed); | |
218 | udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed); | |
219 | ||
220 | sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed) | |
221 | <= 2 * add_cost[speed][mode]); | |
222 | smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed) | |
223 | <= 4 * add_cost[speed][mode]); | |
224 | ||
225 | wider_mode = GET_MODE_WIDER_MODE (mode); | |
226 | if (wider_mode != VOIDmode) | |
227 | { | |
228 | PUT_MODE (&all.zext, wider_mode); | |
229 | PUT_MODE (&all.wide_mult, wider_mode); | |
230 | PUT_MODE (&all.wide_lshr, wider_mode); | |
231 | XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode)); | |
232 | ||
233 | mul_widen_cost[speed][wider_mode] | |
234 | = rtx_cost (&all.wide_mult, SET, speed); | |
235 | mul_highpart_cost[speed][mode] | |
236 | = rtx_cost (&all.wide_trunc, SET, speed); | |
237 | } | |
71af73bb | 238 | |
f40751dd | 239 | shift_cost[speed][mode][0] = 0; |
ef268d34 KH |
240 | shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0] |
241 | = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode]; | |
79b4a8dc | 242 | |
f40751dd JH |
243 | n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); |
244 | for (m = 1; m < n; m++) | |
245 | { | |
246 | XEXP (&all.shift, 1) = cint[m]; | |
247 | XEXP (&all.shift_mult, 1) = pow2[m]; | |
58777718 | 248 | |
f40751dd JH |
249 | shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed); |
250 | shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed); | |
ef268d34 KH |
251 | shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed); |
252 | shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed); | |
f40751dd | 253 | } |
79b4a8dc RH |
254 | } |
255 | } | |
c371bb73 RS |
256 | if (alg_hash_used_p) |
257 | memset (alg_hash, 0, sizeof (alg_hash)); | |
258 | else | |
259 | alg_hash_used_p = true; | |
f40751dd | 260 | default_rtl_profile (); |
44037a66 TG |
261 | } |
262 | ||
263 | /* Return an rtx representing minus the value of X. | |
264 | MODE is the intended mode of the result, | |
265 | useful if X is a CONST_INT. */ | |
266 | ||
267 | rtx | |
502b8322 | 268 | negate_rtx (enum machine_mode mode, rtx x) |
44037a66 | 269 | { |
a39a7484 RK |
270 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |
271 | ||
fdb5537f | 272 | if (result == 0) |
a39a7484 RK |
273 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
274 | ||
275 | return result; | |
44037a66 | 276 | } |
da920570 ZW |
277 | |
278 | /* Report on the availability of insv/extv/extzv and the desired mode | |
279 | of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo | |
280 | is false; else the mode of the specified operand. If OPNO is -1, | |
281 | all the caller cares about is whether the insn is available. */ | |
282 | enum machine_mode | |
502b8322 | 283 | mode_for_extraction (enum extraction_pattern pattern, int opno) |
da920570 | 284 | { |
f12c802a | 285 | const struct insn_data_d *data; |
da920570 ZW |
286 | |
287 | switch (pattern) | |
288 | { | |
289 | case EP_insv: | |
da920570 ZW |
290 | if (HAVE_insv) |
291 | { | |
292 | data = &insn_data[CODE_FOR_insv]; | |
293 | break; | |
294 | } | |
da920570 ZW |
295 | return MAX_MACHINE_MODE; |
296 | ||
297 | case EP_extv: | |
da920570 ZW |
298 | if (HAVE_extv) |
299 | { | |
300 | data = &insn_data[CODE_FOR_extv]; | |
301 | break; | |
302 | } | |
da920570 ZW |
303 | return MAX_MACHINE_MODE; |
304 | ||
305 | case EP_extzv: | |
da920570 ZW |
306 | if (HAVE_extzv) |
307 | { | |
308 | data = &insn_data[CODE_FOR_extzv]; | |
309 | break; | |
310 | } | |
da920570 | 311 | return MAX_MACHINE_MODE; |
a242b083 ZW |
312 | |
313 | default: | |
5b0264cb | 314 | gcc_unreachable (); |
da920570 ZW |
315 | } |
316 | ||
317 | if (opno == -1) | |
318 | return VOIDmode; | |
319 | ||
320 | /* Everyone who uses this function used to follow it with | |
321 | if (result == VOIDmode) result = word_mode; */ | |
322 | if (data->operand[opno].mode == VOIDmode) | |
323 | return word_mode; | |
324 | return data->operand[opno].mode; | |
325 | } | |
326 | ||
6d7db3c5 RS |
327 | /* Return true if X, of mode MODE, matches the predicate for operand |
328 | OPNO of instruction ICODE. Allow volatile memories, regardless of | |
329 | the ambient volatile_ok setting. */ | |
44037a66 | 330 | |
6d7db3c5 RS |
331 | static bool |
332 | check_predicate_volatile_ok (enum insn_code icode, int opno, | |
333 | rtx x, enum machine_mode mode) | |
334 | { | |
335 | bool save_volatile_ok, result; | |
0d8e55d8 | 336 | |
6d7db3c5 RS |
337 | save_volatile_ok = volatile_ok; |
338 | result = insn_data[(int) icode].operand[opno].predicate (x, mode); | |
339 | volatile_ok = save_volatile_ok; | |
340 | return result; | |
341 | } | |
342 | \f | |
343 | /* A subroutine of store_bit_field, with the same arguments. Return true | |
344 | if the operation could be implemented. | |
44037a66 | 345 | |
6d7db3c5 RS |
346 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
347 | no other way of implementing the operation. If FALLBACK_P is false, | |
348 | return false instead. */ | |
349 | ||
350 | static bool | |
351 | store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
352 | unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, | |
353 | rtx value, bool fallback_p) | |
44037a66 | 354 | { |
770ae6cc | 355 | unsigned int unit |
3c0cb5de | 356 | = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; |
2c58f7dd | 357 | unsigned HOST_WIDE_INT offset, bitpos; |
b3694847 | 358 | rtx op0 = str_rtx; |
420e7dfa | 359 | int byte_offset; |
28526e20 | 360 | rtx orig_value; |
da920570 | 361 | |
a242b083 | 362 | enum machine_mode op_mode = mode_for_extraction (EP_insv, 3); |
44037a66 | 363 | |
44037a66 TG |
364 | while (GET_CODE (op0) == SUBREG) |
365 | { | |
366 | /* The following line once was done only if WORDS_BIG_ENDIAN, | |
367 | but I think that is a mistake. WORDS_BIG_ENDIAN is | |
368 | meaningful at a much higher level; when structures are copied | |
369 | between memory and regs, the higher-numbered regs | |
370 | always get higher addresses. */ | |
495db1a1 AK |
371 | int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); |
372 | int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); | |
b8698a0f | 373 | |
495db1a1 AK |
374 | byte_offset = 0; |
375 | ||
376 | /* Paradoxical subregs need special handling on big endian machines. */ | |
377 | if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) | |
378 | { | |
379 | int difference = inner_mode_size - outer_mode_size; | |
380 | ||
381 | if (WORDS_BIG_ENDIAN) | |
382 | byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; | |
383 | if (BYTES_BIG_ENDIAN) | |
384 | byte_offset += difference % UNITS_PER_WORD; | |
385 | } | |
386 | else | |
387 | byte_offset = SUBREG_BYTE (op0); | |
388 | ||
389 | bitnum += byte_offset * BITS_PER_UNIT; | |
44037a66 TG |
390 | op0 = SUBREG_REG (op0); |
391 | } | |
392 | ||
2c58f7dd RS |
393 | /* No action is needed if the target is a register and if the field |
394 | lies completely outside that register. This can occur if the source | |
395 | code contains an out-of-bounds access to a small array. */ | |
396 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
6d7db3c5 | 397 | return true; |
2c58f7dd | 398 | |
b42271d6 | 399 | /* Use vec_set patterns for inserting parts of vectors whenever |
997404de JH |
400 | available. */ |
401 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 402 | && !MEM_P (op0) |
947131ba | 403 | && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing |
997404de JH |
404 | && fieldmode == GET_MODE_INNER (GET_MODE (op0)) |
405 | && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) | |
406 | && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
407 | { | |
408 | enum machine_mode outermode = GET_MODE (op0); | |
409 | enum machine_mode innermode = GET_MODE_INNER (outermode); | |
947131ba | 410 | int icode = (int) optab_handler (vec_set_optab, outermode); |
997404de JH |
411 | int pos = bitnum / GET_MODE_BITSIZE (innermode); |
412 | rtx rtxpos = GEN_INT (pos); | |
413 | rtx src = value; | |
414 | rtx dest = op0; | |
415 | rtx pat, seq; | |
416 | enum machine_mode mode0 = insn_data[icode].operand[0].mode; | |
417 | enum machine_mode mode1 = insn_data[icode].operand[1].mode; | |
418 | enum machine_mode mode2 = insn_data[icode].operand[2].mode; | |
419 | ||
420 | start_sequence (); | |
421 | ||
422 | if (! (*insn_data[icode].operand[1].predicate) (src, mode1)) | |
423 | src = copy_to_mode_reg (mode1, src); | |
424 | ||
425 | if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) | |
426 | rtxpos = copy_to_mode_reg (mode1, rtxpos); | |
427 | ||
428 | /* We could handle this, but we should always be called with a pseudo | |
429 | for our targets and all insns should take them as outputs. */ | |
5b0264cb NS |
430 | gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0) |
431 | && (*insn_data[icode].operand[1].predicate) (src, mode1) | |
432 | && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)); | |
997404de JH |
433 | pat = GEN_FCN (icode) (dest, src, rtxpos); |
434 | seq = get_insns (); | |
435 | end_sequence (); | |
436 | if (pat) | |
437 | { | |
438 | emit_insn (seq); | |
439 | emit_insn (pat); | |
6d7db3c5 | 440 | return true; |
997404de JH |
441 | } |
442 | } | |
443 | ||
308ecea0 RH |
444 | /* If the target is a register, overwriting the entire object, or storing |
445 | a full-word or multi-word field can be done with just a SUBREG. | |
446 | ||
447 | If the target is memory, storing any naturally aligned field can be | |
448 | done with a simple store. For targets that support fast unaligned | |
0b69c29f | 449 | memory, any naturally sized, unit aligned field can be done directly. */ |
c410d49e | 450 | |
2c58f7dd RS |
451 | offset = bitnum / unit; |
452 | bitpos = bitnum % unit; | |
420e7dfa DN |
453 | byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT |
454 | + (offset * UNITS_PER_WORD); | |
455 | ||
57bfa49a | 456 | if (bitpos == 0 |
0b69c29f | 457 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
3c0cb5de | 458 | && (!MEM_P (op0) |
420e7dfa | 459 | ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD |
ea9ea008 | 460 | || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)) |
0fb7aeda | 461 | && byte_offset % GET_MODE_SIZE (fieldmode) == 0) |
04050c69 | 462 | : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0)) |
ea9ea008 | 463 | || (offset * BITS_PER_UNIT % bitsize == 0 |
04050c69 | 464 | && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))) |
44037a66 | 465 | { |
69498c64 DJ |
466 | if (MEM_P (op0)) |
467 | op0 = adjust_address (op0, fieldmode, offset); | |
468 | else if (GET_MODE (op0) != fieldmode) | |
469 | op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), | |
470 | byte_offset); | |
44037a66 | 471 | emit_move_insn (op0, value); |
6d7db3c5 | 472 | return true; |
44037a66 TG |
473 | } |
474 | ||
a8ca7756 JW |
475 | /* Make sure we are playing with integral modes. Pun with subregs |
476 | if we aren't. This must come after the entire register case above, | |
477 | since that case is valid for any mode. The following cases are only | |
478 | valid for integral modes. */ | |
479 | { | |
480 | enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); | |
481 | if (imode != GET_MODE (op0)) | |
482 | { | |
3c0cb5de | 483 | if (MEM_P (op0)) |
f4ef873c | 484 | op0 = adjust_address (op0, imode, 0); |
a8ca7756 | 485 | else |
5b0264cb NS |
486 | { |
487 | gcc_assert (imode != BLKmode); | |
488 | op0 = gen_lowpart (imode, op0); | |
489 | } | |
a8ca7756 JW |
490 | } |
491 | } | |
492 | ||
4e9bb42b AH |
493 | /* We may be accessing data outside the field, which means |
494 | we can alias adjacent data. */ | |
3c0cb5de | 495 | if (MEM_P (op0)) |
4e9bb42b AH |
496 | { |
497 | op0 = shallow_copy_rtx (op0); | |
498 | set_mem_alias_set (op0, 0); | |
499 | set_mem_expr (op0, 0); | |
500 | } | |
501 | ||
57bfa49a RZ |
502 | /* If OP0 is a register, BITPOS must count within a word. |
503 | But as we have it, it counts within whatever size OP0 now has. | |
504 | On a bigendian machine, these are not the same, so convert. */ | |
505 | if (BYTES_BIG_ENDIAN | |
3c0cb5de | 506 | && !MEM_P (op0) |
57bfa49a RZ |
507 | && unit > GET_MODE_BITSIZE (GET_MODE (op0))) |
508 | bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
509 | ||
44037a66 TG |
510 | /* Storing an lsb-aligned field in a register |
511 | can be done with a movestrict instruction. */ | |
512 | ||
3c0cb5de | 513 | if (!MEM_P (op0) |
f76b9db2 | 514 | && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0) |
44037a66 | 515 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
947131ba | 516 | && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) |
44037a66 | 517 | { |
947131ba | 518 | int icode = optab_handler (movstrict_optab, fieldmode); |
3debdc1e JH |
519 | rtx insn; |
520 | rtx start = get_last_insn (); | |
5d560619 | 521 | rtx arg0 = op0; |
5e4900c7 | 522 | |
44037a66 | 523 | /* Get appropriate low part of the value being stored. */ |
481683e1 | 524 | if (CONST_INT_P (value) || REG_P (value)) |
44037a66 TG |
525 | value = gen_lowpart (fieldmode, value); |
526 | else if (!(GET_CODE (value) == SYMBOL_REF | |
527 | || GET_CODE (value) == LABEL_REF | |
528 | || GET_CODE (value) == CONST)) | |
529 | value = convert_to_mode (fieldmode, value, 0); | |
530 | ||
5e4900c7 JW |
531 | if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode)) |
532 | value = copy_to_mode_reg (fieldmode, value); | |
533 | ||
534 | if (GET_CODE (op0) == SUBREG) | |
44037a66 | 535 | { |
5b0264cb NS |
536 | /* Else we've got some float mode source being extracted into |
537 | a different float mode destination -- this combination of | |
538 | subregs results in Severe Tire Damage. */ | |
539 | gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode | |
540 | || GET_MODE_CLASS (fieldmode) == MODE_INT | |
541 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); | |
5d560619 | 542 | arg0 = SUBREG_REG (op0); |
5e4900c7 | 543 | } |
470032d7 | 544 | |
3debdc1e | 545 | insn = (GEN_FCN (icode) |
5d560619 | 546 | (gen_rtx_SUBREG (fieldmode, arg0, |
ddef6bc7 JJ |
547 | (bitnum % BITS_PER_WORD) / BITS_PER_UNIT |
548 | + (offset * UNITS_PER_WORD)), | |
549 | value)); | |
3debdc1e JH |
550 | if (insn) |
551 | { | |
552 | emit_insn (insn); | |
553 | return true; | |
554 | } | |
555 | delete_insns_since (start); | |
44037a66 TG |
556 | } |
557 | ||
558 | /* Handle fields bigger than a word. */ | |
559 | ||
560 | if (bitsize > BITS_PER_WORD) | |
561 | { | |
562 | /* Here we transfer the words of the field | |
563 | in the order least significant first. | |
564 | This is because the most significant word is the one which may | |
ad83e87b PB |
565 | be less than full. |
566 | However, only do that if the value is not BLKmode. */ | |
567 | ||
770ae6cc RK |
568 | unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
569 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; | |
570 | unsigned int i; | |
6d7db3c5 | 571 | rtx last; |
44037a66 TG |
572 | |
573 | /* This is the mode we must force value to, so that there will be enough | |
574 | subwords to extract. Note that fieldmode will often (always?) be | |
575 | VOIDmode, because that is what store_field uses to indicate that this | |
535a42b1 NS |
576 | is a bit field, but passing VOIDmode to operand_subword_force |
577 | is not allowed. */ | |
9f5e2e11 RS |
578 | fieldmode = GET_MODE (value); |
579 | if (fieldmode == VOIDmode) | |
580 | fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); | |
44037a66 | 581 | |
6d7db3c5 | 582 | last = get_last_insn (); |
44037a66 TG |
583 | for (i = 0; i < nwords; i++) |
584 | { | |
ad83e87b PB |
585 | /* If I is 0, use the low-order word in both field and target; |
586 | if I is 1, use the next to lowest word; and so on. */ | |
770ae6cc RK |
587 | unsigned int wordnum = (backwards ? nwords - i - 1 : i); |
588 | unsigned int bit_offset = (backwards | |
04050c69 RK |
589 | ? MAX ((int) bitsize - ((int) i + 1) |
590 | * BITS_PER_WORD, | |
591 | 0) | |
592 | : (int) i * BITS_PER_WORD); | |
6d7db3c5 | 593 | rtx value_word = operand_subword_force (value, wordnum, fieldmode); |
770ae6cc | 594 | |
6d7db3c5 RS |
595 | if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD, |
596 | bitsize - i * BITS_PER_WORD), | |
597 | bitnum + bit_offset, word_mode, | |
598 | value_word, fallback_p)) | |
599 | { | |
600 | delete_insns_since (last); | |
601 | return false; | |
602 | } | |
44037a66 | 603 | } |
6d7db3c5 | 604 | return true; |
44037a66 TG |
605 | } |
606 | ||
607 | /* From here on we can assume that the field to be stored in is | |
608 | a full-word (whatever type that is), since it is shorter than a word. */ | |
609 | ||
610 | /* OFFSET is the number of words or bytes (UNIT says which) | |
611 | from STR_RTX to the first word or byte containing part of the field. */ | |
612 | ||
3c0cb5de | 613 | if (!MEM_P (op0)) |
44037a66 TG |
614 | { |
615 | if (offset != 0 | |
616 | || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
470032d7 | 617 | { |
f8cfc6aa | 618 | if (!REG_P (op0)) |
7be7a07d | 619 | { |
535a42b1 NS |
620 | /* Since this is a destination (lvalue), we can't copy |
621 | it to a pseudo. We can remove a SUBREG that does not | |
622 | change the size of the operand. Such a SUBREG may | |
623 | have been added above. */ | |
5b0264cb NS |
624 | gcc_assert (GET_CODE (op0) == SUBREG |
625 | && (GET_MODE_SIZE (GET_MODE (op0)) | |
626 | == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))))); | |
627 | op0 = SUBREG_REG (op0); | |
7be7a07d | 628 | } |
470032d7 | 629 | op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), |
ddef6bc7 | 630 | op0, (offset * UNITS_PER_WORD)); |
470032d7 | 631 | } |
44037a66 TG |
632 | offset = 0; |
633 | } | |
44037a66 | 634 | |
4f1da2e9 RS |
635 | /* If VALUE has a floating-point or complex mode, access it as an |
636 | integer of the corresponding size. This can occur on a machine | |
637 | with 64 bit registers that uses SFmode for float. It can also | |
638 | occur for unaligned float or complex fields. */ | |
28526e20 | 639 | orig_value = value; |
4f1da2e9 RS |
640 | if (GET_MODE (value) != VOIDmode |
641 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT | |
32b069d3 | 642 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) |
4f1da2e9 RS |
643 | { |
644 | value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); | |
645 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); | |
646 | } | |
2305bcad | 647 | |
44037a66 TG |
648 | /* Now OFFSET is nonzero only if OP0 is memory |
649 | and is therefore always measured in bytes. */ | |
650 | ||
a242b083 | 651 | if (HAVE_insv |
1d269b0c | 652 | && GET_MODE (value) != BLKmode |
3ab997e8 EB |
653 | && bitsize > 0 |
654 | && GET_MODE_BITSIZE (op_mode) >= bitsize | |
f8cfc6aa | 655 | && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) |
f7acbf4c RS |
656 | && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) |
657 | && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize), | |
6d7db3c5 RS |
658 | VOIDmode) |
659 | && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode)) | |
44037a66 TG |
660 | { |
661 | int xbitpos = bitpos; | |
662 | rtx value1; | |
663 | rtx xop0 = op0; | |
664 | rtx last = get_last_insn (); | |
665 | rtx pat; | |
c600a155 | 666 | bool copy_back = false; |
44037a66 TG |
667 | |
668 | /* Add OFFSET into OP0's address. */ | |
3c0cb5de | 669 | if (MEM_P (xop0)) |
f4ef873c | 670 | xop0 = adjust_address (xop0, byte_mode, offset); |
44037a66 | 671 | |
6d7db3c5 | 672 | /* If xop0 is a register, we need it in OP_MODE |
44037a66 TG |
673 | to make it acceptable to the format of insv. */ |
674 | if (GET_CODE (xop0) == SUBREG) | |
bac7cdfd DE |
675 | /* We can't just change the mode, because this might clobber op0, |
676 | and we will need the original value of op0 if insv fails. */ | |
6d7db3c5 RS |
677 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); |
678 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) | |
d8a60d24 | 679 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); |
44037a66 | 680 | |
c600a155 AN |
681 | /* If the destination is a paradoxical subreg such that we need a |
682 | truncate to the inner mode, perform the insertion on a temporary and | |
683 | truncate the result to the original destination. Note that we can't | |
684 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N | |
685 | X) 0)) is (reg:N X). */ | |
686 | if (GET_CODE (xop0) == SUBREG | |
687 | && REG_P (SUBREG_REG (xop0)) | |
688 | && (!TRULY_NOOP_TRUNCATION | |
689 | (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))), | |
690 | GET_MODE_BITSIZE (op_mode)))) | |
691 | { | |
692 | rtx tem = gen_reg_rtx (op_mode); | |
693 | emit_move_insn (tem, xop0); | |
694 | xop0 = tem; | |
695 | copy_back = true; | |
696 | } | |
697 | ||
44037a66 TG |
698 | /* On big-endian machines, we count bits from the most significant. |
699 | If the bit field insn does not, we must invert. */ | |
700 | ||
f76b9db2 ILT |
701 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |
702 | xbitpos = unit - bitsize - xbitpos; | |
703 | ||
44037a66 TG |
704 | /* We have been counting XBITPOS within UNIT. |
705 | Count instead within the size of the register. */ | |
3c0cb5de | 706 | if (BITS_BIG_ENDIAN && !MEM_P (xop0)) |
6d7db3c5 | 707 | xbitpos += GET_MODE_BITSIZE (op_mode) - unit; |
f76b9db2 | 708 | |
6d7db3c5 | 709 | unit = GET_MODE_BITSIZE (op_mode); |
44037a66 | 710 | |
6d7db3c5 | 711 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ |
44037a66 | 712 | value1 = value; |
6d7db3c5 | 713 | if (GET_MODE (value) != op_mode) |
44037a66 TG |
714 | { |
715 | if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) | |
716 | { | |
717 | /* Optimization: Don't bother really extending VALUE | |
f5df292e RS |
718 | if it has all the bits we will actually use. However, |
719 | if we must narrow it, be sure we do it correctly. */ | |
44037a66 | 720 | |
6d7db3c5 | 721 | if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) |
c410d49e EC |
722 | { |
723 | rtx tmp; | |
724 | ||
6d7db3c5 | 725 | tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); |
c410d49e | 726 | if (! tmp) |
6d7db3c5 | 727 | tmp = simplify_gen_subreg (op_mode, |
c410d49e EC |
728 | force_reg (GET_MODE (value), |
729 | value1), | |
730 | GET_MODE (value), 0); | |
731 | value1 = tmp; | |
732 | } | |
f5df292e | 733 | else |
6d7db3c5 | 734 | value1 = gen_lowpart (op_mode, value1); |
44037a66 | 735 | } |
481683e1 | 736 | else if (CONST_INT_P (value)) |
6d7db3c5 | 737 | value1 = gen_int_mode (INTVAL (value), op_mode); |
5b0264cb | 738 | else |
44037a66 TG |
739 | /* Parse phase is supposed to make VALUE's data type |
740 | match that of the component reference, which is a type | |
741 | at least as wide as the field; so VALUE should have | |
742 | a mode that corresponds to that type. */ | |
5b0264cb | 743 | gcc_assert (CONSTANT_P (value)); |
44037a66 TG |
744 | } |
745 | ||
746 | /* If this machine's insv insists on a register, | |
747 | get VALUE1 into a register. */ | |
a995e389 | 748 | if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate) |
6d7db3c5 RS |
749 | (value1, op_mode))) |
750 | value1 = force_reg (op_mode, value1); | |
44037a66 | 751 | |
b1ec3c92 | 752 | pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1); |
44037a66 | 753 | if (pat) |
6d7db3c5 RS |
754 | { |
755 | emit_insn (pat); | |
0e510b3e | 756 | |
c600a155 AN |
757 | if (copy_back) |
758 | convert_move (op0, xop0, true); | |
6d7db3c5 RS |
759 | return true; |
760 | } | |
761 | delete_insns_since (last); | |
762 | } | |
763 | ||
764 | /* If OP0 is a memory, try copying it to a register and seeing if a | |
765 | cheap register alternative is available. */ | |
766 | if (HAVE_insv && MEM_P (op0)) | |
767 | { | |
768 | enum machine_mode bestmode; | |
769 | ||
770 | /* Get the mode to use for inserting into this field. If OP0 is | |
771 | BLKmode, get the smallest mode consistent with the alignment. If | |
772 | OP0 is a non-BLKmode object that is no wider than OP_MODE, use its | |
773 | mode. Otherwise, use the smallest mode containing the field. */ | |
774 | ||
775 | if (GET_MODE (op0) == BLKmode | |
776 | || (op_mode != MAX_MACHINE_MODE | |
777 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))) | |
778 | bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), | |
779 | (op_mode == MAX_MACHINE_MODE | |
780 | ? VOIDmode : op_mode), | |
781 | MEM_VOLATILE_P (op0)); | |
44037a66 | 782 | else |
6d7db3c5 RS |
783 | bestmode = GET_MODE (op0); |
784 | ||
785 | if (bestmode != VOIDmode | |
786 | && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode) | |
787 | && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) | |
788 | && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) | |
0fb7aeda | 789 | { |
6d7db3c5 RS |
790 | rtx last, tempreg, xop0; |
791 | unsigned HOST_WIDE_INT xoffset, xbitpos; | |
792 | ||
793 | last = get_last_insn (); | |
794 | ||
795 | /* Adjust address to point to the containing unit of | |
796 | that mode. Compute the offset as a multiple of this unit, | |
797 | counting in bytes. */ | |
798 | unit = GET_MODE_BITSIZE (bestmode); | |
799 | xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); | |
800 | xbitpos = bitnum % unit; | |
801 | xop0 = adjust_address (op0, bestmode, xoffset); | |
802 | ||
803 | /* Fetch that unit, store the bitfield in it, then store | |
804 | the unit. */ | |
805 | tempreg = copy_to_reg (xop0); | |
806 | if (store_bit_field_1 (tempreg, bitsize, xbitpos, | |
807 | fieldmode, orig_value, false)) | |
808 | { | |
809 | emit_move_insn (xop0, tempreg); | |
810 | return true; | |
811 | } | |
44037a66 | 812 | delete_insns_since (last); |
44037a66 TG |
813 | } |
814 | } | |
6d7db3c5 RS |
815 | |
816 | if (!fallback_p) | |
817 | return false; | |
818 | ||
819 | store_fixed_bit_field (op0, offset, bitsize, bitpos, value); | |
820 | return true; | |
821 | } | |
822 | ||
823 | /* Generate code to store value from rtx VALUE | |
824 | into a bit-field within structure STR_RTX | |
825 | containing BITSIZE bits starting at bit BITNUM. | |
826 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ | |
827 | ||
828 | void | |
829 | store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
830 | unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, | |
831 | rtx value) | |
832 | { | |
833 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true)) | |
834 | gcc_unreachable (); | |
44037a66 TG |
835 | } |
836 | \f | |
837 | /* Use shifts and boolean operations to store VALUE | |
838 | into a bit field of width BITSIZE | |
839 | in a memory location specified by OP0 except offset by OFFSET bytes. | |
840 | (OFFSET must be 0 if OP0 is a register.) | |
841 | The field starts at position BITPOS within the byte. | |
842 | (If OP0 is a register, it may be a full word or a narrower mode, | |
843 | but BITPOS still counts within a full word, | |
ad76cef8 | 844 | which is significant on bigendian machines.) */ |
44037a66 TG |
845 | |
846 | static void | |
502b8322 AJ |
847 | store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, |
848 | unsigned HOST_WIDE_INT bitsize, | |
849 | unsigned HOST_WIDE_INT bitpos, rtx value) | |
44037a66 | 850 | { |
b3694847 | 851 | enum machine_mode mode; |
770ae6cc | 852 | unsigned int total_bits = BITS_PER_WORD; |
c505fc06 | 853 | rtx temp; |
44037a66 TG |
854 | int all_zero = 0; |
855 | int all_one = 0; | |
856 | ||
44037a66 TG |
857 | /* There is a case not handled here: |
858 | a structure with a known alignment of just a halfword | |
859 | and a field split across two aligned halfwords within the structure. | |
860 | Or likewise a structure with a known alignment of just a byte | |
861 | and a field split across two bytes. | |
862 | Such cases are not supposed to be able to occur. */ | |
863 | ||
f8cfc6aa | 864 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
44037a66 | 865 | { |
5b0264cb | 866 | gcc_assert (!offset); |
44037a66 TG |
867 | /* Special treatment for a bit field split across two registers. */ |
868 | if (bitsize + bitpos > BITS_PER_WORD) | |
869 | { | |
04050c69 | 870 | store_split_bit_field (op0, bitsize, bitpos, value); |
44037a66 TG |
871 | return; |
872 | } | |
873 | } | |
874 | else | |
875 | { | |
876 | /* Get the proper mode to use for this field. We want a mode that | |
877 | includes the entire field. If such a mode would be larger than | |
c410d49e | 878 | a word, we won't be doing the extraction the normal way. |
053a35af | 879 | We don't want a mode bigger than the destination. */ |
44037a66 | 880 | |
053a35af AH |
881 | mode = GET_MODE (op0); |
882 | if (GET_MODE_BITSIZE (mode) == 0 | |
0fb7aeda KH |
883 | || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) |
884 | mode = word_mode; | |
6a78b724 DD |
885 | |
886 | if (MEM_VOLATILE_P (op0) | |
887 | && GET_MODE_BITSIZE (GET_MODE (op0)) > 0 | |
888 | && flag_strict_volatile_bitfields > 0) | |
889 | mode = GET_MODE (op0); | |
890 | else | |
891 | mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, | |
892 | MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); | |
44037a66 TG |
893 | |
894 | if (mode == VOIDmode) | |
895 | { | |
896 | /* The only way this should occur is if the field spans word | |
897 | boundaries. */ | |
04050c69 RK |
898 | store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT, |
899 | value); | |
44037a66 TG |
900 | return; |
901 | } | |
902 | ||
903 | total_bits = GET_MODE_BITSIZE (mode); | |
904 | ||
3bd98790 | 905 | /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to |
38e01259 | 906 | be in the range 0 to total_bits-1, and put any excess bytes in |
3bd98790 JW |
907 | OFFSET. */ |
908 | if (bitpos >= total_bits) | |
909 | { | |
910 | offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); | |
911 | bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) | |
912 | * BITS_PER_UNIT); | |
913 | } | |
914 | ||
44037a66 TG |
915 | /* Get ref to an aligned byte, halfword, or word containing the field. |
916 | Adjust BITPOS to be position within a word, | |
917 | and OFFSET to be the offset of that word. | |
918 | Then alter OP0 to refer to that word. */ | |
919 | bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; | |
920 | offset -= (offset % (total_bits / BITS_PER_UNIT)); | |
f4ef873c | 921 | op0 = adjust_address (op0, mode, offset); |
44037a66 TG |
922 | } |
923 | ||
924 | mode = GET_MODE (op0); | |
925 | ||
926 | /* Now MODE is either some integral mode for a MEM as OP0, | |
927 | or is a full-word for a REG as OP0. TOTAL_BITS corresponds. | |
928 | The bit field is contained entirely within OP0. | |
929 | BITPOS is the starting bit number within OP0. | |
930 | (OP0's mode may actually be narrower than MODE.) */ | |
931 | ||
f76b9db2 ILT |
932 | if (BYTES_BIG_ENDIAN) |
933 | /* BITPOS is the distance between our msb | |
934 | and that of the containing datum. | |
935 | Convert it to the distance from the lsb. */ | |
936 | bitpos = total_bits - bitsize - bitpos; | |
44037a66 | 937 | |
44037a66 TG |
938 | /* Now BITPOS is always the distance between our lsb |
939 | and that of OP0. */ | |
940 | ||
941 | /* Shift VALUE left by BITPOS bits. If VALUE is not constant, | |
942 | we must first convert its mode to MODE. */ | |
943 | ||
481683e1 | 944 | if (CONST_INT_P (value)) |
44037a66 | 945 | { |
b3694847 | 946 | HOST_WIDE_INT v = INTVAL (value); |
44037a66 | 947 | |
b1ec3c92 CH |
948 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
949 | v &= ((HOST_WIDE_INT) 1 << bitsize) - 1; | |
44037a66 TG |
950 | |
951 | if (v == 0) | |
952 | all_zero = 1; | |
b1ec3c92 CH |
953 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
954 | && v == ((HOST_WIDE_INT) 1 << bitsize) - 1) | |
955 | || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1)) | |
44037a66 TG |
956 | all_one = 1; |
957 | ||
958 | value = lshift_value (mode, value, bitpos, bitsize); | |
959 | } | |
960 | else | |
961 | { | |
962 | int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize | |
963 | && bitpos + bitsize != GET_MODE_BITSIZE (mode)); | |
964 | ||
965 | if (GET_MODE (value) != mode) | |
86cfb27a | 966 | value = convert_to_mode (mode, value, 1); |
44037a66 TG |
967 | |
968 | if (must_and) | |
969 | value = expand_binop (mode, and_optab, value, | |
970 | mask_rtx (mode, 0, bitsize, 0), | |
b1ec3c92 | 971 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
44037a66 TG |
972 | if (bitpos > 0) |
973 | value = expand_shift (LSHIFT_EXPR, mode, value, | |
7d60be94 | 974 | build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1); |
44037a66 TG |
975 | } |
976 | ||
977 | /* Now clear the chosen bits in OP0, | |
978 | except that if VALUE is -1 we need not bother. */ | |
c505fc06 RS |
979 | /* We keep the intermediates in registers to allow CSE to combine |
980 | consecutive bitfield assignments. */ | |
44037a66 | 981 | |
c505fc06 | 982 | temp = force_reg (mode, op0); |
44037a66 TG |
983 | |
984 | if (! all_one) | |
985 | { | |
c505fc06 | 986 | temp = expand_binop (mode, and_optab, temp, |
44037a66 | 987 | mask_rtx (mode, bitpos, bitsize, 1), |
c505fc06 RS |
988 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
989 | temp = force_reg (mode, temp); | |
44037a66 | 990 | } |
44037a66 TG |
991 | |
992 | /* Now logical-or VALUE into OP0, unless it is zero. */ | |
993 | ||
994 | if (! all_zero) | |
c505fc06 RS |
995 | { |
996 | temp = expand_binop (mode, ior_optab, temp, value, | |
997 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
998 | temp = force_reg (mode, temp); | |
999 | } | |
1000 | ||
44037a66 | 1001 | if (op0 != temp) |
4679504c UB |
1002 | { |
1003 | op0 = copy_rtx (op0); | |
1004 | emit_move_insn (op0, temp); | |
1005 | } | |
44037a66 TG |
1006 | } |
1007 | \f | |
06c94bce | 1008 | /* Store a bit field that is split across multiple accessible memory objects. |
44037a66 | 1009 | |
06c94bce | 1010 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
44037a66 TG |
1011 | BITSIZE is the field width; BITPOS the position of its first bit |
1012 | (within the word). | |
06c94bce | 1013 | VALUE is the value to store. |
06c94bce RS |
1014 | |
1015 | This does not yet handle fields wider than BITS_PER_WORD. */ | |
44037a66 TG |
1016 | |
1017 | static void | |
502b8322 AJ |
1018 | store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1019 | unsigned HOST_WIDE_INT bitpos, rtx value) | |
44037a66 | 1020 | { |
770ae6cc RK |
1021 | unsigned int unit; |
1022 | unsigned int bitsdone = 0; | |
4ee16841 | 1023 | |
0eb61c19 DE |
1024 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1025 | much at a time. */ | |
f8cfc6aa | 1026 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1027 | unit = BITS_PER_WORD; |
1028 | else | |
04050c69 | 1029 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
e54d80d0 | 1030 | |
3d709ff0 RS |
1031 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1032 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note | |
1033 | that VALUE might be a floating-point constant. */ | |
481683e1 | 1034 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
3d709ff0 RS |
1035 | { |
1036 | rtx word = gen_lowpart_common (word_mode, value); | |
1037 | ||
bc8a0e39 | 1038 | if (word && (value != word)) |
3d709ff0 RS |
1039 | value = word; |
1040 | else | |
1041 | value = gen_lowpart_common (word_mode, | |
d01bc862 DE |
1042 | force_reg (GET_MODE (value) != VOIDmode |
1043 | ? GET_MODE (value) | |
1044 | : word_mode, value)); | |
3d709ff0 | 1045 | } |
44037a66 | 1046 | |
06c94bce | 1047 | while (bitsdone < bitsize) |
44037a66 | 1048 | { |
770ae6cc | 1049 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 1050 | rtx part, word; |
770ae6cc RK |
1051 | unsigned HOST_WIDE_INT thispos; |
1052 | unsigned HOST_WIDE_INT offset; | |
44037a66 | 1053 | |
06c94bce RS |
1054 | offset = (bitpos + bitsdone) / unit; |
1055 | thispos = (bitpos + bitsdone) % unit; | |
44037a66 | 1056 | |
0eb61c19 DE |
1057 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1058 | store_fixed_bit_field will call us again, and we will mutually | |
1059 | recurse forever. */ | |
1060 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1061 | thissize = MIN (thissize, unit - thispos); | |
44037a66 | 1062 | |
f76b9db2 ILT |
1063 | if (BYTES_BIG_ENDIAN) |
1064 | { | |
37811a73 RK |
1065 | int total_bits; |
1066 | ||
1067 | /* We must do an endian conversion exactly the same way as it is | |
1068 | done in extract_bit_field, so that the two calls to | |
1069 | extract_fixed_bit_field will have comparable arguments. */ | |
3c0cb5de | 1070 | if (!MEM_P (value) || GET_MODE (value) == BLKmode) |
37811a73 RK |
1071 | total_bits = BITS_PER_WORD; |
1072 | else | |
1073 | total_bits = GET_MODE_BITSIZE (GET_MODE (value)); | |
1074 | ||
f76b9db2 | 1075 | /* Fetch successively less significant portions. */ |
481683e1 | 1076 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1077 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1078 | >> (bitsize - bitsdone - thissize)) | |
1079 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1080 | else | |
1081 | /* The args are chosen so that the last part includes the | |
1082 | lsb. Give extract_bit_field the value it needs (with | |
04050c69 RK |
1083 | endianness compensation) to fetch the piece we want. */ |
1084 | part = extract_fixed_bit_field (word_mode, value, 0, thissize, | |
1085 | total_bits - bitsize + bitsdone, | |
62519f7f | 1086 | NULL_RTX, 1, false); |
f76b9db2 | 1087 | } |
06c94bce | 1088 | else |
f76b9db2 ILT |
1089 | { |
1090 | /* Fetch successively more significant portions. */ | |
481683e1 | 1091 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1092 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1093 | >> bitsdone) | |
1094 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1095 | else | |
04050c69 | 1096 | part = extract_fixed_bit_field (word_mode, value, 0, thissize, |
62519f7f | 1097 | bitsdone, NULL_RTX, 1, false); |
f76b9db2 | 1098 | } |
44037a66 | 1099 | |
06c94bce | 1100 | /* If OP0 is a register, then handle OFFSET here. |
5f57dff0 JW |
1101 | |
1102 | When handling multiword bitfields, extract_bit_field may pass | |
1103 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1104 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1105 | the current word starting from the base register. */ | |
1106 | if (GET_CODE (op0) == SUBREG) | |
1107 | { | |
ddef6bc7 JJ |
1108 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
1109 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
4ee16841 | 1110 | GET_MODE (SUBREG_REG (op0))); |
5f57dff0 JW |
1111 | offset = 0; |
1112 | } | |
f8cfc6aa | 1113 | else if (REG_P (op0)) |
06c94bce | 1114 | { |
4ee16841 | 1115 | word = operand_subword_force (op0, offset, GET_MODE (op0)); |
06c94bce RS |
1116 | offset = 0; |
1117 | } | |
1118 | else | |
1119 | word = op0; | |
44037a66 | 1120 | |
0eb61c19 DE |
1121 | /* OFFSET is in UNITs, and UNIT is in bits. |
1122 | store_fixed_bit_field wants offset in bytes. */ | |
04050c69 RK |
1123 | store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize, |
1124 | thispos, part); | |
06c94bce RS |
1125 | bitsdone += thissize; |
1126 | } | |
44037a66 TG |
1127 | } |
1128 | \f | |
6d7db3c5 RS |
1129 | /* A subroutine of extract_bit_field_1 that converts return value X |
1130 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments | |
1131 | to extract_bit_field. */ | |
44037a66 | 1132 | |
6d7db3c5 RS |
1133 | static rtx |
1134 | convert_extracted_bit_field (rtx x, enum machine_mode mode, | |
1135 | enum machine_mode tmode, bool unsignedp) | |
1136 | { | |
1137 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) | |
1138 | return x; | |
44037a66 | 1139 | |
6d7db3c5 RS |
1140 | /* If the x mode is not a scalar integral, first convert to the |
1141 | integer mode of that size and then access it as a floating-point | |
1142 | value via a SUBREG. */ | |
1143 | if (!SCALAR_INT_MODE_P (tmode)) | |
1144 | { | |
1145 | enum machine_mode smode; | |
44037a66 | 1146 | |
6d7db3c5 RS |
1147 | smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); |
1148 | x = convert_to_mode (smode, x, unsignedp); | |
1149 | x = force_reg (smode, x); | |
1150 | return gen_lowpart (tmode, x); | |
1151 | } | |
44037a66 | 1152 | |
6d7db3c5 RS |
1153 | return convert_to_mode (tmode, x, unsignedp); |
1154 | } | |
1155 | ||
1156 | /* A subroutine of extract_bit_field, with the same arguments. | |
1157 | If FALLBACK_P is true, fall back to extract_fixed_bit_field | |
1158 | if we can find no other means of implementing the operation. | |
1159 | if FALLBACK_P is false, return NULL instead. */ | |
1160 | ||
1161 | static rtx | |
1162 | extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
62519f7f JZ |
1163 | unsigned HOST_WIDE_INT bitnum, |
1164 | int unsignedp, bool packedp, rtx target, | |
6d7db3c5 RS |
1165 | enum machine_mode mode, enum machine_mode tmode, |
1166 | bool fallback_p) | |
44037a66 | 1167 | { |
770ae6cc | 1168 | unsigned int unit |
3c0cb5de | 1169 | = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; |
2c58f7dd | 1170 | unsigned HOST_WIDE_INT offset, bitpos; |
b3694847 | 1171 | rtx op0 = str_rtx; |
3306eb80 | 1172 | enum machine_mode int_mode; |
6d7db3c5 | 1173 | enum machine_mode ext_mode; |
e98f90d3 | 1174 | enum machine_mode mode1; |
6d7db3c5 | 1175 | enum insn_code icode; |
e98f90d3 | 1176 | int byte_offset; |
44037a66 | 1177 | |
44037a66 TG |
1178 | if (tmode == VOIDmode) |
1179 | tmode = mode; | |
6ca6193b | 1180 | |
44037a66 TG |
1181 | while (GET_CODE (op0) == SUBREG) |
1182 | { | |
2c58f7dd | 1183 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
44037a66 TG |
1184 | op0 = SUBREG_REG (op0); |
1185 | } | |
77295dec | 1186 | |
2c58f7dd | 1187 | /* If we have an out-of-bounds access to a register, just return an |
647eea9d | 1188 | uninitialized register of the required mode. This can occur if the |
2c58f7dd RS |
1189 | source code contains an out-of-bounds access to a small array. */ |
1190 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
1191 | return gen_reg_rtx (tmode); | |
1192 | ||
f8cfc6aa | 1193 | if (REG_P (op0) |
aac280fb DD |
1194 | && mode == GET_MODE (op0) |
1195 | && bitnum == 0 | |
0b69c29f | 1196 | && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |
aac280fb | 1197 | { |
0b69c29f | 1198 | /* We're trying to extract a full register from itself. */ |
aac280fb DD |
1199 | return op0; |
1200 | } | |
1201 | ||
0890b981 AP |
1202 | /* See if we can get a better vector mode before extracting. */ |
1203 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1204 | && !MEM_P (op0) | |
1205 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) | |
1206 | { | |
1207 | enum machine_mode new_mode; | |
1208 | int nunits = GET_MODE_NUNITS (GET_MODE (op0)); | |
1209 | ||
1210 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) | |
1211 | new_mode = MIN_MODE_VECTOR_FLOAT; | |
325217ed CF |
1212 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1213 | new_mode = MIN_MODE_VECTOR_FRACT; | |
1214 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) | |
1215 | new_mode = MIN_MODE_VECTOR_UFRACT; | |
1216 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) | |
1217 | new_mode = MIN_MODE_VECTOR_ACCUM; | |
1218 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) | |
1219 | new_mode = MIN_MODE_VECTOR_UACCUM; | |
0890b981 AP |
1220 | else |
1221 | new_mode = MIN_MODE_VECTOR_INT; | |
1222 | ||
1223 | for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) | |
1224 | if (GET_MODE_NUNITS (new_mode) == nunits | |
6aebac53 | 1225 | && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) |
0890b981 AP |
1226 | && targetm.vector_mode_supported_p (new_mode)) |
1227 | break; | |
1228 | if (new_mode != VOIDmode) | |
1229 | op0 = gen_lowpart (new_mode, op0); | |
1230 | } | |
1231 | ||
997404de JH |
1232 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1233 | available. */ | |
1234 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 1235 | && !MEM_P (op0) |
947131ba | 1236 | && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing |
b42271d6 JB |
1237 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) |
1238 | == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
997404de JH |
1239 | { |
1240 | enum machine_mode outermode = GET_MODE (op0); | |
1241 | enum machine_mode innermode = GET_MODE_INNER (outermode); | |
947131ba | 1242 | int icode = (int) optab_handler (vec_extract_optab, outermode); |
b42271d6 | 1243 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |
997404de JH |
1244 | rtx rtxpos = GEN_INT (pos); |
1245 | rtx src = op0; | |
1246 | rtx dest = NULL, pat, seq; | |
1247 | enum machine_mode mode0 = insn_data[icode].operand[0].mode; | |
1248 | enum machine_mode mode1 = insn_data[icode].operand[1].mode; | |
1249 | enum machine_mode mode2 = insn_data[icode].operand[2].mode; | |
1250 | ||
1251 | if (innermode == tmode || innermode == mode) | |
1252 | dest = target; | |
1253 | ||
1254 | if (!dest) | |
1255 | dest = gen_reg_rtx (innermode); | |
1256 | ||
1257 | start_sequence (); | |
1258 | ||
1259 | if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)) | |
1260 | dest = copy_to_mode_reg (mode0, dest); | |
1261 | ||
1262 | if (! (*insn_data[icode].operand[1].predicate) (src, mode1)) | |
1263 | src = copy_to_mode_reg (mode1, src); | |
1264 | ||
1265 | if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) | |
1266 | rtxpos = copy_to_mode_reg (mode1, rtxpos); | |
1267 | ||
1268 | /* We could handle this, but we should always be called with a pseudo | |
1269 | for our targets and all insns should take them as outputs. */ | |
5b0264cb NS |
1270 | gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0) |
1271 | && (*insn_data[icode].operand[1].predicate) (src, mode1) | |
1272 | && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)); | |
5c64c900 | 1273 | |
997404de JH |
1274 | pat = GEN_FCN (icode) (dest, src, rtxpos); |
1275 | seq = get_insns (); | |
1276 | end_sequence (); | |
1277 | if (pat) | |
1278 | { | |
1279 | emit_insn (seq); | |
1280 | emit_insn (pat); | |
0890b981 AP |
1281 | if (mode0 != mode) |
1282 | return gen_lowpart (tmode, dest); | |
5c64c900 | 1283 | return dest; |
997404de JH |
1284 | } |
1285 | } | |
1286 | ||
d006aa54 RH |
1287 | /* Make sure we are playing with integral modes. Pun with subregs |
1288 | if we aren't. */ | |
1289 | { | |
1290 | enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); | |
1291 | if (imode != GET_MODE (op0)) | |
1292 | { | |
a6d2976a JDA |
1293 | if (MEM_P (op0)) |
1294 | op0 = adjust_address (op0, imode, 0); | |
7d293b58 | 1295 | else if (imode != BLKmode) |
a6d2976a | 1296 | { |
a6d2976a | 1297 | op0 = gen_lowpart (imode, op0); |
360e3535 | 1298 | |
a6d2976a JDA |
1299 | /* If we got a SUBREG, force it into a register since we |
1300 | aren't going to be able to do another SUBREG on it. */ | |
1301 | if (GET_CODE (op0) == SUBREG) | |
1302 | op0 = force_reg (imode, op0); | |
1303 | } | |
7d293b58 JJ |
1304 | else if (REG_P (op0)) |
1305 | { | |
1306 | rtx reg, subreg; | |
1307 | imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)), | |
1308 | MODE_INT); | |
1309 | reg = gen_reg_rtx (imode); | |
1310 | subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg); | |
1311 | emit_move_insn (subreg, op0); | |
1312 | op0 = reg; | |
1313 | bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT; | |
1314 | } | |
1315 | else | |
1316 | { | |
1317 | rtx mem = assign_stack_temp (GET_MODE (op0), | |
1318 | GET_MODE_SIZE (GET_MODE (op0)), 0); | |
1319 | emit_move_insn (mem, op0); | |
1320 | op0 = adjust_address (mem, BLKmode, 0); | |
1321 | } | |
d006aa54 RH |
1322 | } |
1323 | } | |
1324 | ||
4e9bb42b AH |
1325 | /* We may be accessing data outside the field, which means |
1326 | we can alias adjacent data. */ | |
3c0cb5de | 1327 | if (MEM_P (op0)) |
4e9bb42b AH |
1328 | { |
1329 | op0 = shallow_copy_rtx (op0); | |
1330 | set_mem_alias_set (op0, 0); | |
1331 | set_mem_expr (op0, 0); | |
1332 | } | |
1333 | ||
6ca6193b JDA |
1334 | /* Extraction of a full-word or multi-word value from a structure |
1335 | in a register or aligned memory can be done with just a SUBREG. | |
1336 | A subword value in the least significant part of a register | |
1337 | can also be extracted with a SUBREG. For this, we need the | |
1338 | byte offset of the value in op0. */ | |
1339 | ||
2c58f7dd RS |
1340 | bitpos = bitnum % unit; |
1341 | offset = bitnum / unit; | |
6ca6193b | 1342 | byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD; |
c410d49e | 1343 | |
44037a66 TG |
1344 | /* If OP0 is a register, BITPOS must count within a word. |
1345 | But as we have it, it counts within whatever size OP0 now has. | |
1346 | On a bigendian machine, these are not the same, so convert. */ | |
db3cf6fb | 1347 | if (BYTES_BIG_ENDIAN |
3c0cb5de | 1348 | && !MEM_P (op0) |
f76b9db2 | 1349 | && unit > GET_MODE_BITSIZE (GET_MODE (op0))) |
44037a66 | 1350 | bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); |
44037a66 | 1351 | |
6ca6193b JDA |
1352 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1353 | If that's wrong, the solution is to test for it and set TARGET to 0 | |
1354 | if needed. */ | |
e98f90d3 | 1355 | |
367d6d0b JW |
1356 | /* Only scalar integer modes can be converted via subregs. There is an |
1357 | additional problem for FP modes here in that they can have a precision | |
1358 | which is different from the size. mode_for_size uses precision, but | |
1359 | we want a mode based on the size, so we must avoid calling it for FP | |
1360 | modes. */ | |
1361 | mode1 = (SCALAR_INT_MODE_P (tmode) | |
1362 | ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) | |
1363 | : mode); | |
e98f90d3 | 1364 | |
6a78b724 DD |
1365 | /* If the bitfield is volatile, we need to make sure the access |
1366 | remains on a type-aligned boundary. */ | |
1367 | if (GET_CODE (op0) == MEM | |
1368 | && MEM_VOLATILE_P (op0) | |
1369 | && GET_MODE_BITSIZE (GET_MODE (op0)) > 0 | |
1370 | && flag_strict_volatile_bitfields > 0) | |
1371 | goto no_subreg_mode_swap; | |
1372 | ||
0d2f38ee OH |
1373 | if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode) |
1374 | && bitpos % BITS_PER_WORD == 0) | |
367d6d0b | 1375 | || (mode1 != BLKmode |
0d2f38ee OH |
1376 | /* ??? The big endian test here is wrong. This is correct |
1377 | if the value is in a register, and if mode_for_size is not | |
1378 | the same mode as op0. This causes us to get unnecessarily | |
1379 | inefficient code from the Thumb port when -mbig-endian. */ | |
1380 | && (BYTES_BIG_ENDIAN | |
1381 | ? bitpos + bitsize == BITS_PER_WORD | |
1382 | : bitpos == 0))) | |
3c0cb5de | 1383 | && ((!MEM_P (op0) |
86cfb27a | 1384 | && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1), |
0d2f38ee OH |
1385 | GET_MODE_BITSIZE (GET_MODE (op0))) |
1386 | && GET_MODE_SIZE (mode1) != 0 | |
1387 | && byte_offset % GET_MODE_SIZE (mode1) == 0) | |
3c0cb5de | 1388 | || (MEM_P (op0) |
0d2f38ee OH |
1389 | && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) |
1390 | || (offset * BITS_PER_UNIT % bitsize == 0 | |
1391 | && MEM_ALIGN (op0) % bitsize == 0))))) | |
44037a66 | 1392 | { |
8ddcfde1 DJ |
1393 | if (MEM_P (op0)) |
1394 | op0 = adjust_address (op0, mode1, offset); | |
1395 | else if (mode1 != GET_MODE (op0)) | |
c7e33f89 | 1396 | { |
8ddcfde1 DJ |
1397 | rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), |
1398 | byte_offset); | |
1399 | if (sub == NULL) | |
1400 | goto no_subreg_mode_swap; | |
1401 | op0 = sub; | |
c7e33f89 | 1402 | } |
44037a66 TG |
1403 | if (mode1 != mode) |
1404 | return convert_to_mode (tmode, op0, unsignedp); | |
1405 | return op0; | |
1406 | } | |
28ce94d4 | 1407 | no_subreg_mode_swap: |
44037a66 TG |
1408 | |
1409 | /* Handle fields bigger than a word. */ | |
c410d49e | 1410 | |
44037a66 TG |
1411 | if (bitsize > BITS_PER_WORD) |
1412 | { | |
1413 | /* Here we transfer the words of the field | |
1414 | in the order least significant first. | |
1415 | This is because the most significant word is the one which may | |
1416 | be less than full. */ | |
1417 | ||
770ae6cc RK |
1418 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1419 | unsigned int i; | |
44037a66 | 1420 | |
f8cfc6aa | 1421 | if (target == 0 || !REG_P (target)) |
44037a66 TG |
1422 | target = gen_reg_rtx (mode); |
1423 | ||
34ea783b | 1424 | /* Indicate for flow that the entire target reg is being set. */ |
c41c1387 | 1425 | emit_clobber (target); |
34ea783b | 1426 | |
44037a66 TG |
1427 | for (i = 0; i < nwords; i++) |
1428 | { | |
1429 | /* If I is 0, use the low-order word in both field and target; | |
1430 | if I is 1, use the next to lowest word; and so on. */ | |
77295dec | 1431 | /* Word number in TARGET to use. */ |
770ae6cc RK |
1432 | unsigned int wordnum |
1433 | = (WORDS_BIG_ENDIAN | |
1434 | ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 | |
1435 | : i); | |
77295dec | 1436 | /* Offset from start of field in OP0. */ |
770ae6cc RK |
1437 | unsigned int bit_offset = (WORDS_BIG_ENDIAN |
1438 | ? MAX (0, ((int) bitsize - ((int) i + 1) | |
75131237 | 1439 | * (int) BITS_PER_WORD)) |
770ae6cc | 1440 | : (int) i * BITS_PER_WORD); |
44037a66 TG |
1441 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1442 | rtx result_part | |
1443 | = extract_bit_field (op0, MIN (BITS_PER_WORD, | |
1444 | bitsize - i * BITS_PER_WORD), | |
62519f7f | 1445 | bitnum + bit_offset, 1, false, target_part, mode, |
b3520980 | 1446 | word_mode); |
44037a66 | 1447 | |
5b0264cb | 1448 | gcc_assert (target_part); |
44037a66 TG |
1449 | |
1450 | if (result_part != target_part) | |
1451 | emit_move_insn (target_part, result_part); | |
1452 | } | |
1453 | ||
5f57dff0 | 1454 | if (unsignedp) |
77295dec DE |
1455 | { |
1456 | /* Unless we've filled TARGET, the upper regs in a multi-reg value | |
1457 | need to be zero'd out. */ | |
1458 | if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) | |
1459 | { | |
770ae6cc | 1460 | unsigned int i, total_words; |
77295dec DE |
1461 | |
1462 | total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; | |
1463 | for (i = nwords; i < total_words; i++) | |
04050c69 RK |
1464 | emit_move_insn |
1465 | (operand_subword (target, | |
1466 | WORDS_BIG_ENDIAN ? total_words - i - 1 : i, | |
1467 | 1, VOIDmode), | |
1468 | const0_rtx); | |
77295dec DE |
1469 | } |
1470 | return target; | |
1471 | } | |
1472 | ||
5f57dff0 JW |
1473 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1474 | target = expand_shift (LSHIFT_EXPR, mode, target, | |
4a90aeeb | 1475 | build_int_cst (NULL_TREE, |
7d60be94 | 1476 | GET_MODE_BITSIZE (mode) - bitsize), |
5f57dff0 JW |
1477 | NULL_RTX, 0); |
1478 | return expand_shift (RSHIFT_EXPR, mode, target, | |
4a90aeeb | 1479 | build_int_cst (NULL_TREE, |
7d60be94 | 1480 | GET_MODE_BITSIZE (mode) - bitsize), |
5f57dff0 | 1481 | NULL_RTX, 0); |
44037a66 | 1482 | } |
c410d49e | 1483 | |
3306eb80 GK |
1484 | /* From here on we know the desired field is smaller than a word. */ |
1485 | ||
1486 | /* Check if there is a correspondingly-sized integer field, so we can | |
1487 | safely extract it as one size of integer, if necessary; then | |
1488 | truncate or extend to the size that is wanted; then use SUBREGs or | |
1489 | convert_to_mode to get one of the modes we really wanted. */ | |
c410d49e | 1490 | |
3306eb80 GK |
1491 | int_mode = int_mode_for_mode (tmode); |
1492 | if (int_mode == BLKmode) | |
1493 | int_mode = int_mode_for_mode (mode); | |
5b0264cb NS |
1494 | /* Should probably push op0 out to memory and then do a load. */ |
1495 | gcc_assert (int_mode != BLKmode); | |
44037a66 TG |
1496 | |
1497 | /* OFFSET is the number of words or bytes (UNIT says which) | |
1498 | from STR_RTX to the first word or byte containing part of the field. */ | |
3c0cb5de | 1499 | if (!MEM_P (op0)) |
44037a66 TG |
1500 | { |
1501 | if (offset != 0 | |
1502 | || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
470032d7 | 1503 | { |
f8cfc6aa | 1504 | if (!REG_P (op0)) |
470032d7 RH |
1505 | op0 = copy_to_reg (op0); |
1506 | op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), | |
ddef6bc7 | 1507 | op0, (offset * UNITS_PER_WORD)); |
470032d7 | 1508 | } |
44037a66 TG |
1509 | offset = 0; |
1510 | } | |
44037a66 TG |
1511 | |
1512 | /* Now OFFSET is nonzero only for memory operands. */ | |
6d7db3c5 RS |
1513 | ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0); |
1514 | icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv; | |
1515 | if (ext_mode != MAX_MACHINE_MODE | |
1516 | && bitsize > 0 | |
1517 | && GET_MODE_BITSIZE (ext_mode) >= bitsize | |
1518 | /* If op0 is a register, we need it in EXT_MODE to make it | |
1519 | acceptable to the format of ext(z)v. */ | |
1520 | && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) | |
1521 | && !((REG_P (op0) || GET_CODE (op0) == SUBREG) | |
1522 | && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))) | |
1523 | && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0))) | |
44037a66 | 1524 | { |
6d7db3c5 RS |
1525 | unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset; |
1526 | rtx bitsize_rtx, bitpos_rtx; | |
1527 | rtx last = get_last_insn (); | |
1528 | rtx xop0 = op0; | |
1529 | rtx xtarget = target; | |
1530 | rtx xspec_target = target; | |
1531 | rtx xspec_target_subreg = 0; | |
1532 | rtx pat; | |
44037a66 | 1533 | |
6d7db3c5 RS |
1534 | /* If op0 is a register, we need it in EXT_MODE to make it |
1535 | acceptable to the format of ext(z)v. */ | |
1536 | if (REG_P (xop0) && GET_MODE (xop0) != ext_mode) | |
d8a60d24 | 1537 | xop0 = gen_lowpart_SUBREG (ext_mode, xop0); |
6d7db3c5 RS |
1538 | if (MEM_P (xop0)) |
1539 | /* Get ref to first byte containing part of the field. */ | |
1540 | xop0 = adjust_address (xop0, byte_mode, xoffset); | |
44037a66 | 1541 | |
6d7db3c5 RS |
1542 | /* On big-endian machines, we count bits from the most significant. |
1543 | If the bit field insn does not, we must invert. */ | |
1544 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
1545 | xbitpos = unit - bitsize - xbitpos; | |
f76b9db2 | 1546 | |
6d7db3c5 RS |
1547 | /* Now convert from counting within UNIT to counting in EXT_MODE. */ |
1548 | if (BITS_BIG_ENDIAN && !MEM_P (xop0)) | |
1549 | xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; | |
f76b9db2 | 1550 | |
6d7db3c5 | 1551 | unit = GET_MODE_BITSIZE (ext_mode); |
44037a66 | 1552 | |
6d7db3c5 RS |
1553 | if (xtarget == 0) |
1554 | xtarget = xspec_target = gen_reg_rtx (tmode); | |
44037a66 | 1555 | |
6d7db3c5 RS |
1556 | if (GET_MODE (xtarget) != ext_mode) |
1557 | { | |
a990abff AN |
1558 | /* Don't use LHS paradoxical subreg if explicit truncation is needed |
1559 | between the mode of the extraction (word_mode) and the target | |
1560 | mode. Instead, create a temporary and use convert_move to set | |
1561 | the target. */ | |
1562 | if (REG_P (xtarget) | |
1563 | && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)), | |
1564 | GET_MODE_BITSIZE (ext_mode))) | |
44037a66 | 1565 | { |
6d7db3c5 RS |
1566 | xtarget = gen_lowpart (ext_mode, xtarget); |
1567 | if (GET_MODE_SIZE (ext_mode) | |
1568 | > GET_MODE_SIZE (GET_MODE (xspec_target))) | |
1569 | xspec_target_subreg = xtarget; | |
44037a66 | 1570 | } |
6d7db3c5 RS |
1571 | else |
1572 | xtarget = gen_reg_rtx (ext_mode); | |
1573 | } | |
44037a66 | 1574 | |
6d7db3c5 RS |
1575 | /* If this machine's ext(z)v insists on a register target, |
1576 | make sure we have one. */ | |
1577 | if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode)) | |
1578 | xtarget = gen_reg_rtx (ext_mode); | |
44037a66 | 1579 | |
6d7db3c5 RS |
1580 | bitsize_rtx = GEN_INT (bitsize); |
1581 | bitpos_rtx = GEN_INT (xbitpos); | |
44037a66 | 1582 | |
6d7db3c5 RS |
1583 | pat = (unsignedp |
1584 | ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx) | |
1585 | : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx)); | |
1586 | if (pat) | |
1587 | { | |
1588 | emit_insn (pat); | |
1589 | if (xtarget == xspec_target) | |
1590 | return xtarget; | |
1591 | if (xtarget == xspec_target_subreg) | |
1592 | return xspec_target; | |
1593 | return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp); | |
44037a66 | 1594 | } |
6d7db3c5 | 1595 | delete_insns_since (last); |
44037a66 | 1596 | } |
f76b9db2 | 1597 | |
6d7db3c5 RS |
1598 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1599 | cheap register alternative is available. */ | |
1600 | if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0)) | |
1601 | { | |
1602 | enum machine_mode bestmode; | |
1603 | ||
1604 | /* Get the mode to use for inserting into this field. If | |
1605 | OP0 is BLKmode, get the smallest mode consistent with the | |
1606 | alignment. If OP0 is a non-BLKmode object that is no | |
1607 | wider than EXT_MODE, use its mode. Otherwise, use the | |
1608 | smallest mode containing the field. */ | |
1609 | ||
1610 | if (GET_MODE (op0) == BLKmode | |
1611 | || (ext_mode != MAX_MACHINE_MODE | |
1612 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))) | |
1613 | bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), | |
1614 | (ext_mode == MAX_MACHINE_MODE | |
1615 | ? VOIDmode : ext_mode), | |
1616 | MEM_VOLATILE_P (op0)); | |
1617 | else | |
1618 | bestmode = GET_MODE (op0); | |
f76b9db2 | 1619 | |
6d7db3c5 RS |
1620 | if (bestmode != VOIDmode |
1621 | && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) | |
1622 | && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) | |
1623 | { | |
1624 | unsigned HOST_WIDE_INT xoffset, xbitpos; | |
44037a66 | 1625 | |
6d7db3c5 RS |
1626 | /* Compute the offset as a multiple of this unit, |
1627 | counting in bytes. */ | |
1628 | unit = GET_MODE_BITSIZE (bestmode); | |
1629 | xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); | |
1630 | xbitpos = bitnum % unit; | |
44037a66 | 1631 | |
6d7db3c5 RS |
1632 | /* Make sure the register is big enough for the whole field. */ |
1633 | if (xoffset * BITS_PER_UNIT + unit | |
1634 | >= offset * BITS_PER_UNIT + bitsize) | |
44037a66 | 1635 | { |
6d7db3c5 | 1636 | rtx last, result, xop0; |
44037a66 | 1637 | |
6d7db3c5 | 1638 | last = get_last_insn (); |
44037a66 | 1639 | |
6d7db3c5 RS |
1640 | /* Fetch it to a register in that size. */ |
1641 | xop0 = adjust_address (op0, bestmode, xoffset); | |
1642 | xop0 = force_reg (bestmode, xop0); | |
1643 | result = extract_bit_field_1 (xop0, bitsize, xbitpos, | |
62519f7f | 1644 | unsignedp, packedp, target, |
6d7db3c5 RS |
1645 | mode, tmode, false); |
1646 | if (result) | |
1647 | return result; | |
44037a66 | 1648 | |
44037a66 | 1649 | delete_insns_since (last); |
44037a66 | 1650 | } |
c410d49e | 1651 | } |
44037a66 | 1652 | } |
562fc702 | 1653 | |
6d7db3c5 RS |
1654 | if (!fallback_p) |
1655 | return NULL; | |
1656 | ||
1657 | target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, | |
62519f7f | 1658 | bitpos, target, unsignedp, packedp); |
6d7db3c5 RS |
1659 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |
1660 | } | |
1661 | ||
1662 | /* Generate code to extract a byte-field from STR_RTX | |
1663 | containing BITSIZE bits, starting at BITNUM, | |
1664 | and put it in TARGET if possible (if TARGET is nonzero). | |
1665 | Regardless of TARGET, we return the rtx for where the value is placed. | |
1666 | ||
1667 | STR_RTX is the structure containing the byte (a REG or MEM). | |
1668 | UNSIGNEDP is nonzero if this is an unsigned bit field. | |
62519f7f | 1669 | PACKEDP is nonzero if the field has the packed attribute. |
6d7db3c5 RS |
1670 | MODE is the natural mode of the field value once extracted. |
1671 | TMODE is the mode the caller would like the value to have; | |
1672 | but the value may be returned with type MODE instead. | |
1673 | ||
1674 | If a TARGET is specified and we can store in it at no extra cost, | |
1675 | we do so, and return TARGET. | |
1676 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred | |
1677 | if they are equally easy. */ | |
1678 | ||
1679 | rtx | |
1680 | extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
62519f7f JZ |
1681 | unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp, |
1682 | rtx target, enum machine_mode mode, enum machine_mode tmode) | |
6d7db3c5 | 1683 | { |
62519f7f | 1684 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp, |
6d7db3c5 | 1685 | target, mode, tmode, true); |
44037a66 TG |
1686 | } |
1687 | \f | |
1688 | /* Extract a bit field using shifts and boolean operations | |
1689 | Returns an rtx to represent the value. | |
1690 | OP0 addresses a register (word) or memory (byte). | |
1691 | BITPOS says which bit within the word or byte the bit field starts in. | |
1692 | OFFSET says how many bytes farther the bit field starts; | |
1693 | it is 0 if OP0 is a register. | |
1694 | BITSIZE says how many bits long the bit field is. | |
1695 | (If OP0 is a register, it may be narrower than a full word, | |
1696 | but BITPOS still counts within a full word, | |
1697 | which is significant on bigendian machines.) | |
1698 | ||
1699 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). | |
62519f7f JZ |
1700 | PACKEDP is true if the field has the packed attribute. |
1701 | ||
44037a66 TG |
1702 | If TARGET is nonzero, attempts to store the value there |
1703 | and return TARGET, but this is not guaranteed. | |
04050c69 | 1704 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
44037a66 TG |
1705 | |
1706 | static rtx | |
502b8322 AJ |
1707 | extract_fixed_bit_field (enum machine_mode tmode, rtx op0, |
1708 | unsigned HOST_WIDE_INT offset, | |
1709 | unsigned HOST_WIDE_INT bitsize, | |
1710 | unsigned HOST_WIDE_INT bitpos, rtx target, | |
62519f7f | 1711 | int unsignedp, bool packedp) |
44037a66 | 1712 | { |
770ae6cc | 1713 | unsigned int total_bits = BITS_PER_WORD; |
44037a66 TG |
1714 | enum machine_mode mode; |
1715 | ||
f8cfc6aa | 1716 | if (GET_CODE (op0) == SUBREG || REG_P (op0)) |
44037a66 TG |
1717 | { |
1718 | /* Special treatment for a bit field split across two registers. */ | |
1719 | if (bitsize + bitpos > BITS_PER_WORD) | |
04050c69 | 1720 | return extract_split_bit_field (op0, bitsize, bitpos, unsignedp); |
44037a66 TG |
1721 | } |
1722 | else | |
1723 | { | |
1724 | /* Get the proper mode to use for this field. We want a mode that | |
1725 | includes the entire field. If such a mode would be larger than | |
1726 | a word, we won't be doing the extraction the normal way. */ | |
1727 | ||
6a78b724 DD |
1728 | if (MEM_VOLATILE_P (op0) |
1729 | && flag_strict_volatile_bitfields > 0) | |
1730 | { | |
1731 | if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0) | |
1732 | mode = GET_MODE (op0); | |
1733 | else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) | |
1734 | mode = GET_MODE (target); | |
1735 | else | |
1736 | mode = tmode; | |
1737 | } | |
1738 | else | |
1739 | mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, | |
1740 | MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0)); | |
44037a66 TG |
1741 | |
1742 | if (mode == VOIDmode) | |
1743 | /* The only way this should occur is if the field spans word | |
1744 | boundaries. */ | |
1745 | return extract_split_bit_field (op0, bitsize, | |
1746 | bitpos + offset * BITS_PER_UNIT, | |
04050c69 | 1747 | unsignedp); |
44037a66 TG |
1748 | |
1749 | total_bits = GET_MODE_BITSIZE (mode); | |
1750 | ||
401db791 | 1751 | /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to |
38e01259 | 1752 | be in the range 0 to total_bits-1, and put any excess bytes in |
401db791 JW |
1753 | OFFSET. */ |
1754 | if (bitpos >= total_bits) | |
1755 | { | |
1756 | offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); | |
1757 | bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) | |
1758 | * BITS_PER_UNIT); | |
1759 | } | |
1760 | ||
6a78b724 DD |
1761 | /* If we're accessing a volatile MEM, we can't do the next |
1762 | alignment step if it results in a multi-word access where we | |
1763 | otherwise wouldn't have one. So, check for that case | |
1764 | here. */ | |
1765 | if (MEM_P (op0) | |
1766 | && MEM_VOLATILE_P (op0) | |
1767 | && flag_strict_volatile_bitfields > 0 | |
1768 | && bitpos + bitsize <= total_bits | |
1769 | && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits) | |
1770 | { | |
1771 | if (STRICT_ALIGNMENT) | |
1772 | { | |
1773 | static bool informed_about_misalignment = false; | |
1774 | bool warned; | |
1775 | ||
62519f7f JZ |
1776 | if (packedp) |
1777 | { | |
1778 | if (bitsize == total_bits) | |
1779 | warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, | |
1780 | "multiple accesses to volatile structure member" | |
1781 | " because of packed attribute"); | |
1782 | else | |
1783 | warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, | |
1784 | "multiple accesses to volatile structure bitfield" | |
1785 | " because of packed attribute"); | |
1786 | ||
1787 | return extract_split_bit_field (op0, bitsize, | |
1788 | bitpos + offset * BITS_PER_UNIT, | |
1789 | unsignedp); | |
1790 | } | |
1791 | ||
6a78b724 DD |
1792 | if (bitsize == total_bits) |
1793 | warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, | |
1794 | "mis-aligned access used for structure member"); | |
1795 | else | |
1796 | warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, | |
1797 | "mis-aligned access used for structure bitfield"); | |
1798 | ||
1799 | if (! informed_about_misalignment && warned) | |
1800 | { | |
1801 | informed_about_misalignment = true; | |
1802 | inform (input_location, | |
d8a07487 | 1803 | "when a volatile object spans multiple type-sized locations," |
6a78b724 DD |
1804 | " the compiler must choose between using a single mis-aligned access to" |
1805 | " preserve the volatility, or using multiple aligned accesses to avoid" | |
d8a07487 JM |
1806 | " runtime faults; this code may fail at runtime if the hardware does" |
1807 | " not allow this access"); | |
6a78b724 DD |
1808 | } |
1809 | } | |
1810 | } | |
1811 | else | |
1812 | { | |
1813 | ||
1814 | /* Get ref to an aligned byte, halfword, or word containing the field. | |
1815 | Adjust BITPOS to be position within a word, | |
1816 | and OFFSET to be the offset of that word. | |
1817 | Then alter OP0 to refer to that word. */ | |
1818 | bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; | |
1819 | offset -= (offset % (total_bits / BITS_PER_UNIT)); | |
1820 | } | |
1821 | ||
f4ef873c | 1822 | op0 = adjust_address (op0, mode, offset); |
44037a66 TG |
1823 | } |
1824 | ||
37811a73 RK |
1825 | mode = GET_MODE (op0); |
1826 | ||
f76b9db2 | 1827 | if (BYTES_BIG_ENDIAN) |
04050c69 RK |
1828 | /* BITPOS is the distance between our msb and that of OP0. |
1829 | Convert it to the distance from the lsb. */ | |
1830 | bitpos = total_bits - bitsize - bitpos; | |
44037a66 | 1831 | |
44037a66 TG |
1832 | /* Now BITPOS is always the distance between the field's lsb and that of OP0. |
1833 | We have reduced the big-endian case to the little-endian case. */ | |
1834 | ||
1835 | if (unsignedp) | |
1836 | { | |
1837 | if (bitpos) | |
1838 | { | |
1839 | /* If the field does not already start at the lsb, | |
1840 | shift it so it does. */ | |
7d60be94 | 1841 | tree amount = build_int_cst (NULL_TREE, bitpos); |
44037a66 TG |
1842 | /* Maybe propagate the target for the shift. */ |
1843 | /* But not if we will return it--could confuse integrate.c. */ | |
f8cfc6aa | 1844 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
44037a66 TG |
1845 | if (tmode != mode) subtarget = 0; |
1846 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1); | |
1847 | } | |
1848 | /* Convert the value to the desired mode. */ | |
1849 | if (mode != tmode) | |
1850 | op0 = convert_to_mode (tmode, op0, 1); | |
1851 | ||
1852 | /* Unless the msb of the field used to be the msb when we shifted, | |
1853 | mask out the upper bits. */ | |
1854 | ||
c99d986a | 1855 | if (GET_MODE_BITSIZE (mode) != bitpos + bitsize) |
44037a66 TG |
1856 | return expand_binop (GET_MODE (op0), and_optab, op0, |
1857 | mask_rtx (GET_MODE (op0), 0, bitsize, 0), | |
1858 | target, 1, OPTAB_LIB_WIDEN); | |
1859 | return op0; | |
1860 | } | |
1861 | ||
1862 | /* To extract a signed bit-field, first shift its msb to the msb of the word, | |
1863 | then arithmetic-shift its lsb to the lsb of the word. */ | |
1864 | op0 = force_reg (mode, op0); | |
1865 | if (mode != tmode) | |
1866 | target = 0; | |
1867 | ||
1868 | /* Find the narrowest integer mode that contains the field. */ | |
1869 | ||
1870 | for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; | |
1871 | mode = GET_MODE_WIDER_MODE (mode)) | |
1872 | if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos) | |
1873 | { | |
1874 | op0 = convert_to_mode (mode, op0, 0); | |
1875 | break; | |
1876 | } | |
1877 | ||
1878 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos)) | |
1879 | { | |
04050c69 | 1880 | tree amount |
4a90aeeb | 1881 | = build_int_cst (NULL_TREE, |
7d60be94 | 1882 | GET_MODE_BITSIZE (mode) - (bitsize + bitpos)); |
44037a66 | 1883 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 1884 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
44037a66 TG |
1885 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
1886 | } | |
1887 | ||
1888 | return expand_shift (RSHIFT_EXPR, mode, op0, | |
4a90aeeb | 1889 | build_int_cst (NULL_TREE, |
7d60be94 | 1890 | GET_MODE_BITSIZE (mode) - bitsize), |
44037a66 TG |
1891 | target, 0); |
1892 | } | |
1893 | \f | |
1894 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value | |
1895 | of mode MODE with BITSIZE ones followed by BITPOS zeros, or the | |
1896 | complement of that if COMPLEMENT. The mask is truncated if | |
77295dec DE |
1897 | necessary to the width of mode MODE. The mask is zero-extended if |
1898 | BITSIZE+BITPOS is too small for MODE. */ | |
44037a66 TG |
1899 | |
1900 | static rtx | |
502b8322 | 1901 | mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement) |
44037a66 | 1902 | { |
2bd1333d | 1903 | double_int mask; |
44037a66 | 1904 | |
2bd1333d AS |
1905 | mask = double_int_mask (bitsize); |
1906 | mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false); | |
44037a66 TG |
1907 | |
1908 | if (complement) | |
2bd1333d | 1909 | mask = double_int_not (mask); |
44037a66 | 1910 | |
54fb1ae0 | 1911 | return immed_double_int_const (mask, mode); |
44037a66 TG |
1912 | } |
1913 | ||
1914 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value | |
1915 | VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */ | |
1916 | ||
1917 | static rtx | |
502b8322 | 1918 | lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize) |
44037a66 | 1919 | { |
2bd1333d AS |
1920 | double_int val; |
1921 | ||
1922 | val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize); | |
1923 | val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false); | |
44037a66 | 1924 | |
54fb1ae0 | 1925 | return immed_double_int_const (val, mode); |
44037a66 TG |
1926 | } |
1927 | \f | |
1928 | /* Extract a bit field that is split across two words | |
1929 | and return an RTX for the result. | |
1930 | ||
1931 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. | |
1932 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. | |
04050c69 | 1933 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ |
44037a66 TG |
1934 | |
1935 | static rtx | |
502b8322 AJ |
1936 | extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1937 | unsigned HOST_WIDE_INT bitpos, int unsignedp) | |
44037a66 | 1938 | { |
770ae6cc RK |
1939 | unsigned int unit; |
1940 | unsigned int bitsdone = 0; | |
c16ddde3 | 1941 | rtx result = NULL_RTX; |
06c94bce | 1942 | int first = 1; |
44037a66 | 1943 | |
4ee16841 DE |
1944 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1945 | much at a time. */ | |
f8cfc6aa | 1946 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1947 | unit = BITS_PER_WORD; |
1948 | else | |
609023ff | 1949 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
4ee16841 | 1950 | |
06c94bce RS |
1951 | while (bitsdone < bitsize) |
1952 | { | |
770ae6cc | 1953 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 1954 | rtx part, word; |
770ae6cc RK |
1955 | unsigned HOST_WIDE_INT thispos; |
1956 | unsigned HOST_WIDE_INT offset; | |
06c94bce RS |
1957 | |
1958 | offset = (bitpos + bitsdone) / unit; | |
1959 | thispos = (bitpos + bitsdone) % unit; | |
1960 | ||
0eb61c19 DE |
1961 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1962 | extract_fixed_bit_field will call us again, and we will mutually | |
1963 | recurse forever. */ | |
1964 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1965 | thissize = MIN (thissize, unit - thispos); | |
06c94bce RS |
1966 | |
1967 | /* If OP0 is a register, then handle OFFSET here. | |
5f57dff0 JW |
1968 | |
1969 | When handling multiword bitfields, extract_bit_field may pass | |
1970 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1971 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1972 | the current word starting from the base register. */ | |
1973 | if (GET_CODE (op0) == SUBREG) | |
1974 | { | |
ddef6bc7 JJ |
1975 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
1976 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
5f57dff0 JW |
1977 | GET_MODE (SUBREG_REG (op0))); |
1978 | offset = 0; | |
1979 | } | |
f8cfc6aa | 1980 | else if (REG_P (op0)) |
06c94bce RS |
1981 | { |
1982 | word = operand_subword_force (op0, offset, GET_MODE (op0)); | |
1983 | offset = 0; | |
1984 | } | |
1985 | else | |
1986 | word = op0; | |
1987 | ||
06c94bce | 1988 | /* Extract the parts in bit-counting order, |
0eb61c19 DE |
1989 | whose meaning is determined by BYTES_PER_UNIT. |
1990 | OFFSET is in UNITs, and UNIT is in bits. | |
1991 | extract_fixed_bit_field wants offset in bytes. */ | |
1992 | part = extract_fixed_bit_field (word_mode, word, | |
1993 | offset * unit / BITS_PER_UNIT, | |
62519f7f | 1994 | thissize, thispos, 0, 1, false); |
06c94bce | 1995 | bitsdone += thissize; |
44037a66 | 1996 | |
06c94bce | 1997 | /* Shift this part into place for the result. */ |
f76b9db2 ILT |
1998 | if (BYTES_BIG_ENDIAN) |
1999 | { | |
2000 | if (bitsize != bitsdone) | |
2001 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
7d60be94 NS |
2002 | build_int_cst (NULL_TREE, bitsize - bitsdone), |
2003 | 0, 1); | |
f76b9db2 ILT |
2004 | } |
2005 | else | |
2006 | { | |
2007 | if (bitsdone != thissize) | |
2008 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
4a90aeeb | 2009 | build_int_cst (NULL_TREE, |
7d60be94 | 2010 | bitsdone - thissize), 0, 1); |
f76b9db2 | 2011 | } |
44037a66 | 2012 | |
06c94bce RS |
2013 | if (first) |
2014 | result = part; | |
2015 | else | |
2016 | /* Combine the parts with bitwise or. This works | |
2017 | because we extracted each part as an unsigned bit field. */ | |
2018 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, | |
2019 | OPTAB_LIB_WIDEN); | |
2020 | ||
2021 | first = 0; | |
2022 | } | |
44037a66 TG |
2023 | |
2024 | /* Unsigned bit field: we are done. */ | |
2025 | if (unsignedp) | |
2026 | return result; | |
2027 | /* Signed bit field: sign-extend with two arithmetic shifts. */ | |
2028 | result = expand_shift (LSHIFT_EXPR, word_mode, result, | |
7d60be94 | 2029 | build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize), |
b1ec3c92 | 2030 | NULL_RTX, 0); |
44037a66 | 2031 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
7d60be94 NS |
2032 | build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize), |
2033 | NULL_RTX, 0); | |
44037a66 TG |
2034 | } |
2035 | \f | |
18b526e8 RS |
2036 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
2037 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than | |
2038 | MODE, fill the upper bits with zeros. Fail if the layout of either | |
2039 | mode is unknown (as for CC modes) or if the extraction would involve | |
2040 | unprofitable mode punning. Return the value on success, otherwise | |
2041 | return null. | |
2042 | ||
2043 | This is different from gen_lowpart* in these respects: | |
2044 | ||
2045 | - the returned value must always be considered an rvalue | |
2046 | ||
2047 | - when MODE is wider than SRC_MODE, the extraction involves | |
2048 | a zero extension | |
2049 | ||
2050 | - when MODE is smaller than SRC_MODE, the extraction involves | |
2051 | a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). | |
2052 | ||
2053 | In other words, this routine performs a computation, whereas the | |
2054 | gen_lowpart* routines are conceptually lvalue or rvalue subreg | |
2055 | operations. */ | |
2056 | ||
2057 | rtx | |
2058 | extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src) | |
2059 | { | |
2060 | enum machine_mode int_mode, src_int_mode; | |
2061 | ||
2062 | if (mode == src_mode) | |
2063 | return src; | |
2064 | ||
2065 | if (CONSTANT_P (src)) | |
d898d29b JJ |
2066 | { |
2067 | /* simplify_gen_subreg can't be used here, as if simplify_subreg | |
2068 | fails, it will happily create (subreg (symbol_ref)) or similar | |
2069 | invalid SUBREGs. */ | |
2070 | unsigned int byte = subreg_lowpart_offset (mode, src_mode); | |
2071 | rtx ret = simplify_subreg (mode, src, src_mode, byte); | |
2072 | if (ret) | |
2073 | return ret; | |
2074 | ||
2075 | if (GET_MODE (src) == VOIDmode | |
2076 | || !validate_subreg (mode, src_mode, src, byte)) | |
2077 | return NULL_RTX; | |
2078 | ||
2079 | src = force_reg (GET_MODE (src), src); | |
2080 | return gen_rtx_SUBREG (mode, src, byte); | |
2081 | } | |
18b526e8 RS |
2082 | |
2083 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) | |
2084 | return NULL_RTX; | |
2085 | ||
2086 | if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) | |
2087 | && MODES_TIEABLE_P (mode, src_mode)) | |
2088 | { | |
2089 | rtx x = gen_lowpart_common (mode, src); | |
2090 | if (x) | |
2091 | return x; | |
2092 | } | |
2093 | ||
2094 | src_int_mode = int_mode_for_mode (src_mode); | |
2095 | int_mode = int_mode_for_mode (mode); | |
2096 | if (src_int_mode == BLKmode || int_mode == BLKmode) | |
2097 | return NULL_RTX; | |
2098 | ||
2099 | if (!MODES_TIEABLE_P (src_int_mode, src_mode)) | |
2100 | return NULL_RTX; | |
2101 | if (!MODES_TIEABLE_P (int_mode, mode)) | |
2102 | return NULL_RTX; | |
2103 | ||
2104 | src = gen_lowpart (src_int_mode, src); | |
2105 | src = convert_modes (int_mode, src_int_mode, src, true); | |
2106 | src = gen_lowpart (mode, src); | |
2107 | return src; | |
2108 | } | |
2109 | \f | |
44037a66 TG |
2110 | /* Add INC into TARGET. */ |
2111 | ||
2112 | void | |
502b8322 | 2113 | expand_inc (rtx target, rtx inc) |
44037a66 TG |
2114 | { |
2115 | rtx value = expand_binop (GET_MODE (target), add_optab, | |
2116 | target, inc, | |
2117 | target, 0, OPTAB_LIB_WIDEN); | |
2118 | if (value != target) | |
2119 | emit_move_insn (target, value); | |
2120 | } | |
2121 | ||
2122 | /* Subtract DEC from TARGET. */ | |
2123 | ||
2124 | void | |
502b8322 | 2125 | expand_dec (rtx target, rtx dec) |
44037a66 TG |
2126 | { |
2127 | rtx value = expand_binop (GET_MODE (target), sub_optab, | |
2128 | target, dec, | |
2129 | target, 0, OPTAB_LIB_WIDEN); | |
2130 | if (value != target) | |
2131 | emit_move_insn (target, value); | |
2132 | } | |
2133 | \f | |
2134 | /* Output a shift instruction for expression code CODE, | |
2135 | with SHIFTED being the rtx for the value to shift, | |
2136 | and AMOUNT the tree for the amount to shift by. | |
2137 | Store the result in the rtx TARGET, if that is convenient. | |
2138 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2139 | Return the rtx for where the value is. */ | |
2140 | ||
2141 | rtx | |
502b8322 AJ |
2142 | expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, |
2143 | tree amount, rtx target, int unsignedp) | |
44037a66 | 2144 | { |
b3694847 SS |
2145 | rtx op1, temp = 0; |
2146 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); | |
2147 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); | |
71d46ca5 MM |
2148 | optab lshift_optab = ashl_optab; |
2149 | optab rshift_arith_optab = ashr_optab; | |
2150 | optab rshift_uns_optab = lshr_optab; | |
2151 | optab lrotate_optab = rotl_optab; | |
2152 | optab rrotate_optab = rotr_optab; | |
2153 | enum machine_mode op1_mode; | |
d858f359 | 2154 | int attempt; |
f40751dd | 2155 | bool speed = optimize_insn_for_speed_p (); |
44037a66 | 2156 | |
71d46ca5 MM |
2157 | op1 = expand_normal (amount); |
2158 | op1_mode = GET_MODE (op1); | |
2159 | ||
2160 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the | |
2161 | shift amount is a vector, use the vector/vector shift patterns. */ | |
2162 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) | |
2163 | { | |
2164 | lshift_optab = vashl_optab; | |
2165 | rshift_arith_optab = vashr_optab; | |
2166 | rshift_uns_optab = vlshr_optab; | |
2167 | lrotate_optab = vrotl_optab; | |
2168 | rrotate_optab = vrotr_optab; | |
2169 | } | |
2170 | ||
44037a66 TG |
2171 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2172 | and shifted in the other direction; but that does not work | |
2173 | on all machines. */ | |
2174 | ||
166cdf4a RH |
2175 | if (SHIFT_COUNT_TRUNCATED) |
2176 | { | |
481683e1 | 2177 | if (CONST_INT_P (op1) |
0fb7aeda | 2178 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
c84e2712 | 2179 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode))) |
0fb7aeda | 2180 | op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |
166cdf4a RH |
2181 | % GET_MODE_BITSIZE (mode)); |
2182 | else if (GET_CODE (op1) == SUBREG | |
c1cb09ad AN |
2183 | && subreg_lowpart_p (op1) |
2184 | && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))) | |
166cdf4a RH |
2185 | op1 = SUBREG_REG (op1); |
2186 | } | |
2ab0a5c4 | 2187 | |
44037a66 TG |
2188 | if (op1 == const0_rtx) |
2189 | return shifted; | |
2190 | ||
15bad393 RS |
2191 | /* Check whether its cheaper to implement a left shift by a constant |
2192 | bit count by a sequence of additions. */ | |
2193 | if (code == LSHIFT_EXPR | |
481683e1 | 2194 | && CONST_INT_P (op1) |
15bad393 RS |
2195 | && INTVAL (op1) > 0 |
2196 | && INTVAL (op1) < GET_MODE_BITSIZE (mode) | |
cb2eb96f | 2197 | && INTVAL (op1) < MAX_BITS_PER_WORD |
f40751dd JH |
2198 | && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode] |
2199 | && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST) | |
15bad393 RS |
2200 | { |
2201 | int i; | |
2202 | for (i = 0; i < INTVAL (op1); i++) | |
2203 | { | |
2204 | temp = force_reg (mode, shifted); | |
2205 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, | |
2206 | unsignedp, OPTAB_LIB_WIDEN); | |
2207 | } | |
2208 | return shifted; | |
2209 | } | |
2210 | ||
d858f359 | 2211 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
44037a66 TG |
2212 | { |
2213 | enum optab_methods methods; | |
2214 | ||
d858f359 | 2215 | if (attempt == 0) |
44037a66 | 2216 | methods = OPTAB_DIRECT; |
d858f359 | 2217 | else if (attempt == 1) |
44037a66 TG |
2218 | methods = OPTAB_WIDEN; |
2219 | else | |
2220 | methods = OPTAB_LIB_WIDEN; | |
2221 | ||
2222 | if (rotate) | |
2223 | { | |
2224 | /* Widening does not work for rotation. */ | |
2225 | if (methods == OPTAB_WIDEN) | |
2226 | continue; | |
2227 | else if (methods == OPTAB_LIB_WIDEN) | |
cbec710e | 2228 | { |
39e71615 | 2229 | /* If we have been unable to open-code this by a rotation, |
cbec710e RK |
2230 | do it as the IOR of two shifts. I.e., to rotate A |
2231 | by N bits, compute (A << N) | ((unsigned) A >> (C - N)) | |
2232 | where C is the bitsize of A. | |
2233 | ||
2234 | It is theoretically possible that the target machine might | |
2235 | not be able to perform either shift and hence we would | |
2236 | be making two libcalls rather than just the one for the | |
2237 | shift (similarly if IOR could not be done). We will allow | |
2238 | this extremely unlikely lossage to avoid complicating the | |
2239 | code below. */ | |
2240 | ||
39e71615 | 2241 | rtx subtarget = target == shifted ? 0 : target; |
fa00f91b | 2242 | tree new_amount, other_amount; |
39e71615 RK |
2243 | rtx temp1; |
2244 | tree type = TREE_TYPE (amount); | |
fa00f91b RS |
2245 | if (GET_MODE (op1) != TYPE_MODE (type) |
2246 | && GET_MODE (op1) != VOIDmode) | |
2247 | op1 = convert_to_mode (TYPE_MODE (type), op1, 1); | |
2248 | new_amount = make_tree (type, op1); | |
2249 | other_amount | |
4845b383 KH |
2250 | = fold_build2 (MINUS_EXPR, type, |
2251 | build_int_cst (type, GET_MODE_BITSIZE (mode)), | |
fa00f91b | 2252 | new_amount); |
39e71615 RK |
2253 | |
2254 | shifted = force_reg (mode, shifted); | |
2255 | ||
2256 | temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR, | |
6231646a | 2257 | mode, shifted, new_amount, 0, 1); |
39e71615 | 2258 | temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR, |
6231646a | 2259 | mode, shifted, other_amount, subtarget, 1); |
39e71615 RK |
2260 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2261 | unsignedp, methods); | |
cbec710e | 2262 | } |
44037a66 TG |
2263 | |
2264 | temp = expand_binop (mode, | |
71d46ca5 | 2265 | left ? lrotate_optab : rrotate_optab, |
44037a66 TG |
2266 | shifted, op1, target, unsignedp, methods); |
2267 | } | |
2268 | else if (unsignedp) | |
a34958c9 | 2269 | temp = expand_binop (mode, |
71d46ca5 | 2270 | left ? lshift_optab : rshift_uns_optab, |
a34958c9 | 2271 | shifted, op1, target, unsignedp, methods); |
44037a66 TG |
2272 | |
2273 | /* Do arithmetic shifts. | |
2274 | Also, if we are going to widen the operand, we can just as well | |
2275 | use an arithmetic right-shift instead of a logical one. */ | |
2276 | if (temp == 0 && ! rotate | |
2277 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) | |
2278 | { | |
2279 | enum optab_methods methods1 = methods; | |
2280 | ||
2281 | /* If trying to widen a log shift to an arithmetic shift, | |
2282 | don't accept an arithmetic shift of the same size. */ | |
2283 | if (unsignedp) | |
2284 | methods1 = OPTAB_MUST_WIDEN; | |
2285 | ||
2286 | /* Arithmetic shift */ | |
2287 | ||
2288 | temp = expand_binop (mode, | |
71d46ca5 | 2289 | left ? lshift_optab : rshift_arith_optab, |
44037a66 TG |
2290 | shifted, op1, target, unsignedp, methods1); |
2291 | } | |
2292 | ||
711a5e64 | 2293 | /* We used to try extzv here for logical right shifts, but that was |
c410d49e | 2294 | only useful for one machine, the VAX, and caused poor code |
711a5e64 RK |
2295 | generation there for lshrdi3, so the code was deleted and a |
2296 | define_expand for lshrsi3 was added to vax.md. */ | |
44037a66 TG |
2297 | } |
2298 | ||
5b0264cb | 2299 | gcc_assert (temp); |
44037a66 TG |
2300 | return temp; |
2301 | } | |
2302 | \f | |
8efc8980 RS |
2303 | /* Indicates the type of fixup needed after a constant multiplication. |
2304 | BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that | |
2305 | the result should be negated, and ADD_VARIANT means that the | |
2306 | multiplicand should be added to the result. */ | |
2307 | enum mult_variant {basic_variant, negate_variant, add_variant}; | |
2308 | ||
41c64ac0 | 2309 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
26276705 | 2310 | const struct mult_cost *, enum machine_mode mode); |
8efc8980 | 2311 | static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, |
f258e38b | 2312 | struct algorithm *, enum mult_variant *, int); |
8efc8980 RS |
2313 | static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, |
2314 | const struct algorithm *, enum mult_variant); | |
502b8322 | 2315 | static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int, |
e71c0aa7 | 2316 | int, rtx *, int *, int *); |
502b8322 | 2317 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
8efc8980 | 2318 | static rtx extract_high_half (enum machine_mode, rtx); |
0d282692 | 2319 | static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); |
8efc8980 RS |
2320 | static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, |
2321 | int, int); | |
44037a66 | 2322 | /* Compute and return the best algorithm for multiplying by T. |
7963ac37 RK |
2323 | The algorithm must cost less than cost_limit |
2324 | If retval.cost >= COST_LIMIT, no algorithm was found and all | |
41c64ac0 RS |
2325 | other field of the returned struct are undefined. |
2326 | MODE is the machine mode of the multiplication. */ | |
44037a66 | 2327 | |
819126a6 | 2328 | static void |
502b8322 | 2329 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
26276705 | 2330 | const struct mult_cost *cost_limit, enum machine_mode mode) |
44037a66 | 2331 | { |
b2fb324c | 2332 | int m; |
52786026 | 2333 | struct algorithm *alg_in, *best_alg; |
26276705 RS |
2334 | struct mult_cost best_cost; |
2335 | struct mult_cost new_limit; | |
2336 | int op_cost, op_latency; | |
ef268d34 | 2337 | unsigned HOST_WIDE_INT orig_t = t; |
b2fb324c | 2338 | unsigned HOST_WIDE_INT q; |
0792ab19 | 2339 | int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); |
7b13ee6b KH |
2340 | int hash_index; |
2341 | bool cache_hit = false; | |
2342 | enum alg_code cache_alg = alg_zero; | |
f40751dd | 2343 | bool speed = optimize_insn_for_speed_p (); |
44037a66 | 2344 | |
7963ac37 RK |
2345 | /* Indicate that no algorithm is yet found. If no algorithm |
2346 | is found, this value will be returned and indicate failure. */ | |
26276705 | 2347 | alg_out->cost.cost = cost_limit->cost + 1; |
3ab0f290 | 2348 | alg_out->cost.latency = cost_limit->latency + 1; |
44037a66 | 2349 | |
26276705 RS |
2350 | if (cost_limit->cost < 0 |
2351 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) | |
819126a6 | 2352 | return; |
44037a66 | 2353 | |
0792ab19 RS |
2354 | /* Restrict the bits of "t" to the multiplication's mode. */ |
2355 | t &= GET_MODE_MASK (mode); | |
2356 | ||
b385aeda RK |
2357 | /* t == 1 can be done in zero cost. */ |
2358 | if (t == 1) | |
b2fb324c | 2359 | { |
819126a6 | 2360 | alg_out->ops = 1; |
26276705 RS |
2361 | alg_out->cost.cost = 0; |
2362 | alg_out->cost.latency = 0; | |
819126a6 RK |
2363 | alg_out->op[0] = alg_m; |
2364 | return; | |
b2fb324c RK |
2365 | } |
2366 | ||
b385aeda RK |
2367 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2368 | fail now. */ | |
819126a6 | 2369 | if (t == 0) |
b385aeda | 2370 | { |
f40751dd | 2371 | if (MULT_COST_LESS (cost_limit, zero_cost[speed])) |
819126a6 | 2372 | return; |
b385aeda RK |
2373 | else |
2374 | { | |
819126a6 | 2375 | alg_out->ops = 1; |
f40751dd JH |
2376 | alg_out->cost.cost = zero_cost[speed]; |
2377 | alg_out->cost.latency = zero_cost[speed]; | |
819126a6 RK |
2378 | alg_out->op[0] = alg_zero; |
2379 | return; | |
b385aeda RK |
2380 | } |
2381 | } | |
2382 | ||
52786026 RK |
2383 | /* We'll be needing a couple extra algorithm structures now. */ |
2384 | ||
1b4572a8 KG |
2385 | alg_in = XALLOCA (struct algorithm); |
2386 | best_alg = XALLOCA (struct algorithm); | |
26276705 | 2387 | best_cost = *cost_limit; |
52786026 | 2388 | |
7b13ee6b | 2389 | /* Compute the hash index. */ |
f40751dd | 2390 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
7b13ee6b KH |
2391 | |
2392 | /* See if we already know what to do for T. */ | |
2393 | if (alg_hash[hash_index].t == t | |
2394 | && alg_hash[hash_index].mode == mode | |
f40751dd JH |
2395 | && alg_hash[hash_index].mode == mode |
2396 | && alg_hash[hash_index].speed == speed | |
7b13ee6b KH |
2397 | && alg_hash[hash_index].alg != alg_unknown) |
2398 | { | |
7b13ee6b | 2399 | cache_alg = alg_hash[hash_index].alg; |
0178027c KH |
2400 | |
2401 | if (cache_alg == alg_impossible) | |
7b13ee6b | 2402 | { |
0178027c KH |
2403 | /* The cache tells us that it's impossible to synthesize |
2404 | multiplication by T within alg_hash[hash_index].cost. */ | |
2405 | if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit)) | |
2406 | /* COST_LIMIT is at least as restrictive as the one | |
2407 | recorded in the hash table, in which case we have no | |
2408 | hope of synthesizing a multiplication. Just | |
2409 | return. */ | |
2410 | return; | |
2411 | ||
2412 | /* If we get here, COST_LIMIT is less restrictive than the | |
2413 | one recorded in the hash table, so we may be able to | |
2414 | synthesize a multiplication. Proceed as if we didn't | |
2415 | have the cache entry. */ | |
2416 | } | |
2417 | else | |
2418 | { | |
2419 | if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost)) | |
2420 | /* The cached algorithm shows that this multiplication | |
2421 | requires more cost than COST_LIMIT. Just return. This | |
2422 | way, we don't clobber this cache entry with | |
2423 | alg_impossible but retain useful information. */ | |
2424 | return; | |
7b13ee6b | 2425 | |
0178027c KH |
2426 | cache_hit = true; |
2427 | ||
2428 | switch (cache_alg) | |
2429 | { | |
2430 | case alg_shift: | |
2431 | goto do_alg_shift; | |
7b13ee6b | 2432 | |
0178027c KH |
2433 | case alg_add_t_m2: |
2434 | case alg_sub_t_m2: | |
2435 | goto do_alg_addsub_t_m2; | |
7b13ee6b | 2436 | |
0178027c KH |
2437 | case alg_add_factor: |
2438 | case alg_sub_factor: | |
2439 | goto do_alg_addsub_factor; | |
7b13ee6b | 2440 | |
0178027c KH |
2441 | case alg_add_t2_m: |
2442 | goto do_alg_add_t2_m; | |
7b13ee6b | 2443 | |
0178027c KH |
2444 | case alg_sub_t2_m: |
2445 | goto do_alg_sub_t2_m; | |
2446 | ||
2447 | default: | |
2448 | gcc_unreachable (); | |
2449 | } | |
7b13ee6b KH |
2450 | } |
2451 | } | |
2452 | ||
b385aeda RK |
2453 | /* If we have a group of zero bits at the low-order part of T, try |
2454 | multiplying by the remaining bits and then doing a shift. */ | |
2455 | ||
b2fb324c | 2456 | if ((t & 1) == 0) |
44037a66 | 2457 | { |
7b13ee6b | 2458 | do_alg_shift: |
b2fb324c | 2459 | m = floor_log2 (t & -t); /* m = number of low zero bits */ |
0792ab19 | 2460 | if (m < maxm) |
44037a66 | 2461 | { |
02a65aef | 2462 | q = t >> m; |
15bad393 RS |
2463 | /* The function expand_shift will choose between a shift and |
2464 | a sequence of additions, so the observed cost is given as | |
f40751dd JH |
2465 | MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */ |
2466 | op_cost = m * add_cost[speed][mode]; | |
2467 | if (shift_cost[speed][mode][m] < op_cost) | |
2468 | op_cost = shift_cost[speed][mode][m]; | |
26276705 RS |
2469 | new_limit.cost = best_cost.cost - op_cost; |
2470 | new_limit.latency = best_cost.latency - op_cost; | |
2471 | synth_mult (alg_in, q, &new_limit, mode); | |
2472 | ||
2473 | alg_in->cost.cost += op_cost; | |
2474 | alg_in->cost.latency += op_cost; | |
2475 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
02a65aef R |
2476 | { |
2477 | struct algorithm *x; | |
26276705 | 2478 | best_cost = alg_in->cost; |
02a65aef R |
2479 | x = alg_in, alg_in = best_alg, best_alg = x; |
2480 | best_alg->log[best_alg->ops] = m; | |
2481 | best_alg->op[best_alg->ops] = alg_shift; | |
02a65aef | 2482 | } |
ddc2690a KH |
2483 | |
2484 | /* See if treating ORIG_T as a signed number yields a better | |
2485 | sequence. Try this sequence only for a negative ORIG_T | |
2486 | as it would be useless for a non-negative ORIG_T. */ | |
2487 | if ((HOST_WIDE_INT) orig_t < 0) | |
2488 | { | |
2489 | /* Shift ORIG_T as follows because a right shift of a | |
2490 | negative-valued signed type is implementation | |
2491 | defined. */ | |
2492 | q = ~(~orig_t >> m); | |
2493 | /* The function expand_shift will choose between a shift | |
2494 | and a sequence of additions, so the observed cost is | |
2495 | given as MIN (m * add_cost[speed][mode], | |
2496 | shift_cost[speed][mode][m]). */ | |
2497 | op_cost = m * add_cost[speed][mode]; | |
2498 | if (shift_cost[speed][mode][m] < op_cost) | |
2499 | op_cost = shift_cost[speed][mode][m]; | |
2500 | new_limit.cost = best_cost.cost - op_cost; | |
2501 | new_limit.latency = best_cost.latency - op_cost; | |
2502 | synth_mult (alg_in, q, &new_limit, mode); | |
2503 | ||
2504 | alg_in->cost.cost += op_cost; | |
2505 | alg_in->cost.latency += op_cost; | |
2506 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2507 | { | |
2508 | struct algorithm *x; | |
2509 | best_cost = alg_in->cost; | |
2510 | x = alg_in, alg_in = best_alg, best_alg = x; | |
2511 | best_alg->log[best_alg->ops] = m; | |
2512 | best_alg->op[best_alg->ops] = alg_shift; | |
2513 | } | |
2514 | } | |
819126a6 | 2515 | } |
7b13ee6b KH |
2516 | if (cache_hit) |
2517 | goto done; | |
819126a6 RK |
2518 | } |
2519 | ||
2520 | /* If we have an odd number, add or subtract one. */ | |
2521 | if ((t & 1) != 0) | |
2522 | { | |
2523 | unsigned HOST_WIDE_INT w; | |
2524 | ||
7b13ee6b | 2525 | do_alg_addsub_t_m2: |
819126a6 RK |
2526 | for (w = 1; (w & t) != 0; w <<= 1) |
2527 | ; | |
31031edd | 2528 | /* If T was -1, then W will be zero after the loop. This is another |
c410d49e | 2529 | case where T ends with ...111. Handling this with (T + 1) and |
31031edd JL |
2530 | subtract 1 produces slightly better code and results in algorithm |
2531 | selection much faster than treating it like the ...0111 case | |
2532 | below. */ | |
2533 | if (w == 0 | |
2534 | || (w > 2 | |
2535 | /* Reject the case where t is 3. | |
2536 | Thus we prefer addition in that case. */ | |
2537 | && t != 3)) | |
819126a6 RK |
2538 | { |
2539 | /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ | |
2540 | ||
f40751dd | 2541 | op_cost = add_cost[speed][mode]; |
26276705 RS |
2542 | new_limit.cost = best_cost.cost - op_cost; |
2543 | new_limit.latency = best_cost.latency - op_cost; | |
2544 | synth_mult (alg_in, t + 1, &new_limit, mode); | |
b2fb324c | 2545 | |
26276705 RS |
2546 | alg_in->cost.cost += op_cost; |
2547 | alg_in->cost.latency += op_cost; | |
2548 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2549 | { |
b2fb324c | 2550 | struct algorithm *x; |
26276705 | 2551 | best_cost = alg_in->cost; |
b2fb324c | 2552 | x = alg_in, alg_in = best_alg, best_alg = x; |
819126a6 RK |
2553 | best_alg->log[best_alg->ops] = 0; |
2554 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
44037a66 | 2555 | } |
44037a66 | 2556 | } |
819126a6 RK |
2557 | else |
2558 | { | |
2559 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ | |
44037a66 | 2560 | |
f40751dd | 2561 | op_cost = add_cost[speed][mode]; |
26276705 RS |
2562 | new_limit.cost = best_cost.cost - op_cost; |
2563 | new_limit.latency = best_cost.latency - op_cost; | |
2564 | synth_mult (alg_in, t - 1, &new_limit, mode); | |
819126a6 | 2565 | |
26276705 RS |
2566 | alg_in->cost.cost += op_cost; |
2567 | alg_in->cost.latency += op_cost; | |
2568 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
819126a6 RK |
2569 | { |
2570 | struct algorithm *x; | |
26276705 | 2571 | best_cost = alg_in->cost; |
819126a6 RK |
2572 | x = alg_in, alg_in = best_alg, best_alg = x; |
2573 | best_alg->log[best_alg->ops] = 0; | |
2574 | best_alg->op[best_alg->ops] = alg_add_t_m2; | |
819126a6 RK |
2575 | } |
2576 | } | |
ef268d34 KH |
2577 | |
2578 | /* We may be able to calculate a * -7, a * -15, a * -31, etc | |
2579 | quickly with a - a * n for some appropriate constant n. */ | |
2580 | m = exact_log2 (-orig_t + 1); | |
2581 | if (m >= 0 && m < maxm) | |
2582 | { | |
2583 | op_cost = shiftsub1_cost[speed][mode][m]; | |
2584 | new_limit.cost = best_cost.cost - op_cost; | |
2585 | new_limit.latency = best_cost.latency - op_cost; | |
2586 | synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode); | |
2587 | ||
2588 | alg_in->cost.cost += op_cost; | |
2589 | alg_in->cost.latency += op_cost; | |
2590 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2591 | { | |
2592 | struct algorithm *x; | |
2593 | best_cost = alg_in->cost; | |
2594 | x = alg_in, alg_in = best_alg, best_alg = x; | |
2595 | best_alg->log[best_alg->ops] = m; | |
2596 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
2597 | } | |
2598 | } | |
2599 | ||
7b13ee6b KH |
2600 | if (cache_hit) |
2601 | goto done; | |
819126a6 | 2602 | } |
63610db9 | 2603 | |
44037a66 | 2604 | /* Look for factors of t of the form |
7963ac37 | 2605 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
44037a66 | 2606 | If we find such a factor, we can multiply by t using an algorithm that |
7963ac37 | 2607 | multiplies by q, shift the result by m and add/subtract it to itself. |
44037a66 | 2608 | |
7963ac37 RK |
2609 | We search for large factors first and loop down, even if large factors |
2610 | are less probable than small; if we find a large factor we will find a | |
2611 | good sequence quickly, and therefore be able to prune (by decreasing | |
2612 | COST_LIMIT) the search. */ | |
2613 | ||
7b13ee6b | 2614 | do_alg_addsub_factor: |
7963ac37 | 2615 | for (m = floor_log2 (t - 1); m >= 2; m--) |
44037a66 | 2616 | { |
7963ac37 | 2617 | unsigned HOST_WIDE_INT d; |
44037a66 | 2618 | |
7963ac37 | 2619 | d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; |
7b13ee6b KH |
2620 | if (t % d == 0 && t > d && m < maxm |
2621 | && (!cache_hit || cache_alg == alg_add_factor)) | |
44037a66 | 2622 | { |
26276705 RS |
2623 | /* If the target has a cheap shift-and-add instruction use |
2624 | that in preference to a shift insn followed by an add insn. | |
2625 | Assume that the shift-and-add is "atomic" with a latency | |
a37739c1 | 2626 | equal to its cost, otherwise assume that on superscalar |
26276705 RS |
2627 | hardware the shift may be executed concurrently with the |
2628 | earlier steps in the algorithm. */ | |
f40751dd JH |
2629 | op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; |
2630 | if (shiftadd_cost[speed][mode][m] < op_cost) | |
26276705 | 2631 | { |
f40751dd | 2632 | op_cost = shiftadd_cost[speed][mode][m]; |
26276705 RS |
2633 | op_latency = op_cost; |
2634 | } | |
2635 | else | |
f40751dd | 2636 | op_latency = add_cost[speed][mode]; |
26276705 RS |
2637 | |
2638 | new_limit.cost = best_cost.cost - op_cost; | |
2639 | new_limit.latency = best_cost.latency - op_latency; | |
2640 | synth_mult (alg_in, t / d, &new_limit, mode); | |
44037a66 | 2641 | |
26276705 RS |
2642 | alg_in->cost.cost += op_cost; |
2643 | alg_in->cost.latency += op_latency; | |
2644 | if (alg_in->cost.latency < op_cost) | |
2645 | alg_in->cost.latency = op_cost; | |
2646 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2647 | { |
7963ac37 | 2648 | struct algorithm *x; |
26276705 | 2649 | best_cost = alg_in->cost; |
7963ac37 | 2650 | x = alg_in, alg_in = best_alg, best_alg = x; |
b385aeda | 2651 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2652 | best_alg->op[best_alg->ops] = alg_add_factor; |
44037a66 | 2653 | } |
c0b262c1 TG |
2654 | /* Other factors will have been taken care of in the recursion. */ |
2655 | break; | |
44037a66 TG |
2656 | } |
2657 | ||
7963ac37 | 2658 | d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; |
7b13ee6b KH |
2659 | if (t % d == 0 && t > d && m < maxm |
2660 | && (!cache_hit || cache_alg == alg_sub_factor)) | |
44037a66 | 2661 | { |
26276705 RS |
2662 | /* If the target has a cheap shift-and-subtract insn use |
2663 | that in preference to a shift insn followed by a sub insn. | |
2664 | Assume that the shift-and-sub is "atomic" with a latency | |
2665 | equal to it's cost, otherwise assume that on superscalar | |
2666 | hardware the shift may be executed concurrently with the | |
2667 | earlier steps in the algorithm. */ | |
f40751dd | 2668 | op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; |
ef268d34 | 2669 | if (shiftsub0_cost[speed][mode][m] < op_cost) |
26276705 | 2670 | { |
ef268d34 | 2671 | op_cost = shiftsub0_cost[speed][mode][m]; |
26276705 RS |
2672 | op_latency = op_cost; |
2673 | } | |
2674 | else | |
f40751dd | 2675 | op_latency = add_cost[speed][mode]; |
26276705 RS |
2676 | |
2677 | new_limit.cost = best_cost.cost - op_cost; | |
417c735c | 2678 | new_limit.latency = best_cost.latency - op_latency; |
26276705 | 2679 | synth_mult (alg_in, t / d, &new_limit, mode); |
44037a66 | 2680 | |
26276705 RS |
2681 | alg_in->cost.cost += op_cost; |
2682 | alg_in->cost.latency += op_latency; | |
2683 | if (alg_in->cost.latency < op_cost) | |
2684 | alg_in->cost.latency = op_cost; | |
2685 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2686 | { |
7963ac37 | 2687 | struct algorithm *x; |
26276705 | 2688 | best_cost = alg_in->cost; |
7963ac37 | 2689 | x = alg_in, alg_in = best_alg, best_alg = x; |
b385aeda | 2690 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2691 | best_alg->op[best_alg->ops] = alg_sub_factor; |
44037a66 | 2692 | } |
c0b262c1 | 2693 | break; |
44037a66 TG |
2694 | } |
2695 | } | |
7b13ee6b KH |
2696 | if (cache_hit) |
2697 | goto done; | |
44037a66 | 2698 | |
7963ac37 RK |
2699 | /* Try shift-and-add (load effective address) instructions, |
2700 | i.e. do a*3, a*5, a*9. */ | |
2701 | if ((t & 1) != 0) | |
2702 | { | |
7b13ee6b | 2703 | do_alg_add_t2_m: |
7963ac37 RK |
2704 | q = t - 1; |
2705 | q = q & -q; | |
2706 | m = exact_log2 (q); | |
0792ab19 | 2707 | if (m >= 0 && m < maxm) |
b385aeda | 2708 | { |
f40751dd | 2709 | op_cost = shiftadd_cost[speed][mode][m]; |
26276705 RS |
2710 | new_limit.cost = best_cost.cost - op_cost; |
2711 | new_limit.latency = best_cost.latency - op_cost; | |
2712 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); | |
2713 | ||
2714 | alg_in->cost.cost += op_cost; | |
2715 | alg_in->cost.latency += op_cost; | |
2716 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb RK |
2717 | { |
2718 | struct algorithm *x; | |
26276705 | 2719 | best_cost = alg_in->cost; |
5eebe2eb RK |
2720 | x = alg_in, alg_in = best_alg, best_alg = x; |
2721 | best_alg->log[best_alg->ops] = m; | |
819126a6 | 2722 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
5eebe2eb | 2723 | } |
7963ac37 | 2724 | } |
7b13ee6b KH |
2725 | if (cache_hit) |
2726 | goto done; | |
44037a66 | 2727 | |
7b13ee6b | 2728 | do_alg_sub_t2_m: |
7963ac37 RK |
2729 | q = t + 1; |
2730 | q = q & -q; | |
2731 | m = exact_log2 (q); | |
0792ab19 | 2732 | if (m >= 0 && m < maxm) |
b385aeda | 2733 | { |
ef268d34 | 2734 | op_cost = shiftsub0_cost[speed][mode][m]; |
26276705 RS |
2735 | new_limit.cost = best_cost.cost - op_cost; |
2736 | new_limit.latency = best_cost.latency - op_cost; | |
2737 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); | |
2738 | ||
2739 | alg_in->cost.cost += op_cost; | |
2740 | alg_in->cost.latency += op_cost; | |
2741 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb RK |
2742 | { |
2743 | struct algorithm *x; | |
26276705 | 2744 | best_cost = alg_in->cost; |
5eebe2eb RK |
2745 | x = alg_in, alg_in = best_alg, best_alg = x; |
2746 | best_alg->log[best_alg->ops] = m; | |
819126a6 | 2747 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
5eebe2eb | 2748 | } |
7963ac37 | 2749 | } |
7b13ee6b KH |
2750 | if (cache_hit) |
2751 | goto done; | |
7963ac37 | 2752 | } |
44037a66 | 2753 | |
7b13ee6b | 2754 | done: |
3ab0f290 DJ |
2755 | /* If best_cost has not decreased, we have not found any algorithm. */ |
2756 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) | |
0178027c KH |
2757 | { |
2758 | /* We failed to find an algorithm. Record alg_impossible for | |
2759 | this case (that is, <T, MODE, COST_LIMIT>) so that next time | |
2760 | we are asked to find an algorithm for T within the same or | |
2761 | lower COST_LIMIT, we can immediately return to the | |
2762 | caller. */ | |
2763 | alg_hash[hash_index].t = t; | |
2764 | alg_hash[hash_index].mode = mode; | |
f40751dd | 2765 | alg_hash[hash_index].speed = speed; |
0178027c KH |
2766 | alg_hash[hash_index].alg = alg_impossible; |
2767 | alg_hash[hash_index].cost = *cost_limit; | |
2768 | return; | |
2769 | } | |
3ab0f290 | 2770 | |
7b13ee6b KH |
2771 | /* Cache the result. */ |
2772 | if (!cache_hit) | |
2773 | { | |
2774 | alg_hash[hash_index].t = t; | |
2775 | alg_hash[hash_index].mode = mode; | |
f40751dd | 2776 | alg_hash[hash_index].speed = speed; |
7b13ee6b | 2777 | alg_hash[hash_index].alg = best_alg->op[best_alg->ops]; |
0178027c KH |
2778 | alg_hash[hash_index].cost.cost = best_cost.cost; |
2779 | alg_hash[hash_index].cost.latency = best_cost.latency; | |
7b13ee6b KH |
2780 | } |
2781 | ||
52786026 RK |
2782 | /* If we are getting a too long sequence for `struct algorithm' |
2783 | to record, make this search fail. */ | |
2784 | if (best_alg->ops == MAX_BITS_PER_WORD) | |
2785 | return; | |
2786 | ||
819126a6 RK |
2787 | /* Copy the algorithm from temporary space to the space at alg_out. |
2788 | We avoid using structure assignment because the majority of | |
2789 | best_alg is normally undefined, and this is a critical function. */ | |
2790 | alg_out->ops = best_alg->ops + 1; | |
26276705 | 2791 | alg_out->cost = best_cost; |
4e135bdd KG |
2792 | memcpy (alg_out->op, best_alg->op, |
2793 | alg_out->ops * sizeof *alg_out->op); | |
2794 | memcpy (alg_out->log, best_alg->log, | |
2795 | alg_out->ops * sizeof *alg_out->log); | |
44037a66 TG |
2796 | } |
2797 | \f | |
d1a6adeb | 2798 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
8efc8980 RS |
2799 | Try three variations: |
2800 | ||
2801 | - a shift/add sequence based on VAL itself | |
2802 | - a shift/add sequence based on -VAL, followed by a negation | |
2803 | - a shift/add sequence based on VAL - 1, followed by an addition. | |
2804 | ||
f258e38b UW |
2805 | Return true if the cheapest of these cost less than MULT_COST, |
2806 | describing the algorithm in *ALG and final fixup in *VARIANT. */ | |
8efc8980 RS |
2807 | |
2808 | static bool | |
2809 | choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, | |
f258e38b UW |
2810 | struct algorithm *alg, enum mult_variant *variant, |
2811 | int mult_cost) | |
8efc8980 | 2812 | { |
8efc8980 | 2813 | struct algorithm alg2; |
26276705 RS |
2814 | struct mult_cost limit; |
2815 | int op_cost; | |
f40751dd | 2816 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 2817 | |
18eaea7f RS |
2818 | /* Fail quickly for impossible bounds. */ |
2819 | if (mult_cost < 0) | |
2820 | return false; | |
2821 | ||
2822 | /* Ensure that mult_cost provides a reasonable upper bound. | |
2823 | Any constant multiplication can be performed with less | |
2824 | than 2 * bits additions. */ | |
f40751dd | 2825 | op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode]; |
18eaea7f RS |
2826 | if (mult_cost > op_cost) |
2827 | mult_cost = op_cost; | |
2828 | ||
8efc8980 | 2829 | *variant = basic_variant; |
26276705 RS |
2830 | limit.cost = mult_cost; |
2831 | limit.latency = mult_cost; | |
2832 | synth_mult (alg, val, &limit, mode); | |
8efc8980 RS |
2833 | |
2834 | /* This works only if the inverted value actually fits in an | |
2835 | `unsigned int' */ | |
2836 | if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) | |
2837 | { | |
f40751dd | 2838 | op_cost = neg_cost[speed][mode]; |
26276705 RS |
2839 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
2840 | { | |
2841 | limit.cost = alg->cost.cost - op_cost; | |
2842 | limit.latency = alg->cost.latency - op_cost; | |
2843 | } | |
2844 | else | |
2845 | { | |
2846 | limit.cost = mult_cost - op_cost; | |
2847 | limit.latency = mult_cost - op_cost; | |
2848 | } | |
2849 | ||
2850 | synth_mult (&alg2, -val, &limit, mode); | |
2851 | alg2.cost.cost += op_cost; | |
2852 | alg2.cost.latency += op_cost; | |
2853 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
2854 | *alg = alg2, *variant = negate_variant; |
2855 | } | |
2856 | ||
2857 | /* This proves very useful for division-by-constant. */ | |
f40751dd | 2858 | op_cost = add_cost[speed][mode]; |
26276705 RS |
2859 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
2860 | { | |
2861 | limit.cost = alg->cost.cost - op_cost; | |
2862 | limit.latency = alg->cost.latency - op_cost; | |
2863 | } | |
2864 | else | |
2865 | { | |
2866 | limit.cost = mult_cost - op_cost; | |
2867 | limit.latency = mult_cost - op_cost; | |
2868 | } | |
2869 | ||
2870 | synth_mult (&alg2, val - 1, &limit, mode); | |
2871 | alg2.cost.cost += op_cost; | |
2872 | alg2.cost.latency += op_cost; | |
2873 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
2874 | *alg = alg2, *variant = add_variant; |
2875 | ||
26276705 | 2876 | return MULT_COST_LESS (&alg->cost, mult_cost); |
8efc8980 RS |
2877 | } |
2878 | ||
2879 | /* A subroutine of expand_mult, used for constant multiplications. | |
2880 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if | |
2881 | convenient. Use the shift/add sequence described by ALG and apply | |
2882 | the final fixup specified by VARIANT. */ | |
2883 | ||
2884 | static rtx | |
2885 | expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val, | |
2886 | rtx target, const struct algorithm *alg, | |
2887 | enum mult_variant variant) | |
2888 | { | |
2889 | HOST_WIDE_INT val_so_far; | |
2890 | rtx insn, accum, tem; | |
2891 | int opno; | |
2892 | enum machine_mode nmode; | |
2893 | ||
d448860e JH |
2894 | /* Avoid referencing memory over and over and invalid sharing |
2895 | on SUBREGs. */ | |
2896 | op0 = force_reg (mode, op0); | |
8efc8980 RS |
2897 | |
2898 | /* ACCUM starts out either as OP0 or as a zero, depending on | |
2899 | the first operation. */ | |
2900 | ||
2901 | if (alg->op[0] == alg_zero) | |
2902 | { | |
2903 | accum = copy_to_mode_reg (mode, const0_rtx); | |
2904 | val_so_far = 0; | |
2905 | } | |
2906 | else if (alg->op[0] == alg_m) | |
2907 | { | |
2908 | accum = copy_to_mode_reg (mode, op0); | |
2909 | val_so_far = 1; | |
2910 | } | |
2911 | else | |
5b0264cb | 2912 | gcc_unreachable (); |
8efc8980 RS |
2913 | |
2914 | for (opno = 1; opno < alg->ops; opno++) | |
2915 | { | |
2916 | int log = alg->log[opno]; | |
7c27e184 | 2917 | rtx shift_subtarget = optimize ? 0 : accum; |
8efc8980 RS |
2918 | rtx add_target |
2919 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant | |
7c27e184 | 2920 | && !optimize) |
8efc8980 | 2921 | ? target : 0; |
7c27e184 | 2922 | rtx accum_target = optimize ? 0 : accum; |
8efc8980 RS |
2923 | |
2924 | switch (alg->op[opno]) | |
2925 | { | |
2926 | case alg_shift: | |
4caa21a1 UB |
2927 | tem = expand_shift (LSHIFT_EXPR, mode, accum, |
2928 | build_int_cst (NULL_TREE, log), | |
2929 | NULL_RTX, 0); | |
2930 | /* REG_EQUAL note will be attached to the following insn. */ | |
2931 | emit_move_insn (accum, tem); | |
8efc8980 RS |
2932 | val_so_far <<= log; |
2933 | break; | |
2934 | ||
2935 | case alg_add_t_m2: | |
2936 | tem = expand_shift (LSHIFT_EXPR, mode, op0, | |
7d60be94 | 2937 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2938 | NULL_RTX, 0); |
8efc8980 RS |
2939 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
2940 | add_target ? add_target : accum_target); | |
2941 | val_so_far += (HOST_WIDE_INT) 1 << log; | |
2942 | break; | |
2943 | ||
2944 | case alg_sub_t_m2: | |
2945 | tem = expand_shift (LSHIFT_EXPR, mode, op0, | |
7d60be94 | 2946 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2947 | NULL_RTX, 0); |
8efc8980 RS |
2948 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
2949 | add_target ? add_target : accum_target); | |
2950 | val_so_far -= (HOST_WIDE_INT) 1 << log; | |
2951 | break; | |
2952 | ||
2953 | case alg_add_t2_m: | |
2954 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2955 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2956 | shift_subtarget, |
8efc8980 RS |
2957 | 0); |
2958 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), | |
2959 | add_target ? add_target : accum_target); | |
2960 | val_so_far = (val_so_far << log) + 1; | |
2961 | break; | |
2962 | ||
2963 | case alg_sub_t2_m: | |
2964 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2965 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2966 | shift_subtarget, 0); |
8efc8980 RS |
2967 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
2968 | add_target ? add_target : accum_target); | |
2969 | val_so_far = (val_so_far << log) - 1; | |
2970 | break; | |
2971 | ||
2972 | case alg_add_factor: | |
2973 | tem = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2974 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2975 | NULL_RTX, 0); |
8efc8980 RS |
2976 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
2977 | add_target ? add_target : accum_target); | |
2978 | val_so_far += val_so_far << log; | |
2979 | break; | |
2980 | ||
2981 | case alg_sub_factor: | |
2982 | tem = expand_shift (LSHIFT_EXPR, mode, accum, | |
7d60be94 | 2983 | build_int_cst (NULL_TREE, log), |
4a90aeeb | 2984 | NULL_RTX, 0); |
8efc8980 | 2985 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
7c27e184 PB |
2986 | (add_target |
2987 | ? add_target : (optimize ? 0 : tem))); | |
8efc8980 RS |
2988 | val_so_far = (val_so_far << log) - val_so_far; |
2989 | break; | |
2990 | ||
2991 | default: | |
5b0264cb | 2992 | gcc_unreachable (); |
8efc8980 RS |
2993 | } |
2994 | ||
2995 | /* Write a REG_EQUAL note on the last insn so that we can cse | |
2996 | multiplication sequences. Note that if ACCUM is a SUBREG, | |
2997 | we've set the inner register and must properly indicate | |
2998 | that. */ | |
2999 | ||
3000 | tem = op0, nmode = mode; | |
3001 | if (GET_CODE (accum) == SUBREG) | |
3002 | { | |
3003 | nmode = GET_MODE (SUBREG_REG (accum)); | |
3004 | tem = gen_lowpart (nmode, op0); | |
3005 | } | |
3006 | ||
3007 | insn = get_last_insn (); | |
3008 | set_unique_reg_note (insn, REG_EQUAL, | |
d448860e JH |
3009 | gen_rtx_MULT (nmode, tem, |
3010 | GEN_INT (val_so_far))); | |
8efc8980 RS |
3011 | } |
3012 | ||
3013 | if (variant == negate_variant) | |
3014 | { | |
3015 | val_so_far = -val_so_far; | |
3016 | accum = expand_unop (mode, neg_optab, accum, target, 0); | |
3017 | } | |
3018 | else if (variant == add_variant) | |
3019 | { | |
3020 | val_so_far = val_so_far + 1; | |
3021 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); | |
3022 | } | |
3023 | ||
42eb30b5 ZW |
3024 | /* Compare only the bits of val and val_so_far that are significant |
3025 | in the result mode, to avoid sign-/zero-extension confusion. */ | |
3026 | val &= GET_MODE_MASK (mode); | |
3027 | val_so_far &= GET_MODE_MASK (mode); | |
5b0264cb | 3028 | gcc_assert (val == val_so_far); |
8efc8980 RS |
3029 | |
3030 | return accum; | |
3031 | } | |
3032 | ||
44037a66 TG |
3033 | /* Perform a multiplication and return an rtx for the result. |
3034 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3035 | TARGET is a suggestion for where to store the result (an rtx). | |
3036 | ||
3037 | We check specially for a constant integer as OP1. | |
3038 | If you want this check for OP0 as well, then before calling | |
3039 | you should swap the two operands if OP0 would be constant. */ | |
3040 | ||
3041 | rtx | |
f2593a66 RS |
3042 | expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, |
3043 | int unsignedp) | |
44037a66 | 3044 | { |
8efc8980 RS |
3045 | enum mult_variant variant; |
3046 | struct algorithm algorithm; | |
65dc9350 | 3047 | int max_cost; |
f40751dd | 3048 | bool speed = optimize_insn_for_speed_p (); |
44037a66 | 3049 | |
65dc9350 RS |
3050 | /* Handling const0_rtx here allows us to use zero as a rogue value for |
3051 | coeff below. */ | |
3052 | if (op1 == const0_rtx) | |
3053 | return const0_rtx; | |
3054 | if (op1 == const1_rtx) | |
3055 | return op0; | |
3056 | if (op1 == constm1_rtx) | |
3057 | return expand_unop (mode, | |
3058 | GET_MODE_CLASS (mode) == MODE_INT | |
3059 | && !unsignedp && flag_trapv | |
3060 | ? negv_optab : neg_optab, | |
3061 | op0, target, 0); | |
3062 | ||
3063 | /* These are the operations that are potentially turned into a sequence | |
3064 | of shifts and additions. */ | |
d2348bd5 | 3065 | if (SCALAR_INT_MODE_P (mode) |
f258e38b UW |
3066 | && (unsignedp || !flag_trapv)) |
3067 | { | |
65dc9350 | 3068 | HOST_WIDE_INT coeff = 0; |
5e839bc8 | 3069 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
65dc9350 RS |
3070 | |
3071 | /* synth_mult does an `unsigned int' multiply. As long as the mode is | |
3072 | less than or equal in size to `unsigned int' this doesn't matter. | |
3073 | If the mode is larger than `unsigned int', then synth_mult works | |
3074 | only if the constant value exactly fits in an `unsigned int' without | |
3075 | any truncation. This means that multiplying by negative values does | |
3076 | not work; results are off by 2^32 on a 32 bit machine. */ | |
f258e38b | 3077 | |
481683e1 | 3078 | if (CONST_INT_P (op1)) |
58b42e19 | 3079 | { |
65dc9350 RS |
3080 | /* Attempt to handle multiplication of DImode values by negative |
3081 | coefficients, by performing the multiplication by a positive | |
3082 | multiplier and then inverting the result. */ | |
3083 | if (INTVAL (op1) < 0 | |
3084 | && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT) | |
3085 | { | |
3086 | /* Its safe to use -INTVAL (op1) even for INT_MIN, as the | |
5e839bc8 DE |
3087 | result is interpreted as an unsigned coefficient. |
3088 | Exclude cost of op0 from max_cost to match the cost | |
3089 | calculation of the synth_mult. */ | |
f40751dd JH |
3090 | max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed) |
3091 | - neg_cost[speed][mode]; | |
65dc9350 RS |
3092 | if (max_cost > 0 |
3093 | && choose_mult_variant (mode, -INTVAL (op1), &algorithm, | |
3094 | &variant, max_cost)) | |
3095 | { | |
3096 | rtx temp = expand_mult_const (mode, op0, -INTVAL (op1), | |
3097 | NULL_RTX, &algorithm, | |
3098 | variant); | |
3099 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3100 | } | |
3101 | } | |
3102 | else coeff = INTVAL (op1); | |
3103 | } | |
3104 | else if (GET_CODE (op1) == CONST_DOUBLE) | |
3105 | { | |
3106 | /* If we are multiplying in DImode, it may still be a win | |
3107 | to try to work with shifts and adds. */ | |
aee857a2 L |
3108 | if (CONST_DOUBLE_HIGH (op1) == 0 |
3109 | && CONST_DOUBLE_LOW (op1) > 0) | |
65dc9350 RS |
3110 | coeff = CONST_DOUBLE_LOW (op1); |
3111 | else if (CONST_DOUBLE_LOW (op1) == 0 | |
3112 | && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1))) | |
3113 | { | |
3114 | int shift = floor_log2 (CONST_DOUBLE_HIGH (op1)) | |
3115 | + HOST_BITS_PER_WIDE_INT; | |
3116 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3117 | build_int_cst (NULL_TREE, shift), | |
3118 | target, unsignedp); | |
3119 | } | |
3120 | } | |
b8698a0f | 3121 | |
65dc9350 RS |
3122 | /* We used to test optimize here, on the grounds that it's better to |
3123 | produce a smaller program when -O is not used. But this causes | |
3124 | such a terrible slowdown sometimes that it seems better to always | |
3125 | use synth_mult. */ | |
3126 | if (coeff != 0) | |
3127 | { | |
3128 | /* Special case powers of two. */ | |
3129 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3130 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3131 | build_int_cst (NULL_TREE, floor_log2 (coeff)), | |
3132 | target, unsignedp); | |
3133 | ||
5e839bc8 DE |
3134 | /* Exclude cost of op0 from max_cost to match the cost |
3135 | calculation of the synth_mult. */ | |
f40751dd | 3136 | max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed); |
65dc9350 RS |
3137 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |
3138 | max_cost)) | |
3139 | return expand_mult_const (mode, op0, coeff, target, | |
3140 | &algorithm, variant); | |
58b42e19 | 3141 | } |
f258e38b | 3142 | } |
44037a66 | 3143 | |
f2593a66 RS |
3144 | if (GET_CODE (op0) == CONST_DOUBLE) |
3145 | { | |
3146 | rtx temp = op0; | |
3147 | op0 = op1; | |
3148 | op1 = temp; | |
3149 | } | |
3150 | ||
3151 | /* Expand x*2.0 as x+x. */ | |
3152 | if (GET_CODE (op1) == CONST_DOUBLE | |
3d8bf70f | 3153 | && SCALAR_FLOAT_MODE_P (mode)) |
f2593a66 RS |
3154 | { |
3155 | REAL_VALUE_TYPE d; | |
3156 | REAL_VALUE_FROM_CONST_DOUBLE (d, op1); | |
3157 | ||
3158 | if (REAL_VALUES_EQUAL (d, dconst2)) | |
3159 | { | |
3160 | op0 = force_reg (GET_MODE (op0), op0); | |
3161 | return expand_binop (mode, add_optab, op0, op0, | |
3162 | target, unsignedp, OPTAB_LIB_WIDEN); | |
3163 | } | |
3164 | } | |
3165 | ||
819126a6 RK |
3166 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3167 | there is no difference between signed and unsigned. */ | |
c410d49e | 3168 | op0 = expand_binop (mode, |
91ce572a | 3169 | ! unsignedp |
0fb7aeda KH |
3170 | && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT) |
3171 | ? smulv_optab : smul_optab, | |
44037a66 | 3172 | op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); |
5b0264cb | 3173 | gcc_assert (op0); |
44037a66 TG |
3174 | return op0; |
3175 | } | |
5b58b39b BS |
3176 | |
3177 | /* Perform a widening multiplication and return an rtx for the result. | |
3178 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3179 | TARGET is a suggestion for where to store the result (an rtx). | |
3180 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab | |
3181 | or smul_widen_optab. | |
3182 | ||
3183 | We check specially for a constant integer as OP1, comparing the | |
3184 | cost of a widening multiply against the cost of a sequence of shifts | |
3185 | and adds. */ | |
3186 | ||
3187 | rtx | |
3188 | expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, | |
3189 | int unsignedp, optab this_optab) | |
3190 | { | |
3191 | bool speed = optimize_insn_for_speed_p (); | |
3192 | ||
3193 | if (CONST_INT_P (op1) | |
3194 | && (INTVAL (op1) >= 0 | |
3195 | || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)) | |
3196 | { | |
3197 | HOST_WIDE_INT coeff = INTVAL (op1); | |
3198 | int max_cost; | |
3199 | enum mult_variant variant; | |
3200 | struct algorithm algorithm; | |
3201 | ||
3202 | /* Special case powers of two. */ | |
3203 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3204 | { | |
3205 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3206 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3207 | build_int_cst (NULL_TREE, floor_log2 (coeff)), | |
3208 | target, unsignedp); | |
3209 | } | |
3210 | ||
3211 | /* Exclude cost of op0 from max_cost to match the cost | |
3212 | calculation of the synth_mult. */ | |
3213 | max_cost = mul_widen_cost[speed][mode]; | |
3214 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, | |
3215 | max_cost)) | |
3216 | { | |
3217 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3218 | return expand_mult_const (mode, op0, coeff, target, | |
3219 | &algorithm, variant); | |
3220 | } | |
3221 | } | |
3222 | return expand_binop (mode, this_optab, op0, op1, target, | |
3223 | unsignedp, OPTAB_LIB_WIDEN); | |
3224 | } | |
44037a66 | 3225 | \f |
55c2d311 TG |
3226 | /* Return the smallest n such that 2**n >= X. */ |
3227 | ||
3228 | int | |
502b8322 | 3229 | ceil_log2 (unsigned HOST_WIDE_INT x) |
55c2d311 TG |
3230 | { |
3231 | return floor_log2 (x - 1) + 1; | |
3232 | } | |
3233 | ||
3234 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to | |
3235 | replace division by D, and put the least significant N bits of the result | |
3236 | in *MULTIPLIER_PTR and return the most significant bit. | |
3237 | ||
3238 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the | |
3239 | needed precision is in PRECISION (should be <= N). | |
3240 | ||
3241 | PRECISION should be as small as possible so this function can choose | |
3242 | multiplier more freely. | |
3243 | ||
3244 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that | |
3245 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. | |
3246 | ||
3247 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), | |
3248 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ | |
3249 | ||
3250 | static | |
3251 | unsigned HOST_WIDE_INT | |
502b8322 | 3252 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
e71c0aa7 | 3253 | rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr) |
55c2d311 | 3254 | { |
f9e158c3 JM |
3255 | HOST_WIDE_INT mhigh_hi, mlow_hi; |
3256 | unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; | |
55c2d311 TG |
3257 | int lgup, post_shift; |
3258 | int pow, pow2; | |
f9e158c3 JM |
3259 | unsigned HOST_WIDE_INT nl, dummy1; |
3260 | HOST_WIDE_INT nh, dummy2; | |
55c2d311 TG |
3261 | |
3262 | /* lgup = ceil(log2(divisor)); */ | |
3263 | lgup = ceil_log2 (d); | |
3264 | ||
5b0264cb | 3265 | gcc_assert (lgup <= n); |
55c2d311 TG |
3266 | |
3267 | pow = n + lgup; | |
3268 | pow2 = n + lgup - precision; | |
3269 | ||
5b0264cb NS |
3270 | /* We could handle this with some effort, but this case is much |
3271 | better handled directly with a scc insn, so rely on caller using | |
3272 | that. */ | |
3273 | gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT); | |
55c2d311 TG |
3274 | |
3275 | /* mlow = 2^(N + lgup)/d */ | |
3276 | if (pow >= HOST_BITS_PER_WIDE_INT) | |
3277 | { | |
f9e158c3 | 3278 | nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT); |
55c2d311 TG |
3279 | nl = 0; |
3280 | } | |
3281 | else | |
3282 | { | |
3283 | nh = 0; | |
3284 | nl = (unsigned HOST_WIDE_INT) 1 << pow; | |
3285 | } | |
3286 | div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, | |
3287 | &mlow_lo, &mlow_hi, &dummy1, &dummy2); | |
3288 | ||
3289 | /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */ | |
3290 | if (pow2 >= HOST_BITS_PER_WIDE_INT) | |
f9e158c3 | 3291 | nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT); |
55c2d311 TG |
3292 | else |
3293 | nl |= (unsigned HOST_WIDE_INT) 1 << pow2; | |
3294 | div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, | |
3295 | &mhigh_lo, &mhigh_hi, &dummy1, &dummy2); | |
3296 | ||
5b0264cb NS |
3297 | gcc_assert (!mhigh_hi || nh - d < d); |
3298 | gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1); | |
beb235f8 | 3299 | /* Assert that mlow < mhigh. */ |
5b0264cb NS |
3300 | gcc_assert (mlow_hi < mhigh_hi |
3301 | || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)); | |
55c2d311 TG |
3302 | |
3303 | /* If precision == N, then mlow, mhigh exceed 2^N | |
3304 | (but they do not exceed 2^(N+1)). */ | |
3305 | ||
f9da5064 | 3306 | /* Reduce to lowest terms. */ |
55c2d311 TG |
3307 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3308 | { | |
3309 | unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1); | |
3310 | unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1); | |
3311 | if (ml_lo >= mh_lo) | |
3312 | break; | |
3313 | ||
3314 | mlow_hi = 0; | |
3315 | mlow_lo = ml_lo; | |
3316 | mhigh_hi = 0; | |
3317 | mhigh_lo = mh_lo; | |
3318 | } | |
3319 | ||
3320 | *post_shift_ptr = post_shift; | |
3321 | *lgup_ptr = lgup; | |
3322 | if (n < HOST_BITS_PER_WIDE_INT) | |
3323 | { | |
3324 | unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; | |
e71c0aa7 | 3325 | *multiplier_ptr = GEN_INT (mhigh_lo & mask); |
55c2d311 TG |
3326 | return mhigh_lo >= mask; |
3327 | } | |
3328 | else | |
3329 | { | |
e71c0aa7 | 3330 | *multiplier_ptr = GEN_INT (mhigh_lo); |
55c2d311 TG |
3331 | return mhigh_hi; |
3332 | } | |
3333 | } | |
3334 | ||
3335 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is | |
3336 | congruent to 1 (mod 2**N). */ | |
3337 | ||
3338 | static unsigned HOST_WIDE_INT | |
502b8322 | 3339 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
55c2d311 | 3340 | { |
0f41302f | 3341 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
55c2d311 TG |
3342 | |
3343 | /* The algorithm notes that the choice y = x satisfies | |
3344 | x*y == 1 mod 2^3, since x is assumed odd. | |
3345 | Each iteration doubles the number of bits of significance in y. */ | |
3346 | ||
3347 | unsigned HOST_WIDE_INT mask; | |
3348 | unsigned HOST_WIDE_INT y = x; | |
3349 | int nbit = 3; | |
3350 | ||
3351 | mask = (n == HOST_BITS_PER_WIDE_INT | |
3352 | ? ~(unsigned HOST_WIDE_INT) 0 | |
3353 | : ((unsigned HOST_WIDE_INT) 1 << n) - 1); | |
3354 | ||
3355 | while (nbit < n) | |
3356 | { | |
3357 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ | |
3358 | nbit *= 2; | |
3359 | } | |
3360 | return y; | |
3361 | } | |
3362 | ||
3363 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness | |
3364 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the | |
3365 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product | |
3366 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to | |
3367 | become signed. | |
3368 | ||
3369 | The result is put in TARGET if that is convenient. | |
3370 | ||
3371 | MODE is the mode of operation. */ | |
3372 | ||
3373 | rtx | |
502b8322 AJ |
3374 | expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0, |
3375 | rtx op1, rtx target, int unsignedp) | |
55c2d311 TG |
3376 | { |
3377 | rtx tem; | |
3378 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; | |
3379 | ||
3380 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
7d60be94 | 3381 | build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1), |
55c2d311 | 3382 | NULL_RTX, 0); |
22273300 | 3383 | tem = expand_and (mode, tem, op1, NULL_RTX); |
38a448ca RH |
3384 | adj_operand |
3385 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), | |
3386 | adj_operand); | |
55c2d311 TG |
3387 | |
3388 | tem = expand_shift (RSHIFT_EXPR, mode, op1, | |
7d60be94 | 3389 | build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1), |
55c2d311 | 3390 | NULL_RTX, 0); |
22273300 | 3391 | tem = expand_and (mode, tem, op0, NULL_RTX); |
38a448ca RH |
3392 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3393 | target); | |
55c2d311 TG |
3394 | |
3395 | return target; | |
3396 | } | |
3397 | ||
8efc8980 | 3398 | /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */ |
55c2d311 | 3399 | |
8efc8980 RS |
3400 | static rtx |
3401 | extract_high_half (enum machine_mode mode, rtx op) | |
3402 | { | |
3403 | enum machine_mode wider_mode; | |
55c2d311 | 3404 | |
8efc8980 RS |
3405 | if (mode == word_mode) |
3406 | return gen_highpart (mode, op); | |
71af73bb | 3407 | |
15ed7b52 JG |
3408 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3409 | ||
8efc8980 RS |
3410 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3411 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, | |
7d60be94 | 3412 | build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1); |
8efc8980 RS |
3413 | return convert_modes (mode, wider_mode, op, 0); |
3414 | } | |
55c2d311 | 3415 | |
8efc8980 RS |
3416 | /* Like expand_mult_highpart, but only consider using a multiplication |
3417 | optab. OP1 is an rtx for the constant operand. */ | |
3418 | ||
3419 | static rtx | |
3420 | expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, | |
3421 | rtx target, int unsignedp, int max_cost) | |
55c2d311 | 3422 | { |
665acd1e | 3423 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
8efc8980 | 3424 | enum machine_mode wider_mode; |
55c2d311 TG |
3425 | optab moptab; |
3426 | rtx tem; | |
8efc8980 | 3427 | int size; |
f40751dd | 3428 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 3429 | |
15ed7b52 JG |
3430 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3431 | ||
8efc8980 RS |
3432 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3433 | size = GET_MODE_BITSIZE (mode); | |
55c2d311 TG |
3434 | |
3435 | /* Firstly, try using a multiplication insn that only generates the needed | |
3436 | high part of the product, and in the sign flavor of unsignedp. */ | |
f40751dd | 3437 | if (mul_highpart_cost[speed][mode] < max_cost) |
71af73bb | 3438 | { |
8efc8980 | 3439 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
665acd1e | 3440 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3441 | unsignedp, OPTAB_DIRECT); |
3442 | if (tem) | |
3443 | return tem; | |
71af73bb | 3444 | } |
55c2d311 TG |
3445 | |
3446 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. | |
3447 | Need to adjust the result after the multiplication. */ | |
02a65aef | 3448 | if (size - 1 < BITS_PER_WORD |
f40751dd JH |
3449 | && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1] |
3450 | + 4 * add_cost[speed][mode] < max_cost)) | |
71af73bb | 3451 | { |
8efc8980 | 3452 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
665acd1e | 3453 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3454 | unsignedp, OPTAB_DIRECT); |
3455 | if (tem) | |
71af73bb | 3456 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3457 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
8efc8980 | 3458 | tem, unsignedp); |
71af73bb | 3459 | } |
55c2d311 | 3460 | |
71af73bb | 3461 | /* Try widening multiplication. */ |
55c2d311 | 3462 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
947131ba | 3463 | if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing |
f40751dd | 3464 | && mul_widen_cost[speed][wider_mode] < max_cost) |
a295d331 | 3465 | { |
665acd1e | 3466 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
8efc8980 RS |
3467 | unsignedp, OPTAB_WIDEN); |
3468 | if (tem) | |
3469 | return extract_high_half (mode, tem); | |
c410d49e | 3470 | } |
71af73bb TG |
3471 | |
3472 | /* Try widening the mode and perform a non-widening multiplication. */ | |
947131ba | 3473 | if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing |
02a65aef | 3474 | && size - 1 < BITS_PER_WORD |
f40751dd | 3475 | && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost) |
a295d331 | 3476 | { |
82dfb9a5 RS |
3477 | rtx insns, wop0, wop1; |
3478 | ||
3479 | /* We need to widen the operands, for example to ensure the | |
3480 | constant multiplier is correctly sign or zero extended. | |
3481 | Use a sequence to clean-up any instructions emitted by | |
3482 | the conversions if things don't work out. */ | |
3483 | start_sequence (); | |
3484 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); | |
3485 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); | |
3486 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, | |
8efc8980 | 3487 | unsignedp, OPTAB_WIDEN); |
82dfb9a5 RS |
3488 | insns = get_insns (); |
3489 | end_sequence (); | |
3490 | ||
8efc8980 | 3491 | if (tem) |
82dfb9a5 RS |
3492 | { |
3493 | emit_insn (insns); | |
3494 | return extract_high_half (mode, tem); | |
3495 | } | |
a295d331 | 3496 | } |
71af73bb TG |
3497 | |
3498 | /* Try widening multiplication of opposite signedness, and adjust. */ | |
3499 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; | |
947131ba | 3500 | if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing |
02a65aef | 3501 | && size - 1 < BITS_PER_WORD |
f40751dd JH |
3502 | && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1] |
3503 | + 4 * add_cost[speed][mode] < max_cost)) | |
55c2d311 | 3504 | { |
665acd1e | 3505 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
71af73bb TG |
3506 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3507 | if (tem != 0) | |
55c2d311 | 3508 | { |
8efc8980 | 3509 | tem = extract_high_half (mode, tem); |
71af73bb | 3510 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3511 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
71af73bb | 3512 | target, unsignedp); |
55c2d311 | 3513 | } |
55c2d311 TG |
3514 | } |
3515 | ||
71af73bb | 3516 | return 0; |
8efc8980 | 3517 | } |
71af73bb | 3518 | |
0d282692 RS |
3519 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3520 | putting the high half of the result in TARGET if that is convenient, | |
3521 | and return where the result is. If the operation can not be performed, | |
3522 | 0 is returned. | |
55c2d311 | 3523 | |
8efc8980 RS |
3524 | MODE is the mode of operation and result. |
3525 | ||
3526 | UNSIGNEDP nonzero means unsigned multiply. | |
3527 | ||
3528 | MAX_COST is the total allowed cost for the expanded RTL. */ | |
3529 | ||
0d282692 RS |
3530 | static rtx |
3531 | expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, | |
3532 | rtx target, int unsignedp, int max_cost) | |
8efc8980 | 3533 | { |
f258e38b | 3534 | enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); |
0d282692 | 3535 | unsigned HOST_WIDE_INT cnst1; |
f258e38b UW |
3536 | int extra_cost; |
3537 | bool sign_adjust = false; | |
8efc8980 RS |
3538 | enum mult_variant variant; |
3539 | struct algorithm alg; | |
0d282692 | 3540 | rtx tem; |
f40751dd | 3541 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 3542 | |
15ed7b52 | 3543 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
8efc8980 | 3544 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |
5b0264cb | 3545 | gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT); |
8efc8980 | 3546 | |
0d282692 | 3547 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
f258e38b | 3548 | |
b8698a0f L |
3549 | /* We can't optimize modes wider than BITS_PER_WORD. |
3550 | ??? We might be able to perform double-word arithmetic if | |
f258e38b UW |
3551 | mode == word_mode, however all the cost calculations in |
3552 | synth_mult etc. assume single-word operations. */ | |
3553 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) | |
3554 | return expand_mult_highpart_optab (mode, op0, op1, target, | |
3555 | unsignedp, max_cost); | |
3556 | ||
f40751dd | 3557 | extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1]; |
f258e38b UW |
3558 | |
3559 | /* Check whether we try to multiply by a negative constant. */ | |
3560 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) | |
3561 | { | |
3562 | sign_adjust = true; | |
f40751dd | 3563 | extra_cost += add_cost[speed][mode]; |
f258e38b | 3564 | } |
8efc8980 RS |
3565 | |
3566 | /* See whether shift/add multiplication is cheap enough. */ | |
f258e38b UW |
3567 | if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, |
3568 | max_cost - extra_cost)) | |
a295d331 | 3569 | { |
8efc8980 RS |
3570 | /* See whether the specialized multiplication optabs are |
3571 | cheaper than the shift/add version. */ | |
26276705 RS |
3572 | tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
3573 | alg.cost.cost + extra_cost); | |
8efc8980 RS |
3574 | if (tem) |
3575 | return tem; | |
3576 | ||
f258e38b UW |
3577 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3578 | tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); | |
3579 | tem = extract_high_half (mode, tem); | |
3580 | ||
9cf737f8 | 3581 | /* Adjust result for signedness. */ |
f258e38b UW |
3582 | if (sign_adjust) |
3583 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); | |
3584 | ||
3585 | return tem; | |
a295d331 | 3586 | } |
8efc8980 RS |
3587 | return expand_mult_highpart_optab (mode, op0, op1, target, |
3588 | unsignedp, max_cost); | |
55c2d311 | 3589 | } |
0b55e932 RS |
3590 | |
3591 | ||
3592 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ | |
3593 | ||
3594 | static rtx | |
3595 | expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) | |
3596 | { | |
6e7a355c | 3597 | unsigned HOST_WIDE_INT masklow, maskhigh; |
1c234fcb | 3598 | rtx result, temp, shift, label; |
0b55e932 RS |
3599 | int logd; |
3600 | ||
3601 | logd = floor_log2 (d); | |
3602 | result = gen_reg_rtx (mode); | |
3603 | ||
3604 | /* Avoid conditional branches when they're expensive. */ | |
3a4fd356 | 3605 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
22660666 | 3606 | && optimize_insn_for_speed_p ()) |
0b55e932 RS |
3607 | { |
3608 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, | |
3609 | mode, 0, -1); | |
3610 | if (signmask) | |
3611 | { | |
3612 | signmask = force_reg (mode, signmask); | |
6e7a355c | 3613 | masklow = ((HOST_WIDE_INT) 1 << logd) - 1; |
1c234fcb RS |
3614 | shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); |
3615 | ||
3616 | /* Use the rtx_cost of a LSHIFTRT instruction to determine | |
3617 | which instruction sequence to use. If logical right shifts | |
3618 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise | |
3619 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ | |
6e7a355c | 3620 | |
1c234fcb | 3621 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
947131ba | 3622 | if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing |
f40751dd | 3623 | || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2)) |
1c234fcb RS |
3624 | { |
3625 | temp = expand_binop (mode, xor_optab, op0, signmask, | |
3626 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3627 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3628 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
6e7a355c | 3629 | temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow), |
1c234fcb RS |
3630 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3631 | temp = expand_binop (mode, xor_optab, temp, signmask, | |
3632 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3633 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3634 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3635 | } | |
3636 | else | |
3637 | { | |
3638 | signmask = expand_binop (mode, lshr_optab, signmask, shift, | |
3639 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3640 | signmask = force_reg (mode, signmask); | |
3641 | ||
3642 | temp = expand_binop (mode, add_optab, op0, signmask, | |
3643 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
6e7a355c | 3644 | temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow), |
1c234fcb RS |
3645 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3646 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3647 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3648 | } | |
0b55e932 RS |
3649 | return temp; |
3650 | } | |
3651 | } | |
3652 | ||
3653 | /* Mask contains the mode's signbit and the significant bits of the | |
3654 | modulus. By including the signbit in the operation, many targets | |
3655 | can avoid an explicit compare operation in the following comparison | |
3656 | against zero. */ | |
3657 | ||
6e7a355c EB |
3658 | masklow = ((HOST_WIDE_INT) 1 << logd) - 1; |
3659 | if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) | |
3660 | { | |
3661 | masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1); | |
3662 | maskhigh = -1; | |
3663 | } | |
3664 | else | |
3665 | maskhigh = (HOST_WIDE_INT) -1 | |
3666 | << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1); | |
0b55e932 | 3667 | |
6e7a355c EB |
3668 | temp = expand_binop (mode, and_optab, op0, |
3669 | immed_double_const (masklow, maskhigh, mode), | |
3670 | result, 1, OPTAB_LIB_WIDEN); | |
0b55e932 RS |
3671 | if (temp != result) |
3672 | emit_move_insn (result, temp); | |
3673 | ||
3674 | label = gen_label_rtx (); | |
3675 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); | |
3676 | ||
3677 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, | |
3678 | 0, OPTAB_LIB_WIDEN); | |
6e7a355c EB |
3679 | masklow = (HOST_WIDE_INT) -1 << logd; |
3680 | maskhigh = -1; | |
3681 | temp = expand_binop (mode, ior_optab, temp, | |
3682 | immed_double_const (masklow, maskhigh, mode), | |
3683 | result, 1, OPTAB_LIB_WIDEN); | |
0b55e932 RS |
3684 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
3685 | 0, OPTAB_LIB_WIDEN); | |
3686 | if (temp != result) | |
3687 | emit_move_insn (result, temp); | |
3688 | emit_label (label); | |
3689 | return result; | |
3690 | } | |
39cab019 RS |
3691 | |
3692 | /* Expand signed division of OP0 by a power of two D in mode MODE. | |
3693 | This routine is only called for positive values of D. */ | |
3694 | ||
3695 | static rtx | |
3696 | expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) | |
3697 | { | |
3698 | rtx temp, label; | |
3699 | tree shift; | |
3700 | int logd; | |
3701 | ||
3702 | logd = floor_log2 (d); | |
7d60be94 | 3703 | shift = build_int_cst (NULL_TREE, logd); |
39cab019 | 3704 | |
3a4fd356 JH |
3705 | if (d == 2 |
3706 | && BRANCH_COST (optimize_insn_for_speed_p (), | |
3707 | false) >= 1) | |
39cab019 RS |
3708 | { |
3709 | temp = gen_reg_rtx (mode); | |
3710 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); | |
3711 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
3712 | 0, OPTAB_LIB_WIDEN); | |
3713 | return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0); | |
3714 | } | |
3715 | ||
fdded401 | 3716 | #ifdef HAVE_conditional_move |
3a4fd356 JH |
3717 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) |
3718 | >= 2) | |
fdded401 RS |
3719 | { |
3720 | rtx temp2; | |
3721 | ||
38636eac EB |
3722 | /* ??? emit_conditional_move forces a stack adjustment via |
3723 | compare_from_rtx so, if the sequence is discarded, it will | |
3724 | be lost. Do it now instead. */ | |
3725 | do_pending_stack_adjust (); | |
3726 | ||
fdded401 RS |
3727 | start_sequence (); |
3728 | temp2 = copy_to_mode_reg (mode, op0); | |
3729 | temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1), | |
3730 | NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
3731 | temp = force_reg (mode, temp); | |
3732 | ||
3733 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ | |
3734 | temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, | |
3735 | mode, temp, temp2, mode, 0); | |
3736 | if (temp2) | |
3737 | { | |
3738 | rtx seq = get_insns (); | |
3739 | end_sequence (); | |
3740 | emit_insn (seq); | |
3741 | return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0); | |
3742 | } | |
3743 | end_sequence (); | |
3744 | } | |
3745 | #endif | |
3746 | ||
3a4fd356 JH |
3747 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
3748 | false) >= 2) | |
39cab019 RS |
3749 | { |
3750 | int ushift = GET_MODE_BITSIZE (mode) - logd; | |
3751 | ||
3752 | temp = gen_reg_rtx (mode); | |
3753 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); | |
f40751dd | 3754 | if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1)) |
39cab019 RS |
3755 | temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1), |
3756 | NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
3757 | else | |
3758 | temp = expand_shift (RSHIFT_EXPR, mode, temp, | |
7d60be94 | 3759 | build_int_cst (NULL_TREE, ushift), |
4a90aeeb | 3760 | NULL_RTX, 1); |
39cab019 RS |
3761 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
3762 | 0, OPTAB_LIB_WIDEN); | |
3763 | return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0); | |
3764 | } | |
3765 | ||
3766 | label = gen_label_rtx (); | |
3767 | temp = copy_to_mode_reg (mode, op0); | |
3768 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); | |
3769 | expand_inc (temp, GEN_INT (d - 1)); | |
3770 | emit_label (label); | |
3771 | return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0); | |
3772 | } | |
55c2d311 | 3773 | \f |
44037a66 TG |
3774 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
3775 | if that is convenient, and returning where the result is. | |
3776 | You may request either the quotient or the remainder as the result; | |
3777 | specify REM_FLAG nonzero to get the remainder. | |
3778 | ||
3779 | CODE is the expression code for which kind of division this is; | |
3780 | it controls how rounding is done. MODE is the machine mode to use. | |
3781 | UNSIGNEDP nonzero means do unsigned division. */ | |
3782 | ||
3783 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI | |
3784 | and then correct it by or'ing in missing high bits | |
3785 | if result of ANDI is nonzero. | |
3786 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. | |
3787 | This could optimize to a bfexts instruction. | |
3788 | But C doesn't use these operations, so their optimizations are | |
3789 | left for later. */ | |
5353610b R |
3790 | /* ??? For modulo, we don't actually need the highpart of the first product, |
3791 | the low part will do nicely. And for small divisors, the second multiply | |
3792 | can also be a low-part only multiply or even be completely left out. | |
3793 | E.g. to calculate the remainder of a division by 3 with a 32 bit | |
3794 | multiply, multiply with 0x55555556 and extract the upper two bits; | |
3795 | the result is exact for inputs up to 0x1fffffff. | |
3796 | The input range can be reduced by using cross-sum rules. | |
3797 | For odd divisors >= 3, the following table gives right shift counts | |
09da1532 | 3798 | so that if a number is shifted by an integer multiple of the given |
5353610b R |
3799 | amount, the remainder stays the same: |
3800 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, | |
3801 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, | |
3802 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, | |
3803 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, | |
3804 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 | |
3805 | ||
3806 | Cross-sum rules for even numbers can be derived by leaving as many bits | |
3807 | to the right alone as the divisor has zeros to the right. | |
3808 | E.g. if x is an unsigned 32 bit number: | |
3809 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 | |
3810 | */ | |
44037a66 TG |
3811 | |
3812 | rtx | |
502b8322 AJ |
3813 | expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, |
3814 | rtx op0, rtx op1, rtx target, int unsignedp) | |
44037a66 | 3815 | { |
44037a66 | 3816 | enum machine_mode compute_mode; |
b3694847 | 3817 | rtx tquotient; |
55c2d311 TG |
3818 | rtx quotient = 0, remainder = 0; |
3819 | rtx last; | |
2c414fba | 3820 | int size; |
4e430df8 | 3821 | rtx insn, set; |
44037a66 | 3822 | optab optab1, optab2; |
1c4a429a | 3823 | int op1_is_constant, op1_is_pow2 = 0; |
71af73bb | 3824 | int max_cost, extra_cost; |
9ec36da5 | 3825 | static HOST_WIDE_INT last_div_const = 0; |
1c4a429a | 3826 | static HOST_WIDE_INT ext_op1; |
f40751dd | 3827 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 3828 | |
481683e1 | 3829 | op1_is_constant = CONST_INT_P (op1); |
1c4a429a JH |
3830 | if (op1_is_constant) |
3831 | { | |
3832 | ext_op1 = INTVAL (op1); | |
3833 | if (unsignedp) | |
3834 | ext_op1 &= GET_MODE_MASK (mode); | |
3835 | op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) | |
3836 | || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1)))); | |
3837 | } | |
55c2d311 TG |
3838 | |
3839 | /* | |
3840 | This is the structure of expand_divmod: | |
3841 | ||
3842 | First comes code to fix up the operands so we can perform the operations | |
3843 | correctly and efficiently. | |
3844 | ||
3845 | Second comes a switch statement with code specific for each rounding mode. | |
3846 | For some special operands this code emits all RTL for the desired | |
69f61901 | 3847 | operation, for other cases, it generates only a quotient and stores it in |
55c2d311 TG |
3848 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
3849 | to indicate that it has not done anything. | |
3850 | ||
69f61901 RK |
3851 | Last comes code that finishes the operation. If QUOTIENT is set and |
3852 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If | |
3853 | QUOTIENT is not set, it is computed using trunc rounding. | |
44037a66 | 3854 | |
55c2d311 TG |
3855 | We try to generate special code for division and remainder when OP1 is a |
3856 | constant. If |OP1| = 2**n we can use shifts and some other fast | |
3857 | operations. For other values of OP1, we compute a carefully selected | |
3858 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 | |
3859 | by m. | |
3860 | ||
3861 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper | |
3862 | half of the product. Different strategies for generating the product are | |
3863 | implemented in expand_mult_highpart. | |
3864 | ||
3865 | If what we actually want is the remainder, we generate that by another | |
3866 | by-constant multiplication and a subtraction. */ | |
3867 | ||
3868 | /* We shouldn't be called with OP1 == const1_rtx, but some of the | |
3d32ffd1 TW |
3869 | code below will malfunction if we are, so check here and handle |
3870 | the special case if so. */ | |
3871 | if (op1 == const1_rtx) | |
3872 | return rem_flag ? const0_rtx : op0; | |
3873 | ||
91ce572a CC |
3874 | /* When dividing by -1, we could get an overflow. |
3875 | negv_optab can handle overflows. */ | |
3876 | if (! unsignedp && op1 == constm1_rtx) | |
3877 | { | |
3878 | if (rem_flag) | |
0fb7aeda | 3879 | return const0_rtx; |
91ce572a | 3880 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT |
0fb7aeda | 3881 | ? negv_optab : neg_optab, op0, target, 0); |
91ce572a CC |
3882 | } |
3883 | ||
bc1c7e93 RK |
3884 | if (target |
3885 | /* Don't use the function value register as a target | |
3886 | since we have to read it as well as write it, | |
3887 | and function-inlining gets confused by this. */ | |
3888 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) | |
3889 | /* Don't clobber an operand while doing a multi-step calculation. */ | |
515dfc7a | 3890 | || ((rem_flag || op1_is_constant) |
bc1c7e93 | 3891 | && (reg_mentioned_p (target, op0) |
3c0cb5de | 3892 | || (MEM_P (op0) && MEM_P (target)))) |
bc1c7e93 | 3893 | || reg_mentioned_p (target, op1) |
3c0cb5de | 3894 | || (MEM_P (op1) && MEM_P (target)))) |
44037a66 TG |
3895 | target = 0; |
3896 | ||
44037a66 TG |
3897 | /* Get the mode in which to perform this computation. Normally it will |
3898 | be MODE, but sometimes we can't do the desired operation in MODE. | |
3899 | If so, pick a wider mode in which we can do the operation. Convert | |
3900 | to that mode at the start to avoid repeated conversions. | |
3901 | ||
3902 | First see what operations we need. These depend on the expression | |
3903 | we are evaluating. (We assume that divxx3 insns exist under the | |
3904 | same conditions that modxx3 insns and that these insns don't normally | |
3905 | fail. If these assumptions are not correct, we may generate less | |
3906 | efficient code in some cases.) | |
3907 | ||
3908 | Then see if we find a mode in which we can open-code that operation | |
3909 | (either a division, modulus, or shift). Finally, check for the smallest | |
3910 | mode for which we can do the operation with a library call. */ | |
3911 | ||
55c2d311 TG |
3912 | /* We might want to refine this now that we have division-by-constant |
3913 | optimization. Since expand_mult_highpart tries so many variants, it is | |
3914 | not straightforward to generalize this. Maybe we should make an array | |
3915 | of possible modes in init_expmed? Save this for GCC 2.7. */ | |
3916 | ||
556a56ac DM |
3917 | optab1 = ((op1_is_pow2 && op1 != const0_rtx) |
3918 | ? (unsignedp ? lshr_optab : ashr_optab) | |
44037a66 | 3919 | : (unsignedp ? udiv_optab : sdiv_optab)); |
556a56ac DM |
3920 | optab2 = ((op1_is_pow2 && op1 != const0_rtx) |
3921 | ? optab1 | |
3922 | : (unsignedp ? udivmod_optab : sdivmod_optab)); | |
44037a66 TG |
3923 | |
3924 | for (compute_mode = mode; compute_mode != VOIDmode; | |
3925 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
947131ba RS |
3926 | if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing |
3927 | || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) | |
44037a66 TG |
3928 | break; |
3929 | ||
3930 | if (compute_mode == VOIDmode) | |
3931 | for (compute_mode = mode; compute_mode != VOIDmode; | |
3932 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
8a33f100 JH |
3933 | if (optab_libfunc (optab1, compute_mode) |
3934 | || optab_libfunc (optab2, compute_mode)) | |
44037a66 TG |
3935 | break; |
3936 | ||
535a42b1 NS |
3937 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
3938 | probably die. */ | |
44037a66 TG |
3939 | if (compute_mode == VOIDmode) |
3940 | compute_mode = mode; | |
3941 | ||
55c2d311 TG |
3942 | if (target && GET_MODE (target) == compute_mode) |
3943 | tquotient = target; | |
3944 | else | |
3945 | tquotient = gen_reg_rtx (compute_mode); | |
2c414fba | 3946 | |
55c2d311 TG |
3947 | size = GET_MODE_BITSIZE (compute_mode); |
3948 | #if 0 | |
3949 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE | |
71af73bb TG |
3950 | (mode), and thereby get better code when OP1 is a constant. Do that |
3951 | later. It will require going over all usages of SIZE below. */ | |
55c2d311 TG |
3952 | size = GET_MODE_BITSIZE (mode); |
3953 | #endif | |
bc1c7e93 | 3954 | |
9ec36da5 JL |
3955 | /* Only deduct something for a REM if the last divide done was |
3956 | for a different constant. Then set the constant of the last | |
3957 | divide. */ | |
f40751dd | 3958 | max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode]; |
a28b2ac6 RS |
3959 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
3960 | && INTVAL (op1) == last_div_const)) | |
f40751dd | 3961 | max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode]; |
9ec36da5 JL |
3962 | |
3963 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; | |
71af73bb | 3964 | |
55c2d311 | 3965 | /* Now convert to the best mode to use. */ |
44037a66 TG |
3966 | if (compute_mode != mode) |
3967 | { | |
55c2d311 | 3968 | op0 = convert_modes (compute_mode, mode, op0, unsignedp); |
81722fa9 | 3969 | op1 = convert_modes (compute_mode, mode, op1, unsignedp); |
e13a25d5 | 3970 | |
e9a25f70 JL |
3971 | /* convert_modes may have placed op1 into a register, so we |
3972 | must recompute the following. */ | |
481683e1 | 3973 | op1_is_constant = CONST_INT_P (op1); |
e13a25d5 DM |
3974 | op1_is_pow2 = (op1_is_constant |
3975 | && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) | |
3976 | || (! unsignedp | |
e9a25f70 | 3977 | && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ; |
44037a66 TG |
3978 | } |
3979 | ||
55c2d311 | 3980 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
c2a47e48 | 3981 | |
3c0cb5de | 3982 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
55c2d311 | 3983 | op0 = force_reg (compute_mode, op0); |
3c0cb5de | 3984 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
c2a47e48 RK |
3985 | op1 = force_reg (compute_mode, op1); |
3986 | ||
ab0b6581 TG |
3987 | /* If we need the remainder or if OP1 is constant, we need to |
3988 | put OP0 in a register in case it has any queued subexpressions. */ | |
3989 | if (rem_flag || op1_is_constant) | |
3990 | op0 = force_reg (compute_mode, op0); | |
bc1c7e93 | 3991 | |
55c2d311 | 3992 | last = get_last_insn (); |
44037a66 | 3993 | |
9faa82d8 | 3994 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
55c2d311 | 3995 | if (unsignedp) |
44037a66 | 3996 | { |
55c2d311 TG |
3997 | if (code == FLOOR_DIV_EXPR) |
3998 | code = TRUNC_DIV_EXPR; | |
3999 | if (code == FLOOR_MOD_EXPR) | |
4000 | code = TRUNC_MOD_EXPR; | |
db7cafb0 JL |
4001 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4002 | code = TRUNC_DIV_EXPR; | |
55c2d311 | 4003 | } |
bc1c7e93 | 4004 | |
55c2d311 TG |
4005 | if (op1 != const0_rtx) |
4006 | switch (code) | |
4007 | { | |
4008 | case TRUNC_MOD_EXPR: | |
4009 | case TRUNC_DIV_EXPR: | |
34f016ed | 4010 | if (op1_is_constant) |
55c2d311 | 4011 | { |
d8f1376c | 4012 | if (unsignedp) |
55c2d311 | 4013 | { |
e71c0aa7 | 4014 | unsigned HOST_WIDE_INT mh; |
55c2d311 TG |
4015 | int pre_shift, post_shift; |
4016 | int dummy; | |
e71c0aa7 | 4017 | rtx ml; |
1c4a429a JH |
4018 | unsigned HOST_WIDE_INT d = (INTVAL (op1) |
4019 | & GET_MODE_MASK (compute_mode)); | |
55c2d311 TG |
4020 | |
4021 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4022 | { | |
4023 | pre_shift = floor_log2 (d); | |
4024 | if (rem_flag) | |
4025 | { | |
db3cf6fb MS |
4026 | remainder |
4027 | = expand_binop (compute_mode, and_optab, op0, | |
4028 | GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), | |
4029 | remainder, 1, | |
4030 | OPTAB_LIB_WIDEN); | |
55c2d311 | 4031 | if (remainder) |
c8dbc8ca | 4032 | return gen_lowpart (mode, remainder); |
55c2d311 TG |
4033 | } |
4034 | quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
4a90aeeb | 4035 | build_int_cst (NULL_TREE, |
7d60be94 | 4036 | pre_shift), |
55c2d311 TG |
4037 | tquotient, 1); |
4038 | } | |
34f016ed | 4039 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 | 4040 | { |
dc1d6150 | 4041 | if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) |
55c2d311 | 4042 | { |
dc1d6150 TG |
4043 | /* Most significant bit of divisor is set; emit an scc |
4044 | insn. */ | |
b45f0e58 PB |
4045 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
4046 | compute_mode, 1, 1); | |
55c2d311 TG |
4047 | } |
4048 | else | |
4049 | { | |
dc1d6150 TG |
4050 | /* Find a suitable multiplier and right shift count |
4051 | instead of multiplying with D. */ | |
4052 | ||
4053 | mh = choose_multiplier (d, size, size, | |
4054 | &ml, &post_shift, &dummy); | |
4055 | ||
4056 | /* If the suggested multiplier is more than SIZE bits, | |
4057 | we can do better for even divisors, using an | |
4058 | initial right shift. */ | |
4059 | if (mh != 0 && (d & 1) == 0) | |
4060 | { | |
4061 | pre_shift = floor_log2 (d & -d); | |
4062 | mh = choose_multiplier (d >> pre_shift, size, | |
4063 | size - pre_shift, | |
4064 | &ml, &post_shift, &dummy); | |
5b0264cb | 4065 | gcc_assert (!mh); |
dc1d6150 TG |
4066 | } |
4067 | else | |
4068 | pre_shift = 0; | |
4069 | ||
4070 | if (mh != 0) | |
4071 | { | |
4072 | rtx t1, t2, t3, t4; | |
4073 | ||
02a65aef R |
4074 | if (post_shift - 1 >= BITS_PER_WORD) |
4075 | goto fail1; | |
4076 | ||
965703ed | 4077 | extra_cost |
f40751dd JH |
4078 | = (shift_cost[speed][compute_mode][post_shift - 1] |
4079 | + shift_cost[speed][compute_mode][1] | |
4080 | + 2 * add_cost[speed][compute_mode]); | |
e71c0aa7 | 4081 | t1 = expand_mult_highpart (compute_mode, op0, ml, |
dc1d6150 TG |
4082 | NULL_RTX, 1, |
4083 | max_cost - extra_cost); | |
4084 | if (t1 == 0) | |
4085 | goto fail1; | |
38a448ca RH |
4086 | t2 = force_operand (gen_rtx_MINUS (compute_mode, |
4087 | op0, t1), | |
dc1d6150 | 4088 | NULL_RTX); |
9a9d280e AS |
4089 | t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2, |
4090 | integer_one_node, NULL_RTX, 1); | |
38a448ca RH |
4091 | t4 = force_operand (gen_rtx_PLUS (compute_mode, |
4092 | t1, t3), | |
dc1d6150 | 4093 | NULL_RTX); |
4a90aeeb NS |
4094 | quotient = expand_shift |
4095 | (RSHIFT_EXPR, compute_mode, t4, | |
7d60be94 | 4096 | build_int_cst (NULL_TREE, post_shift - 1), |
4a90aeeb | 4097 | tquotient, 1); |
dc1d6150 TG |
4098 | } |
4099 | else | |
4100 | { | |
4101 | rtx t1, t2; | |
4102 | ||
02a65aef R |
4103 | if (pre_shift >= BITS_PER_WORD |
4104 | || post_shift >= BITS_PER_WORD) | |
4105 | goto fail1; | |
4106 | ||
4a90aeeb NS |
4107 | t1 = expand_shift |
4108 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4109 | build_int_cst (NULL_TREE, pre_shift), |
4a90aeeb | 4110 | NULL_RTX, 1); |
965703ed | 4111 | extra_cost |
f40751dd JH |
4112 | = (shift_cost[speed][compute_mode][pre_shift] |
4113 | + shift_cost[speed][compute_mode][post_shift]); | |
e71c0aa7 | 4114 | t2 = expand_mult_highpart (compute_mode, t1, ml, |
dc1d6150 TG |
4115 | NULL_RTX, 1, |
4116 | max_cost - extra_cost); | |
4117 | if (t2 == 0) | |
4118 | goto fail1; | |
4a90aeeb NS |
4119 | quotient = expand_shift |
4120 | (RSHIFT_EXPR, compute_mode, t2, | |
7d60be94 | 4121 | build_int_cst (NULL_TREE, post_shift), |
4a90aeeb | 4122 | tquotient, 1); |
dc1d6150 | 4123 | } |
55c2d311 TG |
4124 | } |
4125 | } | |
34f016ed TG |
4126 | else /* Too wide mode to use tricky code */ |
4127 | break; | |
55c2d311 TG |
4128 | |
4129 | insn = get_last_insn (); | |
4e430df8 RK |
4130 | if (insn != last |
4131 | && (set = single_set (insn)) != 0 | |
4132 | && SET_DEST (set) == quotient) | |
c410d49e | 4133 | set_unique_reg_note (insn, |
502b8322 | 4134 | REG_EQUAL, |
7e5bda2c | 4135 | gen_rtx_UDIV (compute_mode, op0, op1)); |
55c2d311 TG |
4136 | } |
4137 | else /* TRUNC_DIV, signed */ | |
4138 | { | |
4139 | unsigned HOST_WIDE_INT ml; | |
4140 | int lgup, post_shift; | |
e71c0aa7 | 4141 | rtx mlr; |
55c2d311 | 4142 | HOST_WIDE_INT d = INTVAL (op1); |
e4c9f3c2 ILT |
4143 | unsigned HOST_WIDE_INT abs_d; |
4144 | ||
093253be ILT |
4145 | /* Since d might be INT_MIN, we have to cast to |
4146 | unsigned HOST_WIDE_INT before negating to avoid | |
4147 | undefined signed overflow. */ | |
6d9c91e9 ILT |
4148 | abs_d = (d >= 0 |
4149 | ? (unsigned HOST_WIDE_INT) d | |
4150 | : - (unsigned HOST_WIDE_INT) d); | |
55c2d311 TG |
4151 | |
4152 | /* n rem d = n rem -d */ | |
4153 | if (rem_flag && d < 0) | |
4154 | { | |
4155 | d = abs_d; | |
2496c7bd | 4156 | op1 = gen_int_mode (abs_d, compute_mode); |
55c2d311 TG |
4157 | } |
4158 | ||
4159 | if (d == 1) | |
4160 | quotient = op0; | |
4161 | else if (d == -1) | |
4162 | quotient = expand_unop (compute_mode, neg_optab, op0, | |
4163 | tquotient, 0); | |
f6c1336c ILT |
4164 | else if (HOST_BITS_PER_WIDE_INT >= size |
4165 | && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) | |
f737b132 RK |
4166 | { |
4167 | /* This case is not handled correctly below. */ | |
4168 | quotient = emit_store_flag (tquotient, EQ, op0, op1, | |
4169 | compute_mode, 1, 1); | |
4170 | if (quotient == 0) | |
4171 | goto fail1; | |
4172 | } | |
55c2d311 | 4173 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
0d77cc6c JH |
4174 | && (rem_flag ? smod_pow2_cheap[speed][compute_mode] |
4175 | : sdiv_pow2_cheap[speed][compute_mode]) | |
0b55e932 RS |
4176 | /* We assume that cheap metric is true if the |
4177 | optab has an expander for this mode. */ | |
166cdb08 JH |
4178 | && ((optab_handler ((rem_flag ? smod_optab |
4179 | : sdiv_optab), | |
947131ba | 4180 | compute_mode) |
a8c7e72d | 4181 | != CODE_FOR_nothing) |
947131ba RS |
4182 | || (optab_handler (sdivmod_optab, |
4183 | compute_mode) | |
4184 | != CODE_FOR_nothing))) | |
55c2d311 TG |
4185 | ; |
4186 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) | |
4187 | { | |
0b55e932 RS |
4188 | if (rem_flag) |
4189 | { | |
4190 | remainder = expand_smod_pow2 (compute_mode, op0, d); | |
4191 | if (remainder) | |
4192 | return gen_lowpart (mode, remainder); | |
4193 | } | |
3d520aaf | 4194 | |
0d77cc6c | 4195 | if (sdiv_pow2_cheap[speed][compute_mode] |
947131ba | 4196 | && ((optab_handler (sdiv_optab, compute_mode) |
3d520aaf | 4197 | != CODE_FOR_nothing) |
947131ba | 4198 | || (optab_handler (sdivmod_optab, compute_mode) |
3d520aaf DE |
4199 | != CODE_FOR_nothing))) |
4200 | quotient = expand_divmod (0, TRUNC_DIV_EXPR, | |
4201 | compute_mode, op0, | |
4202 | gen_int_mode (abs_d, | |
4203 | compute_mode), | |
4204 | NULL_RTX, 0); | |
4205 | else | |
4206 | quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); | |
55c2d311 | 4207 | |
0b55e932 RS |
4208 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4209 | negate the quotient. */ | |
55c2d311 TG |
4210 | if (d < 0) |
4211 | { | |
4212 | insn = get_last_insn (); | |
4e430df8 RK |
4213 | if (insn != last |
4214 | && (set = single_set (insn)) != 0 | |
c8e7fe58 DE |
4215 | && SET_DEST (set) == quotient |
4216 | && abs_d < ((unsigned HOST_WIDE_INT) 1 | |
4217 | << (HOST_BITS_PER_WIDE_INT - 1))) | |
c410d49e | 4218 | set_unique_reg_note (insn, |
502b8322 | 4219 | REG_EQUAL, |
7e5bda2c AM |
4220 | gen_rtx_DIV (compute_mode, |
4221 | op0, | |
69107307 AO |
4222 | GEN_INT |
4223 | (trunc_int_for_mode | |
4224 | (abs_d, | |
4225 | compute_mode)))); | |
55c2d311 TG |
4226 | |
4227 | quotient = expand_unop (compute_mode, neg_optab, | |
4228 | quotient, quotient, 0); | |
4229 | } | |
4230 | } | |
34f016ed | 4231 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 TG |
4232 | { |
4233 | choose_multiplier (abs_d, size, size - 1, | |
e71c0aa7 RS |
4234 | &mlr, &post_shift, &lgup); |
4235 | ml = (unsigned HOST_WIDE_INT) INTVAL (mlr); | |
55c2d311 TG |
4236 | if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) |
4237 | { | |
4238 | rtx t1, t2, t3; | |
4239 | ||
02a65aef R |
4240 | if (post_shift >= BITS_PER_WORD |
4241 | || size - 1 >= BITS_PER_WORD) | |
4242 | goto fail1; | |
4243 | ||
f40751dd JH |
4244 | extra_cost = (shift_cost[speed][compute_mode][post_shift] |
4245 | + shift_cost[speed][compute_mode][size - 1] | |
4246 | + add_cost[speed][compute_mode]); | |
e71c0aa7 | 4247 | t1 = expand_mult_highpart (compute_mode, op0, mlr, |
71af73bb TG |
4248 | NULL_RTX, 0, |
4249 | max_cost - extra_cost); | |
55c2d311 TG |
4250 | if (t1 == 0) |
4251 | goto fail1; | |
4a90aeeb NS |
4252 | t2 = expand_shift |
4253 | (RSHIFT_EXPR, compute_mode, t1, | |
7d60be94 | 4254 | build_int_cst (NULL_TREE, post_shift), |
4a90aeeb NS |
4255 | NULL_RTX, 0); |
4256 | t3 = expand_shift | |
4257 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4258 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4259 | NULL_RTX, 0); |
55c2d311 | 4260 | if (d < 0) |
c5c76735 JL |
4261 | quotient |
4262 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4263 | t3, t2), | |
4264 | tquotient); | |
55c2d311 | 4265 | else |
c5c76735 JL |
4266 | quotient |
4267 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4268 | t2, t3), | |
4269 | tquotient); | |
55c2d311 TG |
4270 | } |
4271 | else | |
4272 | { | |
4273 | rtx t1, t2, t3, t4; | |
4274 | ||
02a65aef R |
4275 | if (post_shift >= BITS_PER_WORD |
4276 | || size - 1 >= BITS_PER_WORD) | |
4277 | goto fail1; | |
4278 | ||
55c2d311 | 4279 | ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); |
e71c0aa7 | 4280 | mlr = gen_int_mode (ml, compute_mode); |
f40751dd JH |
4281 | extra_cost = (shift_cost[speed][compute_mode][post_shift] |
4282 | + shift_cost[speed][compute_mode][size - 1] | |
4283 | + 2 * add_cost[speed][compute_mode]); | |
e71c0aa7 | 4284 | t1 = expand_mult_highpart (compute_mode, op0, mlr, |
71af73bb TG |
4285 | NULL_RTX, 0, |
4286 | max_cost - extra_cost); | |
55c2d311 TG |
4287 | if (t1 == 0) |
4288 | goto fail1; | |
c5c76735 JL |
4289 | t2 = force_operand (gen_rtx_PLUS (compute_mode, |
4290 | t1, op0), | |
55c2d311 | 4291 | NULL_RTX); |
4a90aeeb NS |
4292 | t3 = expand_shift |
4293 | (RSHIFT_EXPR, compute_mode, t2, | |
7d60be94 | 4294 | build_int_cst (NULL_TREE, post_shift), |
4a90aeeb NS |
4295 | NULL_RTX, 0); |
4296 | t4 = expand_shift | |
4297 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4298 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4299 | NULL_RTX, 0); |
55c2d311 | 4300 | if (d < 0) |
c5c76735 JL |
4301 | quotient |
4302 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4303 | t4, t3), | |
4304 | tquotient); | |
55c2d311 | 4305 | else |
c5c76735 JL |
4306 | quotient |
4307 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4308 | t3, t4), | |
4309 | tquotient); | |
55c2d311 TG |
4310 | } |
4311 | } | |
34f016ed TG |
4312 | else /* Too wide mode to use tricky code */ |
4313 | break; | |
55c2d311 | 4314 | |
4e430df8 RK |
4315 | insn = get_last_insn (); |
4316 | if (insn != last | |
4317 | && (set = single_set (insn)) != 0 | |
4318 | && SET_DEST (set) == quotient) | |
c410d49e | 4319 | set_unique_reg_note (insn, |
502b8322 | 4320 | REG_EQUAL, |
7e5bda2c | 4321 | gen_rtx_DIV (compute_mode, op0, op1)); |
55c2d311 TG |
4322 | } |
4323 | break; | |
4324 | } | |
4325 | fail1: | |
4326 | delete_insns_since (last); | |
4327 | break; | |
44037a66 | 4328 | |
55c2d311 TG |
4329 | case FLOOR_DIV_EXPR: |
4330 | case FLOOR_MOD_EXPR: | |
4331 | /* We will come here only for signed operations. */ | |
4332 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4333 | { | |
e71c0aa7 | 4334 | unsigned HOST_WIDE_INT mh; |
55c2d311 TG |
4335 | int pre_shift, lgup, post_shift; |
4336 | HOST_WIDE_INT d = INTVAL (op1); | |
e71c0aa7 | 4337 | rtx ml; |
55c2d311 TG |
4338 | |
4339 | if (d > 0) | |
4340 | { | |
4341 | /* We could just as easily deal with negative constants here, | |
4342 | but it does not seem worth the trouble for GCC 2.6. */ | |
4343 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4344 | { | |
4345 | pre_shift = floor_log2 (d); | |
4346 | if (rem_flag) | |
4347 | { | |
4348 | remainder = expand_binop (compute_mode, and_optab, op0, | |
4349 | GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), | |
4350 | remainder, 0, OPTAB_LIB_WIDEN); | |
4351 | if (remainder) | |
c8dbc8ca | 4352 | return gen_lowpart (mode, remainder); |
55c2d311 | 4353 | } |
4a90aeeb NS |
4354 | quotient = expand_shift |
4355 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4356 | build_int_cst (NULL_TREE, pre_shift), |
4a90aeeb | 4357 | tquotient, 0); |
55c2d311 TG |
4358 | } |
4359 | else | |
4360 | { | |
4361 | rtx t1, t2, t3, t4; | |
4362 | ||
4363 | mh = choose_multiplier (d, size, size - 1, | |
4364 | &ml, &post_shift, &lgup); | |
5b0264cb | 4365 | gcc_assert (!mh); |
55c2d311 | 4366 | |
02a65aef R |
4367 | if (post_shift < BITS_PER_WORD |
4368 | && size - 1 < BITS_PER_WORD) | |
55c2d311 | 4369 | { |
4a90aeeb NS |
4370 | t1 = expand_shift |
4371 | (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4372 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4373 | NULL_RTX, 0); |
02a65aef R |
4374 | t2 = expand_binop (compute_mode, xor_optab, op0, t1, |
4375 | NULL_RTX, 0, OPTAB_WIDEN); | |
f40751dd JH |
4376 | extra_cost = (shift_cost[speed][compute_mode][post_shift] |
4377 | + shift_cost[speed][compute_mode][size - 1] | |
4378 | + 2 * add_cost[speed][compute_mode]); | |
e71c0aa7 | 4379 | t3 = expand_mult_highpart (compute_mode, t2, ml, |
02a65aef R |
4380 | NULL_RTX, 1, |
4381 | max_cost - extra_cost); | |
4382 | if (t3 != 0) | |
4383 | { | |
4a90aeeb NS |
4384 | t4 = expand_shift |
4385 | (RSHIFT_EXPR, compute_mode, t3, | |
7d60be94 | 4386 | build_int_cst (NULL_TREE, post_shift), |
4a90aeeb | 4387 | NULL_RTX, 1); |
02a65aef R |
4388 | quotient = expand_binop (compute_mode, xor_optab, |
4389 | t4, t1, tquotient, 0, | |
4390 | OPTAB_WIDEN); | |
4391 | } | |
55c2d311 TG |
4392 | } |
4393 | } | |
4394 | } | |
4395 | else | |
4396 | { | |
4397 | rtx nsign, t1, t2, t3, t4; | |
38a448ca RH |
4398 | t1 = force_operand (gen_rtx_PLUS (compute_mode, |
4399 | op0, constm1_rtx), NULL_RTX); | |
55c2d311 TG |
4400 | t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, |
4401 | 0, OPTAB_WIDEN); | |
4a90aeeb NS |
4402 | nsign = expand_shift |
4403 | (RSHIFT_EXPR, compute_mode, t2, | |
7d60be94 | 4404 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4405 | NULL_RTX, 0); |
38a448ca | 4406 | t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), |
55c2d311 TG |
4407 | NULL_RTX); |
4408 | t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, | |
4409 | NULL_RTX, 0); | |
4410 | if (t4) | |
4411 | { | |
4412 | rtx t5; | |
4413 | t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, | |
4414 | NULL_RTX, 0); | |
38a448ca RH |
4415 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4416 | t4, t5), | |
55c2d311 TG |
4417 | tquotient); |
4418 | } | |
4419 | } | |
4420 | } | |
4421 | ||
4422 | if (quotient != 0) | |
4423 | break; | |
4424 | delete_insns_since (last); | |
4425 | ||
4426 | /* Try using an instruction that produces both the quotient and | |
4427 | remainder, using truncation. We can easily compensate the quotient | |
4428 | or remainder to get floor rounding, once we have the remainder. | |
4429 | Notice that we compute also the final remainder value here, | |
4430 | and return the result right away. */ | |
a45cf58c | 4431 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4432 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4433 | |
55c2d311 TG |
4434 | if (rem_flag) |
4435 | { | |
668443c9 | 4436 | remainder |
f8cfc6aa | 4437 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4438 | quotient = gen_reg_rtx (compute_mode); |
4439 | } | |
4440 | else | |
4441 | { | |
668443c9 | 4442 | quotient |
f8cfc6aa | 4443 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4444 | remainder = gen_reg_rtx (compute_mode); |
4445 | } | |
4446 | ||
4447 | if (expand_twoval_binop (sdivmod_optab, op0, op1, | |
4448 | quotient, remainder, 0)) | |
4449 | { | |
4450 | /* This could be computed with a branch-less sequence. | |
4451 | Save that for later. */ | |
4452 | rtx tem; | |
4453 | rtx label = gen_label_rtx (); | |
f5963e61 | 4454 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
55c2d311 TG |
4455 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4456 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4457 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
55c2d311 TG |
4458 | expand_dec (quotient, const1_rtx); |
4459 | expand_inc (remainder, op1); | |
4460 | emit_label (label); | |
c8dbc8ca | 4461 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4462 | } |
4463 | ||
4464 | /* No luck with division elimination or divmod. Have to do it | |
4465 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4466 | { |
55c2d311 TG |
4467 | rtx label1, label2, label3, label4, label5; |
4468 | rtx adjusted_op0; | |
4469 | rtx tem; | |
4470 | ||
4471 | quotient = gen_reg_rtx (compute_mode); | |
4472 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4473 | label1 = gen_label_rtx (); | |
4474 | label2 = gen_label_rtx (); | |
4475 | label3 = gen_label_rtx (); | |
4476 | label4 = gen_label_rtx (); | |
4477 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4478 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4479 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); | |
55c2d311 TG |
4480 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4481 | quotient, 0, OPTAB_LIB_WIDEN); | |
4482 | if (tem != quotient) | |
4483 | emit_move_insn (quotient, tem); | |
4484 | emit_jump_insn (gen_jump (label5)); | |
4485 | emit_barrier (); | |
4486 | emit_label (label1); | |
44037a66 | 4487 | expand_inc (adjusted_op0, const1_rtx); |
55c2d311 TG |
4488 | emit_jump_insn (gen_jump (label4)); |
4489 | emit_barrier (); | |
4490 | emit_label (label2); | |
f5963e61 | 4491 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
55c2d311 TG |
4492 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4493 | quotient, 0, OPTAB_LIB_WIDEN); | |
4494 | if (tem != quotient) | |
4495 | emit_move_insn (quotient, tem); | |
4496 | emit_jump_insn (gen_jump (label5)); | |
4497 | emit_barrier (); | |
4498 | emit_label (label3); | |
4499 | expand_dec (adjusted_op0, const1_rtx); | |
4500 | emit_label (label4); | |
4501 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4502 | quotient, 0, OPTAB_LIB_WIDEN); | |
4503 | if (tem != quotient) | |
4504 | emit_move_insn (quotient, tem); | |
4505 | expand_dec (quotient, const1_rtx); | |
4506 | emit_label (label5); | |
44037a66 | 4507 | } |
55c2d311 | 4508 | break; |
44037a66 | 4509 | |
55c2d311 TG |
4510 | case CEIL_DIV_EXPR: |
4511 | case CEIL_MOD_EXPR: | |
4512 | if (unsignedp) | |
4513 | { | |
9176af2f TG |
4514 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))) |
4515 | { | |
4516 | rtx t1, t2, t3; | |
4517 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4518 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4519 | build_int_cst (NULL_TREE, floor_log2 (d)), |
412381d9 | 4520 | tquotient, 1); |
9176af2f TG |
4521 | t2 = expand_binop (compute_mode, and_optab, op0, |
4522 | GEN_INT (d - 1), | |
4523 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4524 | t3 = gen_reg_rtx (compute_mode); | |
4525 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4526 | compute_mode, 1, 1); | |
412381d9 TG |
4527 | if (t3 == 0) |
4528 | { | |
4529 | rtx lab; | |
4530 | lab = gen_label_rtx (); | |
f5963e61 | 4531 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
412381d9 TG |
4532 | expand_inc (t1, const1_rtx); |
4533 | emit_label (lab); | |
4534 | quotient = t1; | |
4535 | } | |
4536 | else | |
38a448ca RH |
4537 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4538 | t1, t3), | |
412381d9 | 4539 | tquotient); |
9176af2f TG |
4540 | break; |
4541 | } | |
55c2d311 TG |
4542 | |
4543 | /* Try using an instruction that produces both the quotient and | |
4544 | remainder, using truncation. We can easily compensate the | |
4545 | quotient or remainder to get ceiling rounding, once we have the | |
4546 | remainder. Notice that we compute also the final remainder | |
4547 | value here, and return the result right away. */ | |
a45cf58c | 4548 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4549 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4550 | |
55c2d311 TG |
4551 | if (rem_flag) |
4552 | { | |
f8cfc6aa | 4553 | remainder = (REG_P (target) |
668443c9 | 4554 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4555 | quotient = gen_reg_rtx (compute_mode); |
4556 | } | |
4557 | else | |
4558 | { | |
f8cfc6aa | 4559 | quotient = (REG_P (target) |
668443c9 | 4560 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4561 | remainder = gen_reg_rtx (compute_mode); |
4562 | } | |
4563 | ||
4564 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, | |
4565 | remainder, 1)) | |
4566 | { | |
4567 | /* This could be computed with a branch-less sequence. | |
4568 | Save that for later. */ | |
4569 | rtx label = gen_label_rtx (); | |
f5963e61 JL |
4570 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4571 | compute_mode, label); | |
55c2d311 TG |
4572 | expand_inc (quotient, const1_rtx); |
4573 | expand_dec (remainder, op1); | |
4574 | emit_label (label); | |
c8dbc8ca | 4575 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4576 | } |
4577 | ||
4578 | /* No luck with division elimination or divmod. Have to do it | |
4579 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4580 | { |
55c2d311 TG |
4581 | rtx label1, label2; |
4582 | rtx adjusted_op0, tem; | |
4583 | ||
4584 | quotient = gen_reg_rtx (compute_mode); | |
4585 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4586 | label1 = gen_label_rtx (); | |
4587 | label2 = gen_label_rtx (); | |
f5963e61 JL |
4588 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4589 | compute_mode, label1); | |
55c2d311 TG |
4590 | emit_move_insn (quotient, const0_rtx); |
4591 | emit_jump_insn (gen_jump (label2)); | |
4592 | emit_barrier (); | |
4593 | emit_label (label1); | |
4594 | expand_dec (adjusted_op0, const1_rtx); | |
4595 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, | |
4596 | quotient, 1, OPTAB_LIB_WIDEN); | |
4597 | if (tem != quotient) | |
4598 | emit_move_insn (quotient, tem); | |
4599 | expand_inc (quotient, const1_rtx); | |
4600 | emit_label (label2); | |
44037a66 | 4601 | } |
55c2d311 TG |
4602 | } |
4603 | else /* signed */ | |
4604 | { | |
73f27728 RK |
4605 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4606 | && INTVAL (op1) >= 0) | |
4607 | { | |
4608 | /* This is extremely similar to the code for the unsigned case | |
4609 | above. For 2.7 we should merge these variants, but for | |
4610 | 2.6.1 I don't want to touch the code for unsigned since that | |
4611 | get used in C. The signed case will only be used by other | |
4612 | languages (Ada). */ | |
4613 | ||
4614 | rtx t1, t2, t3; | |
4615 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4616 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4617 | build_int_cst (NULL_TREE, floor_log2 (d)), |
73f27728 RK |
4618 | tquotient, 0); |
4619 | t2 = expand_binop (compute_mode, and_optab, op0, | |
4620 | GEN_INT (d - 1), | |
4621 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4622 | t3 = gen_reg_rtx (compute_mode); | |
4623 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4624 | compute_mode, 1, 1); | |
4625 | if (t3 == 0) | |
4626 | { | |
4627 | rtx lab; | |
4628 | lab = gen_label_rtx (); | |
f5963e61 | 4629 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
73f27728 RK |
4630 | expand_inc (t1, const1_rtx); |
4631 | emit_label (lab); | |
4632 | quotient = t1; | |
4633 | } | |
4634 | else | |
38a448ca RH |
4635 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4636 | t1, t3), | |
73f27728 RK |
4637 | tquotient); |
4638 | break; | |
4639 | } | |
4640 | ||
55c2d311 TG |
4641 | /* Try using an instruction that produces both the quotient and |
4642 | remainder, using truncation. We can easily compensate the | |
4643 | quotient or remainder to get ceiling rounding, once we have the | |
4644 | remainder. Notice that we compute also the final remainder | |
4645 | value here, and return the result right away. */ | |
a45cf58c | 4646 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 TG |
4647 | target = gen_reg_rtx (compute_mode); |
4648 | if (rem_flag) | |
4649 | { | |
f8cfc6aa | 4650 | remainder= (REG_P (target) |
668443c9 | 4651 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4652 | quotient = gen_reg_rtx (compute_mode); |
4653 | } | |
4654 | else | |
4655 | { | |
f8cfc6aa | 4656 | quotient = (REG_P (target) |
668443c9 | 4657 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4658 | remainder = gen_reg_rtx (compute_mode); |
4659 | } | |
4660 | ||
4661 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, | |
4662 | remainder, 0)) | |
4663 | { | |
4664 | /* This could be computed with a branch-less sequence. | |
4665 | Save that for later. */ | |
4666 | rtx tem; | |
4667 | rtx label = gen_label_rtx (); | |
f5963e61 JL |
4668 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4669 | compute_mode, label); | |
55c2d311 TG |
4670 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4671 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4672 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
55c2d311 TG |
4673 | expand_inc (quotient, const1_rtx); |
4674 | expand_dec (remainder, op1); | |
4675 | emit_label (label); | |
c8dbc8ca | 4676 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4677 | } |
4678 | ||
4679 | /* No luck with division elimination or divmod. Have to do it | |
4680 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4681 | { |
55c2d311 TG |
4682 | rtx label1, label2, label3, label4, label5; |
4683 | rtx adjusted_op0; | |
4684 | rtx tem; | |
4685 | ||
4686 | quotient = gen_reg_rtx (compute_mode); | |
4687 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4688 | label1 = gen_label_rtx (); | |
4689 | label2 = gen_label_rtx (); | |
4690 | label3 = gen_label_rtx (); | |
4691 | label4 = gen_label_rtx (); | |
4692 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4693 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4694 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, | |
4695 | compute_mode, label1); | |
55c2d311 TG |
4696 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4697 | quotient, 0, OPTAB_LIB_WIDEN); | |
4698 | if (tem != quotient) | |
4699 | emit_move_insn (quotient, tem); | |
4700 | emit_jump_insn (gen_jump (label5)); | |
4701 | emit_barrier (); | |
4702 | emit_label (label1); | |
4703 | expand_dec (adjusted_op0, const1_rtx); | |
4704 | emit_jump_insn (gen_jump (label4)); | |
4705 | emit_barrier (); | |
4706 | emit_label (label2); | |
f5963e61 JL |
4707 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
4708 | compute_mode, label3); | |
55c2d311 TG |
4709 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4710 | quotient, 0, OPTAB_LIB_WIDEN); | |
4711 | if (tem != quotient) | |
4712 | emit_move_insn (quotient, tem); | |
4713 | emit_jump_insn (gen_jump (label5)); | |
4714 | emit_barrier (); | |
4715 | emit_label (label3); | |
4716 | expand_inc (adjusted_op0, const1_rtx); | |
4717 | emit_label (label4); | |
4718 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4719 | quotient, 0, OPTAB_LIB_WIDEN); | |
4720 | if (tem != quotient) | |
4721 | emit_move_insn (quotient, tem); | |
4722 | expand_inc (quotient, const1_rtx); | |
4723 | emit_label (label5); | |
44037a66 | 4724 | } |
55c2d311 TG |
4725 | } |
4726 | break; | |
bc1c7e93 | 4727 | |
55c2d311 TG |
4728 | case EXACT_DIV_EXPR: |
4729 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4730 | { | |
4731 | HOST_WIDE_INT d = INTVAL (op1); | |
4732 | unsigned HOST_WIDE_INT ml; | |
91ce572a | 4733 | int pre_shift; |
55c2d311 TG |
4734 | rtx t1; |
4735 | ||
91ce572a CC |
4736 | pre_shift = floor_log2 (d & -d); |
4737 | ml = invert_mod2n (d >> pre_shift, size); | |
4738 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
7d60be94 | 4739 | build_int_cst (NULL_TREE, pre_shift), |
4a90aeeb | 4740 | NULL_RTX, unsignedp); |
69107307 | 4741 | quotient = expand_mult (compute_mode, t1, |
2496c7bd | 4742 | gen_int_mode (ml, compute_mode), |
31ff3e0b | 4743 | NULL_RTX, 1); |
55c2d311 TG |
4744 | |
4745 | insn = get_last_insn (); | |
7e5bda2c | 4746 | set_unique_reg_note (insn, |
502b8322 | 4747 | REG_EQUAL, |
7e5bda2c AM |
4748 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, |
4749 | compute_mode, | |
4750 | op0, op1)); | |
55c2d311 TG |
4751 | } |
4752 | break; | |
4753 | ||
4754 | case ROUND_DIV_EXPR: | |
4755 | case ROUND_MOD_EXPR: | |
69f61901 RK |
4756 | if (unsignedp) |
4757 | { | |
4758 | rtx tem; | |
4759 | rtx label; | |
4760 | label = gen_label_rtx (); | |
4761 | quotient = gen_reg_rtx (compute_mode); | |
4762 | remainder = gen_reg_rtx (compute_mode); | |
4763 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) | |
4764 | { | |
4765 | rtx tem; | |
4766 | quotient = expand_binop (compute_mode, udiv_optab, op0, op1, | |
4767 | quotient, 1, OPTAB_LIB_WIDEN); | |
4768 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); | |
4769 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4770 | remainder, 1, OPTAB_LIB_WIDEN); | |
4771 | } | |
4772 | tem = plus_constant (op1, -1); | |
4773 | tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, | |
9a9d280e | 4774 | integer_one_node, NULL_RTX, 1); |
f5963e61 | 4775 | do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); |
69f61901 RK |
4776 | expand_inc (quotient, const1_rtx); |
4777 | expand_dec (remainder, op1); | |
4778 | emit_label (label); | |
4779 | } | |
4780 | else | |
4781 | { | |
4782 | rtx abs_rem, abs_op1, tem, mask; | |
4783 | rtx label; | |
4784 | label = gen_label_rtx (); | |
4785 | quotient = gen_reg_rtx (compute_mode); | |
4786 | remainder = gen_reg_rtx (compute_mode); | |
4787 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) | |
4788 | { | |
4789 | rtx tem; | |
4790 | quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, | |
4791 | quotient, 0, OPTAB_LIB_WIDEN); | |
4792 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); | |
4793 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4794 | remainder, 0, OPTAB_LIB_WIDEN); | |
4795 | } | |
91ce572a CC |
4796 | abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); |
4797 | abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); | |
69f61901 | 4798 | tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, |
9a9d280e | 4799 | integer_one_node, NULL_RTX, 1); |
f5963e61 | 4800 | do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); |
69f61901 RK |
4801 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4802 | NULL_RTX, 0, OPTAB_WIDEN); | |
4803 | mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, | |
7d60be94 | 4804 | build_int_cst (NULL_TREE, size - 1), |
4a90aeeb | 4805 | NULL_RTX, 0); |
69f61901 RK |
4806 | tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, |
4807 | NULL_RTX, 0, OPTAB_WIDEN); | |
4808 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4809 | NULL_RTX, 0, OPTAB_WIDEN); | |
4810 | expand_inc (quotient, tem); | |
4811 | tem = expand_binop (compute_mode, xor_optab, mask, op1, | |
4812 | NULL_RTX, 0, OPTAB_WIDEN); | |
4813 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4814 | NULL_RTX, 0, OPTAB_WIDEN); | |
4815 | expand_dec (remainder, tem); | |
4816 | emit_label (label); | |
4817 | } | |
4818 | return gen_lowpart (mode, rem_flag ? remainder : quotient); | |
c410d49e | 4819 | |
e9a25f70 | 4820 | default: |
5b0264cb | 4821 | gcc_unreachable (); |
55c2d311 | 4822 | } |
44037a66 | 4823 | |
55c2d311 | 4824 | if (quotient == 0) |
44037a66 | 4825 | { |
a45cf58c RK |
4826 | if (target && GET_MODE (target) != compute_mode) |
4827 | target = 0; | |
4828 | ||
55c2d311 | 4829 | if (rem_flag) |
44037a66 | 4830 | { |
32fdf36b | 4831 | /* Try to produce the remainder without producing the quotient. |
d6a7951f | 4832 | If we seem to have a divmod pattern that does not require widening, |
b20b352b | 4833 | don't try widening here. We should really have a WIDEN argument |
32fdf36b TG |
4834 | to expand_twoval_binop, since what we'd really like to do here is |
4835 | 1) try a mod insn in compute_mode | |
4836 | 2) try a divmod insn in compute_mode | |
4837 | 3) try a div insn in compute_mode and multiply-subtract to get | |
4838 | remainder | |
4839 | 4) try the same things with widening allowed. */ | |
4840 | remainder | |
4841 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4842 | op0, op1, target, | |
4843 | unsignedp, | |
947131ba | 4844 | ((optab_handler (optab2, compute_mode) |
32fdf36b TG |
4845 | != CODE_FOR_nothing) |
4846 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
55c2d311 | 4847 | if (remainder == 0) |
44037a66 TG |
4848 | { |
4849 | /* No luck there. Can we do remainder and divide at once | |
4850 | without a library call? */ | |
55c2d311 TG |
4851 | remainder = gen_reg_rtx (compute_mode); |
4852 | if (! expand_twoval_binop ((unsignedp | |
4853 | ? udivmod_optab | |
4854 | : sdivmod_optab), | |
4855 | op0, op1, | |
4856 | NULL_RTX, remainder, unsignedp)) | |
4857 | remainder = 0; | |
44037a66 | 4858 | } |
55c2d311 TG |
4859 | |
4860 | if (remainder) | |
4861 | return gen_lowpart (mode, remainder); | |
44037a66 | 4862 | } |
44037a66 | 4863 | |
dc38b292 RK |
4864 | /* Produce the quotient. Try a quotient insn, but not a library call. |
4865 | If we have a divmod in this mode, use it in preference to widening | |
4866 | the div (for this test we assume it will not fail). Note that optab2 | |
4867 | is set to the one of the two optabs that the call below will use. */ | |
4868 | quotient | |
4869 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, | |
4870 | op0, op1, rem_flag ? NULL_RTX : target, | |
4871 | unsignedp, | |
947131ba | 4872 | ((optab_handler (optab2, compute_mode) |
dc38b292 RK |
4873 | != CODE_FOR_nothing) |
4874 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
4875 | ||
55c2d311 | 4876 | if (quotient == 0) |
44037a66 TG |
4877 | { |
4878 | /* No luck there. Try a quotient-and-remainder insn, | |
4879 | keeping the quotient alone. */ | |
55c2d311 | 4880 | quotient = gen_reg_rtx (compute_mode); |
44037a66 | 4881 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
55c2d311 TG |
4882 | op0, op1, |
4883 | quotient, NULL_RTX, unsignedp)) | |
4884 | { | |
4885 | quotient = 0; | |
4886 | if (! rem_flag) | |
4887 | /* Still no luck. If we are not computing the remainder, | |
4888 | use a library call for the quotient. */ | |
4889 | quotient = sign_expand_binop (compute_mode, | |
4890 | udiv_optab, sdiv_optab, | |
4891 | op0, op1, target, | |
4892 | unsignedp, OPTAB_LIB_WIDEN); | |
4893 | } | |
44037a66 | 4894 | } |
44037a66 TG |
4895 | } |
4896 | ||
44037a66 TG |
4897 | if (rem_flag) |
4898 | { | |
a45cf58c RK |
4899 | if (target && GET_MODE (target) != compute_mode) |
4900 | target = 0; | |
4901 | ||
55c2d311 | 4902 | if (quotient == 0) |
b3f8d95d MM |
4903 | { |
4904 | /* No divide instruction either. Use library for remainder. */ | |
4905 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4906 | op0, op1, target, | |
4907 | unsignedp, OPTAB_LIB_WIDEN); | |
4908 | /* No remainder function. Try a quotient-and-remainder | |
4909 | function, keeping the remainder. */ | |
4910 | if (!remainder) | |
4911 | { | |
4912 | remainder = gen_reg_rtx (compute_mode); | |
b8698a0f | 4913 | if (!expand_twoval_binop_libfunc |
b3f8d95d MM |
4914 | (unsignedp ? udivmod_optab : sdivmod_optab, |
4915 | op0, op1, | |
4916 | NULL_RTX, remainder, | |
4917 | unsignedp ? UMOD : MOD)) | |
4918 | remainder = NULL_RTX; | |
4919 | } | |
4920 | } | |
44037a66 TG |
4921 | else |
4922 | { | |
4923 | /* We divided. Now finish doing X - Y * (X / Y). */ | |
55c2d311 TG |
4924 | remainder = expand_mult (compute_mode, quotient, op1, |
4925 | NULL_RTX, unsignedp); | |
4926 | remainder = expand_binop (compute_mode, sub_optab, op0, | |
4927 | remainder, target, unsignedp, | |
4928 | OPTAB_LIB_WIDEN); | |
44037a66 TG |
4929 | } |
4930 | } | |
4931 | ||
55c2d311 | 4932 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
44037a66 TG |
4933 | } |
4934 | \f | |
4935 | /* Return a tree node with data type TYPE, describing the value of X. | |
4dfa0342 | 4936 | Usually this is an VAR_DECL, if there is no obvious better choice. |
44037a66 | 4937 | X may be an expression, however we only support those expressions |
6d2f8887 | 4938 | generated by loop.c. */ |
44037a66 TG |
4939 | |
4940 | tree | |
502b8322 | 4941 | make_tree (tree type, rtx x) |
44037a66 TG |
4942 | { |
4943 | tree t; | |
4944 | ||
4945 | switch (GET_CODE (x)) | |
4946 | { | |
4947 | case CONST_INT: | |
4a90aeeb NS |
4948 | { |
4949 | HOST_WIDE_INT hi = 0; | |
4950 | ||
4951 | if (INTVAL (x) < 0 | |
4952 | && !(TYPE_UNSIGNED (type) | |
4953 | && (GET_MODE_BITSIZE (TYPE_MODE (type)) | |
4954 | < HOST_BITS_PER_WIDE_INT))) | |
4955 | hi = -1; | |
b8698a0f | 4956 | |
7d60be94 | 4957 | t = build_int_cst_wide (type, INTVAL (x), hi); |
b8698a0f | 4958 | |
4a90aeeb NS |
4959 | return t; |
4960 | } | |
b8698a0f | 4961 | |
44037a66 TG |
4962 | case CONST_DOUBLE: |
4963 | if (GET_MODE (x) == VOIDmode) | |
7d60be94 NS |
4964 | t = build_int_cst_wide (type, |
4965 | CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x)); | |
44037a66 TG |
4966 | else |
4967 | { | |
4968 | REAL_VALUE_TYPE d; | |
4969 | ||
4970 | REAL_VALUE_FROM_CONST_DOUBLE (d, x); | |
4971 | t = build_real (type, d); | |
4972 | } | |
4973 | ||
4974 | return t; | |
69ef87e2 AH |
4975 | |
4976 | case CONST_VECTOR: | |
4977 | { | |
b8b7f162 RS |
4978 | int units = CONST_VECTOR_NUNITS (x); |
4979 | tree itype = TREE_TYPE (type); | |
69ef87e2 | 4980 | tree t = NULL_TREE; |
b8b7f162 | 4981 | int i; |
69ef87e2 | 4982 | |
69ef87e2 AH |
4983 | |
4984 | /* Build a tree with vector elements. */ | |
4985 | for (i = units - 1; i >= 0; --i) | |
4986 | { | |
b8b7f162 RS |
4987 | rtx elt = CONST_VECTOR_ELT (x, i); |
4988 | t = tree_cons (NULL_TREE, make_tree (itype, elt), t); | |
69ef87e2 | 4989 | } |
c410d49e | 4990 | |
69ef87e2 AH |
4991 | return build_vector (type, t); |
4992 | } | |
4993 | ||
44037a66 | 4994 | case PLUS: |
4845b383 KH |
4995 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
4996 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 4997 | |
44037a66 | 4998 | case MINUS: |
4845b383 KH |
4999 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5000 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5001 | |
44037a66 | 5002 | case NEG: |
4845b383 | 5003 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
44037a66 TG |
5004 | |
5005 | case MULT: | |
4845b383 KH |
5006 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5007 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5008 | |
44037a66 | 5009 | case ASHIFT: |
4845b383 KH |
5010 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5011 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5012 | |
44037a66 | 5013 | case LSHIFTRT: |
ca5ba2a3 | 5014 | t = unsigned_type_for (type); |
aeba6c28 JM |
5015 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5016 | make_tree (t, XEXP (x, 0)), | |
5017 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5018 | |
44037a66 | 5019 | case ASHIFTRT: |
12753674 | 5020 | t = signed_type_for (type); |
aeba6c28 JM |
5021 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5022 | make_tree (t, XEXP (x, 0)), | |
5023 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5024 | |
44037a66 TG |
5025 | case DIV: |
5026 | if (TREE_CODE (type) != REAL_TYPE) | |
12753674 | 5027 | t = signed_type_for (type); |
44037a66 TG |
5028 | else |
5029 | t = type; | |
5030 | ||
aeba6c28 JM |
5031 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5032 | make_tree (t, XEXP (x, 0)), | |
5033 | make_tree (t, XEXP (x, 1)))); | |
44037a66 | 5034 | case UDIV: |
ca5ba2a3 | 5035 | t = unsigned_type_for (type); |
aeba6c28 JM |
5036 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5037 | make_tree (t, XEXP (x, 0)), | |
5038 | make_tree (t, XEXP (x, 1)))); | |
5c45425b RH |
5039 | |
5040 | case SIGN_EXTEND: | |
5041 | case ZERO_EXTEND: | |
ae2bcd98 RS |
5042 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5043 | GET_CODE (x) == ZERO_EXTEND); | |
aeba6c28 | 5044 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
5c45425b | 5045 | |
84816907 JM |
5046 | case CONST: |
5047 | return make_tree (type, XEXP (x, 0)); | |
5048 | ||
5049 | case SYMBOL_REF: | |
5050 | t = SYMBOL_REF_DECL (x); | |
5051 | if (t) | |
5052 | return fold_convert (type, build_fold_addr_expr (t)); | |
5053 | /* else fall through. */ | |
5054 | ||
4dfa0342 | 5055 | default: |
c2255bc4 | 5056 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
d1608933 | 5057 | |
d4ebfa65 BE |
5058 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5059 | address mode to pointer mode. */ | |
5ae6cd0d | 5060 | if (POINTER_TYPE_P (type)) |
d4ebfa65 BE |
5061 | x = convert_memory_address_addr_space |
5062 | (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); | |
d1608933 | 5063 | |
8a0aa06e RH |
5064 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5065 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ | |
820cc88f | 5066 | t->decl_with_rtl.rtl = x; |
4dfa0342 | 5067 | |
44037a66 TG |
5068 | return t; |
5069 | } | |
5070 | } | |
44037a66 TG |
5071 | \f |
5072 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET | |
5073 | and returning TARGET. | |
5074 | ||
5075 | If TARGET is 0, a pseudo-register or constant is returned. */ | |
5076 | ||
5077 | rtx | |
502b8322 | 5078 | expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target) |
44037a66 | 5079 | { |
22273300 | 5080 | rtx tem = 0; |
44037a66 | 5081 | |
22273300 JJ |
5082 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5083 | tem = simplify_binary_operation (AND, mode, op0, op1); | |
5084 | if (tem == 0) | |
44037a66 | 5085 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
44037a66 TG |
5086 | |
5087 | if (target == 0) | |
5088 | target = tem; | |
5089 | else if (tem != target) | |
5090 | emit_move_insn (target, tem); | |
5091 | return target; | |
5092 | } | |
495499da | 5093 | |
a41a56b6 RE |
5094 | /* Helper function for emit_store_flag. */ |
5095 | static rtx | |
ef12ae45 PB |
5096 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
5097 | enum machine_mode mode, enum machine_mode compare_mode, | |
92355a9c PB |
5098 | int unsignedp, rtx x, rtx y, int normalizep, |
5099 | enum machine_mode target_mode) | |
a41a56b6 | 5100 | { |
45475a3f | 5101 | rtx op0, last, comparison, subtarget, pattern; |
45475a3f PB |
5102 | enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode; |
5103 | ||
5104 | last = get_last_insn (); | |
5105 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); | |
5106 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); | |
5107 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); | |
5108 | if (!x || !y | |
5109 | || !insn_data[icode].operand[2].predicate | |
5110 | (x, insn_data[icode].operand[2].mode) | |
5111 | || !insn_data[icode].operand[3].predicate | |
5112 | (y, insn_data[icode].operand[3].mode) | |
5113 | || !insn_data[icode].operand[1].predicate (comparison, VOIDmode)) | |
5114 | { | |
5115 | delete_insns_since (last); | |
5116 | return NULL_RTX; | |
5117 | } | |
5118 | ||
92355a9c PB |
5119 | if (target_mode == VOIDmode) |
5120 | target_mode = result_mode; | |
5121 | if (!target) | |
5122 | target = gen_reg_rtx (target_mode); | |
b8698a0f | 5123 | |
92355a9c | 5124 | if (optimize |
45475a3f PB |
5125 | || !(insn_data[(int) icode].operand[0].predicate (target, result_mode))) |
5126 | subtarget = gen_reg_rtx (result_mode); | |
5127 | else | |
5128 | subtarget = target; | |
5129 | ||
5130 | pattern = GEN_FCN (icode) (subtarget, comparison, x, y); | |
5131 | if (!pattern) | |
5132 | return NULL_RTX; | |
5133 | emit_insn (pattern); | |
495499da | 5134 | |
a41a56b6 RE |
5135 | /* If we are converting to a wider mode, first convert to |
5136 | TARGET_MODE, then normalize. This produces better combining | |
5137 | opportunities on machines that have a SIGN_EXTRACT when we are | |
5138 | testing a single bit. This mostly benefits the 68k. | |
5139 | ||
5140 | If STORE_FLAG_VALUE does not have the sign bit set when | |
5141 | interpreted in MODE, we can do this conversion as unsigned, which | |
5142 | is usually more efficient. */ | |
45475a3f | 5143 | if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) |
a41a56b6 RE |
5144 | { |
5145 | convert_move (target, subtarget, | |
45475a3f | 5146 | (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT) |
a41a56b6 RE |
5147 | && 0 == (STORE_FLAG_VALUE |
5148 | & ((HOST_WIDE_INT) 1 | |
45475a3f | 5149 | << (GET_MODE_BITSIZE (result_mode) -1)))); |
a41a56b6 | 5150 | op0 = target; |
45475a3f | 5151 | result_mode = target_mode; |
a41a56b6 RE |
5152 | } |
5153 | else | |
5154 | op0 = subtarget; | |
5155 | ||
5156 | /* If we want to keep subexpressions around, don't reuse our last | |
5157 | target. */ | |
5158 | if (optimize) | |
5159 | subtarget = 0; | |
5160 | ||
5161 | /* Now normalize to the proper value in MODE. Sometimes we don't | |
5162 | have to do anything. */ | |
5163 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) | |
5164 | ; | |
5165 | /* STORE_FLAG_VALUE might be the most negative number, so write | |
5166 | the comparison this way to avoid a compiler-time warning. */ | |
5167 | else if (- normalizep == STORE_FLAG_VALUE) | |
45475a3f | 5168 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
a41a56b6 RE |
5169 | |
5170 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes | |
5171 | it hard to use a value of just the sign bit due to ANSI integer | |
5172 | constant typing rules. */ | |
45475a3f | 5173 | else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT |
a41a56b6 | 5174 | && (STORE_FLAG_VALUE |
45475a3f PB |
5175 | & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1)))) |
5176 | op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, | |
5177 | size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget, | |
a41a56b6 RE |
5178 | normalizep == 1); |
5179 | else | |
5180 | { | |
5181 | gcc_assert (STORE_FLAG_VALUE & 1); | |
5182 | ||
45475a3f | 5183 | op0 = expand_and (result_mode, op0, const1_rtx, subtarget); |
a41a56b6 | 5184 | if (normalizep == -1) |
45475a3f | 5185 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
a41a56b6 RE |
5186 | } |
5187 | ||
5188 | /* If we were converting to a smaller mode, do the conversion now. */ | |
45475a3f | 5189 | if (target_mode != result_mode) |
a41a56b6 RE |
5190 | { |
5191 | convert_move (target, op0, 0); | |
5192 | return target; | |
5193 | } | |
5194 | else | |
5195 | return op0; | |
5196 | } | |
5197 | ||
44037a66 | 5198 | |
ef12ae45 PB |
5199 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5200 | need a recursive call. These are kept separate to avoid infinite | |
5201 | loops. */ | |
44037a66 | 5202 | |
ef12ae45 PB |
5203 | static rtx |
5204 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
92355a9c PB |
5205 | enum machine_mode mode, int unsignedp, int normalizep, |
5206 | enum machine_mode target_mode) | |
44037a66 TG |
5207 | { |
5208 | rtx subtarget; | |
5209 | enum insn_code icode; | |
5210 | enum machine_mode compare_mode; | |
f90b7a5a | 5211 | enum mode_class mclass; |
45475a3f | 5212 | enum rtx_code scode; |
ef12ae45 | 5213 | rtx tem; |
44037a66 | 5214 | |
b30f05db BS |
5215 | if (unsignedp) |
5216 | code = unsigned_condition (code); | |
45475a3f | 5217 | scode = swap_condition (code); |
b30f05db | 5218 | |
c2615a67 RK |
5219 | /* If one operand is constant, make it the second one. Only do this |
5220 | if the other operand is not constant as well. */ | |
5221 | ||
8c9864f3 | 5222 | if (swap_commutative_operands_p (op0, op1)) |
c2615a67 RK |
5223 | { |
5224 | tem = op0; | |
5225 | op0 = op1; | |
5226 | op1 = tem; | |
5227 | code = swap_condition (code); | |
5228 | } | |
5229 | ||
6405e07b DE |
5230 | if (mode == VOIDmode) |
5231 | mode = GET_MODE (op0); | |
5232 | ||
c410d49e | 5233 | /* For some comparisons with 1 and -1, we can convert this to |
44037a66 | 5234 | comparisons with zero. This will often produce more opportunities for |
0f41302f | 5235 | store-flag insns. */ |
44037a66 TG |
5236 | |
5237 | switch (code) | |
5238 | { | |
5239 | case LT: | |
5240 | if (op1 == const1_rtx) | |
5241 | op1 = const0_rtx, code = LE; | |
5242 | break; | |
5243 | case LE: | |
5244 | if (op1 == constm1_rtx) | |
5245 | op1 = const0_rtx, code = LT; | |
5246 | break; | |
5247 | case GE: | |
5248 | if (op1 == const1_rtx) | |
5249 | op1 = const0_rtx, code = GT; | |
5250 | break; | |
5251 | case GT: | |
5252 | if (op1 == constm1_rtx) | |
5253 | op1 = const0_rtx, code = GE; | |
5254 | break; | |
5255 | case GEU: | |
5256 | if (op1 == const1_rtx) | |
5257 | op1 = const0_rtx, code = NE; | |
5258 | break; | |
5259 | case LTU: | |
5260 | if (op1 == const1_rtx) | |
5261 | op1 = const0_rtx, code = EQ; | |
5262 | break; | |
e9a25f70 JL |
5263 | default: |
5264 | break; | |
44037a66 TG |
5265 | } |
5266 | ||
884815aa JB |
5267 | /* If we are comparing a double-word integer with zero or -1, we can |
5268 | convert the comparison into one involving a single word. */ | |
6912b84b RK |
5269 | if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 |
5270 | && GET_MODE_CLASS (mode) == MODE_INT | |
3c0cb5de | 5271 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
6912b84b | 5272 | { |
884815aa JB |
5273 | if ((code == EQ || code == NE) |
5274 | && (op1 == const0_rtx || op1 == constm1_rtx)) | |
6912b84b | 5275 | { |
1ed20a40 | 5276 | rtx op00, op01; |
8433f113 | 5277 | |
a41a56b6 RE |
5278 | /* Do a logical OR or AND of the two words and compare the |
5279 | result. */ | |
8433f113 RH |
5280 | op00 = simplify_gen_subreg (word_mode, op0, mode, 0); |
5281 | op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); | |
1ed20a40 PB |
5282 | tem = expand_binop (word_mode, |
5283 | op1 == const0_rtx ? ior_optab : and_optab, | |
5284 | op00, op01, NULL_RTX, unsignedp, | |
5285 | OPTAB_DIRECT); | |
884815aa | 5286 | |
1ed20a40 PB |
5287 | if (tem != 0) |
5288 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, | |
92355a9c | 5289 | unsignedp, normalizep); |
6912b84b | 5290 | } |
884815aa | 5291 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
8433f113 RH |
5292 | { |
5293 | rtx op0h; | |
5294 | ||
5295 | /* If testing the sign bit, can just test on high word. */ | |
5296 | op0h = simplify_gen_subreg (word_mode, op0, mode, | |
a41a56b6 RE |
5297 | subreg_highpart_offset (word_mode, |
5298 | mode)); | |
1ed20a40 PB |
5299 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5300 | unsignedp, normalizep); | |
5301 | } | |
5302 | else | |
5303 | tem = NULL_RTX; | |
5304 | ||
5305 | if (tem) | |
5306 | { | |
92355a9c | 5307 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
1ed20a40 | 5308 | return tem; |
92355a9c PB |
5309 | if (!target) |
5310 | target = gen_reg_rtx (target_mode); | |
1ed20a40 PB |
5311 | |
5312 | convert_move (target, tem, | |
e9edda23 | 5313 | 0 == ((normalizep ? normalizep : STORE_FLAG_VALUE) |
1ed20a40 PB |
5314 | & ((HOST_WIDE_INT) 1 |
5315 | << (GET_MODE_BITSIZE (word_mode) -1)))); | |
5316 | return target; | |
8433f113 | 5317 | } |
6912b84b RK |
5318 | } |
5319 | ||
44037a66 TG |
5320 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5321 | complement of A (for GE) and shifting the sign bit to the low bit. */ | |
5322 | if (op1 == const0_rtx && (code == LT || code == GE) | |
5323 | && GET_MODE_CLASS (mode) == MODE_INT | |
5324 | && (normalizep || STORE_FLAG_VALUE == 1 | |
b1ec3c92 | 5325 | || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT |
12dd565f | 5326 | && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) |
a41a56b6 RE |
5327 | == ((unsigned HOST_WIDE_INT) 1 |
5328 | << (GET_MODE_BITSIZE (mode) - 1)))))) | |
44037a66 | 5329 | { |
8deb7047 | 5330 | subtarget = target; |
44037a66 | 5331 | |
495499da PB |
5332 | if (!target) |
5333 | target_mode = mode; | |
5334 | ||
44037a66 TG |
5335 | /* If the result is to be wider than OP0, it is best to convert it |
5336 | first. If it is to be narrower, it is *incorrect* to convert it | |
5337 | first. */ | |
495499da | 5338 | else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) |
44037a66 | 5339 | { |
81722fa9 | 5340 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5341 | mode = target_mode; |
5342 | } | |
5343 | ||
5344 | if (target_mode != mode) | |
5345 | subtarget = 0; | |
5346 | ||
5347 | if (code == GE) | |
1d6eaf3d RK |
5348 | op0 = expand_unop (mode, one_cmpl_optab, op0, |
5349 | ((STORE_FLAG_VALUE == 1 || normalizep) | |
5350 | ? 0 : subtarget), 0); | |
44037a66 | 5351 | |
1d6eaf3d | 5352 | if (STORE_FLAG_VALUE == 1 || normalizep) |
44037a66 TG |
5353 | /* If we are supposed to produce a 0/1 value, we want to do |
5354 | a logical shift from the sign bit to the low-order bit; for | |
5355 | a -1/0 value, we do an arithmetic shift. */ | |
5356 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, | |
5357 | size_int (GET_MODE_BITSIZE (mode) - 1), | |
5358 | subtarget, normalizep != -1); | |
5359 | ||
5360 | if (mode != target_mode) | |
c2ec26b8 | 5361 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5362 | |
5363 | return op0; | |
5364 | } | |
5365 | ||
f90b7a5a PB |
5366 | mclass = GET_MODE_CLASS (mode); |
5367 | for (compare_mode = mode; compare_mode != VOIDmode; | |
5368 | compare_mode = GET_MODE_WIDER_MODE (compare_mode)) | |
a41a56b6 | 5369 | { |
f90b7a5a | 5370 | enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
947131ba | 5371 | icode = optab_handler (cstore_optab, optab_mode); |
f90b7a5a | 5372 | if (icode != CODE_FOR_nothing) |
a41a56b6 | 5373 | { |
a41a56b6 | 5374 | do_pending_stack_adjust (); |
ef12ae45 | 5375 | tem = emit_cstore (target, icode, code, mode, compare_mode, |
92355a9c | 5376 | unsignedp, op0, op1, normalizep, target_mode); |
45475a3f PB |
5377 | if (tem) |
5378 | return tem; | |
44037a66 | 5379 | |
45475a3f | 5380 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
44037a66 | 5381 | { |
ef12ae45 | 5382 | tem = emit_cstore (target, icode, scode, mode, compare_mode, |
92355a9c | 5383 | unsignedp, op1, op0, normalizep, target_mode); |
45475a3f PB |
5384 | if (tem) |
5385 | return tem; | |
44037a66 | 5386 | } |
f90b7a5a | 5387 | break; |
44037a66 TG |
5388 | } |
5389 | } | |
5390 | ||
ef12ae45 PB |
5391 | return 0; |
5392 | } | |
5393 | ||
5394 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 | |
5395 | and storing in TARGET. Normally return TARGET. | |
5396 | Return 0 if that cannot be done. | |
5397 | ||
5398 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If | |
5399 | it is VOIDmode, they cannot both be CONST_INT. | |
5400 | ||
5401 | UNSIGNEDP is for the case where we have to widen the operands | |
5402 | to perform the operation. It says to use zero-extension. | |
5403 | ||
5404 | NORMALIZEP is 1 if we should convert the result to be either zero | |
5405 | or one. Normalize is -1 if we should convert the result to be | |
5406 | either zero or -1. If NORMALIZEP is zero, the result will be left | |
5407 | "raw" out of the scc insn. */ | |
5408 | ||
5409 | rtx | |
5410 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
5411 | enum machine_mode mode, int unsignedp, int normalizep) | |
5412 | { | |
5413 | enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; | |
5414 | enum rtx_code rcode; | |
5415 | rtx subtarget; | |
5416 | rtx tem, last, trueval; | |
5417 | ||
92355a9c PB |
5418 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, |
5419 | target_mode); | |
ef12ae45 PB |
5420 | if (tem) |
5421 | return tem; | |
44037a66 | 5422 | |
495499da PB |
5423 | /* If we reached here, we can't do this with a scc insn, however there |
5424 | are some comparisons that can be done in other ways. Don't do any | |
5425 | of these cases if branches are very cheap. */ | |
5426 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) | |
5427 | return 0; | |
5428 | ||
5429 | /* See what we need to return. We can only return a 1, -1, or the | |
5430 | sign bit. */ | |
5431 | ||
5432 | if (normalizep == 0) | |
5433 | { | |
5434 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) | |
5435 | normalizep = STORE_FLAG_VALUE; | |
5436 | ||
5437 | else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT | |
5438 | && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode)) | |
5439 | == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))) | |
5440 | ; | |
5441 | else | |
5442 | return 0; | |
5443 | } | |
5444 | ||
ef12ae45 PB |
5445 | last = get_last_insn (); |
5446 | ||
7c27e184 PB |
5447 | /* If optimizing, use different pseudo registers for each insn, instead |
5448 | of reusing the same pseudo. This leads to better CSE, but slows | |
5449 | down the compiler, since there are more pseudos */ | |
5450 | subtarget = (!optimize | |
91e66235 | 5451 | && (target_mode == mode)) ? target : NULL_RTX; |
495499da PB |
5452 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); |
5453 | ||
5454 | /* For floating-point comparisons, try the reverse comparison or try | |
5455 | changing the "orderedness" of the comparison. */ | |
5456 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
5457 | { | |
5458 | enum rtx_code first_code; | |
5459 | bool and_them; | |
5460 | ||
5461 | rcode = reverse_condition_maybe_unordered (code); | |
5462 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
5463 | && (code == ORDERED || code == UNORDERED | |
5464 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5465 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5466 | { | |
533d4b99 PB |
5467 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5468 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5469 | ||
495499da | 5470 | /* For the reverse comparison, use either an addition or a XOR. */ |
533d4b99 PB |
5471 | if (want_add |
5472 | && rtx_cost (GEN_INT (normalizep), PLUS, | |
5473 | optimize_insn_for_speed_p ()) == 0) | |
495499da | 5474 | { |
ef12ae45 | 5475 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5476 | STORE_FLAG_VALUE, target_mode); |
495499da PB |
5477 | if (tem) |
5478 | return expand_binop (target_mode, add_optab, tem, | |
5479 | GEN_INT (normalizep), | |
5480 | target, 0, OPTAB_WIDEN); | |
5481 | } | |
533d4b99 PB |
5482 | else if (!want_add |
5483 | && rtx_cost (trueval, XOR, | |
5484 | optimize_insn_for_speed_p ()) == 0) | |
495499da | 5485 | { |
ef12ae45 | 5486 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5487 | normalizep, target_mode); |
495499da PB |
5488 | if (tem) |
5489 | return expand_binop (target_mode, xor_optab, tem, trueval, | |
5490 | target, INTVAL (trueval) >= 0, OPTAB_WIDEN); | |
5491 | } | |
5492 | } | |
5493 | ||
5494 | delete_insns_since (last); | |
5495 | ||
5496 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ | |
5497 | if (code == ORDERED || code == UNORDERED) | |
5498 | return 0; | |
b8698a0f | 5499 | |
495499da PB |
5500 | and_them = split_comparison (code, mode, &first_code, &code); |
5501 | ||
5502 | /* If there are no NaNs, the first comparison should always fall through. | |
5503 | Effectively change the comparison to the other one. */ | |
5504 | if (!HONOR_NANS (mode)) | |
5505 | { | |
5506 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); | |
92355a9c PB |
5507 | return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, |
5508 | target_mode); | |
495499da PB |
5509 | } |
5510 | ||
5511 | #ifdef HAVE_conditional_move | |
5512 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a | |
5513 | conditional move. */ | |
92355a9c PB |
5514 | tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, |
5515 | normalizep, target_mode); | |
495499da PB |
5516 | if (tem == 0) |
5517 | return 0; | |
5518 | ||
5519 | if (and_them) | |
5520 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5521 | tem, const0_rtx, GET_MODE (tem), 0); | |
5522 | else | |
5523 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5524 | trueval, tem, GET_MODE (tem), 0); | |
5525 | ||
5526 | if (tem == 0) | |
5527 | delete_insns_since (last); | |
5528 | return tem; | |
5529 | #else | |
5530 | return 0; | |
5531 | #endif | |
5532 | } | |
44037a66 | 5533 | |
495499da PB |
5534 | /* The remaining tricks only apply to integer comparisons. */ |
5535 | ||
5536 | if (GET_MODE_CLASS (mode) != MODE_INT) | |
5537 | return 0; | |
5538 | ||
5539 | /* If this is an equality comparison of integers, we can try to exclusive-or | |
44037a66 TG |
5540 | (or subtract) the two operands and use a recursive call to try the |
5541 | comparison with zero. Don't do any of these cases if branches are | |
5542 | very cheap. */ | |
5543 | ||
495499da | 5544 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
44037a66 TG |
5545 | { |
5546 | tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, | |
5547 | OPTAB_WIDEN); | |
5548 | ||
5549 | if (tem == 0) | |
5550 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, | |
5551 | OPTAB_WIDEN); | |
5552 | if (tem != 0) | |
a22fb74c AK |
5553 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5554 | mode, unsignedp, normalizep); | |
495499da PB |
5555 | if (tem != 0) |
5556 | return tem; | |
5557 | ||
5558 | delete_insns_since (last); | |
5559 | } | |
5560 | ||
5561 | /* For integer comparisons, try the reverse comparison. However, for | |
5562 | small X and if we'd have anyway to extend, implementing "X != 0" | |
5563 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ | |
5564 | rcode = reverse_condition (code); | |
5565 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
947131ba | 5566 | && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing |
495499da PB |
5567 | && code == NE |
5568 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD | |
5569 | && op1 == const0_rtx)) | |
5570 | { | |
533d4b99 PB |
5571 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5572 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5573 | ||
495499da | 5574 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
533d4b99 PB |
5575 | if (want_add |
5576 | && rtx_cost (GEN_INT (normalizep), PLUS, | |
5577 | optimize_insn_for_speed_p ()) == 0) | |
495499da | 5578 | { |
ef12ae45 | 5579 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5580 | STORE_FLAG_VALUE, target_mode); |
495499da PB |
5581 | if (tem != 0) |
5582 | tem = expand_binop (target_mode, add_optab, tem, | |
5583 | GEN_INT (normalizep), target, 0, OPTAB_WIDEN); | |
5584 | } | |
533d4b99 PB |
5585 | else if (!want_add |
5586 | && rtx_cost (trueval, XOR, | |
5587 | optimize_insn_for_speed_p ()) == 0) | |
495499da | 5588 | { |
ef12ae45 | 5589 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5590 | normalizep, target_mode); |
495499da PB |
5591 | if (tem != 0) |
5592 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, | |
5593 | INTVAL (trueval) >= 0, OPTAB_WIDEN); | |
5594 | } | |
5595 | ||
5596 | if (tem != 0) | |
5597 | return tem; | |
5598 | delete_insns_since (last); | |
44037a66 TG |
5599 | } |
5600 | ||
c410d49e | 5601 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
44037a66 TG |
5602 | the constant zero. Reject all other comparisons at this point. Only |
5603 | do LE and GT if branches are expensive since they are expensive on | |
5604 | 2-operand machines. */ | |
5605 | ||
495499da | 5606 | if (op1 != const0_rtx |
44037a66 | 5607 | || (code != EQ && code != NE |
3a4fd356 JH |
5608 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5609 | false) <= 1 || (code != LE && code != GT)))) | |
44037a66 TG |
5610 | return 0; |
5611 | ||
44037a66 TG |
5612 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5613 | do the necessary operation below. */ | |
5614 | ||
5615 | tem = 0; | |
5616 | ||
5617 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has | |
5618 | the sign bit set. */ | |
5619 | ||
5620 | if (code == LE) | |
5621 | { | |
5622 | /* This is destructive, so SUBTARGET can't be OP0. */ | |
5623 | if (rtx_equal_p (subtarget, op0)) | |
5624 | subtarget = 0; | |
5625 | ||
5626 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, | |
5627 | OPTAB_WIDEN); | |
5628 | if (tem) | |
5629 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, | |
5630 | OPTAB_WIDEN); | |
5631 | } | |
5632 | ||
5633 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the | |
5634 | number of bits in the mode of OP0, minus one. */ | |
5635 | ||
5636 | if (code == GT) | |
5637 | { | |
5638 | if (rtx_equal_p (subtarget, op0)) | |
5639 | subtarget = 0; | |
5640 | ||
5641 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
5642 | size_int (GET_MODE_BITSIZE (mode) - 1), | |
5643 | subtarget, 0); | |
5644 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, | |
5645 | OPTAB_WIDEN); | |
5646 | } | |
c410d49e | 5647 | |
44037a66 TG |
5648 | if (code == EQ || code == NE) |
5649 | { | |
5650 | /* For EQ or NE, one way to do the comparison is to apply an operation | |
cc2902df | 5651 | that converts the operand into a positive number if it is nonzero |
44037a66 TG |
5652 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5653 | for NE we negate. This puts the result in the sign bit. Then we | |
c410d49e | 5654 | normalize with a shift, if needed. |
44037a66 TG |
5655 | |
5656 | Two operations that can do the above actions are ABS and FFS, so try | |
5657 | them. If that doesn't work, and MODE is smaller than a full word, | |
36d747f6 | 5658 | we can use zero-extension to the wider mode (an unsigned conversion) |
44037a66 TG |
5659 | as the operation. */ |
5660 | ||
c410d49e EC |
5661 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5662 | that is compensated by the subsequent overflow when subtracting | |
30f7a378 | 5663 | one / negating. */ |
91ce572a | 5664 | |
947131ba | 5665 | if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) |
44037a66 | 5666 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
947131ba | 5667 | else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) |
44037a66 TG |
5668 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5669 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5670 | { | |
c2ec26b8 | 5671 | tem = convert_modes (word_mode, mode, op0, 1); |
81722fa9 | 5672 | mode = word_mode; |
44037a66 TG |
5673 | } |
5674 | ||
5675 | if (tem != 0) | |
5676 | { | |
5677 | if (code == EQ) | |
5678 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, | |
5679 | 0, OPTAB_WIDEN); | |
5680 | else | |
5681 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); | |
5682 | } | |
5683 | ||
5684 | /* If we couldn't do it that way, for NE we can "or" the two's complement | |
5685 | of the value with itself. For EQ, we take the one's complement of | |
5686 | that "or", which is an extra insn, so we only handle EQ if branches | |
5687 | are expensive. */ | |
5688 | ||
3a4fd356 JH |
5689 | if (tem == 0 |
5690 | && (code == NE | |
5691 | || BRANCH_COST (optimize_insn_for_speed_p (), | |
5692 | false) > 1)) | |
44037a66 | 5693 | { |
36d747f6 RS |
5694 | if (rtx_equal_p (subtarget, op0)) |
5695 | subtarget = 0; | |
5696 | ||
44037a66 TG |
5697 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5698 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, | |
5699 | OPTAB_WIDEN); | |
5700 | ||
5701 | if (tem && code == EQ) | |
5702 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); | |
5703 | } | |
5704 | } | |
5705 | ||
5706 | if (tem && normalizep) | |
5707 | tem = expand_shift (RSHIFT_EXPR, mode, tem, | |
5708 | size_int (GET_MODE_BITSIZE (mode) - 1), | |
91e66235 | 5709 | subtarget, normalizep == 1); |
44037a66 | 5710 | |
91e66235 | 5711 | if (tem) |
44037a66 | 5712 | { |
495499da PB |
5713 | if (!target) |
5714 | ; | |
5715 | else if (GET_MODE (tem) != target_mode) | |
91e66235 MM |
5716 | { |
5717 | convert_move (target, tem, 0); | |
5718 | tem = target; | |
5719 | } | |
5720 | else if (!subtarget) | |
5721 | { | |
5722 | emit_move_insn (target, tem); | |
5723 | tem = target; | |
5724 | } | |
44037a66 | 5725 | } |
91e66235 | 5726 | else |
44037a66 TG |
5727 | delete_insns_since (last); |
5728 | ||
5729 | return tem; | |
5730 | } | |
04a8ee2f TG |
5731 | |
5732 | /* Like emit_store_flag, but always succeeds. */ | |
5733 | ||
5734 | rtx | |
502b8322 AJ |
5735 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
5736 | enum machine_mode mode, int unsignedp, int normalizep) | |
04a8ee2f TG |
5737 | { |
5738 | rtx tem, label; | |
495499da | 5739 | rtx trueval, falseval; |
04a8ee2f TG |
5740 | |
5741 | /* First see if emit_store_flag can do the job. */ | |
5742 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); | |
5743 | if (tem != 0) | |
5744 | return tem; | |
5745 | ||
495499da PB |
5746 | if (!target) |
5747 | target = gen_reg_rtx (word_mode); | |
04a8ee2f | 5748 | |
495499da PB |
5749 | /* If this failed, we have to do this with set/compare/jump/set code. |
5750 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ | |
5751 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; | |
b8698a0f | 5752 | if (code == NE |
495499da PB |
5753 | && GET_MODE_CLASS (mode) == MODE_INT |
5754 | && REG_P (target) | |
5755 | && op0 == target | |
5756 | && op1 == const0_rtx) | |
5757 | { | |
5758 | label = gen_label_rtx (); | |
5759 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, | |
40e90eac | 5760 | mode, NULL_RTX, NULL_RTX, label, -1); |
495499da PB |
5761 | emit_move_insn (target, trueval); |
5762 | emit_label (label); | |
5763 | return target; | |
5764 | } | |
04a8ee2f | 5765 | |
f8cfc6aa | 5766 | if (!REG_P (target) |
04a8ee2f TG |
5767 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
5768 | target = gen_reg_rtx (GET_MODE (target)); | |
5769 | ||
495499da PB |
5770 | /* Jump in the right direction if the target cannot implement CODE |
5771 | but can jump on its reverse condition. */ | |
5772 | falseval = const0_rtx; | |
5773 | if (! can_compare_p (code, mode, ccp_jump) | |
5774 | && (! FLOAT_MODE_P (mode) | |
5775 | || code == ORDERED || code == UNORDERED | |
5776 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5777 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5778 | { | |
5779 | enum rtx_code rcode; | |
5780 | if (FLOAT_MODE_P (mode)) | |
5781 | rcode = reverse_condition_maybe_unordered (code); | |
5782 | else | |
5783 | rcode = reverse_condition (code); | |
5784 | ||
5785 | /* Canonicalize to UNORDERED for the libcall. */ | |
5786 | if (can_compare_p (rcode, mode, ccp_jump) | |
5787 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) | |
5788 | { | |
5789 | falseval = trueval; | |
5790 | trueval = const0_rtx; | |
5791 | code = rcode; | |
5792 | } | |
5793 | } | |
5794 | ||
5795 | emit_move_insn (target, trueval); | |
04a8ee2f | 5796 | label = gen_label_rtx (); |
d43e0b7d | 5797 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, |
40e90eac | 5798 | NULL_RTX, label, -1); |
04a8ee2f | 5799 | |
495499da | 5800 | emit_move_insn (target, falseval); |
44037a66 TG |
5801 | emit_label (label); |
5802 | ||
5803 | return target; | |
5804 | } | |
f5963e61 JL |
5805 | \f |
5806 | /* Perform possibly multi-word comparison and conditional jump to LABEL | |
feb04780 RS |
5807 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
5808 | now a thin wrapper around do_compare_rtx_and_jump. */ | |
f5963e61 JL |
5809 | |
5810 | static void | |
502b8322 AJ |
5811 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode, |
5812 | rtx label) | |
f5963e61 | 5813 | { |
feb04780 RS |
5814 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
5815 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, | |
40e90eac | 5816 | NULL_RTX, NULL_RTX, label, -1); |
f5963e61 | 5817 | } |