]>
Commit | Line | Data |
---|---|---|
db96f378 | 1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. | |
ddca3e9d | 3 | Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, |
38ba30bf | 4 | 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, |
49db198b | 5 | 2011, 2012 |
85afca2d | 6 | Free Software Foundation, Inc. |
db96f378 | 7 | |
f12b58b3 | 8 | This file is part of GCC. |
db96f378 | 9 | |
f12b58b3 | 10 | GCC is free software; you can redistribute it and/or modify it under |
11 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 12 | Software Foundation; either version 3, or (at your option) any later |
f12b58b3 | 13 | version. |
db96f378 | 14 | |
f12b58b3 | 15 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 | for more details. | |
db96f378 | 19 | |
20 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 21 | along with GCC; see the file COPYING3. If not see |
22 | <http://www.gnu.org/licenses/>. */ | |
db96f378 | 23 | |
24 | ||
25 | #include "config.h" | |
405711de | 26 | #include "system.h" |
805e22b2 | 27 | #include "coretypes.h" |
28 | #include "tm.h" | |
0b205f4c | 29 | #include "diagnostic-core.h" |
db96f378 | 30 | #include "rtl.h" |
31 | #include "tree.h" | |
7953c610 | 32 | #include "tm_p.h" |
db96f378 | 33 | #include "flags.h" |
db96f378 | 34 | #include "insn-config.h" |
35 | #include "expr.h" | |
d8fc4d0b | 36 | #include "optabs.h" |
db96f378 | 37 | #include "recog.h" |
771d21fa | 38 | #include "langhooks.h" |
3072d30e | 39 | #include "df.h" |
8ea8de24 | 40 | #include "target.h" |
6ebe4c69 | 41 | #include "expmed.h" |
42 | ||
43 | struct target_expmed default_target_expmed; | |
44 | #if SWITCHABLE_TARGET | |
45 | struct target_expmed *this_target_expmed = &default_target_expmed; | |
46 | #endif | |
db96f378 | 47 | |
35cb5232 | 48 | static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, |
49 | unsigned HOST_WIDE_INT, | |
4bb60ec7 | 50 | unsigned HOST_WIDE_INT, |
51 | unsigned HOST_WIDE_INT, | |
52 | unsigned HOST_WIDE_INT, | |
53 | rtx); | |
35cb5232 | 54 | static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
4bb60ec7 | 55 | unsigned HOST_WIDE_INT, |
56 | unsigned HOST_WIDE_INT, | |
57 | unsigned HOST_WIDE_INT, | |
58 | rtx); | |
35cb5232 | 59 | static rtx extract_fixed_bit_field (enum machine_mode, rtx, |
60 | unsigned HOST_WIDE_INT, | |
61 | unsigned HOST_WIDE_INT, | |
8eef3a45 | 62 | unsigned HOST_WIDE_INT, rtx, int, bool); |
35cb5232 | 63 | static rtx mask_rtx (enum machine_mode, int, int, int); |
64 | static rtx lshift_value (enum machine_mode, rtx, int, int); | |
65 | static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, | |
66 | unsigned HOST_WIDE_INT, int); | |
67 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx); | |
41323e11 | 68 | static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); |
9c423367 | 69 | static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); |
db96f378 | 70 | |
7a9e3364 | 71 | /* Test whether a value is zero of a power of two. */ |
72 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0) | |
73 | ||
ed28fcb6 | 74 | #ifndef SLOW_UNALIGNED_ACCESS |
9439ebf7 | 75 | #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT |
ed28fcb6 | 76 | #endif |
77 | ||
de373123 | 78 | |
8f5c7a6b | 79 | /* Reduce conditional compilation elsewhere. */ |
80 | #ifndef HAVE_insv | |
de7d059c | 81 | #define HAVE_insv 0 |
8f5c7a6b | 82 | #define CODE_FOR_insv CODE_FOR_nothing |
83 | #define gen_insv(a,b,c,d) NULL_RTX | |
84 | #endif | |
85 | #ifndef HAVE_extv | |
de7d059c | 86 | #define HAVE_extv 0 |
8f5c7a6b | 87 | #define CODE_FOR_extv CODE_FOR_nothing |
88 | #define gen_extv(a,b,c,d) NULL_RTX | |
89 | #endif | |
90 | #ifndef HAVE_extzv | |
de7d059c | 91 | #define HAVE_extzv 0 |
8f5c7a6b | 92 | #define CODE_FOR_extzv CODE_FOR_nothing |
93 | #define gen_extzv(a,b,c,d) NULL_RTX | |
94 | #endif | |
95 | ||
49db198b | 96 | struct init_expmed_rtl |
db96f378 | 97 | { |
49db198b | 98 | struct rtx_def reg; rtunion reg_fld[2]; |
99 | struct rtx_def plus; rtunion plus_fld1; | |
100 | struct rtx_def neg; | |
101 | struct rtx_def mult; rtunion mult_fld1; | |
102 | struct rtx_def sdiv; rtunion sdiv_fld1; | |
103 | struct rtx_def udiv; rtunion udiv_fld1; | |
49db198b | 104 | struct rtx_def sdiv_32; rtunion sdiv_32_fld1; |
105 | struct rtx_def smod_32; rtunion smod_32_fld1; | |
106 | struct rtx_def wide_mult; rtunion wide_mult_fld1; | |
107 | struct rtx_def wide_lshr; rtunion wide_lshr_fld1; | |
108 | struct rtx_def wide_trunc; | |
109 | struct rtx_def shift; rtunion shift_fld1; | |
110 | struct rtx_def shift_mult; rtunion shift_mult_fld1; | |
111 | struct rtx_def shift_add; rtunion shift_add_fld1; | |
112 | struct rtx_def shift_sub0; rtunion shift_sub0_fld1; | |
113 | struct rtx_def shift_sub1; rtunion shift_sub1_fld1; | |
573ff301 | 114 | struct rtx_def zext; |
115 | struct rtx_def trunc; | |
649e81fd | 116 | |
e56afeb2 | 117 | rtx pow2[MAX_BITS_PER_WORD]; |
118 | rtx cint[MAX_BITS_PER_WORD]; | |
49db198b | 119 | }; |
120 | ||
573ff301 | 121 | static void |
122 | init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode, | |
123 | enum machine_mode from_mode, bool speed) | |
124 | { | |
125 | int to_size, from_size; | |
126 | rtx which; | |
127 | ||
128 | /* We're given no information about the true size of a partial integer, | |
129 | only the size of the "full" integer it requires for storage. For | |
130 | comparison purposes here, reduce the bit size by one in that case. */ | |
131 | to_size = (GET_MODE_BITSIZE (to_mode) | |
132 | - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT)); | |
133 | from_size = (GET_MODE_BITSIZE (from_mode) | |
134 | - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT)); | |
135 | ||
136 | /* Assume cost of zero-extend and sign-extend is the same. */ | |
137 | which = (to_size < from_size ? &all->trunc : &all->zext); | |
138 | ||
139 | PUT_MODE (&all->reg, from_mode); | |
140 | set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed)); | |
141 | } | |
142 | ||
49db198b | 143 | static void |
144 | init_expmed_one_mode (struct init_expmed_rtl *all, | |
145 | enum machine_mode mode, int speed) | |
146 | { | |
147 | int m, n, mode_bitsize; | |
72655676 | 148 | enum machine_mode mode_from; |
db96f378 | 149 | |
49db198b | 150 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
941522d6 | 151 | |
49db198b | 152 | PUT_MODE (&all->reg, mode); |
153 | PUT_MODE (&all->plus, mode); | |
154 | PUT_MODE (&all->neg, mode); | |
155 | PUT_MODE (&all->mult, mode); | |
156 | PUT_MODE (&all->sdiv, mode); | |
157 | PUT_MODE (&all->udiv, mode); | |
158 | PUT_MODE (&all->sdiv_32, mode); | |
159 | PUT_MODE (&all->smod_32, mode); | |
160 | PUT_MODE (&all->wide_trunc, mode); | |
161 | PUT_MODE (&all->shift, mode); | |
162 | PUT_MODE (&all->shift_mult, mode); | |
163 | PUT_MODE (&all->shift_add, mode); | |
164 | PUT_MODE (&all->shift_sub0, mode); | |
165 | PUT_MODE (&all->shift_sub1, mode); | |
573ff301 | 166 | PUT_MODE (&all->zext, mode); |
167 | PUT_MODE (&all->trunc, mode); | |
49db198b | 168 | |
49a71e58 | 169 | set_add_cost (speed, mode, set_src_cost (&all->plus, speed)); |
170 | set_neg_cost (speed, mode, set_src_cost (&all->neg, speed)); | |
171 | set_mul_cost (speed, mode, set_src_cost (&all->mult, speed)); | |
172 | set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed)); | |
173 | set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed)); | |
174 | ||
175 | set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed) | |
176 | <= 2 * add_cost (speed, mode))); | |
177 | set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed) | |
178 | <= 4 * add_cost (speed, mode))); | |
179 | ||
180 | set_shift_cost (speed, mode, 0, 0); | |
181 | { | |
182 | int cost = add_cost (speed, mode); | |
183 | set_shiftadd_cost (speed, mode, 0, cost); | |
184 | set_shiftsub0_cost (speed, mode, 0, cost); | |
185 | set_shiftsub1_cost (speed, mode, 0, cost); | |
186 | } | |
49db198b | 187 | |
188 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); | |
189 | for (m = 1; m < n; m++) | |
190 | { | |
191 | XEXP (&all->shift, 1) = all->cint[m]; | |
192 | XEXP (&all->shift_mult, 1) = all->pow2[m]; | |
193 | ||
49a71e58 | 194 | set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed)); |
195 | set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed)); | |
196 | set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed)); | |
197 | set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed)); | |
49db198b | 198 | } |
199 | ||
200 | if (SCALAR_INT_MODE_P (mode)) | |
e56afeb2 | 201 | { |
573ff301 | 202 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |
203 | mode_from = (enum machine_mode)(mode_from + 1)) | |
204 | init_expmed_one_conv (all, mode, mode_from, speed); | |
205 | } | |
206 | if (GET_MODE_CLASS (mode) == MODE_INT) | |
207 | { | |
208 | enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); | |
49db198b | 209 | if (wider_mode != VOIDmode) |
210 | { | |
211 | PUT_MODE (&all->zext, wider_mode); | |
212 | PUT_MODE (&all->wide_mult, wider_mode); | |
213 | PUT_MODE (&all->wide_lshr, wider_mode); | |
214 | XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize); | |
215 | ||
573ff301 | 216 | set_mul_widen_cost (speed, wider_mode, |
217 | set_src_cost (&all->wide_mult, speed)); | |
218 | set_mul_highpart_cost (speed, mode, | |
219 | set_src_cost (&all->wide_trunc, speed)); | |
49db198b | 220 | } |
e56afeb2 | 221 | } |
49db198b | 222 | } |
223 | ||
224 | void | |
225 | init_expmed (void) | |
226 | { | |
227 | struct init_expmed_rtl all; | |
228 | enum machine_mode mode; | |
229 | int m, speed; | |
230 | ||
649e81fd | 231 | memset (&all, 0, sizeof all); |
49db198b | 232 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
233 | { | |
234 | all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); | |
235 | all.cint[m] = GEN_INT (m); | |
236 | } | |
649e81fd | 237 | |
238 | PUT_CODE (&all.reg, REG); | |
15a79151 | 239 | /* Avoid using hard regs in ways which may be unsupported. */ |
3072d30e | 240 | SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1); |
649e81fd | 241 | |
242 | PUT_CODE (&all.plus, PLUS); | |
243 | XEXP (&all.plus, 0) = &all.reg; | |
244 | XEXP (&all.plus, 1) = &all.reg; | |
245 | ||
246 | PUT_CODE (&all.neg, NEG); | |
247 | XEXP (&all.neg, 0) = &all.reg; | |
248 | ||
649e81fd | 249 | PUT_CODE (&all.mult, MULT); |
250 | XEXP (&all.mult, 0) = &all.reg; | |
251 | XEXP (&all.mult, 1) = &all.reg; | |
252 | ||
1facc8d7 | 253 | PUT_CODE (&all.sdiv, DIV); |
254 | XEXP (&all.sdiv, 0) = &all.reg; | |
255 | XEXP (&all.sdiv, 1) = &all.reg; | |
649e81fd | 256 | |
1facc8d7 | 257 | PUT_CODE (&all.udiv, UDIV); |
258 | XEXP (&all.udiv, 0) = &all.reg; | |
259 | XEXP (&all.udiv, 1) = &all.reg; | |
260 | ||
261 | PUT_CODE (&all.sdiv_32, DIV); | |
262 | XEXP (&all.sdiv_32, 0) = &all.reg; | |
49db198b | 263 | XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32); |
1facc8d7 | 264 | |
265 | PUT_CODE (&all.smod_32, MOD); | |
266 | XEXP (&all.smod_32, 0) = &all.reg; | |
267 | XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1); | |
649e81fd | 268 | |
269 | PUT_CODE (&all.zext, ZERO_EXTEND); | |
270 | XEXP (&all.zext, 0) = &all.reg; | |
271 | ||
272 | PUT_CODE (&all.wide_mult, MULT); | |
273 | XEXP (&all.wide_mult, 0) = &all.zext; | |
274 | XEXP (&all.wide_mult, 1) = &all.zext; | |
275 | ||
276 | PUT_CODE (&all.wide_lshr, LSHIFTRT); | |
277 | XEXP (&all.wide_lshr, 0) = &all.wide_mult; | |
278 | ||
279 | PUT_CODE (&all.wide_trunc, TRUNCATE); | |
280 | XEXP (&all.wide_trunc, 0) = &all.wide_lshr; | |
281 | ||
282 | PUT_CODE (&all.shift, ASHIFT); | |
283 | XEXP (&all.shift, 0) = &all.reg; | |
284 | ||
285 | PUT_CODE (&all.shift_mult, MULT); | |
286 | XEXP (&all.shift_mult, 0) = &all.reg; | |
287 | ||
288 | PUT_CODE (&all.shift_add, PLUS); | |
289 | XEXP (&all.shift_add, 0) = &all.shift_mult; | |
290 | XEXP (&all.shift_add, 1) = &all.reg; | |
291 | ||
b592bb50 | 292 | PUT_CODE (&all.shift_sub0, MINUS); |
293 | XEXP (&all.shift_sub0, 0) = &all.shift_mult; | |
294 | XEXP (&all.shift_sub0, 1) = &all.reg; | |
295 | ||
296 | PUT_CODE (&all.shift_sub1, MINUS); | |
297 | XEXP (&all.shift_sub1, 0) = &all.reg; | |
298 | XEXP (&all.shift_sub1, 1) = &all.shift_mult; | |
649e81fd | 299 | |
573ff301 | 300 | PUT_CODE (&all.trunc, TRUNCATE); |
301 | XEXP (&all.trunc, 0) = &all.reg; | |
72655676 | 302 | |
f529eb25 | 303 | for (speed = 0; speed < 2; speed++) |
33183a3c | 304 | { |
f529eb25 | 305 | crtl->maybe_hot_insn_p = speed; |
49a71e58 | 306 | set_zero_cost (speed, set_src_cost (const0_rtx, speed)); |
649e81fd | 307 | |
573ff301 | 308 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |
309 | mode = (enum machine_mode)(mode + 1)) | |
49db198b | 310 | init_expmed_one_mode (&all, mode, speed); |
649e81fd | 311 | |
573ff301 | 312 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |
313 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; | |
314 | mode = (enum machine_mode)(mode + 1)) | |
315 | init_expmed_one_mode (&all, mode, speed); | |
316 | ||
317 | if (MIN_MODE_VECTOR_INT != VOIDmode) | |
318 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; | |
319 | mode = (enum machine_mode)(mode + 1)) | |
320 | init_expmed_one_mode (&all, mode, speed); | |
649e81fd | 321 | } |
49db198b | 322 | |
49a71e58 | 323 | if (alg_hash_used_p ()) |
324 | { | |
325 | struct alg_hash_entry *p = alg_hash_entry_ptr (0); | |
326 | memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); | |
327 | } | |
92358f62 | 328 | else |
49a71e58 | 329 | set_alg_hash_used_p (true); |
f529eb25 | 330 | default_rtl_profile (); |
db96f378 | 331 | } |
332 | ||
333 | /* Return an rtx representing minus the value of X. | |
334 | MODE is the intended mode of the result, | |
335 | useful if X is a CONST_INT. */ | |
336 | ||
337 | rtx | |
35cb5232 | 338 | negate_rtx (enum machine_mode mode, rtx x) |
db96f378 | 339 | { |
2242dc4b | 340 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |
341 | ||
4e57dfc5 | 342 | if (result == 0) |
2242dc4b | 343 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
344 | ||
345 | return result; | |
db96f378 | 346 | } |
9068af20 | 347 | |
348 | /* Report on the availability of insv/extv/extzv and the desired mode | |
349 | of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo | |
350 | is false; else the mode of the specified operand. If OPNO is -1, | |
351 | all the caller cares about is whether the insn is available. */ | |
352 | enum machine_mode | |
35cb5232 | 353 | mode_for_extraction (enum extraction_pattern pattern, int opno) |
9068af20 | 354 | { |
f2956fc5 | 355 | const struct insn_data_d *data; |
9068af20 | 356 | |
357 | switch (pattern) | |
358 | { | |
359 | case EP_insv: | |
9068af20 | 360 | if (HAVE_insv) |
361 | { | |
362 | data = &insn_data[CODE_FOR_insv]; | |
363 | break; | |
364 | } | |
9068af20 | 365 | return MAX_MACHINE_MODE; |
366 | ||
367 | case EP_extv: | |
9068af20 | 368 | if (HAVE_extv) |
369 | { | |
370 | data = &insn_data[CODE_FOR_extv]; | |
371 | break; | |
372 | } | |
9068af20 | 373 | return MAX_MACHINE_MODE; |
374 | ||
375 | case EP_extzv: | |
9068af20 | 376 | if (HAVE_extzv) |
377 | { | |
378 | data = &insn_data[CODE_FOR_extzv]; | |
379 | break; | |
380 | } | |
9068af20 | 381 | return MAX_MACHINE_MODE; |
de7d059c | 382 | |
383 | default: | |
611234b4 | 384 | gcc_unreachable (); |
9068af20 | 385 | } |
386 | ||
387 | if (opno == -1) | |
388 | return VOIDmode; | |
389 | ||
390 | /* Everyone who uses this function used to follow it with | |
391 | if (result == VOIDmode) result = word_mode; */ | |
392 | if (data->operand[opno].mode == VOIDmode) | |
393 | return word_mode; | |
394 | return data->operand[opno].mode; | |
395 | } | |
36122326 | 396 | \f |
397 | /* A subroutine of store_bit_field, with the same arguments. Return true | |
398 | if the operation could be implemented. | |
db96f378 | 399 | |
36122326 | 400 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
401 | no other way of implementing the operation. If FALLBACK_P is false, | |
402 | return false instead. */ | |
403 | ||
404 | static bool | |
405 | store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
4bb60ec7 | 406 | unsigned HOST_WIDE_INT bitnum, |
407 | unsigned HOST_WIDE_INT bitregion_start, | |
408 | unsigned HOST_WIDE_INT bitregion_end, | |
409 | enum machine_mode fieldmode, | |
36122326 | 410 | rtx value, bool fallback_p) |
db96f378 | 411 | { |
02e7a332 | 412 | unsigned int unit |
e16ceb8e | 413 | = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; |
bc5449fc | 414 | unsigned HOST_WIDE_INT offset, bitpos; |
19cb6b50 | 415 | rtx op0 = str_rtx; |
5e862a70 | 416 | int byte_offset; |
3cc25a48 | 417 | rtx orig_value; |
9068af20 | 418 | |
de7d059c | 419 | enum machine_mode op_mode = mode_for_extraction (EP_insv, 3); |
db96f378 | 420 | |
db96f378 | 421 | while (GET_CODE (op0) == SUBREG) |
422 | { | |
423 | /* The following line once was done only if WORDS_BIG_ENDIAN, | |
424 | but I think that is a mistake. WORDS_BIG_ENDIAN is | |
425 | meaningful at a much higher level; when structures are copied | |
426 | between memory and regs, the higher-numbered regs | |
427 | always get higher addresses. */ | |
217d3a24 | 428 | int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); |
429 | int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); | |
48e1416a | 430 | |
217d3a24 | 431 | byte_offset = 0; |
432 | ||
433 | /* Paradoxical subregs need special handling on big endian machines. */ | |
434 | if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) | |
435 | { | |
436 | int difference = inner_mode_size - outer_mode_size; | |
437 | ||
438 | if (WORDS_BIG_ENDIAN) | |
439 | byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; | |
440 | if (BYTES_BIG_ENDIAN) | |
441 | byte_offset += difference % UNITS_PER_WORD; | |
442 | } | |
443 | else | |
444 | byte_offset = SUBREG_BYTE (op0); | |
445 | ||
446 | bitnum += byte_offset * BITS_PER_UNIT; | |
db96f378 | 447 | op0 = SUBREG_REG (op0); |
448 | } | |
449 | ||
bc5449fc | 450 | /* No action is needed if the target is a register and if the field |
451 | lies completely outside that register. This can occur if the source | |
452 | code contains an out-of-bounds access to a small array. */ | |
453 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
36122326 | 454 | return true; |
bc5449fc | 455 | |
cb746719 | 456 | /* Use vec_set patterns for inserting parts of vectors whenever |
b8d2bcdd | 457 | available. */ |
458 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
e16ceb8e | 459 | && !MEM_P (op0) |
d6bf3b14 | 460 | && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing |
b8d2bcdd | 461 | && fieldmode == GET_MODE_INNER (GET_MODE (op0)) |
462 | && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) | |
463 | && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
464 | { | |
8786db1e | 465 | struct expand_operand ops[3]; |
b8d2bcdd | 466 | enum machine_mode outermode = GET_MODE (op0); |
467 | enum machine_mode innermode = GET_MODE_INNER (outermode); | |
8786db1e | 468 | enum insn_code icode = optab_handler (vec_set_optab, outermode); |
b8d2bcdd | 469 | int pos = bitnum / GET_MODE_BITSIZE (innermode); |
b8d2bcdd | 470 | |
8786db1e | 471 | create_fixed_operand (&ops[0], op0); |
472 | create_input_operand (&ops[1], value, innermode); | |
473 | create_integer_operand (&ops[2], pos); | |
474 | if (maybe_expand_insn (icode, 3, ops)) | |
475 | return true; | |
b8d2bcdd | 476 | } |
477 | ||
9e527b97 | 478 | /* If the target is a register, overwriting the entire object, or storing |
479 | a full-word or multi-word field can be done with just a SUBREG. | |
480 | ||
481 | If the target is memory, storing any naturally aligned field can be | |
482 | done with a simple store. For targets that support fast unaligned | |
bc9d42da | 483 | memory, any naturally sized, unit aligned field can be done directly. */ |
a4194ff7 | 484 | |
bc5449fc | 485 | offset = bitnum / unit; |
486 | bitpos = bitnum % unit; | |
5e862a70 | 487 | byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT |
488 | + (offset * UNITS_PER_WORD); | |
489 | ||
a29e3f42 | 490 | if (bitpos == 0 |
bc9d42da | 491 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
e16ceb8e | 492 | && (!MEM_P (op0) |
5e862a70 | 493 | ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD |
3a175160 | 494 | || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)) |
495 | && ((GET_MODE (op0) == fieldmode && byte_offset == 0) | |
496 | || validate_subreg (fieldmode, GET_MODE (op0), op0, | |
497 | byte_offset))) | |
2c269e73 | 498 | : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0)) |
13af9900 | 499 | || (offset * BITS_PER_UNIT % bitsize == 0 |
2c269e73 | 500 | && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))) |
db96f378 | 501 | { |
06d6b64e | 502 | if (MEM_P (op0)) |
503 | op0 = adjust_address (op0, fieldmode, offset); | |
504 | else if (GET_MODE (op0) != fieldmode) | |
505 | op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), | |
506 | byte_offset); | |
db96f378 | 507 | emit_move_insn (op0, value); |
36122326 | 508 | return true; |
db96f378 | 509 | } |
510 | ||
b708a05c | 511 | /* Make sure we are playing with integral modes. Pun with subregs |
512 | if we aren't. This must come after the entire register case above, | |
513 | since that case is valid for any mode. The following cases are only | |
514 | valid for integral modes. */ | |
515 | { | |
516 | enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); | |
517 | if (imode != GET_MODE (op0)) | |
518 | { | |
e16ceb8e | 519 | if (MEM_P (op0)) |
e513d163 | 520 | op0 = adjust_address (op0, imode, 0); |
b708a05c | 521 | else |
611234b4 | 522 | { |
523 | gcc_assert (imode != BLKmode); | |
524 | op0 = gen_lowpart (imode, op0); | |
525 | } | |
b708a05c | 526 | } |
527 | } | |
528 | ||
edc4f1e8 | 529 | /* We may be accessing data outside the field, which means |
530 | we can alias adjacent data. */ | |
4bb60ec7 | 531 | /* ?? not always for C++0x memory model ?? */ |
e16ceb8e | 532 | if (MEM_P (op0)) |
edc4f1e8 | 533 | { |
534 | op0 = shallow_copy_rtx (op0); | |
535 | set_mem_alias_set (op0, 0); | |
536 | set_mem_expr (op0, 0); | |
537 | } | |
538 | ||
a29e3f42 | 539 | /* If OP0 is a register, BITPOS must count within a word. |
540 | But as we have it, it counts within whatever size OP0 now has. | |
541 | On a bigendian machine, these are not the same, so convert. */ | |
542 | if (BYTES_BIG_ENDIAN | |
e16ceb8e | 543 | && !MEM_P (op0) |
a29e3f42 | 544 | && unit > GET_MODE_BITSIZE (GET_MODE (op0))) |
545 | bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
546 | ||
db96f378 | 547 | /* Storing an lsb-aligned field in a register |
548 | can be done with a movestrict instruction. */ | |
549 | ||
e16ceb8e | 550 | if (!MEM_P (op0) |
51356f86 | 551 | && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0) |
db96f378 | 552 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
d6bf3b14 | 553 | && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) |
db96f378 | 554 | { |
8786db1e | 555 | struct expand_operand ops[2]; |
556 | enum insn_code icode = optab_handler (movstrict_optab, fieldmode); | |
c2ef487a | 557 | rtx arg0 = op0; |
3a175160 | 558 | unsigned HOST_WIDE_INT subreg_off; |
ee2ba10c | 559 | |
8786db1e | 560 | if (GET_CODE (arg0) == SUBREG) |
db96f378 | 561 | { |
611234b4 | 562 | /* Else we've got some float mode source being extracted into |
563 | a different float mode destination -- this combination of | |
564 | subregs results in Severe Tire Damage. */ | |
8786db1e | 565 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |
611234b4 | 566 | || GET_MODE_CLASS (fieldmode) == MODE_INT |
567 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); | |
8786db1e | 568 | arg0 = SUBREG_REG (arg0); |
ee2ba10c | 569 | } |
650df5df | 570 | |
3a175160 | 571 | subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT |
572 | + (offset * UNITS_PER_WORD); | |
573 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) | |
574 | { | |
575 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); | |
8786db1e | 576 | |
3a175160 | 577 | create_fixed_operand (&ops[0], arg0); |
578 | /* Shrink the source operand to FIELDMODE. */ | |
579 | create_convert_operand_to (&ops[1], value, fieldmode, false); | |
580 | if (maybe_expand_insn (icode, 2, ops)) | |
581 | return true; | |
582 | } | |
db96f378 | 583 | } |
584 | ||
585 | /* Handle fields bigger than a word. */ | |
586 | ||
587 | if (bitsize > BITS_PER_WORD) | |
588 | { | |
589 | /* Here we transfer the words of the field | |
590 | in the order least significant first. | |
591 | This is because the most significant word is the one which may | |
766e2366 | 592 | be less than full. |
593 | However, only do that if the value is not BLKmode. */ | |
594 | ||
02e7a332 | 595 | unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
596 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; | |
597 | unsigned int i; | |
36122326 | 598 | rtx last; |
db96f378 | 599 | |
600 | /* This is the mode we must force value to, so that there will be enough | |
601 | subwords to extract. Note that fieldmode will often (always?) be | |
602 | VOIDmode, because that is what store_field uses to indicate that this | |
89f18f73 | 603 | is a bit field, but passing VOIDmode to operand_subword_force |
604 | is not allowed. */ | |
7ac3f3c5 | 605 | fieldmode = GET_MODE (value); |
606 | if (fieldmode == VOIDmode) | |
0a0dae2e | 607 | fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); |
db96f378 | 608 | |
36122326 | 609 | last = get_last_insn (); |
db96f378 | 610 | for (i = 0; i < nwords; i++) |
611 | { | |
766e2366 | 612 | /* If I is 0, use the low-order word in both field and target; |
613 | if I is 1, use the next to lowest word; and so on. */ | |
cefa4c33 | 614 | unsigned int wordnum = (backwards |
615 | ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD | |
616 | - i - 1 | |
617 | : i); | |
02e7a332 | 618 | unsigned int bit_offset = (backwards |
2c269e73 | 619 | ? MAX ((int) bitsize - ((int) i + 1) |
620 | * BITS_PER_WORD, | |
621 | 0) | |
622 | : (int) i * BITS_PER_WORD); | |
36122326 | 623 | rtx value_word = operand_subword_force (value, wordnum, fieldmode); |
4d18c297 | 624 | unsigned HOST_WIDE_INT new_bitsize = |
625 | MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); | |
626 | ||
627 | /* If the remaining chunk doesn't have full wordsize we have | |
628 | to make sure that for big endian machines the higher order | |
629 | bits are used. */ | |
630 | if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) | |
631 | value_word = simplify_expand_binop (word_mode, lshr_optab, | |
632 | value_word, | |
633 | GEN_INT (BITS_PER_WORD | |
634 | - new_bitsize), | |
635 | NULL_RTX, true, | |
636 | OPTAB_LIB_WIDEN); | |
637 | ||
638 | if (!store_bit_field_1 (op0, new_bitsize, | |
4bb60ec7 | 639 | bitnum + bit_offset, |
640 | bitregion_start, bitregion_end, | |
641 | word_mode, | |
36122326 | 642 | value_word, fallback_p)) |
643 | { | |
644 | delete_insns_since (last); | |
645 | return false; | |
646 | } | |
db96f378 | 647 | } |
36122326 | 648 | return true; |
db96f378 | 649 | } |
650 | ||
651 | /* From here on we can assume that the field to be stored in is | |
652 | a full-word (whatever type that is), since it is shorter than a word. */ | |
653 | ||
654 | /* OFFSET is the number of words or bytes (UNIT says which) | |
655 | from STR_RTX to the first word or byte containing part of the field. */ | |
656 | ||
e16ceb8e | 657 | if (!MEM_P (op0)) |
db96f378 | 658 | { |
659 | if (offset != 0 | |
660 | || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
650df5df | 661 | { |
8ad4c111 | 662 | if (!REG_P (op0)) |
edffc66d | 663 | { |
89f18f73 | 664 | /* Since this is a destination (lvalue), we can't copy |
665 | it to a pseudo. We can remove a SUBREG that does not | |
666 | change the size of the operand. Such a SUBREG may | |
667 | have been added above. */ | |
611234b4 | 668 | gcc_assert (GET_CODE (op0) == SUBREG |
669 | && (GET_MODE_SIZE (GET_MODE (op0)) | |
670 | == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))))); | |
671 | op0 = SUBREG_REG (op0); | |
edffc66d | 672 | } |
650df5df | 673 | op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), |
701e46d0 | 674 | op0, (offset * UNITS_PER_WORD)); |
650df5df | 675 | } |
db96f378 | 676 | offset = 0; |
677 | } | |
db96f378 | 678 | |
2b701a08 | 679 | /* If VALUE has a floating-point or complex mode, access it as an |
680 | integer of the corresponding size. This can occur on a machine | |
681 | with 64 bit registers that uses SFmode for float. It can also | |
682 | occur for unaligned float or complex fields. */ | |
3cc25a48 | 683 | orig_value = value; |
2b701a08 | 684 | if (GET_MODE (value) != VOIDmode |
685 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT | |
ead227c9 | 686 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) |
2b701a08 | 687 | { |
688 | value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); | |
689 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); | |
690 | } | |
acb2971d | 691 | |
db96f378 | 692 | /* Now OFFSET is nonzero only if OP0 is memory |
693 | and is therefore always measured in bytes. */ | |
694 | ||
de7d059c | 695 | if (HAVE_insv |
f89c2379 | 696 | && GET_MODE (value) != BLKmode |
02256d9b | 697 | && bitsize > 0 |
698 | && GET_MODE_BITSIZE (op_mode) >= bitsize | |
eb04cafb | 699 | /* Do not use insv for volatile bitfields when |
700 | -fstrict-volatile-bitfields is in effect. */ | |
701 | && !(MEM_P (op0) && MEM_VOLATILE_P (op0) | |
702 | && flag_strict_volatile_bitfields > 0) | |
8ad4c111 | 703 | && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) |
7e9ba3f3 | 704 | && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) |
705 | /* Do not use insv if the bit region is restricted and | |
706 | op_mode integer at offset doesn't fit into the | |
707 | restricted region. */ | |
708 | && !(MEM_P (op0) && bitregion_end | |
709 | && bitnum - bitpos + GET_MODE_BITSIZE (op_mode) | |
710 | > bitregion_end + 1)) | |
db96f378 | 711 | { |
8786db1e | 712 | struct expand_operand ops[4]; |
db96f378 | 713 | int xbitpos = bitpos; |
714 | rtx value1; | |
715 | rtx xop0 = op0; | |
716 | rtx last = get_last_insn (); | |
98e07982 | 717 | bool copy_back = false; |
db96f378 | 718 | |
719 | /* Add OFFSET into OP0's address. */ | |
e16ceb8e | 720 | if (MEM_P (xop0)) |
e513d163 | 721 | xop0 = adjust_address (xop0, byte_mode, offset); |
db96f378 | 722 | |
36122326 | 723 | /* If xop0 is a register, we need it in OP_MODE |
db96f378 | 724 | to make it acceptable to the format of insv. */ |
725 | if (GET_CODE (xop0) == SUBREG) | |
b7c9a162 | 726 | /* We can't just change the mode, because this might clobber op0, |
727 | and we will need the original value of op0 if insv fails. */ | |
36122326 | 728 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); |
729 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) | |
42b8780a | 730 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); |
db96f378 | 731 | |
98e07982 | 732 | /* If the destination is a paradoxical subreg such that we need a |
733 | truncate to the inner mode, perform the insertion on a temporary and | |
734 | truncate the result to the original destination. Note that we can't | |
735 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N | |
736 | X) 0)) is (reg:N X). */ | |
737 | if (GET_CODE (xop0) == SUBREG | |
738 | && REG_P (SUBREG_REG (xop0)) | |
396f2130 | 739 | && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), |
740 | op_mode))) | |
98e07982 | 741 | { |
742 | rtx tem = gen_reg_rtx (op_mode); | |
743 | emit_move_insn (tem, xop0); | |
744 | xop0 = tem; | |
745 | copy_back = true; | |
746 | } | |
747 | ||
db96f378 | 748 | /* We have been counting XBITPOS within UNIT. |
749 | Count instead within the size of the register. */ | |
eb04cafb | 750 | if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) |
36122326 | 751 | xbitpos += GET_MODE_BITSIZE (op_mode) - unit; |
51356f86 | 752 | |
36122326 | 753 | unit = GET_MODE_BITSIZE (op_mode); |
db96f378 | 754 | |
eb04cafb | 755 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
756 | "backwards" from the size of the unit we are inserting into. | |
757 | Otherwise, we count bits from the most significant on a | |
758 | BYTES/BITS_BIG_ENDIAN machine. */ | |
759 | ||
760 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
761 | xbitpos = unit - bitsize - xbitpos; | |
762 | ||
36122326 | 763 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ |
db96f378 | 764 | value1 = value; |
36122326 | 765 | if (GET_MODE (value) != op_mode) |
db96f378 | 766 | { |
767 | if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) | |
768 | { | |
769 | /* Optimization: Don't bother really extending VALUE | |
0777ab28 | 770 | if it has all the bits we will actually use. However, |
771 | if we must narrow it, be sure we do it correctly. */ | |
db96f378 | 772 | |
36122326 | 773 | if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) |
a4194ff7 | 774 | { |
775 | rtx tmp; | |
776 | ||
36122326 | 777 | tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); |
a4194ff7 | 778 | if (! tmp) |
36122326 | 779 | tmp = simplify_gen_subreg (op_mode, |
a4194ff7 | 780 | force_reg (GET_MODE (value), |
781 | value1), | |
782 | GET_MODE (value), 0); | |
783 | value1 = tmp; | |
784 | } | |
0777ab28 | 785 | else |
36122326 | 786 | value1 = gen_lowpart (op_mode, value1); |
db96f378 | 787 | } |
971ba038 | 788 | else if (CONST_INT_P (value)) |
36122326 | 789 | value1 = gen_int_mode (INTVAL (value), op_mode); |
611234b4 | 790 | else |
db96f378 | 791 | /* Parse phase is supposed to make VALUE's data type |
792 | match that of the component reference, which is a type | |
793 | at least as wide as the field; so VALUE should have | |
794 | a mode that corresponds to that type. */ | |
611234b4 | 795 | gcc_assert (CONSTANT_P (value)); |
db96f378 | 796 | } |
797 | ||
8786db1e | 798 | create_fixed_operand (&ops[0], xop0); |
799 | create_integer_operand (&ops[1], bitsize); | |
800 | create_integer_operand (&ops[2], xbitpos); | |
801 | create_input_operand (&ops[3], value1, op_mode); | |
802 | if (maybe_expand_insn (CODE_FOR_insv, 4, ops)) | |
36122326 | 803 | { |
98e07982 | 804 | if (copy_back) |
805 | convert_move (op0, xop0, true); | |
36122326 | 806 | return true; |
807 | } | |
808 | delete_insns_since (last); | |
809 | } | |
810 | ||
811 | /* If OP0 is a memory, try copying it to a register and seeing if a | |
812 | cheap register alternative is available. */ | |
813 | if (HAVE_insv && MEM_P (op0)) | |
814 | { | |
815 | enum machine_mode bestmode; | |
4bb60ec7 | 816 | unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE; |
817 | ||
818 | if (bitregion_end) | |
819 | maxbits = bitregion_end - bitregion_start + 1; | |
36122326 | 820 | |
821 | /* Get the mode to use for inserting into this field. If OP0 is | |
822 | BLKmode, get the smallest mode consistent with the alignment. If | |
823 | OP0 is a non-BLKmode object that is no wider than OP_MODE, use its | |
824 | mode. Otherwise, use the smallest mode containing the field. */ | |
825 | ||
826 | if (GET_MODE (op0) == BLKmode | |
4bb60ec7 | 827 | || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits |
36122326 | 828 | || (op_mode != MAX_MACHINE_MODE |
829 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))) | |
7e9ba3f3 | 830 | bestmode = get_best_mode (bitsize, bitnum, |
4bb60ec7 | 831 | bitregion_start, bitregion_end, |
832 | MEM_ALIGN (op0), | |
36122326 | 833 | (op_mode == MAX_MACHINE_MODE |
834 | ? VOIDmode : op_mode), | |
835 | MEM_VOLATILE_P (op0)); | |
db96f378 | 836 | else |
36122326 | 837 | bestmode = GET_MODE (op0); |
838 | ||
839 | if (bestmode != VOIDmode | |
840 | && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode) | |
841 | && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) | |
842 | && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) | |
ff385626 | 843 | { |
36122326 | 844 | rtx last, tempreg, xop0; |
845 | unsigned HOST_WIDE_INT xoffset, xbitpos; | |
846 | ||
847 | last = get_last_insn (); | |
848 | ||
849 | /* Adjust address to point to the containing unit of | |
850 | that mode. Compute the offset as a multiple of this unit, | |
851 | counting in bytes. */ | |
852 | unit = GET_MODE_BITSIZE (bestmode); | |
853 | xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); | |
854 | xbitpos = bitnum % unit; | |
855 | xop0 = adjust_address (op0, bestmode, xoffset); | |
856 | ||
857 | /* Fetch that unit, store the bitfield in it, then store | |
858 | the unit. */ | |
859 | tempreg = copy_to_reg (xop0); | |
860 | if (store_bit_field_1 (tempreg, bitsize, xbitpos, | |
4bb60ec7 | 861 | bitregion_start, bitregion_end, |
36122326 | 862 | fieldmode, orig_value, false)) |
863 | { | |
864 | emit_move_insn (xop0, tempreg); | |
865 | return true; | |
866 | } | |
db96f378 | 867 | delete_insns_since (last); |
db96f378 | 868 | } |
869 | } | |
36122326 | 870 | |
871 | if (!fallback_p) | |
872 | return false; | |
873 | ||
4bb60ec7 | 874 | store_fixed_bit_field (op0, offset, bitsize, bitpos, |
875 | bitregion_start, bitregion_end, value); | |
36122326 | 876 | return true; |
877 | } | |
878 | ||
879 | /* Generate code to store value from rtx VALUE | |
880 | into a bit-field within structure STR_RTX | |
881 | containing BITSIZE bits starting at bit BITNUM. | |
4bb60ec7 | 882 | |
883 | BITREGION_START is bitpos of the first bitfield in this region. | |
884 | BITREGION_END is the bitpos of the ending bitfield in this region. | |
885 | These two fields are 0, if the C++ memory model does not apply, | |
886 | or we are not interested in keeping track of bitfield regions. | |
887 | ||
36122326 | 888 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ |
889 | ||
890 | void | |
891 | store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
4bb60ec7 | 892 | unsigned HOST_WIDE_INT bitnum, |
893 | unsigned HOST_WIDE_INT bitregion_start, | |
894 | unsigned HOST_WIDE_INT bitregion_end, | |
895 | enum machine_mode fieldmode, | |
36122326 | 896 | rtx value) |
897 | { | |
4bb60ec7 | 898 | /* Under the C++0x memory model, we must not touch bits outside the |
899 | bit region. Adjust the address to start at the beginning of the | |
900 | bit region. */ | |
73041e9b | 901 | if (MEM_P (str_rtx) && bitregion_start > 0) |
4bb60ec7 | 902 | { |
903 | enum machine_mode bestmode; | |
904 | enum machine_mode op_mode; | |
905 | unsigned HOST_WIDE_INT offset; | |
906 | ||
907 | op_mode = mode_for_extraction (EP_insv, 3); | |
908 | if (op_mode == MAX_MACHINE_MODE) | |
909 | op_mode = VOIDmode; | |
910 | ||
73041e9b | 911 | gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); |
912 | ||
4bb60ec7 | 913 | offset = bitregion_start / BITS_PER_UNIT; |
914 | bitnum -= bitregion_start; | |
915 | bitregion_end -= bitregion_start; | |
916 | bitregion_start = 0; | |
917 | bestmode = get_best_mode (bitsize, bitnum, | |
918 | bitregion_start, bitregion_end, | |
919 | MEM_ALIGN (str_rtx), | |
920 | op_mode, | |
921 | MEM_VOLATILE_P (str_rtx)); | |
922 | str_rtx = adjust_address (str_rtx, bestmode, offset); | |
923 | } | |
924 | ||
925 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, | |
926 | bitregion_start, bitregion_end, | |
927 | fieldmode, value, true)) | |
36122326 | 928 | gcc_unreachable (); |
db96f378 | 929 | } |
930 | \f | |
931 | /* Use shifts and boolean operations to store VALUE | |
932 | into a bit field of width BITSIZE | |
933 | in a memory location specified by OP0 except offset by OFFSET bytes. | |
934 | (OFFSET must be 0 if OP0 is a register.) | |
935 | The field starts at position BITPOS within the byte. | |
936 | (If OP0 is a register, it may be a full word or a narrower mode, | |
937 | but BITPOS still counts within a full word, | |
0a534ba7 | 938 | which is significant on bigendian machines.) */ |
db96f378 | 939 | |
940 | static void | |
35cb5232 | 941 | store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, |
942 | unsigned HOST_WIDE_INT bitsize, | |
4bb60ec7 | 943 | unsigned HOST_WIDE_INT bitpos, |
944 | unsigned HOST_WIDE_INT bitregion_start, | |
945 | unsigned HOST_WIDE_INT bitregion_end, | |
946 | rtx value) | |
db96f378 | 947 | { |
19cb6b50 | 948 | enum machine_mode mode; |
02e7a332 | 949 | unsigned int total_bits = BITS_PER_WORD; |
f31d1dc3 | 950 | rtx temp; |
db96f378 | 951 | int all_zero = 0; |
952 | int all_one = 0; | |
953 | ||
db96f378 | 954 | /* There is a case not handled here: |
955 | a structure with a known alignment of just a halfword | |
956 | and a field split across two aligned halfwords within the structure. | |
957 | Or likewise a structure with a known alignment of just a byte | |
958 | and a field split across two bytes. | |
959 | Such cases are not supposed to be able to occur. */ | |
960 | ||
8ad4c111 | 961 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
db96f378 | 962 | { |
611234b4 | 963 | gcc_assert (!offset); |
db96f378 | 964 | /* Special treatment for a bit field split across two registers. */ |
965 | if (bitsize + bitpos > BITS_PER_WORD) | |
966 | { | |
4bb60ec7 | 967 | store_split_bit_field (op0, bitsize, bitpos, |
968 | bitregion_start, bitregion_end, | |
969 | value); | |
db96f378 | 970 | return; |
971 | } | |
972 | } | |
973 | else | |
974 | { | |
4bb60ec7 | 975 | unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE; |
976 | ||
977 | if (bitregion_end) | |
978 | maxbits = bitregion_end - bitregion_start + 1; | |
979 | ||
db96f378 | 980 | /* Get the proper mode to use for this field. We want a mode that |
981 | includes the entire field. If such a mode would be larger than | |
a4194ff7 | 982 | a word, we won't be doing the extraction the normal way. |
b498b5e6 | 983 | We don't want a mode bigger than the destination. */ |
db96f378 | 984 | |
b498b5e6 | 985 | mode = GET_MODE (op0); |
986 | if (GET_MODE_BITSIZE (mode) == 0 | |
ff385626 | 987 | || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) |
988 | mode = word_mode; | |
a420d927 | 989 | |
990 | if (MEM_VOLATILE_P (op0) | |
991 | && GET_MODE_BITSIZE (GET_MODE (op0)) > 0 | |
4bb60ec7 | 992 | && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits |
a420d927 | 993 | && flag_strict_volatile_bitfields > 0) |
994 | mode = GET_MODE (op0); | |
995 | else | |
996 | mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, | |
4bb60ec7 | 997 | bitregion_start, bitregion_end, |
a420d927 | 998 | MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); |
db96f378 | 999 | |
1000 | if (mode == VOIDmode) | |
1001 | { | |
1002 | /* The only way this should occur is if the field spans word | |
1003 | boundaries. */ | |
2c269e73 | 1004 | store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT, |
4bb60ec7 | 1005 | bitregion_start, bitregion_end, value); |
db96f378 | 1006 | return; |
1007 | } | |
1008 | ||
1009 | total_bits = GET_MODE_BITSIZE (mode); | |
1010 | ||
3ec011bc | 1011 | /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to |
3398e91d | 1012 | be in the range 0 to total_bits-1, and put any excess bytes in |
3ec011bc | 1013 | OFFSET. */ |
1014 | if (bitpos >= total_bits) | |
1015 | { | |
1016 | offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); | |
1017 | bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) | |
1018 | * BITS_PER_UNIT); | |
1019 | } | |
1020 | ||
db96f378 | 1021 | /* Get ref to an aligned byte, halfword, or word containing the field. |
1022 | Adjust BITPOS to be position within a word, | |
1023 | and OFFSET to be the offset of that word. | |
1024 | Then alter OP0 to refer to that word. */ | |
1025 | bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; | |
1026 | offset -= (offset % (total_bits / BITS_PER_UNIT)); | |
e513d163 | 1027 | op0 = adjust_address (op0, mode, offset); |
db96f378 | 1028 | } |
1029 | ||
1030 | mode = GET_MODE (op0); | |
1031 | ||
1032 | /* Now MODE is either some integral mode for a MEM as OP0, | |
1033 | or is a full-word for a REG as OP0. TOTAL_BITS corresponds. | |
1034 | The bit field is contained entirely within OP0. | |
1035 | BITPOS is the starting bit number within OP0. | |
1036 | (OP0's mode may actually be narrower than MODE.) */ | |
1037 | ||
51356f86 | 1038 | if (BYTES_BIG_ENDIAN) |
1039 | /* BITPOS is the distance between our msb | |
1040 | and that of the containing datum. | |
1041 | Convert it to the distance from the lsb. */ | |
1042 | bitpos = total_bits - bitsize - bitpos; | |
db96f378 | 1043 | |
db96f378 | 1044 | /* Now BITPOS is always the distance between our lsb |
1045 | and that of OP0. */ | |
1046 | ||
1047 | /* Shift VALUE left by BITPOS bits. If VALUE is not constant, | |
1048 | we must first convert its mode to MODE. */ | |
1049 | ||
971ba038 | 1050 | if (CONST_INT_P (value)) |
db96f378 | 1051 | { |
19cb6b50 | 1052 | HOST_WIDE_INT v = INTVAL (value); |
db96f378 | 1053 | |
50b0c9ee | 1054 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
1055 | v &= ((HOST_WIDE_INT) 1 << bitsize) - 1; | |
db96f378 | 1056 | |
1057 | if (v == 0) | |
1058 | all_zero = 1; | |
50b0c9ee | 1059 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
1060 | && v == ((HOST_WIDE_INT) 1 << bitsize) - 1) | |
1061 | || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1)) | |
db96f378 | 1062 | all_one = 1; |
1063 | ||
1064 | value = lshift_value (mode, value, bitpos, bitsize); | |
1065 | } | |
1066 | else | |
1067 | { | |
1068 | int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize | |
1069 | && bitpos + bitsize != GET_MODE_BITSIZE (mode)); | |
1070 | ||
1071 | if (GET_MODE (value) != mode) | |
3c15005d | 1072 | value = convert_to_mode (mode, value, 1); |
db96f378 | 1073 | |
1074 | if (must_and) | |
1075 | value = expand_binop (mode, and_optab, value, | |
1076 | mask_rtx (mode, 0, bitsize, 0), | |
50b0c9ee | 1077 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
db96f378 | 1078 | if (bitpos > 0) |
1079 | value = expand_shift (LSHIFT_EXPR, mode, value, | |
f5ff0b21 | 1080 | bitpos, NULL_RTX, 1); |
db96f378 | 1081 | } |
1082 | ||
1083 | /* Now clear the chosen bits in OP0, | |
1084 | except that if VALUE is -1 we need not bother. */ | |
f31d1dc3 | 1085 | /* We keep the intermediates in registers to allow CSE to combine |
1086 | consecutive bitfield assignments. */ | |
db96f378 | 1087 | |
f31d1dc3 | 1088 | temp = force_reg (mode, op0); |
db96f378 | 1089 | |
1090 | if (! all_one) | |
1091 | { | |
f31d1dc3 | 1092 | temp = expand_binop (mode, and_optab, temp, |
db96f378 | 1093 | mask_rtx (mode, bitpos, bitsize, 1), |
f31d1dc3 | 1094 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1095 | temp = force_reg (mode, temp); | |
db96f378 | 1096 | } |
db96f378 | 1097 | |
1098 | /* Now logical-or VALUE into OP0, unless it is zero. */ | |
1099 | ||
1100 | if (! all_zero) | |
f31d1dc3 | 1101 | { |
1102 | temp = expand_binop (mode, ior_optab, temp, value, | |
1103 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
1104 | temp = force_reg (mode, temp); | |
1105 | } | |
1106 | ||
db96f378 | 1107 | if (op0 != temp) |
39925406 | 1108 | { |
1109 | op0 = copy_rtx (op0); | |
1110 | emit_move_insn (op0, temp); | |
1111 | } | |
db96f378 | 1112 | } |
1113 | \f | |
e9782169 | 1114 | /* Store a bit field that is split across multiple accessible memory objects. |
db96f378 | 1115 | |
e9782169 | 1116 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
db96f378 | 1117 | BITSIZE is the field width; BITPOS the position of its first bit |
1118 | (within the word). | |
e9782169 | 1119 | VALUE is the value to store. |
e9782169 | 1120 | |
1121 | This does not yet handle fields wider than BITS_PER_WORD. */ | |
db96f378 | 1122 | |
1123 | static void | |
35cb5232 | 1124 | store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
4bb60ec7 | 1125 | unsigned HOST_WIDE_INT bitpos, |
1126 | unsigned HOST_WIDE_INT bitregion_start, | |
1127 | unsigned HOST_WIDE_INT bitregion_end, | |
1128 | rtx value) | |
db96f378 | 1129 | { |
02e7a332 | 1130 | unsigned int unit; |
1131 | unsigned int bitsdone = 0; | |
e81f2e56 | 1132 | |
ba860eb2 | 1133 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1134 | much at a time. */ | |
8ad4c111 | 1135 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
e81f2e56 | 1136 | unit = BITS_PER_WORD; |
1137 | else | |
2c269e73 | 1138 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
6d199784 | 1139 | |
88fc9196 | 1140 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1141 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note | |
1142 | that VALUE might be a floating-point constant. */ | |
971ba038 | 1143 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
88fc9196 | 1144 | { |
1145 | rtx word = gen_lowpart_common (word_mode, value); | |
1146 | ||
a2c7abaa | 1147 | if (word && (value != word)) |
88fc9196 | 1148 | value = word; |
1149 | else | |
1150 | value = gen_lowpart_common (word_mode, | |
329edf37 | 1151 | force_reg (GET_MODE (value) != VOIDmode |
1152 | ? GET_MODE (value) | |
1153 | : word_mode, value)); | |
88fc9196 | 1154 | } |
db96f378 | 1155 | |
e9782169 | 1156 | while (bitsdone < bitsize) |
db96f378 | 1157 | { |
02e7a332 | 1158 | unsigned HOST_WIDE_INT thissize; |
e9782169 | 1159 | rtx part, word; |
02e7a332 | 1160 | unsigned HOST_WIDE_INT thispos; |
1161 | unsigned HOST_WIDE_INT offset; | |
db96f378 | 1162 | |
e9782169 | 1163 | offset = (bitpos + bitsdone) / unit; |
1164 | thispos = (bitpos + bitsdone) % unit; | |
db96f378 | 1165 | |
7e9ba3f3 | 1166 | /* When region of bytes we can touch is restricted, decrease |
1167 | UNIT close to the end of the region as needed. */ | |
1168 | if (bitregion_end | |
1169 | && unit > BITS_PER_UNIT | |
1170 | && bitpos + bitsdone - thispos + unit > bitregion_end + 1) | |
1171 | { | |
1172 | unit = unit / 2; | |
1173 | continue; | |
1174 | } | |
1175 | ||
ba860eb2 | 1176 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1177 | store_fixed_bit_field will call us again, and we will mutually | |
1178 | recurse forever. */ | |
1179 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1180 | thissize = MIN (thissize, unit - thispos); | |
db96f378 | 1181 | |
51356f86 | 1182 | if (BYTES_BIG_ENDIAN) |
1183 | { | |
052251d0 | 1184 | int total_bits; |
1185 | ||
1186 | /* We must do an endian conversion exactly the same way as it is | |
1187 | done in extract_bit_field, so that the two calls to | |
1188 | extract_fixed_bit_field will have comparable arguments. */ | |
e16ceb8e | 1189 | if (!MEM_P (value) || GET_MODE (value) == BLKmode) |
052251d0 | 1190 | total_bits = BITS_PER_WORD; |
1191 | else | |
1192 | total_bits = GET_MODE_BITSIZE (GET_MODE (value)); | |
1193 | ||
51356f86 | 1194 | /* Fetch successively less significant portions. */ |
971ba038 | 1195 | if (CONST_INT_P (value)) |
51356f86 | 1196 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1197 | >> (bitsize - bitsdone - thissize)) | |
1198 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1199 | else | |
1200 | /* The args are chosen so that the last part includes the | |
1201 | lsb. Give extract_bit_field the value it needs (with | |
2c269e73 | 1202 | endianness compensation) to fetch the piece we want. */ |
1203 | part = extract_fixed_bit_field (word_mode, value, 0, thissize, | |
1204 | total_bits - bitsize + bitsdone, | |
8eef3a45 | 1205 | NULL_RTX, 1, false); |
51356f86 | 1206 | } |
e9782169 | 1207 | else |
51356f86 | 1208 | { |
1209 | /* Fetch successively more significant portions. */ | |
971ba038 | 1210 | if (CONST_INT_P (value)) |
51356f86 | 1211 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1212 | >> bitsdone) | |
1213 | & (((HOST_WIDE_INT) 1 << thissize) - 1)); | |
1214 | else | |
2c269e73 | 1215 | part = extract_fixed_bit_field (word_mode, value, 0, thissize, |
8eef3a45 | 1216 | bitsdone, NULL_RTX, 1, false); |
51356f86 | 1217 | } |
db96f378 | 1218 | |
e9782169 | 1219 | /* If OP0 is a register, then handle OFFSET here. |
6d292981 | 1220 | |
1221 | When handling multiword bitfields, extract_bit_field may pass | |
1222 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
1223 | crosses a word boundary. Thus, for a SUBREG, we must find | |
1224 | the current word starting from the base register. */ | |
1225 | if (GET_CODE (op0) == SUBREG) | |
1226 | { | |
701e46d0 | 1227 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
3a175160 | 1228 | enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0)); |
1229 | if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD) | |
1230 | word = word_offset ? const0_rtx : op0; | |
1231 | else | |
1232 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
1233 | GET_MODE (SUBREG_REG (op0))); | |
6d292981 | 1234 | offset = 0; |
1235 | } | |
8ad4c111 | 1236 | else if (REG_P (op0)) |
e9782169 | 1237 | { |
3a175160 | 1238 | enum machine_mode op0_mode = GET_MODE (op0); |
1239 | if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) | |
1240 | word = offset ? const0_rtx : op0; | |
1241 | else | |
1242 | word = operand_subword_force (op0, offset, GET_MODE (op0)); | |
e9782169 | 1243 | offset = 0; |
1244 | } | |
1245 | else | |
1246 | word = op0; | |
db96f378 | 1247 | |
ba860eb2 | 1248 | /* OFFSET is in UNITs, and UNIT is in bits. |
3a175160 | 1249 | store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx, |
1250 | it is just an out-of-bounds access. Ignore it. */ | |
1251 | if (word != const0_rtx) | |
1252 | store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize, | |
4bb60ec7 | 1253 | thispos, bitregion_start, bitregion_end, part); |
e9782169 | 1254 | bitsdone += thissize; |
1255 | } | |
db96f378 | 1256 | } |
1257 | \f | |
36122326 | 1258 | /* A subroutine of extract_bit_field_1 that converts return value X |
1259 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments | |
1260 | to extract_bit_field. */ | |
db96f378 | 1261 | |
36122326 | 1262 | static rtx |
1263 | convert_extracted_bit_field (rtx x, enum machine_mode mode, | |
1264 | enum machine_mode tmode, bool unsignedp) | |
1265 | { | |
1266 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) | |
1267 | return x; | |
db96f378 | 1268 | |
36122326 | 1269 | /* If the x mode is not a scalar integral, first convert to the |
1270 | integer mode of that size and then access it as a floating-point | |
1271 | value via a SUBREG. */ | |
1272 | if (!SCALAR_INT_MODE_P (tmode)) | |
1273 | { | |
1274 | enum machine_mode smode; | |
db96f378 | 1275 | |
36122326 | 1276 | smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); |
1277 | x = convert_to_mode (smode, x, unsignedp); | |
1278 | x = force_reg (smode, x); | |
1279 | return gen_lowpart (tmode, x); | |
1280 | } | |
db96f378 | 1281 | |
36122326 | 1282 | return convert_to_mode (tmode, x, unsignedp); |
1283 | } | |
1284 | ||
1285 | /* A subroutine of extract_bit_field, with the same arguments. | |
1286 | If FALLBACK_P is true, fall back to extract_fixed_bit_field | |
1287 | if we can find no other means of implementing the operation. | |
1288 | if FALLBACK_P is false, return NULL instead. */ | |
1289 | ||
1290 | static rtx | |
1291 | extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
8eef3a45 | 1292 | unsigned HOST_WIDE_INT bitnum, |
1293 | int unsignedp, bool packedp, rtx target, | |
36122326 | 1294 | enum machine_mode mode, enum machine_mode tmode, |
1295 | bool fallback_p) | |
db96f378 | 1296 | { |
02e7a332 | 1297 | unsigned int unit |
e16ceb8e | 1298 | = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; |
bc5449fc | 1299 | unsigned HOST_WIDE_INT offset, bitpos; |
19cb6b50 | 1300 | rtx op0 = str_rtx; |
49776ff6 | 1301 | enum machine_mode int_mode; |
36122326 | 1302 | enum machine_mode ext_mode; |
ce7c9eb1 | 1303 | enum machine_mode mode1; |
1304 | int byte_offset; | |
db96f378 | 1305 | |
db96f378 | 1306 | if (tmode == VOIDmode) |
1307 | tmode = mode; | |
804e9c91 | 1308 | |
db96f378 | 1309 | while (GET_CODE (op0) == SUBREG) |
1310 | { | |
bc5449fc | 1311 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
db96f378 | 1312 | op0 = SUBREG_REG (op0); |
1313 | } | |
c88df841 | 1314 | |
bc5449fc | 1315 | /* If we have an out-of-bounds access to a register, just return an |
2fb89879 | 1316 | uninitialized register of the required mode. This can occur if the |
bc5449fc | 1317 | source code contains an out-of-bounds access to a small array. */ |
1318 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
1319 | return gen_reg_rtx (tmode); | |
1320 | ||
8ad4c111 | 1321 | if (REG_P (op0) |
3664abd2 | 1322 | && mode == GET_MODE (op0) |
1323 | && bitnum == 0 | |
bc9d42da | 1324 | && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |
3664abd2 | 1325 | { |
bc9d42da | 1326 | /* We're trying to extract a full register from itself. */ |
3664abd2 | 1327 | return op0; |
1328 | } | |
1329 | ||
8ea8de24 | 1330 | /* See if we can get a better vector mode before extracting. */ |
1331 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1332 | && !MEM_P (op0) | |
1333 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) | |
1334 | { | |
1335 | enum machine_mode new_mode; | |
8ea8de24 | 1336 | |
1337 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) | |
1338 | new_mode = MIN_MODE_VECTOR_FLOAT; | |
06f0b99c | 1339 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1340 | new_mode = MIN_MODE_VECTOR_FRACT; | |
1341 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) | |
1342 | new_mode = MIN_MODE_VECTOR_UFRACT; | |
1343 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) | |
1344 | new_mode = MIN_MODE_VECTOR_ACCUM; | |
1345 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) | |
1346 | new_mode = MIN_MODE_VECTOR_UACCUM; | |
8ea8de24 | 1347 | else |
1348 | new_mode = MIN_MODE_VECTOR_INT; | |
1349 | ||
1350 | for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) | |
d5f2f2c4 | 1351 | if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) |
8ea8de24 | 1352 | && targetm.vector_mode_supported_p (new_mode)) |
1353 | break; | |
1354 | if (new_mode != VOIDmode) | |
1355 | op0 = gen_lowpart (new_mode, op0); | |
1356 | } | |
1357 | ||
b8d2bcdd | 1358 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1359 | available. */ | |
1360 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
e16ceb8e | 1361 | && !MEM_P (op0) |
d6bf3b14 | 1362 | && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing |
cb746719 | 1363 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) |
1364 | == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) | |
b8d2bcdd | 1365 | { |
8786db1e | 1366 | struct expand_operand ops[3]; |
b8d2bcdd | 1367 | enum machine_mode outermode = GET_MODE (op0); |
1368 | enum machine_mode innermode = GET_MODE_INNER (outermode); | |
8786db1e | 1369 | enum insn_code icode = optab_handler (vec_extract_optab, outermode); |
cb746719 | 1370 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |
b8d2bcdd | 1371 | |
8786db1e | 1372 | create_output_operand (&ops[0], target, innermode); |
1373 | create_input_operand (&ops[1], op0, outermode); | |
1374 | create_integer_operand (&ops[2], pos); | |
1375 | if (maybe_expand_insn (icode, 3, ops)) | |
b8d2bcdd | 1376 | { |
8786db1e | 1377 | target = ops[0].value; |
1378 | if (GET_MODE (target) != mode) | |
1379 | return gen_lowpart (tmode, target); | |
1380 | return target; | |
b8d2bcdd | 1381 | } |
1382 | } | |
1383 | ||
86cde393 | 1384 | /* Make sure we are playing with integral modes. Pun with subregs |
1385 | if we aren't. */ | |
1386 | { | |
1387 | enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); | |
1388 | if (imode != GET_MODE (op0)) | |
1389 | { | |
8061fd40 | 1390 | if (MEM_P (op0)) |
1391 | op0 = adjust_address (op0, imode, 0); | |
4675df53 | 1392 | else if (imode != BLKmode) |
8061fd40 | 1393 | { |
8061fd40 | 1394 | op0 = gen_lowpart (imode, op0); |
f6d8f384 | 1395 | |
8061fd40 | 1396 | /* If we got a SUBREG, force it into a register since we |
1397 | aren't going to be able to do another SUBREG on it. */ | |
1398 | if (GET_CODE (op0) == SUBREG) | |
1399 | op0 = force_reg (imode, op0); | |
1400 | } | |
4675df53 | 1401 | else if (REG_P (op0)) |
1402 | { | |
1403 | rtx reg, subreg; | |
1404 | imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)), | |
1405 | MODE_INT); | |
1406 | reg = gen_reg_rtx (imode); | |
1407 | subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg); | |
1408 | emit_move_insn (subreg, op0); | |
1409 | op0 = reg; | |
1410 | bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT; | |
1411 | } | |
1412 | else | |
1413 | { | |
1414 | rtx mem = assign_stack_temp (GET_MODE (op0), | |
0ab48139 | 1415 | GET_MODE_SIZE (GET_MODE (op0))); |
4675df53 | 1416 | emit_move_insn (mem, op0); |
1417 | op0 = adjust_address (mem, BLKmode, 0); | |
1418 | } | |
86cde393 | 1419 | } |
1420 | } | |
1421 | ||
edc4f1e8 | 1422 | /* We may be accessing data outside the field, which means |
1423 | we can alias adjacent data. */ | |
e16ceb8e | 1424 | if (MEM_P (op0)) |
edc4f1e8 | 1425 | { |
1426 | op0 = shallow_copy_rtx (op0); | |
1427 | set_mem_alias_set (op0, 0); | |
1428 | set_mem_expr (op0, 0); | |
1429 | } | |
1430 | ||
804e9c91 | 1431 | /* Extraction of a full-word or multi-word value from a structure |
1432 | in a register or aligned memory can be done with just a SUBREG. | |
1433 | A subword value in the least significant part of a register | |
1434 | can also be extracted with a SUBREG. For this, we need the | |
1435 | byte offset of the value in op0. */ | |
1436 | ||
bc5449fc | 1437 | bitpos = bitnum % unit; |
1438 | offset = bitnum / unit; | |
804e9c91 | 1439 | byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD; |
a4194ff7 | 1440 | |
db96f378 | 1441 | /* If OP0 is a register, BITPOS must count within a word. |
1442 | But as we have it, it counts within whatever size OP0 now has. | |
1443 | On a bigendian machine, these are not the same, so convert. */ | |
0bc644e0 | 1444 | if (BYTES_BIG_ENDIAN |
e16ceb8e | 1445 | && !MEM_P (op0) |
51356f86 | 1446 | && unit > GET_MODE_BITSIZE (GET_MODE (op0))) |
db96f378 | 1447 | bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); |
db96f378 | 1448 | |
804e9c91 | 1449 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1450 | If that's wrong, the solution is to test for it and set TARGET to 0 | |
1451 | if needed. */ | |
ce7c9eb1 | 1452 | |
b97d1165 | 1453 | /* Only scalar integer modes can be converted via subregs. There is an |
1454 | additional problem for FP modes here in that they can have a precision | |
1455 | which is different from the size. mode_for_size uses precision, but | |
1456 | we want a mode based on the size, so we must avoid calling it for FP | |
1457 | modes. */ | |
1458 | mode1 = (SCALAR_INT_MODE_P (tmode) | |
1459 | ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) | |
1460 | : mode); | |
ce7c9eb1 | 1461 | |
a420d927 | 1462 | /* If the bitfield is volatile, we need to make sure the access |
1463 | remains on a type-aligned boundary. */ | |
1464 | if (GET_CODE (op0) == MEM | |
1465 | && MEM_VOLATILE_P (op0) | |
1466 | && GET_MODE_BITSIZE (GET_MODE (op0)) > 0 | |
1467 | && flag_strict_volatile_bitfields > 0) | |
1468 | goto no_subreg_mode_swap; | |
1469 | ||
4007ab72 | 1470 | if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode) |
1471 | && bitpos % BITS_PER_WORD == 0) | |
b97d1165 | 1472 | || (mode1 != BLKmode |
4007ab72 | 1473 | /* ??? The big endian test here is wrong. This is correct |
1474 | if the value is in a register, and if mode_for_size is not | |
1475 | the same mode as op0. This causes us to get unnecessarily | |
1476 | inefficient code from the Thumb port when -mbig-endian. */ | |
1477 | && (BYTES_BIG_ENDIAN | |
1478 | ? bitpos + bitsize == BITS_PER_WORD | |
1479 | : bitpos == 0))) | |
e16ceb8e | 1480 | && ((!MEM_P (op0) |
396f2130 | 1481 | && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)) |
4007ab72 | 1482 | && GET_MODE_SIZE (mode1) != 0 |
1483 | && byte_offset % GET_MODE_SIZE (mode1) == 0) | |
e16ceb8e | 1484 | || (MEM_P (op0) |
4007ab72 | 1485 | && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) |
1486 | || (offset * BITS_PER_UNIT % bitsize == 0 | |
1487 | && MEM_ALIGN (op0) % bitsize == 0))))) | |
db96f378 | 1488 | { |
05b481df | 1489 | if (MEM_P (op0)) |
1490 | op0 = adjust_address (op0, mode1, offset); | |
1491 | else if (mode1 != GET_MODE (op0)) | |
ed28fcb6 | 1492 | { |
05b481df | 1493 | rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), |
1494 | byte_offset); | |
1495 | if (sub == NULL) | |
1496 | goto no_subreg_mode_swap; | |
1497 | op0 = sub; | |
ed28fcb6 | 1498 | } |
db96f378 | 1499 | if (mode1 != mode) |
1500 | return convert_to_mode (tmode, op0, unsignedp); | |
1501 | return op0; | |
1502 | } | |
c601dd7d | 1503 | no_subreg_mode_swap: |
db96f378 | 1504 | |
1505 | /* Handle fields bigger than a word. */ | |
a4194ff7 | 1506 | |
db96f378 | 1507 | if (bitsize > BITS_PER_WORD) |
1508 | { | |
1509 | /* Here we transfer the words of the field | |
1510 | in the order least significant first. | |
1511 | This is because the most significant word is the one which may | |
1512 | be less than full. */ | |
1513 | ||
02e7a332 | 1514 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1515 | unsigned int i; | |
db96f378 | 1516 | |
e09c2930 | 1517 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |
db96f378 | 1518 | target = gen_reg_rtx (mode); |
1519 | ||
625d6efb | 1520 | /* Indicate for flow that the entire target reg is being set. */ |
18b42941 | 1521 | emit_clobber (target); |
625d6efb | 1522 | |
db96f378 | 1523 | for (i = 0; i < nwords; i++) |
1524 | { | |
1525 | /* If I is 0, use the low-order word in both field and target; | |
1526 | if I is 1, use the next to lowest word; and so on. */ | |
c88df841 | 1527 | /* Word number in TARGET to use. */ |
02e7a332 | 1528 | unsigned int wordnum |
1529 | = (WORDS_BIG_ENDIAN | |
1530 | ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 | |
1531 | : i); | |
c88df841 | 1532 | /* Offset from start of field in OP0. */ |
02e7a332 | 1533 | unsigned int bit_offset = (WORDS_BIG_ENDIAN |
1534 | ? MAX (0, ((int) bitsize - ((int) i + 1) | |
0eacd412 | 1535 | * (int) BITS_PER_WORD)) |
02e7a332 | 1536 | : (int) i * BITS_PER_WORD); |
db96f378 | 1537 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1538 | rtx result_part | |
1539 | = extract_bit_field (op0, MIN (BITS_PER_WORD, | |
1540 | bitsize - i * BITS_PER_WORD), | |
8eef3a45 | 1541 | bitnum + bit_offset, 1, false, target_part, mode, |
1445ea5b | 1542 | word_mode); |
db96f378 | 1543 | |
611234b4 | 1544 | gcc_assert (target_part); |
db96f378 | 1545 | |
1546 | if (result_part != target_part) | |
1547 | emit_move_insn (target_part, result_part); | |
1548 | } | |
1549 | ||
6d292981 | 1550 | if (unsignedp) |
c88df841 | 1551 | { |
1552 | /* Unless we've filled TARGET, the upper regs in a multi-reg value | |
1553 | need to be zero'd out. */ | |
1554 | if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) | |
1555 | { | |
02e7a332 | 1556 | unsigned int i, total_words; |
c88df841 | 1557 | |
1558 | total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; | |
1559 | for (i = nwords; i < total_words; i++) | |
2c269e73 | 1560 | emit_move_insn |
1561 | (operand_subword (target, | |
1562 | WORDS_BIG_ENDIAN ? total_words - i - 1 : i, | |
1563 | 1, VOIDmode), | |
1564 | const0_rtx); | |
c88df841 | 1565 | } |
1566 | return target; | |
1567 | } | |
1568 | ||
6d292981 | 1569 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1570 | target = expand_shift (LSHIFT_EXPR, mode, target, | |
f5ff0b21 | 1571 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
6d292981 | 1572 | return expand_shift (RSHIFT_EXPR, mode, target, |
f5ff0b21 | 1573 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
db96f378 | 1574 | } |
a4194ff7 | 1575 | |
49776ff6 | 1576 | /* From here on we know the desired field is smaller than a word. */ |
1577 | ||
1578 | /* Check if there is a correspondingly-sized integer field, so we can | |
1579 | safely extract it as one size of integer, if necessary; then | |
1580 | truncate or extend to the size that is wanted; then use SUBREGs or | |
1581 | convert_to_mode to get one of the modes we really wanted. */ | |
a4194ff7 | 1582 | |
49776ff6 | 1583 | int_mode = int_mode_for_mode (tmode); |
1584 | if (int_mode == BLKmode) | |
1585 | int_mode = int_mode_for_mode (mode); | |
611234b4 | 1586 | /* Should probably push op0 out to memory and then do a load. */ |
1587 | gcc_assert (int_mode != BLKmode); | |
db96f378 | 1588 | |
1589 | /* OFFSET is the number of words or bytes (UNIT says which) | |
1590 | from STR_RTX to the first word or byte containing part of the field. */ | |
e16ceb8e | 1591 | if (!MEM_P (op0)) |
db96f378 | 1592 | { |
1593 | if (offset != 0 | |
1594 | || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
650df5df | 1595 | { |
8ad4c111 | 1596 | if (!REG_P (op0)) |
650df5df | 1597 | op0 = copy_to_reg (op0); |
1598 | op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), | |
701e46d0 | 1599 | op0, (offset * UNITS_PER_WORD)); |
650df5df | 1600 | } |
db96f378 | 1601 | offset = 0; |
1602 | } | |
db96f378 | 1603 | |
1604 | /* Now OFFSET is nonzero only for memory operands. */ | |
36122326 | 1605 | ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0); |
36122326 | 1606 | if (ext_mode != MAX_MACHINE_MODE |
1607 | && bitsize > 0 | |
1608 | && GET_MODE_BITSIZE (ext_mode) >= bitsize | |
eb04cafb | 1609 | /* Do not use extv/extzv for volatile bitfields when |
1610 | -fstrict-volatile-bitfields is in effect. */ | |
1611 | && !(MEM_P (op0) && MEM_VOLATILE_P (op0) | |
1612 | && flag_strict_volatile_bitfields > 0) | |
36122326 | 1613 | /* If op0 is a register, we need it in EXT_MODE to make it |
1614 | acceptable to the format of ext(z)v. */ | |
1615 | && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) | |
1616 | && !((REG_P (op0) || GET_CODE (op0) == SUBREG) | |
8786db1e | 1617 | && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))) |
db96f378 | 1618 | { |
8786db1e | 1619 | struct expand_operand ops[4]; |
36122326 | 1620 | unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset; |
36122326 | 1621 | rtx xop0 = op0; |
1622 | rtx xtarget = target; | |
1623 | rtx xspec_target = target; | |
1624 | rtx xspec_target_subreg = 0; | |
db96f378 | 1625 | |
36122326 | 1626 | /* If op0 is a register, we need it in EXT_MODE to make it |
1627 | acceptable to the format of ext(z)v. */ | |
1628 | if (REG_P (xop0) && GET_MODE (xop0) != ext_mode) | |
42b8780a | 1629 | xop0 = gen_lowpart_SUBREG (ext_mode, xop0); |
36122326 | 1630 | if (MEM_P (xop0)) |
1631 | /* Get ref to first byte containing part of the field. */ | |
1632 | xop0 = adjust_address (xop0, byte_mode, xoffset); | |
db96f378 | 1633 | |
36122326 | 1634 | /* Now convert from counting within UNIT to counting in EXT_MODE. */ |
eb04cafb | 1635 | if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) |
36122326 | 1636 | xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; |
51356f86 | 1637 | |
36122326 | 1638 | unit = GET_MODE_BITSIZE (ext_mode); |
db96f378 | 1639 | |
eb04cafb | 1640 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
1641 | "backwards" from the size of the unit we are extracting from. | |
1642 | Otherwise, we count bits from the most significant on a | |
1643 | BYTES/BITS_BIG_ENDIAN machine. */ | |
1644 | ||
1645 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
1646 | xbitpos = unit - bitsize - xbitpos; | |
1647 | ||
36122326 | 1648 | if (xtarget == 0) |
1649 | xtarget = xspec_target = gen_reg_rtx (tmode); | |
db96f378 | 1650 | |
36122326 | 1651 | if (GET_MODE (xtarget) != ext_mode) |
1652 | { | |
fbf78e3d | 1653 | /* Don't use LHS paradoxical subreg if explicit truncation is needed |
1654 | between the mode of the extraction (word_mode) and the target | |
1655 | mode. Instead, create a temporary and use convert_move to set | |
1656 | the target. */ | |
1657 | if (REG_P (xtarget) | |
396f2130 | 1658 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode)) |
db96f378 | 1659 | { |
36122326 | 1660 | xtarget = gen_lowpart (ext_mode, xtarget); |
b537bfdb | 1661 | if (GET_MODE_PRECISION (ext_mode) |
1662 | > GET_MODE_PRECISION (GET_MODE (xspec_target))) | |
36122326 | 1663 | xspec_target_subreg = xtarget; |
db96f378 | 1664 | } |
36122326 | 1665 | else |
1666 | xtarget = gen_reg_rtx (ext_mode); | |
1667 | } | |
db96f378 | 1668 | |
8786db1e | 1669 | create_output_operand (&ops[0], xtarget, ext_mode); |
1670 | create_fixed_operand (&ops[1], xop0); | |
1671 | create_integer_operand (&ops[2], bitsize); | |
1672 | create_integer_operand (&ops[3], xbitpos); | |
1673 | if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv, | |
1674 | 4, ops)) | |
36122326 | 1675 | { |
8786db1e | 1676 | xtarget = ops[0].value; |
36122326 | 1677 | if (xtarget == xspec_target) |
1678 | return xtarget; | |
1679 | if (xtarget == xspec_target_subreg) | |
1680 | return xspec_target; | |
1681 | return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp); | |
db96f378 | 1682 | } |
db96f378 | 1683 | } |
51356f86 | 1684 | |
36122326 | 1685 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1686 | cheap register alternative is available. */ | |
1687 | if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0)) | |
1688 | { | |
1689 | enum machine_mode bestmode; | |
1690 | ||
1691 | /* Get the mode to use for inserting into this field. If | |
1692 | OP0 is BLKmode, get the smallest mode consistent with the | |
1693 | alignment. If OP0 is a non-BLKmode object that is no | |
1694 | wider than EXT_MODE, use its mode. Otherwise, use the | |
1695 | smallest mode containing the field. */ | |
1696 | ||
1697 | if (GET_MODE (op0) == BLKmode | |
1698 | || (ext_mode != MAX_MACHINE_MODE | |
1699 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))) | |
4bb60ec7 | 1700 | bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), |
36122326 | 1701 | (ext_mode == MAX_MACHINE_MODE |
1702 | ? VOIDmode : ext_mode), | |
1703 | MEM_VOLATILE_P (op0)); | |
1704 | else | |
1705 | bestmode = GET_MODE (op0); | |
51356f86 | 1706 | |
36122326 | 1707 | if (bestmode != VOIDmode |
1708 | && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) | |
1709 | && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) | |
1710 | { | |
1711 | unsigned HOST_WIDE_INT xoffset, xbitpos; | |
db96f378 | 1712 | |
36122326 | 1713 | /* Compute the offset as a multiple of this unit, |
1714 | counting in bytes. */ | |
1715 | unit = GET_MODE_BITSIZE (bestmode); | |
1716 | xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); | |
1717 | xbitpos = bitnum % unit; | |
db96f378 | 1718 | |
36122326 | 1719 | /* Make sure the register is big enough for the whole field. */ |
1720 | if (xoffset * BITS_PER_UNIT + unit | |
1721 | >= offset * BITS_PER_UNIT + bitsize) | |
db96f378 | 1722 | { |
36122326 | 1723 | rtx last, result, xop0; |
db96f378 | 1724 | |
36122326 | 1725 | last = get_last_insn (); |
db96f378 | 1726 | |
36122326 | 1727 | /* Fetch it to a register in that size. */ |
1728 | xop0 = adjust_address (op0, bestmode, xoffset); | |
1729 | xop0 = force_reg (bestmode, xop0); | |
1730 | result = extract_bit_field_1 (xop0, bitsize, xbitpos, | |
8eef3a45 | 1731 | unsignedp, packedp, target, |
36122326 | 1732 | mode, tmode, false); |
1733 | if (result) | |
1734 | return result; | |
db96f378 | 1735 | |
db96f378 | 1736 | delete_insns_since (last); |
db96f378 | 1737 | } |
a4194ff7 | 1738 | } |
db96f378 | 1739 | } |
4b6262b1 | 1740 | |
36122326 | 1741 | if (!fallback_p) |
1742 | return NULL; | |
1743 | ||
1744 | target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, | |
8eef3a45 | 1745 | bitpos, target, unsignedp, packedp); |
36122326 | 1746 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |
1747 | } | |
1748 | ||
1749 | /* Generate code to extract a byte-field from STR_RTX | |
1750 | containing BITSIZE bits, starting at BITNUM, | |
1751 | and put it in TARGET if possible (if TARGET is nonzero). | |
1752 | Regardless of TARGET, we return the rtx for where the value is placed. | |
1753 | ||
1754 | STR_RTX is the structure containing the byte (a REG or MEM). | |
1755 | UNSIGNEDP is nonzero if this is an unsigned bit field. | |
8eef3a45 | 1756 | PACKEDP is nonzero if the field has the packed attribute. |
36122326 | 1757 | MODE is the natural mode of the field value once extracted. |
1758 | TMODE is the mode the caller would like the value to have; | |
1759 | but the value may be returned with type MODE instead. | |
1760 | ||
1761 | If a TARGET is specified and we can store in it at no extra cost, | |
1762 | we do so, and return TARGET. | |
1763 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred | |
1764 | if they are equally easy. */ | |
1765 | ||
1766 | rtx | |
1767 | extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
8eef3a45 | 1768 | unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp, |
1769 | rtx target, enum machine_mode mode, enum machine_mode tmode) | |
36122326 | 1770 | { |
8eef3a45 | 1771 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp, |
36122326 | 1772 | target, mode, tmode, true); |
db96f378 | 1773 | } |
1774 | \f | |
1775 | /* Extract a bit field using shifts and boolean operations | |
1776 | Returns an rtx to represent the value. | |
1777 | OP0 addresses a register (word) or memory (byte). | |
1778 | BITPOS says which bit within the word or byte the bit field starts in. | |
1779 | OFFSET says how many bytes farther the bit field starts; | |
1780 | it is 0 if OP0 is a register. | |
1781 | BITSIZE says how many bits long the bit field is. | |
1782 | (If OP0 is a register, it may be narrower than a full word, | |
1783 | but BITPOS still counts within a full word, | |
1784 | which is significant on bigendian machines.) | |
1785 | ||
1786 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). | |
8eef3a45 | 1787 | PACKEDP is true if the field has the packed attribute. |
1788 | ||
db96f378 | 1789 | If TARGET is nonzero, attempts to store the value there |
1790 | and return TARGET, but this is not guaranteed. | |
2c269e73 | 1791 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
db96f378 | 1792 | |
1793 | static rtx | |
35cb5232 | 1794 | extract_fixed_bit_field (enum machine_mode tmode, rtx op0, |
1795 | unsigned HOST_WIDE_INT offset, | |
1796 | unsigned HOST_WIDE_INT bitsize, | |
1797 | unsigned HOST_WIDE_INT bitpos, rtx target, | |
8eef3a45 | 1798 | int unsignedp, bool packedp) |
db96f378 | 1799 | { |
02e7a332 | 1800 | unsigned int total_bits = BITS_PER_WORD; |
db96f378 | 1801 | enum machine_mode mode; |
1802 | ||
8ad4c111 | 1803 | if (GET_CODE (op0) == SUBREG || REG_P (op0)) |
db96f378 | 1804 | { |
1805 | /* Special treatment for a bit field split across two registers. */ | |
1806 | if (bitsize + bitpos > BITS_PER_WORD) | |
2c269e73 | 1807 | return extract_split_bit_field (op0, bitsize, bitpos, unsignedp); |
db96f378 | 1808 | } |
1809 | else | |
1810 | { | |
1811 | /* Get the proper mode to use for this field. We want a mode that | |
1812 | includes the entire field. If such a mode would be larger than | |
1813 | a word, we won't be doing the extraction the normal way. */ | |
1814 | ||
a420d927 | 1815 | if (MEM_VOLATILE_P (op0) |
1816 | && flag_strict_volatile_bitfields > 0) | |
1817 | { | |
1818 | if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0) | |
1819 | mode = GET_MODE (op0); | |
1820 | else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) | |
1821 | mode = GET_MODE (target); | |
1822 | else | |
1823 | mode = tmode; | |
1824 | } | |
1825 | else | |
4bb60ec7 | 1826 | mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0, |
a420d927 | 1827 | MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0)); |
db96f378 | 1828 | |
1829 | if (mode == VOIDmode) | |
1830 | /* The only way this should occur is if the field spans word | |
1831 | boundaries. */ | |
1832 | return extract_split_bit_field (op0, bitsize, | |
1833 | bitpos + offset * BITS_PER_UNIT, | |
2c269e73 | 1834 | unsignedp); |
db96f378 | 1835 | |
1836 | total_bits = GET_MODE_BITSIZE (mode); | |
1837 | ||
35140538 | 1838 | /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to |
3398e91d | 1839 | be in the range 0 to total_bits-1, and put any excess bytes in |
35140538 | 1840 | OFFSET. */ |
1841 | if (bitpos >= total_bits) | |
1842 | { | |
1843 | offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); | |
1844 | bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) | |
1845 | * BITS_PER_UNIT); | |
1846 | } | |
1847 | ||
a420d927 | 1848 | /* If we're accessing a volatile MEM, we can't do the next |
1849 | alignment step if it results in a multi-word access where we | |
1850 | otherwise wouldn't have one. So, check for that case | |
1851 | here. */ | |
1852 | if (MEM_P (op0) | |
1853 | && MEM_VOLATILE_P (op0) | |
1854 | && flag_strict_volatile_bitfields > 0 | |
1855 | && bitpos + bitsize <= total_bits | |
1856 | && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits) | |
1857 | { | |
1858 | if (STRICT_ALIGNMENT) | |
1859 | { | |
1860 | static bool informed_about_misalignment = false; | |
1861 | bool warned; | |
1862 | ||
8eef3a45 | 1863 | if (packedp) |
1864 | { | |
1865 | if (bitsize == total_bits) | |
1866 | warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, | |
1867 | "multiple accesses to volatile structure member" | |
1868 | " because of packed attribute"); | |
1869 | else | |
1870 | warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, | |
1871 | "multiple accesses to volatile structure bitfield" | |
1872 | " because of packed attribute"); | |
1873 | ||
1874 | return extract_split_bit_field (op0, bitsize, | |
1875 | bitpos + offset * BITS_PER_UNIT, | |
1876 | unsignedp); | |
1877 | } | |
1878 | ||
a420d927 | 1879 | if (bitsize == total_bits) |
1880 | warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, | |
1881 | "mis-aligned access used for structure member"); | |
1882 | else | |
1883 | warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, | |
1884 | "mis-aligned access used for structure bitfield"); | |
1885 | ||
1886 | if (! informed_about_misalignment && warned) | |
1887 | { | |
1888 | informed_about_misalignment = true; | |
1889 | inform (input_location, | |
bf776685 | 1890 | "when a volatile object spans multiple type-sized locations," |
a420d927 | 1891 | " the compiler must choose between using a single mis-aligned access to" |
1892 | " preserve the volatility, or using multiple aligned accesses to avoid" | |
bf776685 | 1893 | " runtime faults; this code may fail at runtime if the hardware does" |
1894 | " not allow this access"); | |
a420d927 | 1895 | } |
1896 | } | |
1897 | } | |
1898 | else | |
1899 | { | |
1900 | ||
1901 | /* Get ref to an aligned byte, halfword, or word containing the field. | |
1902 | Adjust BITPOS to be position within a word, | |
1903 | and OFFSET to be the offset of that word. | |
1904 | Then alter OP0 to refer to that word. */ | |
1905 | bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; | |
1906 | offset -= (offset % (total_bits / BITS_PER_UNIT)); | |
1907 | } | |
1908 | ||
e513d163 | 1909 | op0 = adjust_address (op0, mode, offset); |
db96f378 | 1910 | } |
1911 | ||
052251d0 | 1912 | mode = GET_MODE (op0); |
1913 | ||
51356f86 | 1914 | if (BYTES_BIG_ENDIAN) |
2c269e73 | 1915 | /* BITPOS is the distance between our msb and that of OP0. |
1916 | Convert it to the distance from the lsb. */ | |
1917 | bitpos = total_bits - bitsize - bitpos; | |
db96f378 | 1918 | |
db96f378 | 1919 | /* Now BITPOS is always the distance between the field's lsb and that of OP0. |
1920 | We have reduced the big-endian case to the little-endian case. */ | |
1921 | ||
1922 | if (unsignedp) | |
1923 | { | |
1924 | if (bitpos) | |
1925 | { | |
1926 | /* If the field does not already start at the lsb, | |
1927 | shift it so it does. */ | |
db96f378 | 1928 | /* Maybe propagate the target for the shift. */ |
8ad4c111 | 1929 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
ea1760a3 | 1930 | if (tmode != mode) |
1931 | subtarget = 0; | |
f5ff0b21 | 1932 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1); |
db96f378 | 1933 | } |
1934 | /* Convert the value to the desired mode. */ | |
1935 | if (mode != tmode) | |
1936 | op0 = convert_to_mode (tmode, op0, 1); | |
1937 | ||
1938 | /* Unless the msb of the field used to be the msb when we shifted, | |
1939 | mask out the upper bits. */ | |
1940 | ||
ab89ec1b | 1941 | if (GET_MODE_BITSIZE (mode) != bitpos + bitsize) |
db96f378 | 1942 | return expand_binop (GET_MODE (op0), and_optab, op0, |
1943 | mask_rtx (GET_MODE (op0), 0, bitsize, 0), | |
1944 | target, 1, OPTAB_LIB_WIDEN); | |
1945 | return op0; | |
1946 | } | |
1947 | ||
1948 | /* To extract a signed bit-field, first shift its msb to the msb of the word, | |
1949 | then arithmetic-shift its lsb to the lsb of the word. */ | |
1950 | op0 = force_reg (mode, op0); | |
db96f378 | 1951 | |
1952 | /* Find the narrowest integer mode that contains the field. */ | |
1953 | ||
1954 | for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; | |
1955 | mode = GET_MODE_WIDER_MODE (mode)) | |
1956 | if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos) | |
1957 | { | |
1958 | op0 = convert_to_mode (mode, op0, 0); | |
1959 | break; | |
1960 | } | |
1961 | ||
fcae9057 | 1962 | if (mode != tmode) |
1963 | target = 0; | |
1964 | ||
db96f378 | 1965 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos)) |
1966 | { | |
f5ff0b21 | 1967 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos); |
db96f378 | 1968 | /* Maybe propagate the target for the shift. */ |
8ad4c111 | 1969 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
db96f378 | 1970 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
1971 | } | |
1972 | ||
1973 | return expand_shift (RSHIFT_EXPR, mode, op0, | |
f5ff0b21 | 1974 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |
db96f378 | 1975 | } |
1976 | \f | |
1977 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value | |
1978 | of mode MODE with BITSIZE ones followed by BITPOS zeros, or the | |
1979 | complement of that if COMPLEMENT. The mask is truncated if | |
c88df841 | 1980 | necessary to the width of mode MODE. The mask is zero-extended if |
1981 | BITSIZE+BITPOS is too small for MODE. */ | |
db96f378 | 1982 | |
1983 | static rtx | |
35cb5232 | 1984 | mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement) |
db96f378 | 1985 | { |
41283922 | 1986 | double_int mask; |
db96f378 | 1987 | |
41283922 | 1988 | mask = double_int_mask (bitsize); |
1989 | mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false); | |
db96f378 | 1990 | |
1991 | if (complement) | |
41283922 | 1992 | mask = double_int_not (mask); |
db96f378 | 1993 | |
3e052aec | 1994 | return immed_double_int_const (mask, mode); |
db96f378 | 1995 | } |
1996 | ||
1997 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value | |
1998 | VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */ | |
1999 | ||
2000 | static rtx | |
35cb5232 | 2001 | lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize) |
db96f378 | 2002 | { |
41283922 | 2003 | double_int val; |
2004 | ||
2005 | val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize); | |
2006 | val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false); | |
db96f378 | 2007 | |
3e052aec | 2008 | return immed_double_int_const (val, mode); |
db96f378 | 2009 | } |
2010 | \f | |
2011 | /* Extract a bit field that is split across two words | |
2012 | and return an RTX for the result. | |
2013 | ||
2014 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. | |
2015 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. | |
2c269e73 | 2016 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ |
db96f378 | 2017 | |
2018 | static rtx | |
35cb5232 | 2019 | extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
2020 | unsigned HOST_WIDE_INT bitpos, int unsignedp) | |
db96f378 | 2021 | { |
02e7a332 | 2022 | unsigned int unit; |
2023 | unsigned int bitsdone = 0; | |
b1924c4b | 2024 | rtx result = NULL_RTX; |
e9782169 | 2025 | int first = 1; |
db96f378 | 2026 | |
e81f2e56 | 2027 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
2028 | much at a time. */ | |
8ad4c111 | 2029 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
e81f2e56 | 2030 | unit = BITS_PER_WORD; |
2031 | else | |
6b2813fb | 2032 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
e81f2e56 | 2033 | |
e9782169 | 2034 | while (bitsdone < bitsize) |
2035 | { | |
02e7a332 | 2036 | unsigned HOST_WIDE_INT thissize; |
e9782169 | 2037 | rtx part, word; |
02e7a332 | 2038 | unsigned HOST_WIDE_INT thispos; |
2039 | unsigned HOST_WIDE_INT offset; | |
e9782169 | 2040 | |
2041 | offset = (bitpos + bitsdone) / unit; | |
2042 | thispos = (bitpos + bitsdone) % unit; | |
2043 | ||
ba860eb2 | 2044 | /* THISSIZE must not overrun a word boundary. Otherwise, |
2045 | extract_fixed_bit_field will call us again, and we will mutually | |
2046 | recurse forever. */ | |
2047 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
2048 | thissize = MIN (thissize, unit - thispos); | |
e9782169 | 2049 | |
2050 | /* If OP0 is a register, then handle OFFSET here. | |
6d292981 | 2051 | |
2052 | When handling multiword bitfields, extract_bit_field may pass | |
2053 | down a word_mode SUBREG of a larger REG for a bitfield that actually | |
2054 | crosses a word boundary. Thus, for a SUBREG, we must find | |
2055 | the current word starting from the base register. */ | |
2056 | if (GET_CODE (op0) == SUBREG) | |
2057 | { | |
701e46d0 | 2058 | int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
2059 | word = operand_subword_force (SUBREG_REG (op0), word_offset, | |
6d292981 | 2060 | GET_MODE (SUBREG_REG (op0))); |
2061 | offset = 0; | |
2062 | } | |
8ad4c111 | 2063 | else if (REG_P (op0)) |
e9782169 | 2064 | { |
2065 | word = operand_subword_force (op0, offset, GET_MODE (op0)); | |
2066 | offset = 0; | |
2067 | } | |
2068 | else | |
2069 | word = op0; | |
2070 | ||
e9782169 | 2071 | /* Extract the parts in bit-counting order, |
ba860eb2 | 2072 | whose meaning is determined by BYTES_PER_UNIT. |
2073 | OFFSET is in UNITs, and UNIT is in bits. | |
2074 | extract_fixed_bit_field wants offset in bytes. */ | |
2075 | part = extract_fixed_bit_field (word_mode, word, | |
2076 | offset * unit / BITS_PER_UNIT, | |
8eef3a45 | 2077 | thissize, thispos, 0, 1, false); |
e9782169 | 2078 | bitsdone += thissize; |
db96f378 | 2079 | |
e9782169 | 2080 | /* Shift this part into place for the result. */ |
51356f86 | 2081 | if (BYTES_BIG_ENDIAN) |
2082 | { | |
2083 | if (bitsize != bitsdone) | |
2084 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
f5ff0b21 | 2085 | bitsize - bitsdone, 0, 1); |
51356f86 | 2086 | } |
2087 | else | |
2088 | { | |
2089 | if (bitsdone != thissize) | |
2090 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
f5ff0b21 | 2091 | bitsdone - thissize, 0, 1); |
51356f86 | 2092 | } |
db96f378 | 2093 | |
e9782169 | 2094 | if (first) |
2095 | result = part; | |
2096 | else | |
2097 | /* Combine the parts with bitwise or. This works | |
2098 | because we extracted each part as an unsigned bit field. */ | |
2099 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, | |
2100 | OPTAB_LIB_WIDEN); | |
2101 | ||
2102 | first = 0; | |
2103 | } | |
db96f378 | 2104 | |
2105 | /* Unsigned bit field: we are done. */ | |
2106 | if (unsignedp) | |
2107 | return result; | |
2108 | /* Signed bit field: sign-extend with two arithmetic shifts. */ | |
2109 | result = expand_shift (LSHIFT_EXPR, word_mode, result, | |
f5ff0b21 | 2110 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
db96f378 | 2111 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
f5ff0b21 | 2112 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
db96f378 | 2113 | } |
2114 | \f | |
10d4de0e | 2115 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
2116 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than | |
2117 | MODE, fill the upper bits with zeros. Fail if the layout of either | |
2118 | mode is unknown (as for CC modes) or if the extraction would involve | |
2119 | unprofitable mode punning. Return the value on success, otherwise | |
2120 | return null. | |
2121 | ||
2122 | This is different from gen_lowpart* in these respects: | |
2123 | ||
2124 | - the returned value must always be considered an rvalue | |
2125 | ||
2126 | - when MODE is wider than SRC_MODE, the extraction involves | |
2127 | a zero extension | |
2128 | ||
2129 | - when MODE is smaller than SRC_MODE, the extraction involves | |
2130 | a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). | |
2131 | ||
2132 | In other words, this routine performs a computation, whereas the | |
2133 | gen_lowpart* routines are conceptually lvalue or rvalue subreg | |
2134 | operations. */ | |
2135 | ||
2136 | rtx | |
2137 | extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src) | |
2138 | { | |
2139 | enum machine_mode int_mode, src_int_mode; | |
2140 | ||
2141 | if (mode == src_mode) | |
2142 | return src; | |
2143 | ||
2144 | if (CONSTANT_P (src)) | |
171557e8 | 2145 | { |
2146 | /* simplify_gen_subreg can't be used here, as if simplify_subreg | |
2147 | fails, it will happily create (subreg (symbol_ref)) or similar | |
2148 | invalid SUBREGs. */ | |
2149 | unsigned int byte = subreg_lowpart_offset (mode, src_mode); | |
2150 | rtx ret = simplify_subreg (mode, src, src_mode, byte); | |
2151 | if (ret) | |
2152 | return ret; | |
2153 | ||
2154 | if (GET_MODE (src) == VOIDmode | |
2155 | || !validate_subreg (mode, src_mode, src, byte)) | |
2156 | return NULL_RTX; | |
2157 | ||
2158 | src = force_reg (GET_MODE (src), src); | |
2159 | return gen_rtx_SUBREG (mode, src, byte); | |
2160 | } | |
10d4de0e | 2161 | |
2162 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) | |
2163 | return NULL_RTX; | |
2164 | ||
2165 | if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) | |
2166 | && MODES_TIEABLE_P (mode, src_mode)) | |
2167 | { | |
2168 | rtx x = gen_lowpart_common (mode, src); | |
2169 | if (x) | |
2170 | return x; | |
2171 | } | |
2172 | ||
2173 | src_int_mode = int_mode_for_mode (src_mode); | |
2174 | int_mode = int_mode_for_mode (mode); | |
2175 | if (src_int_mode == BLKmode || int_mode == BLKmode) | |
2176 | return NULL_RTX; | |
2177 | ||
2178 | if (!MODES_TIEABLE_P (src_int_mode, src_mode)) | |
2179 | return NULL_RTX; | |
2180 | if (!MODES_TIEABLE_P (int_mode, mode)) | |
2181 | return NULL_RTX; | |
2182 | ||
2183 | src = gen_lowpart (src_int_mode, src); | |
2184 | src = convert_modes (int_mode, src_int_mode, src, true); | |
2185 | src = gen_lowpart (mode, src); | |
2186 | return src; | |
2187 | } | |
2188 | \f | |
db96f378 | 2189 | /* Add INC into TARGET. */ |
2190 | ||
2191 | void | |
35cb5232 | 2192 | expand_inc (rtx target, rtx inc) |
db96f378 | 2193 | { |
2194 | rtx value = expand_binop (GET_MODE (target), add_optab, | |
2195 | target, inc, | |
2196 | target, 0, OPTAB_LIB_WIDEN); | |
2197 | if (value != target) | |
2198 | emit_move_insn (target, value); | |
2199 | } | |
2200 | ||
2201 | /* Subtract DEC from TARGET. */ | |
2202 | ||
2203 | void | |
35cb5232 | 2204 | expand_dec (rtx target, rtx dec) |
db96f378 | 2205 | { |
2206 | rtx value = expand_binop (GET_MODE (target), sub_optab, | |
2207 | target, dec, | |
2208 | target, 0, OPTAB_LIB_WIDEN); | |
2209 | if (value != target) | |
2210 | emit_move_insn (target, value); | |
2211 | } | |
2212 | \f | |
2213 | /* Output a shift instruction for expression code CODE, | |
2214 | with SHIFTED being the rtx for the value to shift, | |
32d37219 | 2215 | and AMOUNT the rtx for the amount to shift by. |
db96f378 | 2216 | Store the result in the rtx TARGET, if that is convenient. |
2217 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2218 | Return the rtx for where the value is. */ | |
2219 | ||
32d37219 | 2220 | static rtx |
2221 | expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted, | |
2222 | rtx amount, rtx target, int unsignedp) | |
db96f378 | 2223 | { |
19cb6b50 | 2224 | rtx op1, temp = 0; |
2225 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); | |
2226 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); | |
4d54df85 | 2227 | optab lshift_optab = ashl_optab; |
2228 | optab rshift_arith_optab = ashr_optab; | |
2229 | optab rshift_uns_optab = lshr_optab; | |
2230 | optab lrotate_optab = rotl_optab; | |
2231 | optab rrotate_optab = rotr_optab; | |
2232 | enum machine_mode op1_mode; | |
6659485c | 2233 | int attempt; |
f529eb25 | 2234 | bool speed = optimize_insn_for_speed_p (); |
db96f378 | 2235 | |
32d37219 | 2236 | op1 = amount; |
4d54df85 | 2237 | op1_mode = GET_MODE (op1); |
2238 | ||
2239 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the | |
2240 | shift amount is a vector, use the vector/vector shift patterns. */ | |
2241 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) | |
2242 | { | |
2243 | lshift_optab = vashl_optab; | |
2244 | rshift_arith_optab = vashr_optab; | |
2245 | rshift_uns_optab = vlshr_optab; | |
2246 | lrotate_optab = vrotl_optab; | |
2247 | rrotate_optab = vrotr_optab; | |
2248 | } | |
2249 | ||
db96f378 | 2250 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2251 | and shifted in the other direction; but that does not work | |
2252 | on all machines. */ | |
2253 | ||
d58b6b22 | 2254 | if (SHIFT_COUNT_TRUNCATED) |
2255 | { | |
971ba038 | 2256 | if (CONST_INT_P (op1) |
ff385626 | 2257 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
57380eb2 | 2258 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode))) |
ff385626 | 2259 | op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |
d58b6b22 | 2260 | % GET_MODE_BITSIZE (mode)); |
2261 | else if (GET_CODE (op1) == SUBREG | |
298bbd8d | 2262 | && subreg_lowpart_p (op1) |
2263 | && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))) | |
d58b6b22 | 2264 | op1 = SUBREG_REG (op1); |
2265 | } | |
73432b7f | 2266 | |
db96f378 | 2267 | if (op1 == const0_rtx) |
2268 | return shifted; | |
2269 | ||
617cc55d | 2270 | /* Check whether its cheaper to implement a left shift by a constant |
2271 | bit count by a sequence of additions. */ | |
2272 | if (code == LSHIFT_EXPR | |
971ba038 | 2273 | && CONST_INT_P (op1) |
617cc55d | 2274 | && INTVAL (op1) > 0 |
995b44f5 | 2275 | && INTVAL (op1) < GET_MODE_PRECISION (mode) |
99d2e279 | 2276 | && INTVAL (op1) < MAX_BITS_PER_WORD |
49a71e58 | 2277 | && (shift_cost (speed, mode, INTVAL (op1)) |
2278 | > INTVAL (op1) * add_cost (speed, mode)) | |
2279 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) | |
617cc55d | 2280 | { |
2281 | int i; | |
2282 | for (i = 0; i < INTVAL (op1); i++) | |
2283 | { | |
2284 | temp = force_reg (mode, shifted); | |
2285 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, | |
2286 | unsignedp, OPTAB_LIB_WIDEN); | |
2287 | } | |
2288 | return shifted; | |
2289 | } | |
2290 | ||
6659485c | 2291 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
db96f378 | 2292 | { |
2293 | enum optab_methods methods; | |
2294 | ||
6659485c | 2295 | if (attempt == 0) |
db96f378 | 2296 | methods = OPTAB_DIRECT; |
6659485c | 2297 | else if (attempt == 1) |
db96f378 | 2298 | methods = OPTAB_WIDEN; |
2299 | else | |
2300 | methods = OPTAB_LIB_WIDEN; | |
2301 | ||
2302 | if (rotate) | |
2303 | { | |
2304 | /* Widening does not work for rotation. */ | |
2305 | if (methods == OPTAB_WIDEN) | |
2306 | continue; | |
2307 | else if (methods == OPTAB_LIB_WIDEN) | |
1290205f | 2308 | { |
c2c10df6 | 2309 | /* If we have been unable to open-code this by a rotation, |
1290205f | 2310 | do it as the IOR of two shifts. I.e., to rotate A |
2311 | by N bits, compute (A << N) | ((unsigned) A >> (C - N)) | |
2312 | where C is the bitsize of A. | |
2313 | ||
2314 | It is theoretically possible that the target machine might | |
2315 | not be able to perform either shift and hence we would | |
2316 | be making two libcalls rather than just the one for the | |
2317 | shift (similarly if IOR could not be done). We will allow | |
2318 | this extremely unlikely lossage to avoid complicating the | |
2319 | code below. */ | |
2320 | ||
c2c10df6 | 2321 | rtx subtarget = target == shifted ? 0 : target; |
32d37219 | 2322 | rtx new_amount, other_amount; |
c2c10df6 | 2323 | rtx temp1; |
32d37219 | 2324 | |
2325 | new_amount = op1; | |
714e9434 | 2326 | if (CONST_INT_P (op1)) |
2327 | other_amount = GEN_INT (GET_MODE_BITSIZE (mode) | |
2328 | - INTVAL (op1)); | |
2329 | else | |
2330 | other_amount | |
2331 | = simplify_gen_binary (MINUS, GET_MODE (op1), | |
995b44f5 | 2332 | GEN_INT (GET_MODE_PRECISION (mode)), |
714e9434 | 2333 | op1); |
c2c10df6 | 2334 | |
2335 | shifted = force_reg (mode, shifted); | |
2336 | ||
32d37219 | 2337 | temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, |
2338 | mode, shifted, new_amount, 0, 1); | |
2339 | temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, | |
2340 | mode, shifted, other_amount, | |
2341 | subtarget, 1); | |
c2c10df6 | 2342 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2343 | unsignedp, methods); | |
1290205f | 2344 | } |
db96f378 | 2345 | |
2346 | temp = expand_binop (mode, | |
4d54df85 | 2347 | left ? lrotate_optab : rrotate_optab, |
db96f378 | 2348 | shifted, op1, target, unsignedp, methods); |
2349 | } | |
2350 | else if (unsignedp) | |
e1abcbdd | 2351 | temp = expand_binop (mode, |
4d54df85 | 2352 | left ? lshift_optab : rshift_uns_optab, |
e1abcbdd | 2353 | shifted, op1, target, unsignedp, methods); |
db96f378 | 2354 | |
2355 | /* Do arithmetic shifts. | |
2356 | Also, if we are going to widen the operand, we can just as well | |
2357 | use an arithmetic right-shift instead of a logical one. */ | |
2358 | if (temp == 0 && ! rotate | |
2359 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) | |
2360 | { | |
2361 | enum optab_methods methods1 = methods; | |
2362 | ||
2363 | /* If trying to widen a log shift to an arithmetic shift, | |
2364 | don't accept an arithmetic shift of the same size. */ | |
2365 | if (unsignedp) | |
2366 | methods1 = OPTAB_MUST_WIDEN; | |
2367 | ||
2368 | /* Arithmetic shift */ | |
2369 | ||
2370 | temp = expand_binop (mode, | |
4d54df85 | 2371 | left ? lshift_optab : rshift_arith_optab, |
db96f378 | 2372 | shifted, op1, target, unsignedp, methods1); |
2373 | } | |
2374 | ||
8164ec17 | 2375 | /* We used to try extzv here for logical right shifts, but that was |
a4194ff7 | 2376 | only useful for one machine, the VAX, and caused poor code |
8164ec17 | 2377 | generation there for lshrdi3, so the code was deleted and a |
2378 | define_expand for lshrsi3 was added to vax.md. */ | |
db96f378 | 2379 | } |
2380 | ||
611234b4 | 2381 | gcc_assert (temp); |
db96f378 | 2382 | return temp; |
2383 | } | |
f5ff0b21 | 2384 | |
2385 | /* Output a shift instruction for expression code CODE, | |
2386 | with SHIFTED being the rtx for the value to shift, | |
2387 | and AMOUNT the amount to shift by. | |
2388 | Store the result in the rtx TARGET, if that is convenient. | |
2389 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2390 | Return the rtx for where the value is. */ | |
2391 | ||
2392 | rtx | |
2393 | expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, | |
2394 | int amount, rtx target, int unsignedp) | |
2395 | { | |
32d37219 | 2396 | return expand_shift_1 (code, mode, |
2397 | shifted, GEN_INT (amount), target, unsignedp); | |
2398 | } | |
2399 | ||
2400 | /* Output a shift instruction for expression code CODE, | |
2401 | with SHIFTED being the rtx for the value to shift, | |
2402 | and AMOUNT the tree for the amount to shift by. | |
2403 | Store the result in the rtx TARGET, if that is convenient. | |
2404 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2405 | Return the rtx for where the value is. */ | |
2406 | ||
2407 | rtx | |
2408 | expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted, | |
2409 | tree amount, rtx target, int unsignedp) | |
2410 | { | |
2411 | return expand_shift_1 (code, mode, | |
2412 | shifted, expand_normal (amount), target, unsignedp); | |
f5ff0b21 | 2413 | } |
32d37219 | 2414 | |
db96f378 | 2415 | \f |
27588b0f | 2416 | /* Indicates the type of fixup needed after a constant multiplication. |
2417 | BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that | |
2418 | the result should be negated, and ADD_VARIANT means that the | |
2419 | multiplicand should be added to the result. */ | |
2420 | enum mult_variant {basic_variant, negate_variant, add_variant}; | |
2421 | ||
49931967 | 2422 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
d9154849 | 2423 | const struct mult_cost *, enum machine_mode mode); |
27588b0f | 2424 | static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, |
e4fedb10 | 2425 | struct algorithm *, enum mult_variant *, int); |
27588b0f | 2426 | static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, |
2427 | const struct algorithm *, enum mult_variant); | |
35cb5232 | 2428 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
27588b0f | 2429 | static rtx extract_high_half (enum machine_mode, rtx); |
ebf4f764 | 2430 | static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); |
2431 | static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, | |
27588b0f | 2432 | int, int); |
db96f378 | 2433 | /* Compute and return the best algorithm for multiplying by T. |
4b780351 | 2434 | The algorithm must cost less than cost_limit |
2435 | If retval.cost >= COST_LIMIT, no algorithm was found and all | |
49931967 | 2436 | other field of the returned struct are undefined. |
2437 | MODE is the machine mode of the multiplication. */ | |
db96f378 | 2438 | |
1e401f10 | 2439 | static void |
35cb5232 | 2440 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
d9154849 | 2441 | const struct mult_cost *cost_limit, enum machine_mode mode) |
db96f378 | 2442 | { |
183a33d2 | 2443 | int m; |
709f9009 | 2444 | struct algorithm *alg_in, *best_alg; |
d9154849 | 2445 | struct mult_cost best_cost; |
2446 | struct mult_cost new_limit; | |
2447 | int op_cost, op_latency; | |
b592bb50 | 2448 | unsigned HOST_WIDE_INT orig_t = t; |
183a33d2 | 2449 | unsigned HOST_WIDE_INT q; |
49db198b | 2450 | int maxm, hash_index; |
7fe4cfe2 | 2451 | bool cache_hit = false; |
2452 | enum alg_code cache_alg = alg_zero; | |
f529eb25 | 2453 | bool speed = optimize_insn_for_speed_p (); |
49db198b | 2454 | enum machine_mode imode; |
49a71e58 | 2455 | struct alg_hash_entry *entry_ptr; |
db96f378 | 2456 | |
4b780351 | 2457 | /* Indicate that no algorithm is yet found. If no algorithm |
2458 | is found, this value will be returned and indicate failure. */ | |
d9154849 | 2459 | alg_out->cost.cost = cost_limit->cost + 1; |
eddf2705 | 2460 | alg_out->cost.latency = cost_limit->latency + 1; |
db96f378 | 2461 | |
d9154849 | 2462 | if (cost_limit->cost < 0 |
2463 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) | |
1e401f10 | 2464 | return; |
db96f378 | 2465 | |
49db198b | 2466 | /* Be prepared for vector modes. */ |
2467 | imode = GET_MODE_INNER (mode); | |
2468 | if (imode == VOIDmode) | |
2469 | imode = mode; | |
2470 | ||
2471 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); | |
2472 | ||
83df06d0 | 2473 | /* Restrict the bits of "t" to the multiplication's mode. */ |
49db198b | 2474 | t &= GET_MODE_MASK (imode); |
83df06d0 | 2475 | |
798c4e27 | 2476 | /* t == 1 can be done in zero cost. */ |
2477 | if (t == 1) | |
183a33d2 | 2478 | { |
1e401f10 | 2479 | alg_out->ops = 1; |
d9154849 | 2480 | alg_out->cost.cost = 0; |
2481 | alg_out->cost.latency = 0; | |
1e401f10 | 2482 | alg_out->op[0] = alg_m; |
2483 | return; | |
183a33d2 | 2484 | } |
2485 | ||
798c4e27 | 2486 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2487 | fail now. */ | |
1e401f10 | 2488 | if (t == 0) |
798c4e27 | 2489 | { |
49a71e58 | 2490 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |
1e401f10 | 2491 | return; |
798c4e27 | 2492 | else |
2493 | { | |
1e401f10 | 2494 | alg_out->ops = 1; |
49a71e58 | 2495 | alg_out->cost.cost = zero_cost (speed); |
2496 | alg_out->cost.latency = zero_cost (speed); | |
1e401f10 | 2497 | alg_out->op[0] = alg_zero; |
2498 | return; | |
798c4e27 | 2499 | } |
2500 | } | |
2501 | ||
709f9009 | 2502 | /* We'll be needing a couple extra algorithm structures now. */ |
2503 | ||
2457c754 | 2504 | alg_in = XALLOCA (struct algorithm); |
2505 | best_alg = XALLOCA (struct algorithm); | |
d9154849 | 2506 | best_cost = *cost_limit; |
709f9009 | 2507 | |
7fe4cfe2 | 2508 | /* Compute the hash index. */ |
f529eb25 | 2509 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
7fe4cfe2 | 2510 | |
2511 | /* See if we already know what to do for T. */ | |
49a71e58 | 2512 | entry_ptr = alg_hash_entry_ptr (hash_index); |
2513 | if (entry_ptr->t == t | |
2514 | && entry_ptr->mode == mode | |
2515 | && entry_ptr->mode == mode | |
2516 | && entry_ptr->speed == speed | |
2517 | && entry_ptr->alg != alg_unknown) | |
7fe4cfe2 | 2518 | { |
49a71e58 | 2519 | cache_alg = entry_ptr->alg; |
44a03d75 | 2520 | |
2521 | if (cache_alg == alg_impossible) | |
7fe4cfe2 | 2522 | { |
44a03d75 | 2523 | /* The cache tells us that it's impossible to synthesize |
49a71e58 | 2524 | multiplication by T within entry_ptr->cost. */ |
2525 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) | |
44a03d75 | 2526 | /* COST_LIMIT is at least as restrictive as the one |
2527 | recorded in the hash table, in which case we have no | |
2528 | hope of synthesizing a multiplication. Just | |
2529 | return. */ | |
2530 | return; | |
2531 | ||
2532 | /* If we get here, COST_LIMIT is less restrictive than the | |
2533 | one recorded in the hash table, so we may be able to | |
2534 | synthesize a multiplication. Proceed as if we didn't | |
2535 | have the cache entry. */ | |
2536 | } | |
2537 | else | |
2538 | { | |
49a71e58 | 2539 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |
44a03d75 | 2540 | /* The cached algorithm shows that this multiplication |
2541 | requires more cost than COST_LIMIT. Just return. This | |
2542 | way, we don't clobber this cache entry with | |
2543 | alg_impossible but retain useful information. */ | |
2544 | return; | |
7fe4cfe2 | 2545 | |
44a03d75 | 2546 | cache_hit = true; |
2547 | ||
2548 | switch (cache_alg) | |
2549 | { | |
2550 | case alg_shift: | |
2551 | goto do_alg_shift; | |
7fe4cfe2 | 2552 | |
44a03d75 | 2553 | case alg_add_t_m2: |
2554 | case alg_sub_t_m2: | |
2555 | goto do_alg_addsub_t_m2; | |
7fe4cfe2 | 2556 | |
44a03d75 | 2557 | case alg_add_factor: |
2558 | case alg_sub_factor: | |
2559 | goto do_alg_addsub_factor; | |
7fe4cfe2 | 2560 | |
44a03d75 | 2561 | case alg_add_t2_m: |
2562 | goto do_alg_add_t2_m; | |
7fe4cfe2 | 2563 | |
44a03d75 | 2564 | case alg_sub_t2_m: |
2565 | goto do_alg_sub_t2_m; | |
2566 | ||
2567 | default: | |
2568 | gcc_unreachable (); | |
2569 | } | |
7fe4cfe2 | 2570 | } |
2571 | } | |
2572 | ||
798c4e27 | 2573 | /* If we have a group of zero bits at the low-order part of T, try |
2574 | multiplying by the remaining bits and then doing a shift. */ | |
2575 | ||
183a33d2 | 2576 | if ((t & 1) == 0) |
db96f378 | 2577 | { |
7fe4cfe2 | 2578 | do_alg_shift: |
183a33d2 | 2579 | m = floor_log2 (t & -t); /* m = number of low zero bits */ |
83df06d0 | 2580 | if (m < maxm) |
db96f378 | 2581 | { |
84ab528e | 2582 | q = t >> m; |
617cc55d | 2583 | /* The function expand_shift will choose between a shift and |
2584 | a sequence of additions, so the observed cost is given as | |
49a71e58 | 2585 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |
2586 | op_cost = m * add_cost (speed, mode); | |
2587 | if (shift_cost (speed, mode, m) < op_cost) | |
2588 | op_cost = shift_cost (speed, mode, m); | |
d9154849 | 2589 | new_limit.cost = best_cost.cost - op_cost; |
2590 | new_limit.latency = best_cost.latency - op_cost; | |
2591 | synth_mult (alg_in, q, &new_limit, mode); | |
2592 | ||
2593 | alg_in->cost.cost += op_cost; | |
2594 | alg_in->cost.latency += op_cost; | |
2595 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
84ab528e | 2596 | { |
2597 | struct algorithm *x; | |
d9154849 | 2598 | best_cost = alg_in->cost; |
84ab528e | 2599 | x = alg_in, alg_in = best_alg, best_alg = x; |
2600 | best_alg->log[best_alg->ops] = m; | |
2601 | best_alg->op[best_alg->ops] = alg_shift; | |
84ab528e | 2602 | } |
5521b4c8 | 2603 | |
2604 | /* See if treating ORIG_T as a signed number yields a better | |
2605 | sequence. Try this sequence only for a negative ORIG_T | |
2606 | as it would be useless for a non-negative ORIG_T. */ | |
2607 | if ((HOST_WIDE_INT) orig_t < 0) | |
2608 | { | |
2609 | /* Shift ORIG_T as follows because a right shift of a | |
2610 | negative-valued signed type is implementation | |
2611 | defined. */ | |
2612 | q = ~(~orig_t >> m); | |
2613 | /* The function expand_shift will choose between a shift | |
2614 | and a sequence of additions, so the observed cost is | |
49a71e58 | 2615 | given as MIN (m * add_cost(speed, mode), |
2616 | shift_cost(speed, mode, m)). */ | |
2617 | op_cost = m * add_cost (speed, mode); | |
2618 | if (shift_cost (speed, mode, m) < op_cost) | |
2619 | op_cost = shift_cost (speed, mode, m); | |
5521b4c8 | 2620 | new_limit.cost = best_cost.cost - op_cost; |
2621 | new_limit.latency = best_cost.latency - op_cost; | |
2622 | synth_mult (alg_in, q, &new_limit, mode); | |
2623 | ||
2624 | alg_in->cost.cost += op_cost; | |
2625 | alg_in->cost.latency += op_cost; | |
2626 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2627 | { | |
2628 | struct algorithm *x; | |
2629 | best_cost = alg_in->cost; | |
2630 | x = alg_in, alg_in = best_alg, best_alg = x; | |
2631 | best_alg->log[best_alg->ops] = m; | |
2632 | best_alg->op[best_alg->ops] = alg_shift; | |
2633 | } | |
2634 | } | |
1e401f10 | 2635 | } |
7fe4cfe2 | 2636 | if (cache_hit) |
2637 | goto done; | |
1e401f10 | 2638 | } |
2639 | ||
2640 | /* If we have an odd number, add or subtract one. */ | |
2641 | if ((t & 1) != 0) | |
2642 | { | |
2643 | unsigned HOST_WIDE_INT w; | |
2644 | ||
7fe4cfe2 | 2645 | do_alg_addsub_t_m2: |
1e401f10 | 2646 | for (w = 1; (w & t) != 0; w <<= 1) |
2647 | ; | |
68215e49 | 2648 | /* If T was -1, then W will be zero after the loop. This is another |
a4194ff7 | 2649 | case where T ends with ...111. Handling this with (T + 1) and |
68215e49 | 2650 | subtract 1 produces slightly better code and results in algorithm |
2651 | selection much faster than treating it like the ...0111 case | |
2652 | below. */ | |
2653 | if (w == 0 | |
2654 | || (w > 2 | |
2655 | /* Reject the case where t is 3. | |
2656 | Thus we prefer addition in that case. */ | |
2657 | && t != 3)) | |
1e401f10 | 2658 | { |
2659 | /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ | |
2660 | ||
49a71e58 | 2661 | op_cost = add_cost (speed, mode); |
d9154849 | 2662 | new_limit.cost = best_cost.cost - op_cost; |
2663 | new_limit.latency = best_cost.latency - op_cost; | |
2664 | synth_mult (alg_in, t + 1, &new_limit, mode); | |
183a33d2 | 2665 | |
d9154849 | 2666 | alg_in->cost.cost += op_cost; |
2667 | alg_in->cost.latency += op_cost; | |
2668 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
db96f378 | 2669 | { |
183a33d2 | 2670 | struct algorithm *x; |
d9154849 | 2671 | best_cost = alg_in->cost; |
183a33d2 | 2672 | x = alg_in, alg_in = best_alg, best_alg = x; |
1e401f10 | 2673 | best_alg->log[best_alg->ops] = 0; |
2674 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
db96f378 | 2675 | } |
db96f378 | 2676 | } |
1e401f10 | 2677 | else |
2678 | { | |
2679 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ | |
db96f378 | 2680 | |
49a71e58 | 2681 | op_cost = add_cost (speed, mode); |
d9154849 | 2682 | new_limit.cost = best_cost.cost - op_cost; |
2683 | new_limit.latency = best_cost.latency - op_cost; | |
2684 | synth_mult (alg_in, t - 1, &new_limit, mode); | |
1e401f10 | 2685 | |
d9154849 | 2686 | alg_in->cost.cost += op_cost; |
2687 | alg_in->cost.latency += op_cost; | |
2688 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
1e401f10 | 2689 | { |
2690 | struct algorithm *x; | |
d9154849 | 2691 | best_cost = alg_in->cost; |
1e401f10 | 2692 | x = alg_in, alg_in = best_alg, best_alg = x; |
2693 | best_alg->log[best_alg->ops] = 0; | |
2694 | best_alg->op[best_alg->ops] = alg_add_t_m2; | |
1e401f10 | 2695 | } |
2696 | } | |
b592bb50 | 2697 | |
2698 | /* We may be able to calculate a * -7, a * -15, a * -31, etc | |
2699 | quickly with a - a * n for some appropriate constant n. */ | |
2700 | m = exact_log2 (-orig_t + 1); | |
2701 | if (m >= 0 && m < maxm) | |
2702 | { | |
49a71e58 | 2703 | op_cost = shiftsub1_cost (speed, mode, m); |
b592bb50 | 2704 | new_limit.cost = best_cost.cost - op_cost; |
2705 | new_limit.latency = best_cost.latency - op_cost; | |
49db198b | 2706 | synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |
2707 | &new_limit, mode); | |
b592bb50 | 2708 | |
2709 | alg_in->cost.cost += op_cost; | |
2710 | alg_in->cost.latency += op_cost; | |
2711 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2712 | { | |
2713 | struct algorithm *x; | |
2714 | best_cost = alg_in->cost; | |
2715 | x = alg_in, alg_in = best_alg, best_alg = x; | |
2716 | best_alg->log[best_alg->ops] = m; | |
2717 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
2718 | } | |
2719 | } | |
2720 | ||
7fe4cfe2 | 2721 | if (cache_hit) |
2722 | goto done; | |
1e401f10 | 2723 | } |
4e6e0ee8 | 2724 | |
db96f378 | 2725 | /* Look for factors of t of the form |
4b780351 | 2726 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
db96f378 | 2727 | If we find such a factor, we can multiply by t using an algorithm that |
4b780351 | 2728 | multiplies by q, shift the result by m and add/subtract it to itself. |
db96f378 | 2729 | |
4b780351 | 2730 | We search for large factors first and loop down, even if large factors |
2731 | are less probable than small; if we find a large factor we will find a | |
2732 | good sequence quickly, and therefore be able to prune (by decreasing | |
2733 | COST_LIMIT) the search. */ | |
2734 | ||
7fe4cfe2 | 2735 | do_alg_addsub_factor: |
4b780351 | 2736 | for (m = floor_log2 (t - 1); m >= 2; m--) |
db96f378 | 2737 | { |
4b780351 | 2738 | unsigned HOST_WIDE_INT d; |
db96f378 | 2739 | |
4b780351 | 2740 | d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; |
7fe4cfe2 | 2741 | if (t % d == 0 && t > d && m < maxm |
2742 | && (!cache_hit || cache_alg == alg_add_factor)) | |
db96f378 | 2743 | { |
d9154849 | 2744 | /* If the target has a cheap shift-and-add instruction use |
2745 | that in preference to a shift insn followed by an add insn. | |
2746 | Assume that the shift-and-add is "atomic" with a latency | |
1861ff83 | 2747 | equal to its cost, otherwise assume that on superscalar |
d9154849 | 2748 | hardware the shift may be executed concurrently with the |
2749 | earlier steps in the algorithm. */ | |
49a71e58 | 2750 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
2751 | if (shiftadd_cost (speed, mode, m) < op_cost) | |
d9154849 | 2752 | { |
49a71e58 | 2753 | op_cost = shiftadd_cost (speed, mode, m); |
d9154849 | 2754 | op_latency = op_cost; |
2755 | } | |
2756 | else | |
49a71e58 | 2757 | op_latency = add_cost (speed, mode); |
d9154849 | 2758 | |
2759 | new_limit.cost = best_cost.cost - op_cost; | |
2760 | new_limit.latency = best_cost.latency - op_latency; | |
2761 | synth_mult (alg_in, t / d, &new_limit, mode); | |
db96f378 | 2762 | |
d9154849 | 2763 | alg_in->cost.cost += op_cost; |
2764 | alg_in->cost.latency += op_latency; | |
2765 | if (alg_in->cost.latency < op_cost) | |
2766 | alg_in->cost.latency = op_cost; | |
2767 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
db96f378 | 2768 | { |
4b780351 | 2769 | struct algorithm *x; |
d9154849 | 2770 | best_cost = alg_in->cost; |
4b780351 | 2771 | x = alg_in, alg_in = best_alg, best_alg = x; |
798c4e27 | 2772 | best_alg->log[best_alg->ops] = m; |
1e401f10 | 2773 | best_alg->op[best_alg->ops] = alg_add_factor; |
db96f378 | 2774 | } |
04ba236f | 2775 | /* Other factors will have been taken care of in the recursion. */ |
2776 | break; | |
db96f378 | 2777 | } |
2778 | ||
4b780351 | 2779 | d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; |
7fe4cfe2 | 2780 | if (t % d == 0 && t > d && m < maxm |
2781 | && (!cache_hit || cache_alg == alg_sub_factor)) | |
db96f378 | 2782 | { |
d9154849 | 2783 | /* If the target has a cheap shift-and-subtract insn use |
2784 | that in preference to a shift insn followed by a sub insn. | |
2785 | Assume that the shift-and-sub is "atomic" with a latency | |
2786 | equal to it's cost, otherwise assume that on superscalar | |
2787 | hardware the shift may be executed concurrently with the | |
2788 | earlier steps in the algorithm. */ | |
49a71e58 | 2789 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
2790 | if (shiftsub0_cost (speed, mode, m) < op_cost) | |
d9154849 | 2791 | { |
49a71e58 | 2792 | op_cost = shiftsub0_cost (speed, mode, m); |
d9154849 | 2793 | op_latency = op_cost; |
2794 | } | |
2795 | else | |
49a71e58 | 2796 | op_latency = add_cost (speed, mode); |
d9154849 | 2797 | |
2798 | new_limit.cost = best_cost.cost - op_cost; | |
379eaa7a | 2799 | new_limit.latency = best_cost.latency - op_latency; |
d9154849 | 2800 | synth_mult (alg_in, t / d, &new_limit, mode); |
db96f378 | 2801 | |
d9154849 | 2802 | alg_in->cost.cost += op_cost; |
2803 | alg_in->cost.latency += op_latency; | |
2804 | if (alg_in->cost.latency < op_cost) | |
2805 | alg_in->cost.latency = op_cost; | |
2806 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
db96f378 | 2807 | { |
4b780351 | 2808 | struct algorithm *x; |
d9154849 | 2809 | best_cost = alg_in->cost; |
4b780351 | 2810 | x = alg_in, alg_in = best_alg, best_alg = x; |
798c4e27 | 2811 | best_alg->log[best_alg->ops] = m; |
1e401f10 | 2812 | best_alg->op[best_alg->ops] = alg_sub_factor; |
db96f378 | 2813 | } |
04ba236f | 2814 | break; |
db96f378 | 2815 | } |
2816 | } | |
7fe4cfe2 | 2817 | if (cache_hit) |
2818 | goto done; | |
db96f378 | 2819 | |
4b780351 | 2820 | /* Try shift-and-add (load effective address) instructions, |
2821 | i.e. do a*3, a*5, a*9. */ | |
2822 | if ((t & 1) != 0) | |
2823 | { | |
7fe4cfe2 | 2824 | do_alg_add_t2_m: |
4b780351 | 2825 | q = t - 1; |
2826 | q = q & -q; | |
2827 | m = exact_log2 (q); | |
83df06d0 | 2828 | if (m >= 0 && m < maxm) |
798c4e27 | 2829 | { |
49a71e58 | 2830 | op_cost = shiftadd_cost (speed, mode, m); |
d9154849 | 2831 | new_limit.cost = best_cost.cost - op_cost; |
2832 | new_limit.latency = best_cost.latency - op_cost; | |
2833 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); | |
2834 | ||
2835 | alg_in->cost.cost += op_cost; | |
2836 | alg_in->cost.latency += op_cost; | |
2837 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
010b6a23 | 2838 | { |
2839 | struct algorithm *x; | |
d9154849 | 2840 | best_cost = alg_in->cost; |
010b6a23 | 2841 | x = alg_in, alg_in = best_alg, best_alg = x; |
2842 | best_alg->log[best_alg->ops] = m; | |
1e401f10 | 2843 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
010b6a23 | 2844 | } |
4b780351 | 2845 | } |
7fe4cfe2 | 2846 | if (cache_hit) |
2847 | goto done; | |
db96f378 | 2848 | |
7fe4cfe2 | 2849 | do_alg_sub_t2_m: |
4b780351 | 2850 | q = t + 1; |
2851 | q = q & -q; | |
2852 | m = exact_log2 (q); | |
83df06d0 | 2853 | if (m >= 0 && m < maxm) |
798c4e27 | 2854 | { |
49a71e58 | 2855 | op_cost = shiftsub0_cost (speed, mode, m); |
d9154849 | 2856 | new_limit.cost = best_cost.cost - op_cost; |
2857 | new_limit.latency = best_cost.latency - op_cost; | |
2858 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); | |
2859 | ||
2860 | alg_in->cost.cost += op_cost; | |
2861 | alg_in->cost.latency += op_cost; | |
2862 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
010b6a23 | 2863 | { |
2864 | struct algorithm *x; | |
d9154849 | 2865 | best_cost = alg_in->cost; |
010b6a23 | 2866 | x = alg_in, alg_in = best_alg, best_alg = x; |
2867 | best_alg->log[best_alg->ops] = m; | |
1e401f10 | 2868 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
010b6a23 | 2869 | } |
4b780351 | 2870 | } |
7fe4cfe2 | 2871 | if (cache_hit) |
2872 | goto done; | |
4b780351 | 2873 | } |
db96f378 | 2874 | |
7fe4cfe2 | 2875 | done: |
eddf2705 | 2876 | /* If best_cost has not decreased, we have not found any algorithm. */ |
2877 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) | |
44a03d75 | 2878 | { |
2879 | /* We failed to find an algorithm. Record alg_impossible for | |
2880 | this case (that is, <T, MODE, COST_LIMIT>) so that next time | |
2881 | we are asked to find an algorithm for T within the same or | |
2882 | lower COST_LIMIT, we can immediately return to the | |
2883 | caller. */ | |
49a71e58 | 2884 | entry_ptr->t = t; |
2885 | entry_ptr->mode = mode; | |
2886 | entry_ptr->speed = speed; | |
2887 | entry_ptr->alg = alg_impossible; | |
2888 | entry_ptr->cost = *cost_limit; | |
44a03d75 | 2889 | return; |
2890 | } | |
eddf2705 | 2891 | |
7fe4cfe2 | 2892 | /* Cache the result. */ |
2893 | if (!cache_hit) | |
2894 | { | |
49a71e58 | 2895 | entry_ptr->t = t; |
2896 | entry_ptr->mode = mode; | |
2897 | entry_ptr->speed = speed; | |
2898 | entry_ptr->alg = best_alg->op[best_alg->ops]; | |
2899 | entry_ptr->cost.cost = best_cost.cost; | |
2900 | entry_ptr->cost.latency = best_cost.latency; | |
7fe4cfe2 | 2901 | } |
2902 | ||
709f9009 | 2903 | /* If we are getting a too long sequence for `struct algorithm' |
2904 | to record, make this search fail. */ | |
2905 | if (best_alg->ops == MAX_BITS_PER_WORD) | |
2906 | return; | |
2907 | ||
1e401f10 | 2908 | /* Copy the algorithm from temporary space to the space at alg_out. |
2909 | We avoid using structure assignment because the majority of | |
2910 | best_alg is normally undefined, and this is a critical function. */ | |
2911 | alg_out->ops = best_alg->ops + 1; | |
d9154849 | 2912 | alg_out->cost = best_cost; |
b1b63592 | 2913 | memcpy (alg_out->op, best_alg->op, |
2914 | alg_out->ops * sizeof *alg_out->op); | |
2915 | memcpy (alg_out->log, best_alg->log, | |
2916 | alg_out->ops * sizeof *alg_out->log); | |
db96f378 | 2917 | } |
2918 | \f | |
3927afe0 | 2919 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
27588b0f | 2920 | Try three variations: |
2921 | ||
2922 | - a shift/add sequence based on VAL itself | |
2923 | - a shift/add sequence based on -VAL, followed by a negation | |
2924 | - a shift/add sequence based on VAL - 1, followed by an addition. | |
2925 | ||
e4fedb10 | 2926 | Return true if the cheapest of these cost less than MULT_COST, |
2927 | describing the algorithm in *ALG and final fixup in *VARIANT. */ | |
27588b0f | 2928 | |
2929 | static bool | |
2930 | choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, | |
e4fedb10 | 2931 | struct algorithm *alg, enum mult_variant *variant, |
2932 | int mult_cost) | |
27588b0f | 2933 | { |
27588b0f | 2934 | struct algorithm alg2; |
d9154849 | 2935 | struct mult_cost limit; |
2936 | int op_cost; | |
f529eb25 | 2937 | bool speed = optimize_insn_for_speed_p (); |
27588b0f | 2938 | |
4be3f855 | 2939 | /* Fail quickly for impossible bounds. */ |
2940 | if (mult_cost < 0) | |
2941 | return false; | |
2942 | ||
2943 | /* Ensure that mult_cost provides a reasonable upper bound. | |
2944 | Any constant multiplication can be performed with less | |
2945 | than 2 * bits additions. */ | |
49a71e58 | 2946 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |
4be3f855 | 2947 | if (mult_cost > op_cost) |
2948 | mult_cost = op_cost; | |
2949 | ||
27588b0f | 2950 | *variant = basic_variant; |
d9154849 | 2951 | limit.cost = mult_cost; |
2952 | limit.latency = mult_cost; | |
2953 | synth_mult (alg, val, &limit, mode); | |
27588b0f | 2954 | |
2955 | /* This works only if the inverted value actually fits in an | |
2956 | `unsigned int' */ | |
49db198b | 2957 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |
27588b0f | 2958 | { |
49a71e58 | 2959 | op_cost = neg_cost(speed, mode); |
d9154849 | 2960 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
2961 | { | |
2962 | limit.cost = alg->cost.cost - op_cost; | |
2963 | limit.latency = alg->cost.latency - op_cost; | |
2964 | } | |
2965 | else | |
2966 | { | |
2967 | limit.cost = mult_cost - op_cost; | |
2968 | limit.latency = mult_cost - op_cost; | |
2969 | } | |
2970 | ||
2971 | synth_mult (&alg2, -val, &limit, mode); | |
2972 | alg2.cost.cost += op_cost; | |
2973 | alg2.cost.latency += op_cost; | |
2974 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
27588b0f | 2975 | *alg = alg2, *variant = negate_variant; |
2976 | } | |
2977 | ||
2978 | /* This proves very useful for division-by-constant. */ | |
49a71e58 | 2979 | op_cost = add_cost (speed, mode); |
d9154849 | 2980 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
2981 | { | |
2982 | limit.cost = alg->cost.cost - op_cost; | |
2983 | limit.latency = alg->cost.latency - op_cost; | |
2984 | } | |
2985 | else | |
2986 | { | |
2987 | limit.cost = mult_cost - op_cost; | |
2988 | limit.latency = mult_cost - op_cost; | |
2989 | } | |
2990 | ||
2991 | synth_mult (&alg2, val - 1, &limit, mode); | |
2992 | alg2.cost.cost += op_cost; | |
2993 | alg2.cost.latency += op_cost; | |
2994 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
27588b0f | 2995 | *alg = alg2, *variant = add_variant; |
2996 | ||
d9154849 | 2997 | return MULT_COST_LESS (&alg->cost, mult_cost); |
27588b0f | 2998 | } |
2999 | ||
3000 | /* A subroutine of expand_mult, used for constant multiplications. | |
3001 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if | |
3002 | convenient. Use the shift/add sequence described by ALG and apply | |
3003 | the final fixup specified by VARIANT. */ | |
3004 | ||
3005 | static rtx | |
3006 | expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val, | |
3007 | rtx target, const struct algorithm *alg, | |
3008 | enum mult_variant variant) | |
3009 | { | |
3010 | HOST_WIDE_INT val_so_far; | |
3011 | rtx insn, accum, tem; | |
3012 | int opno; | |
3013 | enum machine_mode nmode; | |
3014 | ||
f45da063 | 3015 | /* Avoid referencing memory over and over and invalid sharing |
3016 | on SUBREGs. */ | |
3017 | op0 = force_reg (mode, op0); | |
27588b0f | 3018 | |
3019 | /* ACCUM starts out either as OP0 or as a zero, depending on | |
3020 | the first operation. */ | |
3021 | ||
3022 | if (alg->op[0] == alg_zero) | |
3023 | { | |
49db198b | 3024 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |
27588b0f | 3025 | val_so_far = 0; |
3026 | } | |
3027 | else if (alg->op[0] == alg_m) | |
3028 | { | |
3029 | accum = copy_to_mode_reg (mode, op0); | |
3030 | val_so_far = 1; | |
3031 | } | |
3032 | else | |
611234b4 | 3033 | gcc_unreachable (); |
27588b0f | 3034 | |
3035 | for (opno = 1; opno < alg->ops; opno++) | |
3036 | { | |
3037 | int log = alg->log[opno]; | |
a1ad7483 | 3038 | rtx shift_subtarget = optimize ? 0 : accum; |
27588b0f | 3039 | rtx add_target |
3040 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant | |
a1ad7483 | 3041 | && !optimize) |
27588b0f | 3042 | ? target : 0; |
a1ad7483 | 3043 | rtx accum_target = optimize ? 0 : accum; |
41cf444a | 3044 | rtx accum_inner; |
27588b0f | 3045 | |
3046 | switch (alg->op[opno]) | |
3047 | { | |
3048 | case alg_shift: | |
f5ff0b21 | 3049 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8813f8fe | 3050 | /* REG_EQUAL note will be attached to the following insn. */ |
3051 | emit_move_insn (accum, tem); | |
27588b0f | 3052 | val_so_far <<= log; |
3053 | break; | |
3054 | ||
3055 | case alg_add_t_m2: | |
f5ff0b21 | 3056 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
27588b0f | 3057 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3058 | add_target ? add_target : accum_target); | |
3059 | val_so_far += (HOST_WIDE_INT) 1 << log; | |
3060 | break; | |
3061 | ||
3062 | case alg_sub_t_m2: | |
f5ff0b21 | 3063 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
27588b0f | 3064 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
3065 | add_target ? add_target : accum_target); | |
3066 | val_so_far -= (HOST_WIDE_INT) 1 << log; | |
3067 | break; | |
3068 | ||
3069 | case alg_add_t2_m: | |
3070 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
f5ff0b21 | 3071 | log, shift_subtarget, 0); |
27588b0f | 3072 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
3073 | add_target ? add_target : accum_target); | |
3074 | val_so_far = (val_so_far << log) + 1; | |
3075 | break; | |
3076 | ||
3077 | case alg_sub_t2_m: | |
3078 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
f5ff0b21 | 3079 | log, shift_subtarget, 0); |
27588b0f | 3080 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
3081 | add_target ? add_target : accum_target); | |
3082 | val_so_far = (val_so_far << log) - 1; | |
3083 | break; | |
3084 | ||
3085 | case alg_add_factor: | |
f5ff0b21 | 3086 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
27588b0f | 3087 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3088 | add_target ? add_target : accum_target); | |
3089 | val_so_far += val_so_far << log; | |
3090 | break; | |
3091 | ||
3092 | case alg_sub_factor: | |
f5ff0b21 | 3093 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
27588b0f | 3094 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
a1ad7483 | 3095 | (add_target |
3096 | ? add_target : (optimize ? 0 : tem))); | |
27588b0f | 3097 | val_so_far = (val_so_far << log) - val_so_far; |
3098 | break; | |
3099 | ||
3100 | default: | |
611234b4 | 3101 | gcc_unreachable (); |
27588b0f | 3102 | } |
3103 | ||
49db198b | 3104 | if (SCALAR_INT_MODE_P (mode)) |
27588b0f | 3105 | { |
49db198b | 3106 | /* Write a REG_EQUAL note on the last insn so that we can cse |
3107 | multiplication sequences. Note that if ACCUM is a SUBREG, | |
3108 | we've set the inner register and must properly indicate that. */ | |
3109 | tem = op0, nmode = mode; | |
3110 | accum_inner = accum; | |
3111 | if (GET_CODE (accum) == SUBREG) | |
3112 | { | |
3113 | accum_inner = SUBREG_REG (accum); | |
3114 | nmode = GET_MODE (accum_inner); | |
3115 | tem = gen_lowpart (nmode, op0); | |
3116 | } | |
27588b0f | 3117 | |
49db198b | 3118 | insn = get_last_insn (); |
3119 | set_dst_reg_note (insn, REG_EQUAL, | |
3120 | gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)), | |
3121 | accum_inner); | |
3122 | } | |
27588b0f | 3123 | } |
3124 | ||
3125 | if (variant == negate_variant) | |
3126 | { | |
3127 | val_so_far = -val_so_far; | |
3128 | accum = expand_unop (mode, neg_optab, accum, target, 0); | |
3129 | } | |
3130 | else if (variant == add_variant) | |
3131 | { | |
3132 | val_so_far = val_so_far + 1; | |
3133 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); | |
3134 | } | |
3135 | ||
a79b863a | 3136 | /* Compare only the bits of val and val_so_far that are significant |
3137 | in the result mode, to avoid sign-/zero-extension confusion. */ | |
49db198b | 3138 | nmode = GET_MODE_INNER (mode); |
3139 | if (nmode == VOIDmode) | |
3140 | nmode = mode; | |
3141 | val &= GET_MODE_MASK (nmode); | |
3142 | val_so_far &= GET_MODE_MASK (nmode); | |
611234b4 | 3143 | gcc_assert (val == val_so_far); |
27588b0f | 3144 | |
3145 | return accum; | |
3146 | } | |
3147 | ||
db96f378 | 3148 | /* Perform a multiplication and return an rtx for the result. |
3149 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3150 | TARGET is a suggestion for where to store the result (an rtx). | |
3151 | ||
3152 | We check specially for a constant integer as OP1. | |
3153 | If you want this check for OP0 as well, then before calling | |
3154 | you should swap the two operands if OP0 would be constant. */ | |
3155 | ||
3156 | rtx | |
cf58ef1d | 3157 | expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, |
3158 | int unsignedp) | |
db96f378 | 3159 | { |
27588b0f | 3160 | enum mult_variant variant; |
3161 | struct algorithm algorithm; | |
49db198b | 3162 | rtx scalar_op1; |
cfd6d985 | 3163 | int max_cost; |
f529eb25 | 3164 | bool speed = optimize_insn_for_speed_p (); |
49db198b | 3165 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |
db96f378 | 3166 | |
49db198b | 3167 | if (CONSTANT_P (op0)) |
3168 | { | |
3169 | rtx temp = op0; | |
3170 | op0 = op1; | |
3171 | op1 = temp; | |
3172 | } | |
3173 | ||
3174 | /* For vectors, there are several simplifications that can be made if | |
3175 | all elements of the vector constant are identical. */ | |
3176 | scalar_op1 = op1; | |
3177 | if (GET_CODE (op1) == CONST_VECTOR) | |
3178 | { | |
3179 | int i, n = CONST_VECTOR_NUNITS (op1); | |
3180 | scalar_op1 = CONST_VECTOR_ELT (op1, 0); | |
3181 | for (i = 1; i < n; ++i) | |
3182 | if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i))) | |
3183 | goto skip_scalar; | |
3184 | } | |
3185 | ||
3186 | if (INTEGRAL_MODE_P (mode)) | |
3187 | { | |
3188 | rtx fake_reg; | |
d9dadd67 | 3189 | HOST_WIDE_INT coeff; |
3190 | bool is_neg; | |
49db198b | 3191 | int mode_bitsize; |
3192 | ||
3193 | if (op1 == CONST0_RTX (mode)) | |
3194 | return op1; | |
3195 | if (op1 == CONST1_RTX (mode)) | |
3196 | return op0; | |
3197 | if (op1 == CONSTM1_RTX (mode)) | |
3198 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, | |
3199 | op0, target, 0); | |
3200 | ||
3201 | if (do_trapv) | |
3202 | goto skip_synth; | |
3203 | ||
3204 | /* These are the operations that are potentially turned into | |
3205 | a sequence of shifts and additions. */ | |
3206 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); | |
cfd6d985 | 3207 | |
3208 | /* synth_mult does an `unsigned int' multiply. As long as the mode is | |
3209 | less than or equal in size to `unsigned int' this doesn't matter. | |
3210 | If the mode is larger than `unsigned int', then synth_mult works | |
3211 | only if the constant value exactly fits in an `unsigned int' without | |
3212 | any truncation. This means that multiplying by negative values does | |
3213 | not work; results are off by 2^32 on a 32 bit machine. */ | |
e4fedb10 | 3214 | |
49db198b | 3215 | if (CONST_INT_P (scalar_op1)) |
7a9e3364 | 3216 | { |
49db198b | 3217 | coeff = INTVAL (scalar_op1); |
3218 | is_neg = coeff < 0; | |
cfd6d985 | 3219 | } |
49db198b | 3220 | else if (CONST_DOUBLE_P (scalar_op1)) |
cfd6d985 | 3221 | { |
3222 | /* If we are multiplying in DImode, it may still be a win | |
3223 | to try to work with shifts and adds. */ | |
49db198b | 3224 | if (CONST_DOUBLE_HIGH (scalar_op1) == 0 |
3225 | && CONST_DOUBLE_LOW (scalar_op1) > 0) | |
3226 | { | |
3227 | coeff = CONST_DOUBLE_LOW (scalar_op1); | |
3228 | is_neg = false; | |
3229 | } | |
3230 | else if (CONST_DOUBLE_LOW (scalar_op1) == 0) | |
cfd6d985 | 3231 | { |
49db198b | 3232 | coeff = CONST_DOUBLE_HIGH (scalar_op1); |
3233 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3234 | { | |
3235 | int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT; | |
3236 | if (shift < HOST_BITS_PER_DOUBLE_INT - 1 | |
3237 | || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT) | |
3238 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3239 | shift, target, unsignedp); | |
3240 | } | |
3241 | goto skip_synth; | |
cfd6d985 | 3242 | } |
d9dadd67 | 3243 | else |
3244 | goto skip_synth; | |
cfd6d985 | 3245 | } |
49db198b | 3246 | else |
3247 | goto skip_synth; | |
48e1416a | 3248 | |
cfd6d985 | 3249 | /* We used to test optimize here, on the grounds that it's better to |
3250 | produce a smaller program when -O is not used. But this causes | |
3251 | such a terrible slowdown sometimes that it seems better to always | |
3252 | use synth_mult. */ | |
cfd6d985 | 3253 | |
49db198b | 3254 | /* Special case powers of two. */ |
3255 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3256 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3257 | floor_log2 (coeff), target, unsignedp); | |
3258 | ||
3259 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3260 | ||
3261 | /* Attempt to handle multiplication of DImode values by negative | |
3262 | coefficients, by performing the multiplication by a positive | |
3263 | multiplier and then inverting the result. */ | |
3264 | /* ??? How is this not slightly redundant with the neg variant? */ | |
3265 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) | |
3266 | { | |
3267 | /* Its safe to use -coeff even for INT_MIN, as the | |
3268 | result is interpreted as an unsigned coefficient. | |
3269 | Exclude cost of op0 from max_cost to match the cost | |
ba83197c | 3270 | calculation of the synth_mult. */ |
49db198b | 3271 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed) |
49a71e58 | 3272 | - neg_cost(speed, mode)); |
49db198b | 3273 | if (max_cost > 0 |
3274 | && choose_mult_variant (mode, -coeff, &algorithm, | |
3275 | &variant, max_cost)) | |
3276 | { | |
3277 | rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX, | |
3278 | &algorithm, variant); | |
3279 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3280 | } | |
7a9e3364 | 3281 | } |
db96f378 | 3282 | |
49db198b | 3283 | /* Exclude cost of op0 from max_cost to match the cost |
3284 | calculation of the synth_mult. */ | |
3285 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed); | |
3286 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) | |
3287 | return expand_mult_const (mode, op0, coeff, target, | |
3288 | &algorithm, variant); | |
cf58ef1d | 3289 | } |
49db198b | 3290 | skip_synth: |
cf58ef1d | 3291 | |
3292 | /* Expand x*2.0 as x+x. */ | |
49db198b | 3293 | if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode)) |
cf58ef1d | 3294 | { |
3295 | REAL_VALUE_TYPE d; | |
49db198b | 3296 | REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1); |
cf58ef1d | 3297 | |
3298 | if (REAL_VALUES_EQUAL (d, dconst2)) | |
3299 | { | |
3300 | op0 = force_reg (GET_MODE (op0), op0); | |
3301 | return expand_binop (mode, add_optab, op0, op0, | |
3302 | target, unsignedp, OPTAB_LIB_WIDEN); | |
3303 | } | |
3304 | } | |
49db198b | 3305 | skip_scalar: |
cf58ef1d | 3306 | |
1e401f10 | 3307 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3308 | there is no difference between signed and unsigned. */ | |
49db198b | 3309 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |
db96f378 | 3310 | op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); |
611234b4 | 3311 | gcc_assert (op0); |
db96f378 | 3312 | return op0; |
3313 | } | |
62be004c | 3314 | |
72655676 | 3315 | /* Return a cost estimate for multiplying a register by the given |
3316 | COEFFicient in the given MODE and SPEED. */ | |
3317 | ||
3318 | int | |
3319 | mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed) | |
3320 | { | |
3321 | int max_cost; | |
3322 | struct algorithm algorithm; | |
3323 | enum mult_variant variant; | |
3324 | ||
3325 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3326 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed); | |
3327 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) | |
3328 | return algorithm.cost.cost; | |
3329 | else | |
3330 | return max_cost; | |
3331 | } | |
3332 | ||
62be004c | 3333 | /* Perform a widening multiplication and return an rtx for the result. |
3334 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3335 | TARGET is a suggestion for where to store the result (an rtx). | |
3336 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab | |
3337 | or smul_widen_optab. | |
3338 | ||
3339 | We check specially for a constant integer as OP1, comparing the | |
3340 | cost of a widening multiply against the cost of a sequence of shifts | |
3341 | and adds. */ | |
3342 | ||
3343 | rtx | |
3344 | expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, | |
3345 | int unsignedp, optab this_optab) | |
3346 | { | |
3347 | bool speed = optimize_insn_for_speed_p (); | |
38ba30bf | 3348 | rtx cop1; |
62be004c | 3349 | |
3350 | if (CONST_INT_P (op1) | |
38ba30bf | 3351 | && GET_MODE (op0) != VOIDmode |
3352 | && (cop1 = convert_modes (mode, GET_MODE (op0), op1, | |
3353 | this_optab == umul_widen_optab)) | |
3354 | && CONST_INT_P (cop1) | |
3355 | && (INTVAL (cop1) >= 0 | |
f179ee60 | 3356 | || HWI_COMPUTABLE_MODE_P (mode))) |
62be004c | 3357 | { |
38ba30bf | 3358 | HOST_WIDE_INT coeff = INTVAL (cop1); |
62be004c | 3359 | int max_cost; |
3360 | enum mult_variant variant; | |
3361 | struct algorithm algorithm; | |
3362 | ||
3363 | /* Special case powers of two. */ | |
3364 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3365 | { | |
3366 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3367 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
f5ff0b21 | 3368 | floor_log2 (coeff), target, unsignedp); |
62be004c | 3369 | } |
3370 | ||
3371 | /* Exclude cost of op0 from max_cost to match the cost | |
3372 | calculation of the synth_mult. */ | |
49a71e58 | 3373 | max_cost = mul_widen_cost (speed, mode); |
62be004c | 3374 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |
3375 | max_cost)) | |
3376 | { | |
3377 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3378 | return expand_mult_const (mode, op0, coeff, target, | |
3379 | &algorithm, variant); | |
3380 | } | |
3381 | } | |
3382 | return expand_binop (mode, this_optab, op0, op1, target, | |
3383 | unsignedp, OPTAB_LIB_WIDEN); | |
3384 | } | |
db96f378 | 3385 | \f |
d2fa4ea5 | 3386 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |
3387 | replace division by D, and put the least significant N bits of the result | |
3388 | in *MULTIPLIER_PTR and return the most significant bit. | |
3389 | ||
3390 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the | |
3391 | needed precision is in PRECISION (should be <= N). | |
3392 | ||
3393 | PRECISION should be as small as possible so this function can choose | |
3394 | multiplier more freely. | |
3395 | ||
3396 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that | |
3397 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. | |
3398 | ||
3399 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), | |
3400 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ | |
3401 | ||
d2fa4ea5 | 3402 | unsigned HOST_WIDE_INT |
35cb5232 | 3403 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
127cb1cd | 3404 | unsigned HOST_WIDE_INT *multiplier_ptr, |
3405 | int *post_shift_ptr, int *lgup_ptr) | |
d2fa4ea5 | 3406 | { |
4491f79f | 3407 | HOST_WIDE_INT mhigh_hi, mlow_hi; |
3408 | unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; | |
d2fa4ea5 | 3409 | int lgup, post_shift; |
3410 | int pow, pow2; | |
4491f79f | 3411 | unsigned HOST_WIDE_INT nl, dummy1; |
3412 | HOST_WIDE_INT nh, dummy2; | |
d2fa4ea5 | 3413 | |
3414 | /* lgup = ceil(log2(divisor)); */ | |
3415 | lgup = ceil_log2 (d); | |
3416 | ||
611234b4 | 3417 | gcc_assert (lgup <= n); |
d2fa4ea5 | 3418 | |
3419 | pow = n + lgup; | |
3420 | pow2 = n + lgup - precision; | |
3421 | ||
611234b4 | 3422 | /* We could handle this with some effort, but this case is much |
3423 | better handled directly with a scc insn, so rely on caller using | |
3424 | that. */ | |
24cd46a7 | 3425 | gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT); |
d2fa4ea5 | 3426 | |
3427 | /* mlow = 2^(N + lgup)/d */ | |
3428 | if (pow >= HOST_BITS_PER_WIDE_INT) | |
3429 | { | |
4491f79f | 3430 | nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT); |
d2fa4ea5 | 3431 | nl = 0; |
3432 | } | |
3433 | else | |
3434 | { | |
3435 | nh = 0; | |
3436 | nl = (unsigned HOST_WIDE_INT) 1 << pow; | |
3437 | } | |
3438 | div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, | |
3439 | &mlow_lo, &mlow_hi, &dummy1, &dummy2); | |
3440 | ||
3441 | /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */ | |
3442 | if (pow2 >= HOST_BITS_PER_WIDE_INT) | |
4491f79f | 3443 | nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT); |
d2fa4ea5 | 3444 | else |
3445 | nl |= (unsigned HOST_WIDE_INT) 1 << pow2; | |
3446 | div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, | |
3447 | &mhigh_lo, &mhigh_hi, &dummy1, &dummy2); | |
3448 | ||
611234b4 | 3449 | gcc_assert (!mhigh_hi || nh - d < d); |
3450 | gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1); | |
139c3f48 | 3451 | /* Assert that mlow < mhigh. */ |
611234b4 | 3452 | gcc_assert (mlow_hi < mhigh_hi |
3453 | || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)); | |
d2fa4ea5 | 3454 | |
3455 | /* If precision == N, then mlow, mhigh exceed 2^N | |
3456 | (but they do not exceed 2^(N+1)). */ | |
3457 | ||
2358393e | 3458 | /* Reduce to lowest terms. */ |
d2fa4ea5 | 3459 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3460 | { | |
3461 | unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1); | |
3462 | unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1); | |
3463 | if (ml_lo >= mh_lo) | |
3464 | break; | |
3465 | ||
3466 | mlow_hi = 0; | |
3467 | mlow_lo = ml_lo; | |
3468 | mhigh_hi = 0; | |
3469 | mhigh_lo = mh_lo; | |
3470 | } | |
3471 | ||
3472 | *post_shift_ptr = post_shift; | |
3473 | *lgup_ptr = lgup; | |
3474 | if (n < HOST_BITS_PER_WIDE_INT) | |
3475 | { | |
3476 | unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; | |
127cb1cd | 3477 | *multiplier_ptr = mhigh_lo & mask; |
d2fa4ea5 | 3478 | return mhigh_lo >= mask; |
3479 | } | |
3480 | else | |
3481 | { | |
127cb1cd | 3482 | *multiplier_ptr = mhigh_lo; |
d2fa4ea5 | 3483 | return mhigh_hi; |
3484 | } | |
3485 | } | |
3486 | ||
3487 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is | |
3488 | congruent to 1 (mod 2**N). */ | |
3489 | ||
3490 | static unsigned HOST_WIDE_INT | |
35cb5232 | 3491 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
d2fa4ea5 | 3492 | { |
a92771b8 | 3493 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
d2fa4ea5 | 3494 | |
3495 | /* The algorithm notes that the choice y = x satisfies | |
3496 | x*y == 1 mod 2^3, since x is assumed odd. | |
3497 | Each iteration doubles the number of bits of significance in y. */ | |
3498 | ||
3499 | unsigned HOST_WIDE_INT mask; | |
3500 | unsigned HOST_WIDE_INT y = x; | |
3501 | int nbit = 3; | |
3502 | ||
3503 | mask = (n == HOST_BITS_PER_WIDE_INT | |
3504 | ? ~(unsigned HOST_WIDE_INT) 0 | |
3505 | : ((unsigned HOST_WIDE_INT) 1 << n) - 1); | |
3506 | ||
3507 | while (nbit < n) | |
3508 | { | |
3509 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ | |
3510 | nbit *= 2; | |
3511 | } | |
3512 | return y; | |
3513 | } | |
3514 | ||
3515 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness | |
3516 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the | |
3517 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product | |
3518 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to | |
3519 | become signed. | |
3520 | ||
3521 | The result is put in TARGET if that is convenient. | |
3522 | ||
3523 | MODE is the mode of operation. */ | |
3524 | ||
3525 | rtx | |
35cb5232 | 3526 | expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0, |
3527 | rtx op1, rtx target, int unsignedp) | |
d2fa4ea5 | 3528 | { |
3529 | rtx tem; | |
3530 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; | |
3531 | ||
3532 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
f5ff0b21 | 3533 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
6de9716c | 3534 | tem = expand_and (mode, tem, op1, NULL_RTX); |
941522d6 | 3535 | adj_operand |
3536 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), | |
3537 | adj_operand); | |
d2fa4ea5 | 3538 | |
3539 | tem = expand_shift (RSHIFT_EXPR, mode, op1, | |
f5ff0b21 | 3540 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
6de9716c | 3541 | tem = expand_and (mode, tem, op0, NULL_RTX); |
941522d6 | 3542 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3543 | target); | |
d2fa4ea5 | 3544 | |
3545 | return target; | |
3546 | } | |
3547 | ||
ebf4f764 | 3548 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |
d2fa4ea5 | 3549 | |
27588b0f | 3550 | static rtx |
3551 | extract_high_half (enum machine_mode mode, rtx op) | |
3552 | { | |
3553 | enum machine_mode wider_mode; | |
d2fa4ea5 | 3554 | |
27588b0f | 3555 | if (mode == word_mode) |
3556 | return gen_highpart (mode, op); | |
33183a3c | 3557 | |
069b07bf | 3558 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3559 | ||
27588b0f | 3560 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3561 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, | |
f5ff0b21 | 3562 | GET_MODE_BITSIZE (mode), 0, 1); |
27588b0f | 3563 | return convert_modes (mode, wider_mode, op, 0); |
3564 | } | |
d2fa4ea5 | 3565 | |
ebf4f764 | 3566 | /* Like expmed_mult_highpart, but only consider using a multiplication |
27588b0f | 3567 | optab. OP1 is an rtx for the constant operand. */ |
3568 | ||
3569 | static rtx | |
ebf4f764 | 3570 | expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, |
27588b0f | 3571 | rtx target, int unsignedp, int max_cost) |
d2fa4ea5 | 3572 | { |
b4dcfd48 | 3573 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
27588b0f | 3574 | enum machine_mode wider_mode; |
d2fa4ea5 | 3575 | optab moptab; |
3576 | rtx tem; | |
27588b0f | 3577 | int size; |
f529eb25 | 3578 | bool speed = optimize_insn_for_speed_p (); |
d2fa4ea5 | 3579 | |
069b07bf | 3580 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3581 | ||
27588b0f | 3582 | wider_mode = GET_MODE_WIDER_MODE (mode); |
3583 | size = GET_MODE_BITSIZE (mode); | |
d2fa4ea5 | 3584 | |
3585 | /* Firstly, try using a multiplication insn that only generates the needed | |
3586 | high part of the product, and in the sign flavor of unsignedp. */ | |
49a71e58 | 3587 | if (mul_highpart_cost (speed, mode) < max_cost) |
33183a3c | 3588 | { |
27588b0f | 3589 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
b4dcfd48 | 3590 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
27588b0f | 3591 | unsignedp, OPTAB_DIRECT); |
3592 | if (tem) | |
3593 | return tem; | |
33183a3c | 3594 | } |
d2fa4ea5 | 3595 | |
3596 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. | |
3597 | Need to adjust the result after the multiplication. */ | |
84ab528e | 3598 | if (size - 1 < BITS_PER_WORD |
49a71e58 | 3599 | && (mul_highpart_cost (speed, mode) |
3600 | + 2 * shift_cost (speed, mode, size-1) | |
3601 | + 4 * add_cost (speed, mode) < max_cost)) | |
33183a3c | 3602 | { |
27588b0f | 3603 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
b4dcfd48 | 3604 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
27588b0f | 3605 | unsignedp, OPTAB_DIRECT); |
3606 | if (tem) | |
33183a3c | 3607 | /* We used the wrong signedness. Adjust the result. */ |
cb2511ae | 3608 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
27588b0f | 3609 | tem, unsignedp); |
33183a3c | 3610 | } |
d2fa4ea5 | 3611 | |
33183a3c | 3612 | /* Try widening multiplication. */ |
d2fa4ea5 | 3613 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
3a9ccbe4 | 3614 | if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
49a71e58 | 3615 | && mul_widen_cost (speed, wider_mode) < max_cost) |
0e1adf44 | 3616 | { |
b4dcfd48 | 3617 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
27588b0f | 3618 | unsignedp, OPTAB_WIDEN); |
3619 | if (tem) | |
3620 | return extract_high_half (mode, tem); | |
a4194ff7 | 3621 | } |
33183a3c | 3622 | |
3623 | /* Try widening the mode and perform a non-widening multiplication. */ | |
d6bf3b14 | 3624 | if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing |
84ab528e | 3625 | && size - 1 < BITS_PER_WORD |
49a71e58 | 3626 | && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) |
3627 | < max_cost)) | |
0e1adf44 | 3628 | { |
857a1176 | 3629 | rtx insns, wop0, wop1; |
3630 | ||
3631 | /* We need to widen the operands, for example to ensure the | |
3632 | constant multiplier is correctly sign or zero extended. | |
3633 | Use a sequence to clean-up any instructions emitted by | |
3634 | the conversions if things don't work out. */ | |
3635 | start_sequence (); | |
3636 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); | |
3637 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); | |
3638 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, | |
27588b0f | 3639 | unsignedp, OPTAB_WIDEN); |
857a1176 | 3640 | insns = get_insns (); |
3641 | end_sequence (); | |
3642 | ||
27588b0f | 3643 | if (tem) |
857a1176 | 3644 | { |
3645 | emit_insn (insns); | |
3646 | return extract_high_half (mode, tem); | |
3647 | } | |
0e1adf44 | 3648 | } |
33183a3c | 3649 | |
3650 | /* Try widening multiplication of opposite signedness, and adjust. */ | |
3651 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; | |
3a9ccbe4 | 3652 | if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
84ab528e | 3653 | && size - 1 < BITS_PER_WORD |
49a71e58 | 3654 | && (mul_widen_cost (speed, wider_mode) |
3655 | + 2 * shift_cost (speed, mode, size-1) | |
3656 | + 4 * add_cost (speed, mode) < max_cost)) | |
d2fa4ea5 | 3657 | { |
b4dcfd48 | 3658 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
33183a3c | 3659 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3660 | if (tem != 0) | |
d2fa4ea5 | 3661 | { |
27588b0f | 3662 | tem = extract_high_half (mode, tem); |
33183a3c | 3663 | /* We used the wrong signedness. Adjust the result. */ |
cb2511ae | 3664 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
33183a3c | 3665 | target, unsignedp); |
d2fa4ea5 | 3666 | } |
d2fa4ea5 | 3667 | } |
3668 | ||
33183a3c | 3669 | return 0; |
27588b0f | 3670 | } |
33183a3c | 3671 | |
0dc8d714 | 3672 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3673 | putting the high half of the result in TARGET if that is convenient, | |
3674 | and return where the result is. If the operation can not be performed, | |
3675 | 0 is returned. | |
d2fa4ea5 | 3676 | |
27588b0f | 3677 | MODE is the mode of operation and result. |
3678 | ||
3679 | UNSIGNEDP nonzero means unsigned multiply. | |
3680 | ||
3681 | MAX_COST is the total allowed cost for the expanded RTL. */ | |
3682 | ||
0dc8d714 | 3683 | static rtx |
ebf4f764 | 3684 | expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, |
0dc8d714 | 3685 | rtx target, int unsignedp, int max_cost) |
27588b0f | 3686 | { |
e4fedb10 | 3687 | enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); |
0dc8d714 | 3688 | unsigned HOST_WIDE_INT cnst1; |
e4fedb10 | 3689 | int extra_cost; |
3690 | bool sign_adjust = false; | |
27588b0f | 3691 | enum mult_variant variant; |
3692 | struct algorithm alg; | |
0dc8d714 | 3693 | rtx tem; |
f529eb25 | 3694 | bool speed = optimize_insn_for_speed_p (); |
27588b0f | 3695 | |
069b07bf | 3696 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
27588b0f | 3697 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |
f179ee60 | 3698 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |
27588b0f | 3699 | |
0dc8d714 | 3700 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
e4fedb10 | 3701 | |
48e1416a | 3702 | /* We can't optimize modes wider than BITS_PER_WORD. |
3703 | ??? We might be able to perform double-word arithmetic if | |
e4fedb10 | 3704 | mode == word_mode, however all the cost calculations in |
3705 | synth_mult etc. assume single-word operations. */ | |
3706 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) | |
ebf4f764 | 3707 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
e4fedb10 | 3708 | unsignedp, max_cost); |
3709 | ||
49a71e58 | 3710 | extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); |
e4fedb10 | 3711 | |
3712 | /* Check whether we try to multiply by a negative constant. */ | |
3713 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) | |
3714 | { | |
3715 | sign_adjust = true; | |
49a71e58 | 3716 | extra_cost += add_cost (speed, mode); |
e4fedb10 | 3717 | } |
27588b0f | 3718 | |
3719 | /* See whether shift/add multiplication is cheap enough. */ | |
e4fedb10 | 3720 | if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, |
3721 | max_cost - extra_cost)) | |
0e1adf44 | 3722 | { |
27588b0f | 3723 | /* See whether the specialized multiplication optabs are |
3724 | cheaper than the shift/add version. */ | |
ebf4f764 | 3725 | tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
d9154849 | 3726 | alg.cost.cost + extra_cost); |
27588b0f | 3727 | if (tem) |
3728 | return tem; | |
3729 | ||
e4fedb10 | 3730 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3731 | tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); | |
3732 | tem = extract_high_half (mode, tem); | |
3733 | ||
0bed3869 | 3734 | /* Adjust result for signedness. */ |
e4fedb10 | 3735 | if (sign_adjust) |
3736 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); | |
3737 | ||
3738 | return tem; | |
0e1adf44 | 3739 | } |
ebf4f764 | 3740 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
27588b0f | 3741 | unsignedp, max_cost); |
d2fa4ea5 | 3742 | } |
41323e11 | 3743 | |
3744 | ||
3745 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ | |
3746 | ||
3747 | static rtx | |
3748 | expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) | |
3749 | { | |
4b05206e | 3750 | unsigned HOST_WIDE_INT masklow, maskhigh; |
8b908ec4 | 3751 | rtx result, temp, shift, label; |
41323e11 | 3752 | int logd; |
3753 | ||
3754 | logd = floor_log2 (d); | |
3755 | result = gen_reg_rtx (mode); | |
3756 | ||
3757 | /* Avoid conditional branches when they're expensive. */ | |
4a9d7ef7 | 3758 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
a0d18cec | 3759 | && optimize_insn_for_speed_p ()) |
41323e11 | 3760 | { |
3761 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, | |
3762 | mode, 0, -1); | |
3763 | if (signmask) | |
3764 | { | |
3765 | signmask = force_reg (mode, signmask); | |
4b05206e | 3766 | masklow = ((HOST_WIDE_INT) 1 << logd) - 1; |
8b908ec4 | 3767 | shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); |
3768 | ||
3769 | /* Use the rtx_cost of a LSHIFTRT instruction to determine | |
3770 | which instruction sequence to use. If logical right shifts | |
3771 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise | |
3772 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ | |
4b05206e | 3773 | |
8b908ec4 | 3774 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
d6bf3b14 | 3775 | if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing |
7013e87c | 3776 | || (set_src_cost (temp, optimize_insn_for_speed_p ()) |
3777 | > COSTS_N_INSNS (2))) | |
8b908ec4 | 3778 | { |
3779 | temp = expand_binop (mode, xor_optab, op0, signmask, | |
3780 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3781 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3782 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4b05206e | 3783 | temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow), |
8b908ec4 | 3784 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3785 | temp = expand_binop (mode, xor_optab, temp, signmask, | |
3786 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3787 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3788 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3789 | } | |
3790 | else | |
3791 | { | |
3792 | signmask = expand_binop (mode, lshr_optab, signmask, shift, | |
3793 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3794 | signmask = force_reg (mode, signmask); | |
3795 | ||
3796 | temp = expand_binop (mode, add_optab, op0, signmask, | |
3797 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4b05206e | 3798 | temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow), |
8b908ec4 | 3799 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3800 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3801 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3802 | } | |
41323e11 | 3803 | return temp; |
3804 | } | |
3805 | } | |
3806 | ||
3807 | /* Mask contains the mode's signbit and the significant bits of the | |
3808 | modulus. By including the signbit in the operation, many targets | |
3809 | can avoid an explicit compare operation in the following comparison | |
3810 | against zero. */ | |
3811 | ||
4b05206e | 3812 | masklow = ((HOST_WIDE_INT) 1 << logd) - 1; |
3813 | if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) | |
3814 | { | |
3815 | masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1); | |
3816 | maskhigh = -1; | |
3817 | } | |
3818 | else | |
3819 | maskhigh = (HOST_WIDE_INT) -1 | |
3820 | << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1); | |
41323e11 | 3821 | |
4b05206e | 3822 | temp = expand_binop (mode, and_optab, op0, |
3823 | immed_double_const (masklow, maskhigh, mode), | |
3824 | result, 1, OPTAB_LIB_WIDEN); | |
41323e11 | 3825 | if (temp != result) |
3826 | emit_move_insn (result, temp); | |
3827 | ||
3828 | label = gen_label_rtx (); | |
3829 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); | |
3830 | ||
3831 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, | |
3832 | 0, OPTAB_LIB_WIDEN); | |
4b05206e | 3833 | masklow = (HOST_WIDE_INT) -1 << logd; |
3834 | maskhigh = -1; | |
3835 | temp = expand_binop (mode, ior_optab, temp, | |
3836 | immed_double_const (masklow, maskhigh, mode), | |
3837 | result, 1, OPTAB_LIB_WIDEN); | |
41323e11 | 3838 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
3839 | 0, OPTAB_LIB_WIDEN); | |
3840 | if (temp != result) | |
3841 | emit_move_insn (result, temp); | |
3842 | emit_label (label); | |
3843 | return result; | |
3844 | } | |
9c423367 | 3845 | |
3846 | /* Expand signed division of OP0 by a power of two D in mode MODE. | |
3847 | This routine is only called for positive values of D. */ | |
3848 | ||
3849 | static rtx | |
3850 | expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) | |
3851 | { | |
3852 | rtx temp, label; | |
9c423367 | 3853 | int logd; |
3854 | ||
3855 | logd = floor_log2 (d); | |
9c423367 | 3856 | |
4a9d7ef7 | 3857 | if (d == 2 |
3858 | && BRANCH_COST (optimize_insn_for_speed_p (), | |
3859 | false) >= 1) | |
9c423367 | 3860 | { |
3861 | temp = gen_reg_rtx (mode); | |
3862 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); | |
3863 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
3864 | 0, OPTAB_LIB_WIDEN); | |
f5ff0b21 | 3865 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
9c423367 | 3866 | } |
3867 | ||
7f4f7064 | 3868 | #ifdef HAVE_conditional_move |
4a9d7ef7 | 3869 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) |
3870 | >= 2) | |
7f4f7064 | 3871 | { |
3872 | rtx temp2; | |
3873 | ||
239d5663 | 3874 | /* ??? emit_conditional_move forces a stack adjustment via |
3875 | compare_from_rtx so, if the sequence is discarded, it will | |
3876 | be lost. Do it now instead. */ | |
3877 | do_pending_stack_adjust (); | |
3878 | ||
7f4f7064 | 3879 | start_sequence (); |
3880 | temp2 = copy_to_mode_reg (mode, op0); | |
3881 | temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1), | |
3882 | NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
3883 | temp = force_reg (mode, temp); | |
3884 | ||
3885 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ | |
3886 | temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, | |
3887 | mode, temp, temp2, mode, 0); | |
3888 | if (temp2) | |
3889 | { | |
3890 | rtx seq = get_insns (); | |
3891 | end_sequence (); | |
3892 | emit_insn (seq); | |
f5ff0b21 | 3893 | return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); |
7f4f7064 | 3894 | } |
3895 | end_sequence (); | |
3896 | } | |
3897 | #endif | |
3898 | ||
4a9d7ef7 | 3899 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
3900 | false) >= 2) | |
9c423367 | 3901 | { |
3902 | int ushift = GET_MODE_BITSIZE (mode) - logd; | |
3903 | ||
3904 | temp = gen_reg_rtx (mode); | |
3905 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); | |
49a71e58 | 3906 | if (shift_cost (optimize_insn_for_speed_p (), mode, ushift) |
3907 | > COSTS_N_INSNS (1)) | |
9c423367 | 3908 | temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1), |
3909 | NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
3910 | else | |
3911 | temp = expand_shift (RSHIFT_EXPR, mode, temp, | |
f5ff0b21 | 3912 | ushift, NULL_RTX, 1); |
9c423367 | 3913 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
3914 | 0, OPTAB_LIB_WIDEN); | |
f5ff0b21 | 3915 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
9c423367 | 3916 | } |
3917 | ||
3918 | label = gen_label_rtx (); | |
3919 | temp = copy_to_mode_reg (mode, op0); | |
3920 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); | |
3921 | expand_inc (temp, GEN_INT (d - 1)); | |
3922 | emit_label (label); | |
f5ff0b21 | 3923 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
9c423367 | 3924 | } |
d2fa4ea5 | 3925 | \f |
db96f378 | 3926 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
3927 | if that is convenient, and returning where the result is. | |
3928 | You may request either the quotient or the remainder as the result; | |
3929 | specify REM_FLAG nonzero to get the remainder. | |
3930 | ||
3931 | CODE is the expression code for which kind of division this is; | |
3932 | it controls how rounding is done. MODE is the machine mode to use. | |
3933 | UNSIGNEDP nonzero means do unsigned division. */ | |
3934 | ||
3935 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI | |
3936 | and then correct it by or'ing in missing high bits | |
3937 | if result of ANDI is nonzero. | |
3938 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. | |
3939 | This could optimize to a bfexts instruction. | |
3940 | But C doesn't use these operations, so their optimizations are | |
3941 | left for later. */ | |
a490489b | 3942 | /* ??? For modulo, we don't actually need the highpart of the first product, |
3943 | the low part will do nicely. And for small divisors, the second multiply | |
3944 | can also be a low-part only multiply or even be completely left out. | |
3945 | E.g. to calculate the remainder of a division by 3 with a 32 bit | |
3946 | multiply, multiply with 0x55555556 and extract the upper two bits; | |
3947 | the result is exact for inputs up to 0x1fffffff. | |
3948 | The input range can be reduced by using cross-sum rules. | |
3949 | For odd divisors >= 3, the following table gives right shift counts | |
edc2a478 | 3950 | so that if a number is shifted by an integer multiple of the given |
a490489b | 3951 | amount, the remainder stays the same: |
3952 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, | |
3953 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, | |
3954 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, | |
3955 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, | |
3956 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 | |
3957 | ||
3958 | Cross-sum rules for even numbers can be derived by leaving as many bits | |
3959 | to the right alone as the divisor has zeros to the right. | |
3960 | E.g. if x is an unsigned 32 bit number: | |
3961 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 | |
3962 | */ | |
db96f378 | 3963 | |
3964 | rtx | |
35cb5232 | 3965 | expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, |
3966 | rtx op0, rtx op1, rtx target, int unsignedp) | |
db96f378 | 3967 | { |
db96f378 | 3968 | enum machine_mode compute_mode; |
19cb6b50 | 3969 | rtx tquotient; |
d2fa4ea5 | 3970 | rtx quotient = 0, remainder = 0; |
3971 | rtx last; | |
ccc4d85f | 3972 | int size; |
41cf444a | 3973 | rtx insn; |
db96f378 | 3974 | optab optab1, optab2; |
ae01b312 | 3975 | int op1_is_constant, op1_is_pow2 = 0; |
33183a3c | 3976 | int max_cost, extra_cost; |
9e042f31 | 3977 | static HOST_WIDE_INT last_div_const = 0; |
ae01b312 | 3978 | static HOST_WIDE_INT ext_op1; |
f529eb25 | 3979 | bool speed = optimize_insn_for_speed_p (); |
d2fa4ea5 | 3980 | |
971ba038 | 3981 | op1_is_constant = CONST_INT_P (op1); |
ae01b312 | 3982 | if (op1_is_constant) |
3983 | { | |
3984 | ext_op1 = INTVAL (op1); | |
3985 | if (unsignedp) | |
3986 | ext_op1 &= GET_MODE_MASK (mode); | |
3987 | op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) | |
3988 | || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1)))); | |
3989 | } | |
d2fa4ea5 | 3990 | |
3991 | /* | |
3992 | This is the structure of expand_divmod: | |
3993 | ||
3994 | First comes code to fix up the operands so we can perform the operations | |
3995 | correctly and efficiently. | |
3996 | ||
3997 | Second comes a switch statement with code specific for each rounding mode. | |
3998 | For some special operands this code emits all RTL for the desired | |
c3118728 | 3999 | operation, for other cases, it generates only a quotient and stores it in |
d2fa4ea5 | 4000 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
4001 | to indicate that it has not done anything. | |
4002 | ||
c3118728 | 4003 | Last comes code that finishes the operation. If QUOTIENT is set and |
4004 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If | |
4005 | QUOTIENT is not set, it is computed using trunc rounding. | |
db96f378 | 4006 | |
d2fa4ea5 | 4007 | We try to generate special code for division and remainder when OP1 is a |
4008 | constant. If |OP1| = 2**n we can use shifts and some other fast | |
4009 | operations. For other values of OP1, we compute a carefully selected | |
4010 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 | |
4011 | by m. | |
4012 | ||
4013 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper | |
4014 | half of the product. Different strategies for generating the product are | |
ebf4f764 | 4015 | implemented in expmed_mult_highpart. |
d2fa4ea5 | 4016 | |
4017 | If what we actually want is the remainder, we generate that by another | |
4018 | by-constant multiplication and a subtraction. */ | |
4019 | ||
4020 | /* We shouldn't be called with OP1 == const1_rtx, but some of the | |
1b05ead8 | 4021 | code below will malfunction if we are, so check here and handle |
4022 | the special case if so. */ | |
4023 | if (op1 == const1_rtx) | |
4024 | return rem_flag ? const0_rtx : op0; | |
4025 | ||
bec2d490 | 4026 | /* When dividing by -1, we could get an overflow. |
4027 | negv_optab can handle overflows. */ | |
4028 | if (! unsignedp && op1 == constm1_rtx) | |
4029 | { | |
4030 | if (rem_flag) | |
ff385626 | 4031 | return const0_rtx; |
bec2d490 | 4032 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT |
ff385626 | 4033 | ? negv_optab : neg_optab, op0, target, 0); |
bec2d490 | 4034 | } |
4035 | ||
64e50eaa | 4036 | if (target |
4037 | /* Don't use the function value register as a target | |
4038 | since we have to read it as well as write it, | |
4039 | and function-inlining gets confused by this. */ | |
4040 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) | |
4041 | /* Don't clobber an operand while doing a multi-step calculation. */ | |
eb55662f | 4042 | || ((rem_flag || op1_is_constant) |
64e50eaa | 4043 | && (reg_mentioned_p (target, op0) |
e16ceb8e | 4044 | || (MEM_P (op0) && MEM_P (target)))) |
64e50eaa | 4045 | || reg_mentioned_p (target, op1) |
e16ceb8e | 4046 | || (MEM_P (op1) && MEM_P (target)))) |
db96f378 | 4047 | target = 0; |
4048 | ||
db96f378 | 4049 | /* Get the mode in which to perform this computation. Normally it will |
4050 | be MODE, but sometimes we can't do the desired operation in MODE. | |
4051 | If so, pick a wider mode in which we can do the operation. Convert | |
4052 | to that mode at the start to avoid repeated conversions. | |
4053 | ||
4054 | First see what operations we need. These depend on the expression | |
4055 | we are evaluating. (We assume that divxx3 insns exist under the | |
4056 | same conditions that modxx3 insns and that these insns don't normally | |
4057 | fail. If these assumptions are not correct, we may generate less | |
4058 | efficient code in some cases.) | |
4059 | ||
4060 | Then see if we find a mode in which we can open-code that operation | |
4061 | (either a division, modulus, or shift). Finally, check for the smallest | |
4062 | mode for which we can do the operation with a library call. */ | |
4063 | ||
d2fa4ea5 | 4064 | /* We might want to refine this now that we have division-by-constant |
ebf4f764 | 4065 | optimization. Since expmed_mult_highpart tries so many variants, it is |
d2fa4ea5 | 4066 | not straightforward to generalize this. Maybe we should make an array |
4067 | of possible modes in init_expmed? Save this for GCC 2.7. */ | |
4068 | ||
0ceee13a | 4069 | optab1 = ((op1_is_pow2 && op1 != const0_rtx) |
4070 | ? (unsignedp ? lshr_optab : ashr_optab) | |
db96f378 | 4071 | : (unsignedp ? udiv_optab : sdiv_optab)); |
0ceee13a | 4072 | optab2 = ((op1_is_pow2 && op1 != const0_rtx) |
4073 | ? optab1 | |
4074 | : (unsignedp ? udivmod_optab : sdivmod_optab)); | |
db96f378 | 4075 | |
4076 | for (compute_mode = mode; compute_mode != VOIDmode; | |
4077 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
d6bf3b14 | 4078 | if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing |
4079 | || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) | |
db96f378 | 4080 | break; |
4081 | ||
4082 | if (compute_mode == VOIDmode) | |
4083 | for (compute_mode = mode; compute_mode != VOIDmode; | |
4084 | compute_mode = GET_MODE_WIDER_MODE (compute_mode)) | |
f36b9f69 | 4085 | if (optab_libfunc (optab1, compute_mode) |
4086 | || optab_libfunc (optab2, compute_mode)) | |
db96f378 | 4087 | break; |
4088 | ||
89f18f73 | 4089 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
4090 | probably die. */ | |
db96f378 | 4091 | if (compute_mode == VOIDmode) |
4092 | compute_mode = mode; | |
4093 | ||
d2fa4ea5 | 4094 | if (target && GET_MODE (target) == compute_mode) |
4095 | tquotient = target; | |
4096 | else | |
4097 | tquotient = gen_reg_rtx (compute_mode); | |
ccc4d85f | 4098 | |
d2fa4ea5 | 4099 | size = GET_MODE_BITSIZE (compute_mode); |
4100 | #if 0 | |
4101 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE | |
33183a3c | 4102 | (mode), and thereby get better code when OP1 is a constant. Do that |
4103 | later. It will require going over all usages of SIZE below. */ | |
d2fa4ea5 | 4104 | size = GET_MODE_BITSIZE (mode); |
4105 | #endif | |
64e50eaa | 4106 | |
9e042f31 | 4107 | /* Only deduct something for a REM if the last divide done was |
4108 | for a different constant. Then set the constant of the last | |
4109 | divide. */ | |
49a71e58 | 4110 | max_cost = (unsignedp |
4111 | ? udiv_cost (speed, compute_mode) | |
4112 | : sdiv_cost (speed, compute_mode)); | |
1facc8d7 | 4113 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
4114 | && INTVAL (op1) == last_div_const)) | |
49a71e58 | 4115 | max_cost -= (mul_cost (speed, compute_mode) |
4116 | + add_cost (speed, compute_mode)); | |
9e042f31 | 4117 | |
4118 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; | |
33183a3c | 4119 | |
d2fa4ea5 | 4120 | /* Now convert to the best mode to use. */ |
db96f378 | 4121 | if (compute_mode != mode) |
4122 | { | |
d2fa4ea5 | 4123 | op0 = convert_modes (compute_mode, mode, op0, unsignedp); |
72467481 | 4124 | op1 = convert_modes (compute_mode, mode, op1, unsignedp); |
6d9d382f | 4125 | |
0dbd1c74 | 4126 | /* convert_modes may have placed op1 into a register, so we |
4127 | must recompute the following. */ | |
971ba038 | 4128 | op1_is_constant = CONST_INT_P (op1); |
6d9d382f | 4129 | op1_is_pow2 = (op1_is_constant |
4130 | && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) | |
4131 | || (! unsignedp | |
0dbd1c74 | 4132 | && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ; |
db96f378 | 4133 | } |
4134 | ||
d2fa4ea5 | 4135 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
8cdd0f84 | 4136 | |
e16ceb8e | 4137 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
d2fa4ea5 | 4138 | op0 = force_reg (compute_mode, op0); |
e16ceb8e | 4139 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
8cdd0f84 | 4140 | op1 = force_reg (compute_mode, op1); |
4141 | ||
008862a8 | 4142 | /* If we need the remainder or if OP1 is constant, we need to |
4143 | put OP0 in a register in case it has any queued subexpressions. */ | |
4144 | if (rem_flag || op1_is_constant) | |
4145 | op0 = force_reg (compute_mode, op0); | |
64e50eaa | 4146 | |
d2fa4ea5 | 4147 | last = get_last_insn (); |
db96f378 | 4148 | |
c3418f42 | 4149 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
d2fa4ea5 | 4150 | if (unsignedp) |
db96f378 | 4151 | { |
d2fa4ea5 | 4152 | if (code == FLOOR_DIV_EXPR) |
4153 | code = TRUNC_DIV_EXPR; | |
4154 | if (code == FLOOR_MOD_EXPR) | |
4155 | code = TRUNC_MOD_EXPR; | |
03b70ee3 | 4156 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4157 | code = TRUNC_DIV_EXPR; | |
d2fa4ea5 | 4158 | } |
64e50eaa | 4159 | |
d2fa4ea5 | 4160 | if (op1 != const0_rtx) |
4161 | switch (code) | |
4162 | { | |
4163 | case TRUNC_MOD_EXPR: | |
4164 | case TRUNC_DIV_EXPR: | |
61e477c7 | 4165 | if (op1_is_constant) |
d2fa4ea5 | 4166 | { |
210ba7c8 | 4167 | if (unsignedp) |
d2fa4ea5 | 4168 | { |
127cb1cd | 4169 | unsigned HOST_WIDE_INT mh, ml; |
d2fa4ea5 | 4170 | int pre_shift, post_shift; |
4171 | int dummy; | |
ae01b312 | 4172 | unsigned HOST_WIDE_INT d = (INTVAL (op1) |
4173 | & GET_MODE_MASK (compute_mode)); | |
d2fa4ea5 | 4174 | |
4175 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4176 | { | |
4177 | pre_shift = floor_log2 (d); | |
4178 | if (rem_flag) | |
4179 | { | |
0bc644e0 | 4180 | remainder |
4181 | = expand_binop (compute_mode, and_optab, op0, | |
4182 | GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), | |
4183 | remainder, 1, | |
4184 | OPTAB_LIB_WIDEN); | |
d2fa4ea5 | 4185 | if (remainder) |
436b0397 | 4186 | return gen_lowpart (mode, remainder); |
d2fa4ea5 | 4187 | } |
4188 | quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4189 | pre_shift, tquotient, 1); |
d2fa4ea5 | 4190 | } |
61e477c7 | 4191 | else if (size <= HOST_BITS_PER_WIDE_INT) |
d2fa4ea5 | 4192 | { |
75ff336e | 4193 | if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) |
d2fa4ea5 | 4194 | { |
75ff336e | 4195 | /* Most significant bit of divisor is set; emit an scc |
4196 | insn. */ | |
dab963fb | 4197 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
4198 | compute_mode, 1, 1); | |
d2fa4ea5 | 4199 | } |
4200 | else | |
4201 | { | |
75ff336e | 4202 | /* Find a suitable multiplier and right shift count |
4203 | instead of multiplying with D. */ | |
4204 | ||
4205 | mh = choose_multiplier (d, size, size, | |
4206 | &ml, &post_shift, &dummy); | |
4207 | ||
4208 | /* If the suggested multiplier is more than SIZE bits, | |
4209 | we can do better for even divisors, using an | |
4210 | initial right shift. */ | |
4211 | if (mh != 0 && (d & 1) == 0) | |
4212 | { | |
4213 | pre_shift = floor_log2 (d & -d); | |
4214 | mh = choose_multiplier (d >> pre_shift, size, | |
4215 | size - pre_shift, | |
4216 | &ml, &post_shift, &dummy); | |
611234b4 | 4217 | gcc_assert (!mh); |
75ff336e | 4218 | } |
4219 | else | |
4220 | pre_shift = 0; | |
4221 | ||
4222 | if (mh != 0) | |
4223 | { | |
4224 | rtx t1, t2, t3, t4; | |
4225 | ||
84ab528e | 4226 | if (post_shift - 1 >= BITS_PER_WORD) |
4227 | goto fail1; | |
4228 | ||
e56afeb2 | 4229 | extra_cost |
49a71e58 | 4230 | = (shift_cost (speed, compute_mode, post_shift - 1) |
4231 | + shift_cost (speed, compute_mode, 1) | |
4232 | + 2 * add_cost (speed, compute_mode)); | |
ebf4f764 | 4233 | t1 = expmed_mult_highpart (compute_mode, op0, |
127cb1cd | 4234 | GEN_INT (ml), |
75ff336e | 4235 | NULL_RTX, 1, |
4236 | max_cost - extra_cost); | |
4237 | if (t1 == 0) | |
4238 | goto fail1; | |
941522d6 | 4239 | t2 = force_operand (gen_rtx_MINUS (compute_mode, |
4240 | op0, t1), | |
75ff336e | 4241 | NULL_RTX); |
f5ff0b21 | 4242 | t3 = expand_shift (RSHIFT_EXPR, compute_mode, |
4243 | t2, 1, NULL_RTX, 1); | |
941522d6 | 4244 | t4 = force_operand (gen_rtx_PLUS (compute_mode, |
4245 | t1, t3), | |
75ff336e | 4246 | NULL_RTX); |
7c446c95 | 4247 | quotient = expand_shift |
4248 | (RSHIFT_EXPR, compute_mode, t4, | |
f5ff0b21 | 4249 | post_shift - 1, tquotient, 1); |
75ff336e | 4250 | } |
4251 | else | |
4252 | { | |
4253 | rtx t1, t2; | |
4254 | ||
84ab528e | 4255 | if (pre_shift >= BITS_PER_WORD |
4256 | || post_shift >= BITS_PER_WORD) | |
4257 | goto fail1; | |
4258 | ||
7c446c95 | 4259 | t1 = expand_shift |
4260 | (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4261 | pre_shift, NULL_RTX, 1); |
e56afeb2 | 4262 | extra_cost |
49a71e58 | 4263 | = (shift_cost (speed, compute_mode, pre_shift) |
4264 | + shift_cost (speed, compute_mode, post_shift)); | |
ebf4f764 | 4265 | t2 = expmed_mult_highpart (compute_mode, t1, |
127cb1cd | 4266 | GEN_INT (ml), |
75ff336e | 4267 | NULL_RTX, 1, |
4268 | max_cost - extra_cost); | |
4269 | if (t2 == 0) | |
4270 | goto fail1; | |
7c446c95 | 4271 | quotient = expand_shift |
4272 | (RSHIFT_EXPR, compute_mode, t2, | |
f5ff0b21 | 4273 | post_shift, tquotient, 1); |
75ff336e | 4274 | } |
d2fa4ea5 | 4275 | } |
4276 | } | |
61e477c7 | 4277 | else /* Too wide mode to use tricky code */ |
4278 | break; | |
d2fa4ea5 | 4279 | |
4280 | insn = get_last_insn (); | |
41cf444a | 4281 | if (insn != last) |
4282 | set_dst_reg_note (insn, REG_EQUAL, | |
4283 | gen_rtx_UDIV (compute_mode, op0, op1), | |
4284 | quotient); | |
d2fa4ea5 | 4285 | } |
4286 | else /* TRUNC_DIV, signed */ | |
4287 | { | |
4288 | unsigned HOST_WIDE_INT ml; | |
4289 | int lgup, post_shift; | |
4de52edf | 4290 | rtx mlr; |
d2fa4ea5 | 4291 | HOST_WIDE_INT d = INTVAL (op1); |
f74f4e04 | 4292 | unsigned HOST_WIDE_INT abs_d; |
4293 | ||
3d77819c | 4294 | /* Since d might be INT_MIN, we have to cast to |
4295 | unsigned HOST_WIDE_INT before negating to avoid | |
4296 | undefined signed overflow. */ | |
34f60736 | 4297 | abs_d = (d >= 0 |
4298 | ? (unsigned HOST_WIDE_INT) d | |
4299 | : - (unsigned HOST_WIDE_INT) d); | |
d2fa4ea5 | 4300 | |
4301 | /* n rem d = n rem -d */ | |
4302 | if (rem_flag && d < 0) | |
4303 | { | |
4304 | d = abs_d; | |
2d232d05 | 4305 | op1 = gen_int_mode (abs_d, compute_mode); |
d2fa4ea5 | 4306 | } |
4307 | ||
4308 | if (d == 1) | |
4309 | quotient = op0; | |
4310 | else if (d == -1) | |
4311 | quotient = expand_unop (compute_mode, neg_optab, op0, | |
4312 | tquotient, 0); | |
6115f243 | 4313 | else if (HOST_BITS_PER_WIDE_INT >= size |
4314 | && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) | |
7676164c | 4315 | { |
4316 | /* This case is not handled correctly below. */ | |
4317 | quotient = emit_store_flag (tquotient, EQ, op0, op1, | |
4318 | compute_mode, 1, 1); | |
4319 | if (quotient == 0) | |
4320 | goto fail1; | |
4321 | } | |
d2fa4ea5 | 4322 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
49a71e58 | 4323 | && (rem_flag |
4324 | ? smod_pow2_cheap (speed, compute_mode) | |
4325 | : sdiv_pow2_cheap (speed, compute_mode)) | |
41323e11 | 4326 | /* We assume that cheap metric is true if the |
4327 | optab has an expander for this mode. */ | |
99bdde56 | 4328 | && ((optab_handler ((rem_flag ? smod_optab |
4329 | : sdiv_optab), | |
d6bf3b14 | 4330 | compute_mode) |
9884e77f | 4331 | != CODE_FOR_nothing) |
d6bf3b14 | 4332 | || (optab_handler (sdivmod_optab, |
4333 | compute_mode) | |
4334 | != CODE_FOR_nothing))) | |
d2fa4ea5 | 4335 | ; |
4336 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) | |
4337 | { | |
41323e11 | 4338 | if (rem_flag) |
4339 | { | |
4340 | remainder = expand_smod_pow2 (compute_mode, op0, d); | |
4341 | if (remainder) | |
4342 | return gen_lowpart (mode, remainder); | |
4343 | } | |
cb2e141e | 4344 | |
49a71e58 | 4345 | if (sdiv_pow2_cheap (speed, compute_mode) |
d6bf3b14 | 4346 | && ((optab_handler (sdiv_optab, compute_mode) |
cb2e141e | 4347 | != CODE_FOR_nothing) |
d6bf3b14 | 4348 | || (optab_handler (sdivmod_optab, compute_mode) |
cb2e141e | 4349 | != CODE_FOR_nothing))) |
4350 | quotient = expand_divmod (0, TRUNC_DIV_EXPR, | |
4351 | compute_mode, op0, | |
4352 | gen_int_mode (abs_d, | |
4353 | compute_mode), | |
4354 | NULL_RTX, 0); | |
4355 | else | |
4356 | quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); | |
d2fa4ea5 | 4357 | |
41323e11 | 4358 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4359 | negate the quotient. */ | |
d2fa4ea5 | 4360 | if (d < 0) |
4361 | { | |
4362 | insn = get_last_insn (); | |
38457527 | 4363 | if (insn != last |
dd192c2d | 4364 | && abs_d < ((unsigned HOST_WIDE_INT) 1 |
4365 | << (HOST_BITS_PER_WIDE_INT - 1))) | |
41cf444a | 4366 | set_dst_reg_note (insn, REG_EQUAL, |
4367 | gen_rtx_DIV (compute_mode, op0, | |
f62058c3 | 4368 | gen_int_mode |
4369 | (abs_d, | |
4370 | compute_mode)), | |
41cf444a | 4371 | quotient); |
d2fa4ea5 | 4372 | |
4373 | quotient = expand_unop (compute_mode, neg_optab, | |
4374 | quotient, quotient, 0); | |
4375 | } | |
4376 | } | |
61e477c7 | 4377 | else if (size <= HOST_BITS_PER_WIDE_INT) |
d2fa4ea5 | 4378 | { |
4379 | choose_multiplier (abs_d, size, size - 1, | |
127cb1cd | 4380 | &ml, &post_shift, &lgup); |
d2fa4ea5 | 4381 | if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) |
4382 | { | |
4383 | rtx t1, t2, t3; | |
4384 | ||
84ab528e | 4385 | if (post_shift >= BITS_PER_WORD |
4386 | || size - 1 >= BITS_PER_WORD) | |
4387 | goto fail1; | |
4388 | ||
49a71e58 | 4389 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4390 | + shift_cost (speed, compute_mode, size - 1) | |
4391 | + add_cost (speed, compute_mode)); | |
ebf4f764 | 4392 | t1 = expmed_mult_highpart (compute_mode, op0, |
127cb1cd | 4393 | GEN_INT (ml), NULL_RTX, 0, |
33183a3c | 4394 | max_cost - extra_cost); |
d2fa4ea5 | 4395 | if (t1 == 0) |
4396 | goto fail1; | |
7c446c95 | 4397 | t2 = expand_shift |
4398 | (RSHIFT_EXPR, compute_mode, t1, | |
f5ff0b21 | 4399 | post_shift, NULL_RTX, 0); |
7c446c95 | 4400 | t3 = expand_shift |
4401 | (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4402 | size - 1, NULL_RTX, 0); |
d2fa4ea5 | 4403 | if (d < 0) |
7014838c | 4404 | quotient |
4405 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4406 | t3, t2), | |
4407 | tquotient); | |
d2fa4ea5 | 4408 | else |
7014838c | 4409 | quotient |
4410 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4411 | t2, t3), | |
4412 | tquotient); | |
d2fa4ea5 | 4413 | } |
4414 | else | |
4415 | { | |
4416 | rtx t1, t2, t3, t4; | |
4417 | ||
84ab528e | 4418 | if (post_shift >= BITS_PER_WORD |
4419 | || size - 1 >= BITS_PER_WORD) | |
4420 | goto fail1; | |
4421 | ||
d2fa4ea5 | 4422 | ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); |
4de52edf | 4423 | mlr = gen_int_mode (ml, compute_mode); |
49a71e58 | 4424 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4425 | + shift_cost (speed, compute_mode, size - 1) | |
4426 | + 2 * add_cost (speed, compute_mode)); | |
ebf4f764 | 4427 | t1 = expmed_mult_highpart (compute_mode, op0, mlr, |
33183a3c | 4428 | NULL_RTX, 0, |
4429 | max_cost - extra_cost); | |
d2fa4ea5 | 4430 | if (t1 == 0) |
4431 | goto fail1; | |
7014838c | 4432 | t2 = force_operand (gen_rtx_PLUS (compute_mode, |
4433 | t1, op0), | |
d2fa4ea5 | 4434 | NULL_RTX); |
7c446c95 | 4435 | t3 = expand_shift |
4436 | (RSHIFT_EXPR, compute_mode, t2, | |
f5ff0b21 | 4437 | post_shift, NULL_RTX, 0); |
7c446c95 | 4438 | t4 = expand_shift |
4439 | (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4440 | size - 1, NULL_RTX, 0); |
d2fa4ea5 | 4441 | if (d < 0) |
7014838c | 4442 | quotient |
4443 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4444 | t4, t3), | |
4445 | tquotient); | |
d2fa4ea5 | 4446 | else |
7014838c | 4447 | quotient |
4448 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4449 | t3, t4), | |
4450 | tquotient); | |
d2fa4ea5 | 4451 | } |
4452 | } | |
61e477c7 | 4453 | else /* Too wide mode to use tricky code */ |
4454 | break; | |
d2fa4ea5 | 4455 | |
38457527 | 4456 | insn = get_last_insn (); |
41cf444a | 4457 | if (insn != last) |
4458 | set_dst_reg_note (insn, REG_EQUAL, | |
4459 | gen_rtx_DIV (compute_mode, op0, op1), | |
4460 | quotient); | |
d2fa4ea5 | 4461 | } |
4462 | break; | |
4463 | } | |
4464 | fail1: | |
4465 | delete_insns_since (last); | |
4466 | break; | |
db96f378 | 4467 | |
d2fa4ea5 | 4468 | case FLOOR_DIV_EXPR: |
4469 | case FLOOR_MOD_EXPR: | |
4470 | /* We will come here only for signed operations. */ | |
4471 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4472 | { | |
127cb1cd | 4473 | unsigned HOST_WIDE_INT mh, ml; |
d2fa4ea5 | 4474 | int pre_shift, lgup, post_shift; |
4475 | HOST_WIDE_INT d = INTVAL (op1); | |
4476 | ||
4477 | if (d > 0) | |
4478 | { | |
4479 | /* We could just as easily deal with negative constants here, | |
4480 | but it does not seem worth the trouble for GCC 2.6. */ | |
4481 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4482 | { | |
4483 | pre_shift = floor_log2 (d); | |
4484 | if (rem_flag) | |
4485 | { | |
4486 | remainder = expand_binop (compute_mode, and_optab, op0, | |
4487 | GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), | |
4488 | remainder, 0, OPTAB_LIB_WIDEN); | |
4489 | if (remainder) | |
436b0397 | 4490 | return gen_lowpart (mode, remainder); |
d2fa4ea5 | 4491 | } |
7c446c95 | 4492 | quotient = expand_shift |
4493 | (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4494 | pre_shift, tquotient, 0); |
d2fa4ea5 | 4495 | } |
4496 | else | |
4497 | { | |
4498 | rtx t1, t2, t3, t4; | |
4499 | ||
4500 | mh = choose_multiplier (d, size, size - 1, | |
4501 | &ml, &post_shift, &lgup); | |
611234b4 | 4502 | gcc_assert (!mh); |
d2fa4ea5 | 4503 | |
84ab528e | 4504 | if (post_shift < BITS_PER_WORD |
4505 | && size - 1 < BITS_PER_WORD) | |
d2fa4ea5 | 4506 | { |
7c446c95 | 4507 | t1 = expand_shift |
4508 | (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4509 | size - 1, NULL_RTX, 0); |
84ab528e | 4510 | t2 = expand_binop (compute_mode, xor_optab, op0, t1, |
4511 | NULL_RTX, 0, OPTAB_WIDEN); | |
49a71e58 | 4512 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4513 | + shift_cost (speed, compute_mode, size - 1) | |
4514 | + 2 * add_cost (speed, compute_mode)); | |
ebf4f764 | 4515 | t3 = expmed_mult_highpart (compute_mode, t2, |
127cb1cd | 4516 | GEN_INT (ml), NULL_RTX, 1, |
84ab528e | 4517 | max_cost - extra_cost); |
4518 | if (t3 != 0) | |
4519 | { | |
7c446c95 | 4520 | t4 = expand_shift |
4521 | (RSHIFT_EXPR, compute_mode, t3, | |
f5ff0b21 | 4522 | post_shift, NULL_RTX, 1); |
84ab528e | 4523 | quotient = expand_binop (compute_mode, xor_optab, |
4524 | t4, t1, tquotient, 0, | |
4525 | OPTAB_WIDEN); | |
4526 | } | |
d2fa4ea5 | 4527 | } |
4528 | } | |
4529 | } | |
4530 | else | |
4531 | { | |
4532 | rtx nsign, t1, t2, t3, t4; | |
941522d6 | 4533 | t1 = force_operand (gen_rtx_PLUS (compute_mode, |
4534 | op0, constm1_rtx), NULL_RTX); | |
d2fa4ea5 | 4535 | t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, |
4536 | 0, OPTAB_WIDEN); | |
7c446c95 | 4537 | nsign = expand_shift |
4538 | (RSHIFT_EXPR, compute_mode, t2, | |
f5ff0b21 | 4539 | size - 1, NULL_RTX, 0); |
941522d6 | 4540 | t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), |
d2fa4ea5 | 4541 | NULL_RTX); |
4542 | t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, | |
4543 | NULL_RTX, 0); | |
4544 | if (t4) | |
4545 | { | |
4546 | rtx t5; | |
4547 | t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, | |
4548 | NULL_RTX, 0); | |
941522d6 | 4549 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4550 | t4, t5), | |
d2fa4ea5 | 4551 | tquotient); |
4552 | } | |
4553 | } | |
4554 | } | |
4555 | ||
4556 | if (quotient != 0) | |
4557 | break; | |
4558 | delete_insns_since (last); | |
4559 | ||
4560 | /* Try using an instruction that produces both the quotient and | |
4561 | remainder, using truncation. We can easily compensate the quotient | |
4562 | or remainder to get floor rounding, once we have the remainder. | |
4563 | Notice that we compute also the final remainder value here, | |
4564 | and return the result right away. */ | |
36db22a0 | 4565 | if (target == 0 || GET_MODE (target) != compute_mode) |
d2fa4ea5 | 4566 | target = gen_reg_rtx (compute_mode); |
e324608c | 4567 | |
d2fa4ea5 | 4568 | if (rem_flag) |
4569 | { | |
e324608c | 4570 | remainder |
8ad4c111 | 4571 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
d2fa4ea5 | 4572 | quotient = gen_reg_rtx (compute_mode); |
4573 | } | |
4574 | else | |
4575 | { | |
e324608c | 4576 | quotient |
8ad4c111 | 4577 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
d2fa4ea5 | 4578 | remainder = gen_reg_rtx (compute_mode); |
4579 | } | |
4580 | ||
4581 | if (expand_twoval_binop (sdivmod_optab, op0, op1, | |
4582 | quotient, remainder, 0)) | |
4583 | { | |
4584 | /* This could be computed with a branch-less sequence. | |
4585 | Save that for later. */ | |
4586 | rtx tem; | |
4587 | rtx label = gen_label_rtx (); | |
c5aa1e92 | 4588 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
d2fa4ea5 | 4589 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4590 | NULL_RTX, 0, OPTAB_WIDEN); | |
c5aa1e92 | 4591 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
d2fa4ea5 | 4592 | expand_dec (quotient, const1_rtx); |
4593 | expand_inc (remainder, op1); | |
4594 | emit_label (label); | |
436b0397 | 4595 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
d2fa4ea5 | 4596 | } |
4597 | ||
4598 | /* No luck with division elimination or divmod. Have to do it | |
4599 | by conditionally adjusting op0 *and* the result. */ | |
db96f378 | 4600 | { |
d2fa4ea5 | 4601 | rtx label1, label2, label3, label4, label5; |
4602 | rtx adjusted_op0; | |
4603 | rtx tem; | |
4604 | ||
4605 | quotient = gen_reg_rtx (compute_mode); | |
4606 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4607 | label1 = gen_label_rtx (); | |
4608 | label2 = gen_label_rtx (); | |
4609 | label3 = gen_label_rtx (); | |
4610 | label4 = gen_label_rtx (); | |
4611 | label5 = gen_label_rtx (); | |
c5aa1e92 | 4612 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4613 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); | |
d2fa4ea5 | 4614 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4615 | quotient, 0, OPTAB_LIB_WIDEN); | |
4616 | if (tem != quotient) | |
4617 | emit_move_insn (quotient, tem); | |
4618 | emit_jump_insn (gen_jump (label5)); | |
4619 | emit_barrier (); | |
4620 | emit_label (label1); | |
db96f378 | 4621 | expand_inc (adjusted_op0, const1_rtx); |
d2fa4ea5 | 4622 | emit_jump_insn (gen_jump (label4)); |
4623 | emit_barrier (); | |
4624 | emit_label (label2); | |
c5aa1e92 | 4625 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
d2fa4ea5 | 4626 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4627 | quotient, 0, OPTAB_LIB_WIDEN); | |
4628 | if (tem != quotient) | |
4629 | emit_move_insn (quotient, tem); | |
4630 | emit_jump_insn (gen_jump (label5)); | |
4631 | emit_barrier (); | |
4632 | emit_label (label3); | |
4633 | expand_dec (adjusted_op0, const1_rtx); | |
4634 | emit_label (label4); | |
4635 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4636 | quotient, 0, OPTAB_LIB_WIDEN); | |
4637 | if (tem != quotient) | |
4638 | emit_move_insn (quotient, tem); | |
4639 | expand_dec (quotient, const1_rtx); | |
4640 | emit_label (label5); | |
db96f378 | 4641 | } |
d2fa4ea5 | 4642 | break; |
db96f378 | 4643 | |
d2fa4ea5 | 4644 | case CEIL_DIV_EXPR: |
4645 | case CEIL_MOD_EXPR: | |
4646 | if (unsignedp) | |
4647 | { | |
3f4d178c | 4648 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))) |
4649 | { | |
4650 | rtx t1, t2, t3; | |
4651 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4652 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4653 | floor_log2 (d), tquotient, 1); |
3f4d178c | 4654 | t2 = expand_binop (compute_mode, and_optab, op0, |
4655 | GEN_INT (d - 1), | |
4656 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4657 | t3 = gen_reg_rtx (compute_mode); | |
4658 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4659 | compute_mode, 1, 1); | |
9d7a4e0b | 4660 | if (t3 == 0) |
4661 | { | |
4662 | rtx lab; | |
4663 | lab = gen_label_rtx (); | |
c5aa1e92 | 4664 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
9d7a4e0b | 4665 | expand_inc (t1, const1_rtx); |
4666 | emit_label (lab); | |
4667 | quotient = t1; | |
4668 | } | |
4669 | else | |
941522d6 | 4670 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4671 | t1, t3), | |
9d7a4e0b | 4672 | tquotient); |
3f4d178c | 4673 | break; |
4674 | } | |
d2fa4ea5 | 4675 | |
4676 | /* Try using an instruction that produces both the quotient and | |
4677 | remainder, using truncation. We can easily compensate the | |
4678 | quotient or remainder to get ceiling rounding, once we have the | |
4679 | remainder. Notice that we compute also the final remainder | |
4680 | value here, and return the result right away. */ | |
36db22a0 | 4681 | if (target == 0 || GET_MODE (target) != compute_mode) |
d2fa4ea5 | 4682 | target = gen_reg_rtx (compute_mode); |
e324608c | 4683 | |
d2fa4ea5 | 4684 | if (rem_flag) |
4685 | { | |
8ad4c111 | 4686 | remainder = (REG_P (target) |
e324608c | 4687 | ? target : gen_reg_rtx (compute_mode)); |
d2fa4ea5 | 4688 | quotient = gen_reg_rtx (compute_mode); |
4689 | } | |
4690 | else | |
4691 | { | |
8ad4c111 | 4692 | quotient = (REG_P (target) |
e324608c | 4693 | ? target : gen_reg_rtx (compute_mode)); |
d2fa4ea5 | 4694 | remainder = gen_reg_rtx (compute_mode); |
4695 | } | |
4696 | ||
4697 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, | |
4698 | remainder, 1)) | |
4699 | { | |
4700 | /* This could be computed with a branch-less sequence. | |
4701 | Save that for later. */ | |
4702 | rtx label = gen_label_rtx (); | |
c5aa1e92 | 4703 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4704 | compute_mode, label); | |
d2fa4ea5 | 4705 | expand_inc (quotient, const1_rtx); |
4706 | expand_dec (remainder, op1); | |
4707 | emit_label (label); | |
436b0397 | 4708 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
d2fa4ea5 | 4709 | } |
4710 | ||
4711 | /* No luck with division elimination or divmod. Have to do it | |
4712 | by conditionally adjusting op0 *and* the result. */ | |
db96f378 | 4713 | { |
d2fa4ea5 | 4714 | rtx label1, label2; |
4715 | rtx adjusted_op0, tem; | |
4716 | ||
4717 | quotient = gen_reg_rtx (compute_mode); | |
4718 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4719 | label1 = gen_label_rtx (); | |
4720 | label2 = gen_label_rtx (); | |
c5aa1e92 | 4721 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4722 | compute_mode, label1); | |
d2fa4ea5 | 4723 | emit_move_insn (quotient, const0_rtx); |
4724 | emit_jump_insn (gen_jump (label2)); | |
4725 | emit_barrier (); | |
4726 | emit_label (label1); | |
4727 | expand_dec (adjusted_op0, const1_rtx); | |
4728 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, | |
4729 | quotient, 1, OPTAB_LIB_WIDEN); | |
4730 | if (tem != quotient) | |
4731 | emit_move_insn (quotient, tem); | |
4732 | expand_inc (quotient, const1_rtx); | |
4733 | emit_label (label2); | |
db96f378 | 4734 | } |
d2fa4ea5 | 4735 | } |
4736 | else /* signed */ | |
4737 | { | |
2b10064a | 4738 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4739 | && INTVAL (op1) >= 0) | |
4740 | { | |
4741 | /* This is extremely similar to the code for the unsigned case | |
4742 | above. For 2.7 we should merge these variants, but for | |
4743 | 2.6.1 I don't want to touch the code for unsigned since that | |
4744 | get used in C. The signed case will only be used by other | |
4745 | languages (Ada). */ | |
4746 | ||
4747 | rtx t1, t2, t3; | |
4748 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4749 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4750 | floor_log2 (d), tquotient, 0); |
2b10064a | 4751 | t2 = expand_binop (compute_mode, and_optab, op0, |
4752 | GEN_INT (d - 1), | |
4753 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
4754 | t3 = gen_reg_rtx (compute_mode); | |
4755 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4756 | compute_mode, 1, 1); | |
4757 | if (t3 == 0) | |
4758 | { | |
4759 | rtx lab; | |
4760 | lab = gen_label_rtx (); | |
c5aa1e92 | 4761 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
2b10064a | 4762 | expand_inc (t1, const1_rtx); |
4763 | emit_label (lab); | |
4764 | quotient = t1; | |
4765 | } | |
4766 | else | |
941522d6 | 4767 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4768 | t1, t3), | |
2b10064a | 4769 | tquotient); |
4770 | break; | |
4771 | } | |
4772 | ||
d2fa4ea5 | 4773 | /* Try using an instruction that produces both the quotient and |
4774 | remainder, using truncation. We can easily compensate the | |
4775 | quotient or remainder to get ceiling rounding, once we have the | |
4776 | remainder. Notice that we compute also the final remainder | |
4777 | value here, and return the result right away. */ | |
36db22a0 | 4778 | if (target == 0 || GET_MODE (target) != compute_mode) |
d2fa4ea5 | 4779 | target = gen_reg_rtx (compute_mode); |
4780 | if (rem_flag) | |
4781 | { | |
8ad4c111 | 4782 | remainder= (REG_P (target) |
e324608c | 4783 | ? target : gen_reg_rtx (compute_mode)); |
d2fa4ea5 | 4784 | quotient = gen_reg_rtx (compute_mode); |
4785 | } | |
4786 | else | |
4787 | { | |
8ad4c111 | 4788 | quotient = (REG_P (target) |
e324608c | 4789 | ? target : gen_reg_rtx (compute_mode)); |
d2fa4ea5 | 4790 | remainder = gen_reg_rtx (compute_mode); |
4791 | } | |
4792 | ||
4793 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, | |
4794 | remainder, 0)) | |
4795 | { | |
4796 | /* This could be computed with a branch-less sequence. | |
4797 | Save that for later. */ | |
4798 | rtx tem; | |
4799 | rtx label = gen_label_rtx (); | |
c5aa1e92 | 4800 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4801 | compute_mode, label); | |
d2fa4ea5 | 4802 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4803 | NULL_RTX, 0, OPTAB_WIDEN); | |
c5aa1e92 | 4804 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
d2fa4ea5 | 4805 | expand_inc (quotient, const1_rtx); |
4806 | expand_dec (remainder, op1); | |
4807 | emit_label (label); | |
436b0397 | 4808 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
d2fa4ea5 | 4809 | } |
4810 | ||
4811 | /* No luck with division elimination or divmod. Have to do it | |
4812 | by conditionally adjusting op0 *and* the result. */ | |
db96f378 | 4813 | { |
d2fa4ea5 | 4814 | rtx label1, label2, label3, label4, label5; |
4815 | rtx adjusted_op0; | |
4816 | rtx tem; | |
4817 | ||
4818 | quotient = gen_reg_rtx (compute_mode); | |
4819 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4820 | label1 = gen_label_rtx (); | |
4821 | label2 = gen_label_rtx (); | |
4822 | label3 = gen_label_rtx (); | |
4823 | label4 = gen_label_rtx (); | |
4824 | label5 = gen_label_rtx (); | |
c5aa1e92 | 4825 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4826 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, | |
4827 | compute_mode, label1); | |
d2fa4ea5 | 4828 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4829 | quotient, 0, OPTAB_LIB_WIDEN); | |
4830 | if (tem != quotient) | |
4831 | emit_move_insn (quotient, tem); | |
4832 | emit_jump_insn (gen_jump (label5)); | |
4833 | emit_barrier (); | |
4834 | emit_label (label1); | |
4835 | expand_dec (adjusted_op0, const1_rtx); | |
4836 | emit_jump_insn (gen_jump (label4)); | |
4837 | emit_barrier (); | |
4838 | emit_label (label2); | |
c5aa1e92 | 4839 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
4840 | compute_mode, label3); | |
d2fa4ea5 | 4841 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4842 | quotient, 0, OPTAB_LIB_WIDEN); | |
4843 | if (tem != quotient) | |
4844 | emit_move_insn (quotient, tem); | |
4845 | emit_jump_insn (gen_jump (label5)); | |
4846 | emit_barrier (); | |
4847 | emit_label (label3); | |
4848 | expand_inc (adjusted_op0, const1_rtx); | |
4849 | emit_label (label4); | |
4850 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4851 | quotient, 0, OPTAB_LIB_WIDEN); | |
4852 | if (tem != quotient) | |
4853 | emit_move_insn (quotient, tem); | |
4854 | expand_inc (quotient, const1_rtx); | |
4855 | emit_label (label5); | |
db96f378 | 4856 | } |
d2fa4ea5 | 4857 | } |
4858 | break; | |
64e50eaa | 4859 | |
d2fa4ea5 | 4860 | case EXACT_DIV_EXPR: |
4861 | if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) | |
4862 | { | |
4863 | HOST_WIDE_INT d = INTVAL (op1); | |
4864 | unsigned HOST_WIDE_INT ml; | |
bec2d490 | 4865 | int pre_shift; |
d2fa4ea5 | 4866 | rtx t1; |
4867 | ||
bec2d490 | 4868 | pre_shift = floor_log2 (d & -d); |
4869 | ml = invert_mod2n (d >> pre_shift, size); | |
4870 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4871 | pre_shift, NULL_RTX, unsignedp); |
213b27c9 | 4872 | quotient = expand_mult (compute_mode, t1, |
2d232d05 | 4873 | gen_int_mode (ml, compute_mode), |
273014f4 | 4874 | NULL_RTX, 1); |
d2fa4ea5 | 4875 | |
4876 | insn = get_last_insn (); | |
41cf444a | 4877 | set_dst_reg_note (insn, REG_EQUAL, |
4878 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, | |
4879 | compute_mode, op0, op1), | |
4880 | quotient); | |
d2fa4ea5 | 4881 | } |
4882 | break; | |
4883 | ||
4884 | case ROUND_DIV_EXPR: | |
4885 | case ROUND_MOD_EXPR: | |
c3118728 | 4886 | if (unsignedp) |
4887 | { | |
4888 | rtx tem; | |
4889 | rtx label; | |
4890 | label = gen_label_rtx (); | |
4891 | quotient = gen_reg_rtx (compute_mode); | |
4892 | remainder = gen_reg_rtx (compute_mode); | |
4893 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) | |
4894 | { | |
4895 | rtx tem; | |
4896 | quotient = expand_binop (compute_mode, udiv_optab, op0, op1, | |
4897 | quotient, 1, OPTAB_LIB_WIDEN); | |
4898 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); | |
4899 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4900 | remainder, 1, OPTAB_LIB_WIDEN); | |
4901 | } | |
29c05e22 | 4902 | tem = plus_constant (compute_mode, op1, -1); |
f5ff0b21 | 4903 | tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1); |
c5aa1e92 | 4904 | do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); |
c3118728 | 4905 | expand_inc (quotient, const1_rtx); |
4906 | expand_dec (remainder, op1); | |
4907 | emit_label (label); | |
4908 | } | |
4909 | else | |
4910 | { | |
4911 | rtx abs_rem, abs_op1, tem, mask; | |
4912 | rtx label; | |
4913 | label = gen_label_rtx (); | |
4914 | quotient = gen_reg_rtx (compute_mode); | |
4915 | remainder = gen_reg_rtx (compute_mode); | |
4916 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) | |
4917 | { | |
4918 | rtx tem; | |
4919 | quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, | |
4920 | quotient, 0, OPTAB_LIB_WIDEN); | |
4921 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); | |
4922 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4923 | remainder, 0, OPTAB_LIB_WIDEN); | |
4924 | } | |
bec2d490 | 4925 | abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); |
4926 | abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); | |
c3118728 | 4927 | tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, |
f5ff0b21 | 4928 | 1, NULL_RTX, 1); |
c5aa1e92 | 4929 | do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); |
c3118728 | 4930 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4931 | NULL_RTX, 0, OPTAB_WIDEN); | |
4932 | mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, | |
f5ff0b21 | 4933 | size - 1, NULL_RTX, 0); |
c3118728 | 4934 | tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, |
4935 | NULL_RTX, 0, OPTAB_WIDEN); | |
4936 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4937 | NULL_RTX, 0, OPTAB_WIDEN); | |
4938 | expand_inc (quotient, tem); | |
4939 | tem = expand_binop (compute_mode, xor_optab, mask, op1, | |
4940 | NULL_RTX, 0, OPTAB_WIDEN); | |
4941 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
4942 | NULL_RTX, 0, OPTAB_WIDEN); | |
4943 | expand_dec (remainder, tem); | |
4944 | emit_label (label); | |
4945 | } | |
4946 | return gen_lowpart (mode, rem_flag ? remainder : quotient); | |
a4194ff7 | 4947 | |
0dbd1c74 | 4948 | default: |
611234b4 | 4949 | gcc_unreachable (); |
d2fa4ea5 | 4950 | } |
db96f378 | 4951 | |
d2fa4ea5 | 4952 | if (quotient == 0) |
db96f378 | 4953 | { |
36db22a0 | 4954 | if (target && GET_MODE (target) != compute_mode) |
4955 | target = 0; | |
4956 | ||
d2fa4ea5 | 4957 | if (rem_flag) |
db96f378 | 4958 | { |
d6567151 | 4959 | /* Try to produce the remainder without producing the quotient. |
cb0ccc1e | 4960 | If we seem to have a divmod pattern that does not require widening, |
df07c3ae | 4961 | don't try widening here. We should really have a WIDEN argument |
d6567151 | 4962 | to expand_twoval_binop, since what we'd really like to do here is |
4963 | 1) try a mod insn in compute_mode | |
4964 | 2) try a divmod insn in compute_mode | |
4965 | 3) try a div insn in compute_mode and multiply-subtract to get | |
4966 | remainder | |
4967 | 4) try the same things with widening allowed. */ | |
4968 | remainder | |
4969 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
4970 | op0, op1, target, | |
4971 | unsignedp, | |
d6bf3b14 | 4972 | ((optab_handler (optab2, compute_mode) |
d6567151 | 4973 | != CODE_FOR_nothing) |
4974 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
d2fa4ea5 | 4975 | if (remainder == 0) |
db96f378 | 4976 | { |
4977 | /* No luck there. Can we do remainder and divide at once | |
4978 | without a library call? */ | |
d2fa4ea5 | 4979 | remainder = gen_reg_rtx (compute_mode); |
4980 | if (! expand_twoval_binop ((unsignedp | |
4981 | ? udivmod_optab | |
4982 | : sdivmod_optab), | |
4983 | op0, op1, | |
4984 | NULL_RTX, remainder, unsignedp)) | |
4985 | remainder = 0; | |
db96f378 | 4986 | } |
d2fa4ea5 | 4987 | |
4988 | if (remainder) | |
4989 | return gen_lowpart (mode, remainder); | |
db96f378 | 4990 | } |
db96f378 | 4991 | |
22971e4a | 4992 | /* Produce the quotient. Try a quotient insn, but not a library call. |
4993 | If we have a divmod in this mode, use it in preference to widening | |
4994 | the div (for this test we assume it will not fail). Note that optab2 | |
4995 | is set to the one of the two optabs that the call below will use. */ | |
4996 | quotient | |
4997 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, | |
4998 | op0, op1, rem_flag ? NULL_RTX : target, | |
4999 | unsignedp, | |
d6bf3b14 | 5000 | ((optab_handler (optab2, compute_mode) |
22971e4a | 5001 | != CODE_FOR_nothing) |
5002 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
5003 | ||
d2fa4ea5 | 5004 | if (quotient == 0) |
db96f378 | 5005 | { |
5006 | /* No luck there. Try a quotient-and-remainder insn, | |
5007 | keeping the quotient alone. */ | |
d2fa4ea5 | 5008 | quotient = gen_reg_rtx (compute_mode); |
db96f378 | 5009 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
d2fa4ea5 | 5010 | op0, op1, |
5011 | quotient, NULL_RTX, unsignedp)) | |
5012 | { | |
5013 | quotient = 0; | |
5014 | if (! rem_flag) | |
5015 | /* Still no luck. If we are not computing the remainder, | |
5016 | use a library call for the quotient. */ | |
5017 | quotient = sign_expand_binop (compute_mode, | |
5018 | udiv_optab, sdiv_optab, | |
5019 | op0, op1, target, | |
5020 | unsignedp, OPTAB_LIB_WIDEN); | |
5021 | } | |
db96f378 | 5022 | } |
db96f378 | 5023 | } |
5024 | ||
db96f378 | 5025 | if (rem_flag) |
5026 | { | |
36db22a0 | 5027 | if (target && GET_MODE (target) != compute_mode) |
5028 | target = 0; | |
5029 | ||
d2fa4ea5 | 5030 | if (quotient == 0) |
30e9913f | 5031 | { |
5032 | /* No divide instruction either. Use library for remainder. */ | |
5033 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
5034 | op0, op1, target, | |
5035 | unsignedp, OPTAB_LIB_WIDEN); | |
5036 | /* No remainder function. Try a quotient-and-remainder | |
5037 | function, keeping the remainder. */ | |
5038 | if (!remainder) | |
5039 | { | |
5040 | remainder = gen_reg_rtx (compute_mode); | |
48e1416a | 5041 | if (!expand_twoval_binop_libfunc |
30e9913f | 5042 | (unsignedp ? udivmod_optab : sdivmod_optab, |
5043 | op0, op1, | |
5044 | NULL_RTX, remainder, | |
5045 | unsignedp ? UMOD : MOD)) | |
5046 | remainder = NULL_RTX; | |
5047 | } | |
5048 | } | |
db96f378 | 5049 | else |
5050 | { | |
5051 | /* We divided. Now finish doing X - Y * (X / Y). */ | |
d2fa4ea5 | 5052 | remainder = expand_mult (compute_mode, quotient, op1, |
5053 | NULL_RTX, unsignedp); | |
5054 | remainder = expand_binop (compute_mode, sub_optab, op0, | |
5055 | remainder, target, unsignedp, | |
5056 | OPTAB_LIB_WIDEN); | |
db96f378 | 5057 | } |
5058 | } | |
5059 | ||
d2fa4ea5 | 5060 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
db96f378 | 5061 | } |
5062 | \f | |
5063 | /* Return a tree node with data type TYPE, describing the value of X. | |
735f4358 | 5064 | Usually this is an VAR_DECL, if there is no obvious better choice. |
db96f378 | 5065 | X may be an expression, however we only support those expressions |
1e625a2e | 5066 | generated by loop.c. */ |
db96f378 | 5067 | |
5068 | tree | |
35cb5232 | 5069 | make_tree (tree type, rtx x) |
db96f378 | 5070 | { |
5071 | tree t; | |
5072 | ||
5073 | switch (GET_CODE (x)) | |
5074 | { | |
5075 | case CONST_INT: | |
7c446c95 | 5076 | { |
5077 | HOST_WIDE_INT hi = 0; | |
5078 | ||
5079 | if (INTVAL (x) < 0 | |
5080 | && !(TYPE_UNSIGNED (type) | |
5081 | && (GET_MODE_BITSIZE (TYPE_MODE (type)) | |
5082 | < HOST_BITS_PER_WIDE_INT))) | |
5083 | hi = -1; | |
48e1416a | 5084 | |
7016c612 | 5085 | t = build_int_cst_wide (type, INTVAL (x), hi); |
48e1416a | 5086 | |
7c446c95 | 5087 | return t; |
5088 | } | |
48e1416a | 5089 | |
db96f378 | 5090 | case CONST_DOUBLE: |
5091 | if (GET_MODE (x) == VOIDmode) | |
7016c612 | 5092 | t = build_int_cst_wide (type, |
5093 | CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x)); | |
db96f378 | 5094 | else |
5095 | { | |
5096 | REAL_VALUE_TYPE d; | |
5097 | ||
5098 | REAL_VALUE_FROM_CONST_DOUBLE (d, x); | |
5099 | t = build_real (type, d); | |
5100 | } | |
5101 | ||
5102 | return t; | |
886cfd4f | 5103 | |
5104 | case CONST_VECTOR: | |
5105 | { | |
aae57ecf | 5106 | int units = CONST_VECTOR_NUNITS (x); |
5107 | tree itype = TREE_TYPE (type); | |
fadf62f4 | 5108 | tree *elts; |
aae57ecf | 5109 | int i; |
886cfd4f | 5110 | |
886cfd4f | 5111 | /* Build a tree with vector elements. */ |
fadf62f4 | 5112 | elts = XALLOCAVEC (tree, units); |
886cfd4f | 5113 | for (i = units - 1; i >= 0; --i) |
5114 | { | |
aae57ecf | 5115 | rtx elt = CONST_VECTOR_ELT (x, i); |
fadf62f4 | 5116 | elts[i] = make_tree (itype, elt); |
886cfd4f | 5117 | } |
a4194ff7 | 5118 | |
fadf62f4 | 5119 | return build_vector (type, elts); |
886cfd4f | 5120 | } |
5121 | ||
db96f378 | 5122 | case PLUS: |
faa43f85 | 5123 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5124 | make_tree (type, XEXP (x, 1))); | |
a4194ff7 | 5125 | |
db96f378 | 5126 | case MINUS: |
faa43f85 | 5127 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5128 | make_tree (type, XEXP (x, 1))); | |
a4194ff7 | 5129 | |
db96f378 | 5130 | case NEG: |
faa43f85 | 5131 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
db96f378 | 5132 | |
5133 | case MULT: | |
faa43f85 | 5134 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5135 | make_tree (type, XEXP (x, 1))); | |
a4194ff7 | 5136 | |
db96f378 | 5137 | case ASHIFT: |
faa43f85 | 5138 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5139 | make_tree (type, XEXP (x, 1))); | |
a4194ff7 | 5140 | |
db96f378 | 5141 | case LSHIFTRT: |
71eea85c | 5142 | t = unsigned_type_for (type); |
37e8021c | 5143 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5144 | make_tree (t, XEXP (x, 0)), | |
5145 | make_tree (type, XEXP (x, 1)))); | |
a4194ff7 | 5146 | |
db96f378 | 5147 | case ASHIFTRT: |
11773141 | 5148 | t = signed_type_for (type); |
37e8021c | 5149 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5150 | make_tree (t, XEXP (x, 0)), | |
5151 | make_tree (type, XEXP (x, 1)))); | |
a4194ff7 | 5152 | |
db96f378 | 5153 | case DIV: |
5154 | if (TREE_CODE (type) != REAL_TYPE) | |
11773141 | 5155 | t = signed_type_for (type); |
db96f378 | 5156 | else |
5157 | t = type; | |
5158 | ||
37e8021c | 5159 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5160 | make_tree (t, XEXP (x, 0)), | |
5161 | make_tree (t, XEXP (x, 1)))); | |
db96f378 | 5162 | case UDIV: |
71eea85c | 5163 | t = unsigned_type_for (type); |
37e8021c | 5164 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5165 | make_tree (t, XEXP (x, 0)), | |
5166 | make_tree (t, XEXP (x, 1)))); | |
513fac1b | 5167 | |
5168 | case SIGN_EXTEND: | |
5169 | case ZERO_EXTEND: | |
dc24ddbd | 5170 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5171 | GET_CODE (x) == ZERO_EXTEND); | |
37e8021c | 5172 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
513fac1b | 5173 | |
96d5c2e2 | 5174 | case CONST: |
5175 | return make_tree (type, XEXP (x, 0)); | |
5176 | ||
5177 | case SYMBOL_REF: | |
5178 | t = SYMBOL_REF_DECL (x); | |
5179 | if (t) | |
5180 | return fold_convert (type, build_fold_addr_expr (t)); | |
5181 | /* else fall through. */ | |
5182 | ||
735f4358 | 5183 | default: |
e60a6f7b | 5184 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
c54c9422 | 5185 | |
98155838 | 5186 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5187 | address mode to pointer mode. */ | |
85d654dd | 5188 | if (POINTER_TYPE_P (type)) |
98155838 | 5189 | x = convert_memory_address_addr_space |
5190 | (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); | |
c54c9422 | 5191 | |
9fabac44 | 5192 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5193 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ | |
5ded8c6f | 5194 | t->decl_with_rtl.rtl = x; |
735f4358 | 5195 | |
db96f378 | 5196 | return t; |
5197 | } | |
5198 | } | |
db96f378 | 5199 | \f |
5200 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET | |
5201 | and returning TARGET. | |
5202 | ||
5203 | If TARGET is 0, a pseudo-register or constant is returned. */ | |
5204 | ||
5205 | rtx | |
35cb5232 | 5206 | expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target) |
db96f378 | 5207 | { |
6de9716c | 5208 | rtx tem = 0; |
db96f378 | 5209 | |
6de9716c | 5210 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5211 | tem = simplify_binary_operation (AND, mode, op0, op1); | |
5212 | if (tem == 0) | |
db96f378 | 5213 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
db96f378 | 5214 | |
5215 | if (target == 0) | |
5216 | target = tem; | |
5217 | else if (tem != target) | |
5218 | emit_move_insn (target, tem); | |
5219 | return target; | |
5220 | } | |
80e1bfa1 | 5221 | |
595d88b5 | 5222 | /* Helper function for emit_store_flag. */ |
5223 | static rtx | |
cf564daf | 5224 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
5225 | enum machine_mode mode, enum machine_mode compare_mode, | |
d68bc06a | 5226 | int unsignedp, rtx x, rtx y, int normalizep, |
5227 | enum machine_mode target_mode) | |
595d88b5 | 5228 | { |
8786db1e | 5229 | struct expand_operand ops[4]; |
5230 | rtx op0, last, comparison, subtarget; | |
fb425e71 | 5231 | enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode; |
5232 | ||
5233 | last = get_last_insn (); | |
5234 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); | |
5235 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); | |
8786db1e | 5236 | if (!x || !y) |
fb425e71 | 5237 | { |
5238 | delete_insns_since (last); | |
5239 | return NULL_RTX; | |
5240 | } | |
5241 | ||
d68bc06a | 5242 | if (target_mode == VOIDmode) |
5243 | target_mode = result_mode; | |
5244 | if (!target) | |
5245 | target = gen_reg_rtx (target_mode); | |
48e1416a | 5246 | |
8786db1e | 5247 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); |
fb425e71 | 5248 | |
8786db1e | 5249 | create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); |
5250 | create_fixed_operand (&ops[1], comparison); | |
5251 | create_fixed_operand (&ops[2], x); | |
5252 | create_fixed_operand (&ops[3], y); | |
5253 | if (!maybe_expand_insn (icode, 4, ops)) | |
5254 | { | |
5255 | delete_insns_since (last); | |
5256 | return NULL_RTX; | |
5257 | } | |
5258 | subtarget = ops[0].value; | |
80e1bfa1 | 5259 | |
595d88b5 | 5260 | /* If we are converting to a wider mode, first convert to |
5261 | TARGET_MODE, then normalize. This produces better combining | |
5262 | opportunities on machines that have a SIGN_EXTRACT when we are | |
5263 | testing a single bit. This mostly benefits the 68k. | |
5264 | ||
5265 | If STORE_FLAG_VALUE does not have the sign bit set when | |
5266 | interpreted in MODE, we can do this conversion as unsigned, which | |
5267 | is usually more efficient. */ | |
fb425e71 | 5268 | if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) |
595d88b5 | 5269 | { |
5270 | convert_move (target, subtarget, | |
f92430e0 | 5271 | val_signbit_known_clear_p (result_mode, |
5272 | STORE_FLAG_VALUE)); | |
595d88b5 | 5273 | op0 = target; |
fb425e71 | 5274 | result_mode = target_mode; |
595d88b5 | 5275 | } |
5276 | else | |
5277 | op0 = subtarget; | |
5278 | ||
5279 | /* If we want to keep subexpressions around, don't reuse our last | |
5280 | target. */ | |
5281 | if (optimize) | |
5282 | subtarget = 0; | |
5283 | ||
5284 | /* Now normalize to the proper value in MODE. Sometimes we don't | |
5285 | have to do anything. */ | |
5286 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) | |
5287 | ; | |
5288 | /* STORE_FLAG_VALUE might be the most negative number, so write | |
5289 | the comparison this way to avoid a compiler-time warning. */ | |
5290 | else if (- normalizep == STORE_FLAG_VALUE) | |
fb425e71 | 5291 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
595d88b5 | 5292 | |
5293 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes | |
5294 | it hard to use a value of just the sign bit due to ANSI integer | |
5295 | constant typing rules. */ | |
f92430e0 | 5296 | else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) |
fb425e71 | 5297 | op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, |
f5ff0b21 | 5298 | GET_MODE_BITSIZE (result_mode) - 1, subtarget, |
595d88b5 | 5299 | normalizep == 1); |
5300 | else | |
5301 | { | |
5302 | gcc_assert (STORE_FLAG_VALUE & 1); | |
5303 | ||
fb425e71 | 5304 | op0 = expand_and (result_mode, op0, const1_rtx, subtarget); |
595d88b5 | 5305 | if (normalizep == -1) |
fb425e71 | 5306 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
595d88b5 | 5307 | } |
5308 | ||
5309 | /* If we were converting to a smaller mode, do the conversion now. */ | |
fb425e71 | 5310 | if (target_mode != result_mode) |
595d88b5 | 5311 | { |
5312 | convert_move (target, op0, 0); | |
5313 | return target; | |
5314 | } | |
5315 | else | |
5316 | return op0; | |
5317 | } | |
5318 | ||
db96f378 | 5319 | |
cf564daf | 5320 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5321 | need a recursive call. These are kept separate to avoid infinite | |
5322 | loops. */ | |
db96f378 | 5323 | |
cf564daf | 5324 | static rtx |
5325 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
d68bc06a | 5326 | enum machine_mode mode, int unsignedp, int normalizep, |
5327 | enum machine_mode target_mode) | |
db96f378 | 5328 | { |
5329 | rtx subtarget; | |
5330 | enum insn_code icode; | |
5331 | enum machine_mode compare_mode; | |
74f4459c | 5332 | enum mode_class mclass; |
fb425e71 | 5333 | enum rtx_code scode; |
cf564daf | 5334 | rtx tem; |
db96f378 | 5335 | |
1a29b174 | 5336 | if (unsignedp) |
5337 | code = unsigned_condition (code); | |
fb425e71 | 5338 | scode = swap_condition (code); |
1a29b174 | 5339 | |
9e2944e9 | 5340 | /* If one operand is constant, make it the second one. Only do this |
5341 | if the other operand is not constant as well. */ | |
5342 | ||
f5ef1390 | 5343 | if (swap_commutative_operands_p (op0, op1)) |
9e2944e9 | 5344 | { |
5345 | tem = op0; | |
5346 | op0 = op1; | |
5347 | op1 = tem; | |
5348 | code = swap_condition (code); | |
5349 | } | |
5350 | ||
b65270aa | 5351 | if (mode == VOIDmode) |
5352 | mode = GET_MODE (op0); | |
5353 | ||
a4194ff7 | 5354 | /* For some comparisons with 1 and -1, we can convert this to |
db96f378 | 5355 | comparisons with zero. This will often produce more opportunities for |
a92771b8 | 5356 | store-flag insns. */ |
db96f378 | 5357 | |
5358 | switch (code) | |
5359 | { | |
5360 | case LT: | |
5361 | if (op1 == const1_rtx) | |
5362 | op1 = const0_rtx, code = LE; | |
5363 | break; | |
5364 | case LE: | |
5365 | if (op1 == constm1_rtx) | |
5366 | op1 = const0_rtx, code = LT; | |
5367 | break; | |
5368 | case GE: | |
5369 | if (op1 == const1_rtx) | |
5370 | op1 = const0_rtx, code = GT; | |
5371 | break; | |
5372 | case GT: | |
5373 | if (op1 == constm1_rtx) | |
5374 | op1 = const0_rtx, code = GE; | |
5375 | break; | |
5376 | case GEU: | |
5377 | if (op1 == const1_rtx) | |
5378 | op1 = const0_rtx, code = NE; | |
5379 | break; | |
5380 | case LTU: | |
5381 | if (op1 == const1_rtx) | |
5382 | op1 = const0_rtx, code = EQ; | |
5383 | break; | |
0dbd1c74 | 5384 | default: |
5385 | break; | |
db96f378 | 5386 | } |
5387 | ||
2986c324 | 5388 | /* If we are comparing a double-word integer with zero or -1, we can |
5389 | convert the comparison into one involving a single word. */ | |
34a84294 | 5390 | if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 |
5391 | && GET_MODE_CLASS (mode) == MODE_INT | |
e16ceb8e | 5392 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
34a84294 | 5393 | { |
2986c324 | 5394 | if ((code == EQ || code == NE) |
5395 | && (op1 == const0_rtx || op1 == constm1_rtx)) | |
34a84294 | 5396 | { |
7d4098a0 | 5397 | rtx op00, op01; |
18df88fa | 5398 | |
595d88b5 | 5399 | /* Do a logical OR or AND of the two words and compare the |
5400 | result. */ | |
18df88fa | 5401 | op00 = simplify_gen_subreg (word_mode, op0, mode, 0); |
5402 | op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); | |
7d4098a0 | 5403 | tem = expand_binop (word_mode, |
5404 | op1 == const0_rtx ? ior_optab : and_optab, | |
5405 | op00, op01, NULL_RTX, unsignedp, | |
5406 | OPTAB_DIRECT); | |
2986c324 | 5407 | |
7d4098a0 | 5408 | if (tem != 0) |
5409 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, | |
d68bc06a | 5410 | unsignedp, normalizep); |
34a84294 | 5411 | } |
2986c324 | 5412 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
18df88fa | 5413 | { |
5414 | rtx op0h; | |
5415 | ||
5416 | /* If testing the sign bit, can just test on high word. */ | |
5417 | op0h = simplify_gen_subreg (word_mode, op0, mode, | |
595d88b5 | 5418 | subreg_highpart_offset (word_mode, |
5419 | mode)); | |
7d4098a0 | 5420 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5421 | unsignedp, normalizep); | |
5422 | } | |
5423 | else | |
5424 | tem = NULL_RTX; | |
5425 | ||
5426 | if (tem) | |
5427 | { | |
d68bc06a | 5428 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
7d4098a0 | 5429 | return tem; |
d68bc06a | 5430 | if (!target) |
5431 | target = gen_reg_rtx (target_mode); | |
7d4098a0 | 5432 | |
5433 | convert_move (target, tem, | |
f92430e0 | 5434 | !val_signbit_known_set_p (word_mode, |
5435 | (normalizep ? normalizep | |
5436 | : STORE_FLAG_VALUE))); | |
7d4098a0 | 5437 | return target; |
18df88fa | 5438 | } |
34a84294 | 5439 | } |
5440 | ||
db96f378 | 5441 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5442 | complement of A (for GE) and shifting the sign bit to the low bit. */ | |
5443 | if (op1 == const0_rtx && (code == LT || code == GE) | |
5444 | && GET_MODE_CLASS (mode) == MODE_INT | |
5445 | && (normalizep || STORE_FLAG_VALUE == 1 | |
f92430e0 | 5446 | || val_signbit_p (mode, STORE_FLAG_VALUE))) |
db96f378 | 5447 | { |
02bd6c04 | 5448 | subtarget = target; |
db96f378 | 5449 | |
80e1bfa1 | 5450 | if (!target) |
5451 | target_mode = mode; | |
5452 | ||
db96f378 | 5453 | /* If the result is to be wider than OP0, it is best to convert it |
5454 | first. If it is to be narrower, it is *incorrect* to convert it | |
5455 | first. */ | |
80e1bfa1 | 5456 | else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) |
db96f378 | 5457 | { |
72467481 | 5458 | op0 = convert_modes (target_mode, mode, op0, 0); |
db96f378 | 5459 | mode = target_mode; |
5460 | } | |
5461 | ||
5462 | if (target_mode != mode) | |
5463 | subtarget = 0; | |
5464 | ||
5465 | if (code == GE) | |
fbb71644 | 5466 | op0 = expand_unop (mode, one_cmpl_optab, op0, |
5467 | ((STORE_FLAG_VALUE == 1 || normalizep) | |
5468 | ? 0 : subtarget), 0); | |
db96f378 | 5469 | |
fbb71644 | 5470 | if (STORE_FLAG_VALUE == 1 || normalizep) |
db96f378 | 5471 | /* If we are supposed to produce a 0/1 value, we want to do |
5472 | a logical shift from the sign bit to the low-order bit; for | |
5473 | a -1/0 value, we do an arithmetic shift. */ | |
5474 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, | |
f5ff0b21 | 5475 | GET_MODE_BITSIZE (mode) - 1, |
db96f378 | 5476 | subtarget, normalizep != -1); |
5477 | ||
5478 | if (mode != target_mode) | |
1d8c65d8 | 5479 | op0 = convert_modes (target_mode, mode, op0, 0); |
db96f378 | 5480 | |
5481 | return op0; | |
5482 | } | |
5483 | ||
74f4459c | 5484 | mclass = GET_MODE_CLASS (mode); |
5485 | for (compare_mode = mode; compare_mode != VOIDmode; | |
5486 | compare_mode = GET_MODE_WIDER_MODE (compare_mode)) | |
595d88b5 | 5487 | { |
74f4459c | 5488 | enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
d6bf3b14 | 5489 | icode = optab_handler (cstore_optab, optab_mode); |
74f4459c | 5490 | if (icode != CODE_FOR_nothing) |
595d88b5 | 5491 | { |
595d88b5 | 5492 | do_pending_stack_adjust (); |
cf564daf | 5493 | tem = emit_cstore (target, icode, code, mode, compare_mode, |
d68bc06a | 5494 | unsignedp, op0, op1, normalizep, target_mode); |
fb425e71 | 5495 | if (tem) |
5496 | return tem; | |
db96f378 | 5497 | |
fb425e71 | 5498 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
db96f378 | 5499 | { |
cf564daf | 5500 | tem = emit_cstore (target, icode, scode, mode, compare_mode, |
d68bc06a | 5501 | unsignedp, op1, op0, normalizep, target_mode); |
fb425e71 | 5502 | if (tem) |
5503 | return tem; | |
db96f378 | 5504 | } |
74f4459c | 5505 | break; |
db96f378 | 5506 | } |
5507 | } | |
5508 | ||
cf564daf | 5509 | return 0; |
5510 | } | |
5511 | ||
5512 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 | |
5513 | and storing in TARGET. Normally return TARGET. | |
5514 | Return 0 if that cannot be done. | |
5515 | ||
5516 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If | |
5517 | it is VOIDmode, they cannot both be CONST_INT. | |
5518 | ||
5519 | UNSIGNEDP is for the case where we have to widen the operands | |
5520 | to perform the operation. It says to use zero-extension. | |
5521 | ||
5522 | NORMALIZEP is 1 if we should convert the result to be either zero | |
5523 | or one. Normalize is -1 if we should convert the result to be | |
5524 | either zero or -1. If NORMALIZEP is zero, the result will be left | |
5525 | "raw" out of the scc insn. */ | |
5526 | ||
5527 | rtx | |
5528 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
5529 | enum machine_mode mode, int unsignedp, int normalizep) | |
5530 | { | |
5531 | enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; | |
5532 | enum rtx_code rcode; | |
5533 | rtx subtarget; | |
5534 | rtx tem, last, trueval; | |
5535 | ||
d68bc06a | 5536 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, |
5537 | target_mode); | |
cf564daf | 5538 | if (tem) |
5539 | return tem; | |
db96f378 | 5540 | |
80e1bfa1 | 5541 | /* If we reached here, we can't do this with a scc insn, however there |
5542 | are some comparisons that can be done in other ways. Don't do any | |
5543 | of these cases if branches are very cheap. */ | |
5544 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) | |
5545 | return 0; | |
5546 | ||
5547 | /* See what we need to return. We can only return a 1, -1, or the | |
5548 | sign bit. */ | |
5549 | ||
5550 | if (normalizep == 0) | |
5551 | { | |
5552 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) | |
5553 | normalizep = STORE_FLAG_VALUE; | |
5554 | ||
f92430e0 | 5555 | else if (val_signbit_p (mode, STORE_FLAG_VALUE)) |
80e1bfa1 | 5556 | ; |
5557 | else | |
5558 | return 0; | |
5559 | } | |
5560 | ||
cf564daf | 5561 | last = get_last_insn (); |
5562 | ||
a1ad7483 | 5563 | /* If optimizing, use different pseudo registers for each insn, instead |
5564 | of reusing the same pseudo. This leads to better CSE, but slows | |
5565 | down the compiler, since there are more pseudos */ | |
5566 | subtarget = (!optimize | |
98f85d3a | 5567 | && (target_mode == mode)) ? target : NULL_RTX; |
80e1bfa1 | 5568 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); |
5569 | ||
5570 | /* For floating-point comparisons, try the reverse comparison or try | |
5571 | changing the "orderedness" of the comparison. */ | |
5572 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
5573 | { | |
5574 | enum rtx_code first_code; | |
5575 | bool and_them; | |
5576 | ||
5577 | rcode = reverse_condition_maybe_unordered (code); | |
5578 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
5579 | && (code == ORDERED || code == UNORDERED | |
5580 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5581 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5582 | { | |
12df6aa8 | 5583 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5584 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5585 | ||
80e1bfa1 | 5586 | /* For the reverse comparison, use either an addition or a XOR. */ |
12df6aa8 | 5587 | if (want_add |
20d892d1 | 5588 | && rtx_cost (GEN_INT (normalizep), PLUS, 1, |
12df6aa8 | 5589 | optimize_insn_for_speed_p ()) == 0) |
80e1bfa1 | 5590 | { |
cf564daf | 5591 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
d68bc06a | 5592 | STORE_FLAG_VALUE, target_mode); |
80e1bfa1 | 5593 | if (tem) |
5594 | return expand_binop (target_mode, add_optab, tem, | |
5595 | GEN_INT (normalizep), | |
5596 | target, 0, OPTAB_WIDEN); | |
5597 | } | |
12df6aa8 | 5598 | else if (!want_add |
20d892d1 | 5599 | && rtx_cost (trueval, XOR, 1, |
12df6aa8 | 5600 | optimize_insn_for_speed_p ()) == 0) |
80e1bfa1 | 5601 | { |
cf564daf | 5602 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
d68bc06a | 5603 | normalizep, target_mode); |
80e1bfa1 | 5604 | if (tem) |
5605 | return expand_binop (target_mode, xor_optab, tem, trueval, | |
5606 | target, INTVAL (trueval) >= 0, OPTAB_WIDEN); | |
5607 | } | |
5608 | } | |
5609 | ||
5610 | delete_insns_since (last); | |
5611 | ||
5612 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ | |
5613 | if (code == ORDERED || code == UNORDERED) | |
5614 | return 0; | |
48e1416a | 5615 | |
80e1bfa1 | 5616 | and_them = split_comparison (code, mode, &first_code, &code); |
5617 | ||
5618 | /* If there are no NaNs, the first comparison should always fall through. | |
5619 | Effectively change the comparison to the other one. */ | |
5620 | if (!HONOR_NANS (mode)) | |
5621 | { | |
5622 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); | |
d68bc06a | 5623 | return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, |
5624 | target_mode); | |
80e1bfa1 | 5625 | } |
5626 | ||
5627 | #ifdef HAVE_conditional_move | |
5628 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a | |
5629 | conditional move. */ | |
d68bc06a | 5630 | tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, |
5631 | normalizep, target_mode); | |
80e1bfa1 | 5632 | if (tem == 0) |
5633 | return 0; | |
5634 | ||
5635 | if (and_them) | |
5636 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5637 | tem, const0_rtx, GET_MODE (tem), 0); | |
5638 | else | |
5639 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5640 | trueval, tem, GET_MODE (tem), 0); | |
5641 | ||
5642 | if (tem == 0) | |
5643 | delete_insns_since (last); | |
5644 | return tem; | |
5645 | #else | |
5646 | return 0; | |
5647 | #endif | |
5648 | } | |
db96f378 | 5649 | |
80e1bfa1 | 5650 | /* The remaining tricks only apply to integer comparisons. */ |
5651 | ||
5652 | if (GET_MODE_CLASS (mode) != MODE_INT) | |
5653 | return 0; | |
5654 | ||
5655 | /* If this is an equality comparison of integers, we can try to exclusive-or | |
db96f378 | 5656 | (or subtract) the two operands and use a recursive call to try the |
5657 | comparison with zero. Don't do any of these cases if branches are | |
5658 | very cheap. */ | |
5659 | ||
80e1bfa1 | 5660 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
db96f378 | 5661 | { |
5662 | tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, | |
5663 | OPTAB_WIDEN); | |
5664 | ||
5665 | if (tem == 0) | |
5666 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, | |
5667 | OPTAB_WIDEN); | |
5668 | if (tem != 0) | |
39a4aea8 | 5669 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5670 | mode, unsignedp, normalizep); | |
80e1bfa1 | 5671 | if (tem != 0) |
5672 | return tem; | |
5673 | ||
5674 | delete_insns_since (last); | |
5675 | } | |
5676 | ||
5677 | /* For integer comparisons, try the reverse comparison. However, for | |
5678 | small X and if we'd have anyway to extend, implementing "X != 0" | |
5679 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ | |
5680 | rcode = reverse_condition (code); | |
5681 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
d6bf3b14 | 5682 | && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing |
80e1bfa1 | 5683 | && code == NE |
5684 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD | |
5685 | && op1 == const0_rtx)) | |
5686 | { | |
12df6aa8 | 5687 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5688 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5689 | ||
80e1bfa1 | 5690 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
12df6aa8 | 5691 | if (want_add |
20d892d1 | 5692 | && rtx_cost (GEN_INT (normalizep), PLUS, 1, |
12df6aa8 | 5693 | optimize_insn_for_speed_p ()) == 0) |
80e1bfa1 | 5694 | { |
cf564daf | 5695 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
d68bc06a | 5696 | STORE_FLAG_VALUE, target_mode); |
80e1bfa1 | 5697 | if (tem != 0) |
5698 | tem = expand_binop (target_mode, add_optab, tem, | |
5699 | GEN_INT (normalizep), target, 0, OPTAB_WIDEN); | |
5700 | } | |
12df6aa8 | 5701 | else if (!want_add |
20d892d1 | 5702 | && rtx_cost (trueval, XOR, 1, |
12df6aa8 | 5703 | optimize_insn_for_speed_p ()) == 0) |
80e1bfa1 | 5704 | { |
cf564daf | 5705 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
d68bc06a | 5706 | normalizep, target_mode); |
80e1bfa1 | 5707 | if (tem != 0) |
5708 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, | |
5709 | INTVAL (trueval) >= 0, OPTAB_WIDEN); | |
5710 | } | |
5711 | ||
5712 | if (tem != 0) | |
5713 | return tem; | |
5714 | delete_insns_since (last); | |
db96f378 | 5715 | } |
5716 | ||
a4194ff7 | 5717 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
db96f378 | 5718 | the constant zero. Reject all other comparisons at this point. Only |
5719 | do LE and GT if branches are expensive since they are expensive on | |
5720 | 2-operand machines. */ | |
5721 | ||
80e1bfa1 | 5722 | if (op1 != const0_rtx |
db96f378 | 5723 | || (code != EQ && code != NE |
4a9d7ef7 | 5724 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5725 | false) <= 1 || (code != LE && code != GT)))) | |
db96f378 | 5726 | return 0; |
5727 | ||
db96f378 | 5728 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5729 | do the necessary operation below. */ | |
5730 | ||
5731 | tem = 0; | |
5732 | ||
5733 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has | |
5734 | the sign bit set. */ | |
5735 | ||
5736 | if (code == LE) | |
5737 | { | |
5738 | /* This is destructive, so SUBTARGET can't be OP0. */ | |
5739 | if (rtx_equal_p (subtarget, op0)) | |
5740 | subtarget = 0; | |
5741 | ||
5742 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, | |
5743 | OPTAB_WIDEN); | |
5744 | if (tem) | |
5745 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, | |
5746 | OPTAB_WIDEN); | |
5747 | } | |
5748 | ||
5749 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the | |
5750 | number of bits in the mode of OP0, minus one. */ | |
5751 | ||
5752 | if (code == GT) | |
5753 | { | |
5754 | if (rtx_equal_p (subtarget, op0)) | |
5755 | subtarget = 0; | |
5756 | ||
5757 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
f5ff0b21 | 5758 | GET_MODE_BITSIZE (mode) - 1, |
db96f378 | 5759 | subtarget, 0); |
5760 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, | |
5761 | OPTAB_WIDEN); | |
5762 | } | |
a4194ff7 | 5763 | |
db96f378 | 5764 | if (code == EQ || code == NE) |
5765 | { | |
5766 | /* For EQ or NE, one way to do the comparison is to apply an operation | |
6ef828f9 | 5767 | that converts the operand into a positive number if it is nonzero |
db96f378 | 5768 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5769 | for NE we negate. This puts the result in the sign bit. Then we | |
a4194ff7 | 5770 | normalize with a shift, if needed. |
db96f378 | 5771 | |
5772 | Two operations that can do the above actions are ABS and FFS, so try | |
5773 | them. If that doesn't work, and MODE is smaller than a full word, | |
2f4e12a2 | 5774 | we can use zero-extension to the wider mode (an unsigned conversion) |
db96f378 | 5775 | as the operation. */ |
5776 | ||
a4194ff7 | 5777 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5778 | that is compensated by the subsequent overflow when subtracting | |
6312a35e | 5779 | one / negating. */ |
bec2d490 | 5780 | |
d6bf3b14 | 5781 | if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) |
db96f378 | 5782 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
d6bf3b14 | 5783 | else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) |
db96f378 | 5784 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5785 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5786 | { | |
1d8c65d8 | 5787 | tem = convert_modes (word_mode, mode, op0, 1); |
72467481 | 5788 | mode = word_mode; |
db96f378 | 5789 | } |
5790 | ||
5791 | if (tem != 0) | |
5792 | { | |
5793 | if (code == EQ) | |
5794 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, | |
5795 | 0, OPTAB_WIDEN); | |
5796 | else | |
5797 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); | |
5798 | } | |
5799 | ||
5800 | /* If we couldn't do it that way, for NE we can "or" the two's complement | |
5801 | of the value with itself. For EQ, we take the one's complement of | |
5802 | that "or", which is an extra insn, so we only handle EQ if branches | |
5803 | are expensive. */ | |
5804 | ||
4a9d7ef7 | 5805 | if (tem == 0 |
5806 | && (code == NE | |
5807 | || BRANCH_COST (optimize_insn_for_speed_p (), | |
5808 | false) > 1)) | |
db96f378 | 5809 | { |
2f4e12a2 | 5810 | if (rtx_equal_p (subtarget, op0)) |
5811 | subtarget = 0; | |
5812 | ||
db96f378 | 5813 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5814 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, | |
5815 | OPTAB_WIDEN); | |
5816 | ||
5817 | if (tem && code == EQ) | |
5818 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); | |
5819 | } | |
5820 | } | |
5821 | ||
5822 | if (tem && normalizep) | |
5823 | tem = expand_shift (RSHIFT_EXPR, mode, tem, | |
f5ff0b21 | 5824 | GET_MODE_BITSIZE (mode) - 1, |
98f85d3a | 5825 | subtarget, normalizep == 1); |
db96f378 | 5826 | |
98f85d3a | 5827 | if (tem) |
db96f378 | 5828 | { |
80e1bfa1 | 5829 | if (!target) |
5830 | ; | |
5831 | else if (GET_MODE (tem) != target_mode) | |
98f85d3a | 5832 | { |
5833 | convert_move (target, tem, 0); | |
5834 | tem = target; | |
5835 | } | |
5836 | else if (!subtarget) | |
5837 | { | |
5838 | emit_move_insn (target, tem); | |
5839 | tem = target; | |
5840 | } | |
db96f378 | 5841 | } |
98f85d3a | 5842 | else |
db96f378 | 5843 | delete_insns_since (last); |
5844 | ||
5845 | return tem; | |
5846 | } | |
469b49fd | 5847 | |
5848 | /* Like emit_store_flag, but always succeeds. */ | |
5849 | ||
5850 | rtx | |
35cb5232 | 5851 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
5852 | enum machine_mode mode, int unsignedp, int normalizep) | |
469b49fd | 5853 | { |
5854 | rtx tem, label; | |
80e1bfa1 | 5855 | rtx trueval, falseval; |
469b49fd | 5856 | |
5857 | /* First see if emit_store_flag can do the job. */ | |
5858 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); | |
5859 | if (tem != 0) | |
5860 | return tem; | |
5861 | ||
80e1bfa1 | 5862 | if (!target) |
5863 | target = gen_reg_rtx (word_mode); | |
469b49fd | 5864 | |
80e1bfa1 | 5865 | /* If this failed, we have to do this with set/compare/jump/set code. |
5866 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ | |
5867 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; | |
48e1416a | 5868 | if (code == NE |
80e1bfa1 | 5869 | && GET_MODE_CLASS (mode) == MODE_INT |
5870 | && REG_P (target) | |
5871 | && op0 == target | |
5872 | && op1 == const0_rtx) | |
5873 | { | |
5874 | label = gen_label_rtx (); | |
5875 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, | |
79ab74cc | 5876 | mode, NULL_RTX, NULL_RTX, label, -1); |
80e1bfa1 | 5877 | emit_move_insn (target, trueval); |
5878 | emit_label (label); | |
5879 | return target; | |
5880 | } | |
469b49fd | 5881 | |
8ad4c111 | 5882 | if (!REG_P (target) |
469b49fd | 5883 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
5884 | target = gen_reg_rtx (GET_MODE (target)); | |
5885 | ||
80e1bfa1 | 5886 | /* Jump in the right direction if the target cannot implement CODE |
5887 | but can jump on its reverse condition. */ | |
5888 | falseval = const0_rtx; | |
5889 | if (! can_compare_p (code, mode, ccp_jump) | |
5890 | && (! FLOAT_MODE_P (mode) | |
5891 | || code == ORDERED || code == UNORDERED | |
5892 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5893 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5894 | { | |
5895 | enum rtx_code rcode; | |
5896 | if (FLOAT_MODE_P (mode)) | |
5897 | rcode = reverse_condition_maybe_unordered (code); | |
5898 | else | |
5899 | rcode = reverse_condition (code); | |
5900 | ||
5901 | /* Canonicalize to UNORDERED for the libcall. */ | |
5902 | if (can_compare_p (rcode, mode, ccp_jump) | |
5903 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) | |
5904 | { | |
5905 | falseval = trueval; | |
5906 | trueval = const0_rtx; | |
5907 | code = rcode; | |
5908 | } | |
5909 | } | |
5910 | ||
5911 | emit_move_insn (target, trueval); | |
469b49fd | 5912 | label = gen_label_rtx (); |
7e69f45b | 5913 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, |
79ab74cc | 5914 | NULL_RTX, label, -1); |
469b49fd | 5915 | |
80e1bfa1 | 5916 | emit_move_insn (target, falseval); |
db96f378 | 5917 | emit_label (label); |
5918 | ||
5919 | return target; | |
5920 | } | |
c5aa1e92 | 5921 | \f |
5922 | /* Perform possibly multi-word comparison and conditional jump to LABEL | |
85afca2d | 5923 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
5924 | now a thin wrapper around do_compare_rtx_and_jump. */ | |
c5aa1e92 | 5925 | |
5926 | static void | |
35cb5232 | 5927 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode, |
5928 | rtx label) | |
c5aa1e92 | 5929 | { |
85afca2d | 5930 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
5931 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, | |
79ab74cc | 5932 | NULL_RTX, NULL_RTX, label, -1); |
c5aa1e92 | 5933 | } |