]>
Commit | Line | Data |
---|---|---|
db96f378 | 1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. | |
aad93da1 | 3 | Copyright (C) 1987-2017 Free Software Foundation, Inc. |
db96f378 | 4 | |
f12b58b3 | 5 | This file is part of GCC. |
db96f378 | 6 | |
f12b58b3 | 7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free | |
8c4c00c1 | 9 | Software Foundation; either version 3, or (at your option) any later |
f12b58b3 | 10 | version. |
db96f378 | 11 | |
f12b58b3 | 12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
db96f378 | 16 | |
17 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
db96f378 | 20 | |
21 | ||
22 | #include "config.h" | |
405711de | 23 | #include "system.h" |
805e22b2 | 24 | #include "coretypes.h" |
9ef16211 | 25 | #include "backend.h" |
7c29e30e | 26 | #include "target.h" |
db96f378 | 27 | #include "rtl.h" |
7c29e30e | 28 | #include "tree.h" |
29 | #include "predict.h" | |
ad7b10a2 | 30 | #include "memmodel.h" |
7c29e30e | 31 | #include "tm_p.h" |
32 | #include "expmed.h" | |
33 | #include "optabs.h" | |
61f54514 | 34 | #include "regs.h" |
7c29e30e | 35 | #include "emit-rtl.h" |
9ef16211 | 36 | #include "diagnostic-core.h" |
b20a8bb4 | 37 | #include "fold-const.h" |
9ed99284 | 38 | #include "stor-layout.h" |
d53441c8 | 39 | #include "dojump.h" |
40 | #include "explow.h" | |
db96f378 | 41 | #include "expr.h" |
771d21fa | 42 | #include "langhooks.h" |
6ebe4c69 | 43 | |
44 | struct target_expmed default_target_expmed; | |
45 | #if SWITCHABLE_TARGET | |
46 | struct target_expmed *this_target_expmed = &default_target_expmed; | |
47 | #endif | |
db96f378 | 48 | |
5e864fc6 | 49 | static void store_fixed_bit_field (rtx, opt_scalar_int_mode, |
35cb5232 | 50 | unsigned HOST_WIDE_INT, |
4bb60ec7 | 51 | unsigned HOST_WIDE_INT, |
52 | unsigned HOST_WIDE_INT, | |
5e864fc6 | 53 | unsigned HOST_WIDE_INT, |
54 | rtx, scalar_int_mode, bool); | |
55 | static void store_fixed_bit_field_1 (rtx, scalar_int_mode, | |
56 | unsigned HOST_WIDE_INT, | |
08b277ff | 57 | unsigned HOST_WIDE_INT, |
5e864fc6 | 58 | rtx, scalar_int_mode, bool); |
59 | static void store_split_bit_field (rtx, opt_scalar_int_mode, | |
4bb60ec7 | 60 | unsigned HOST_WIDE_INT, |
61 | unsigned HOST_WIDE_INT, | |
62 | unsigned HOST_WIDE_INT, | |
5e864fc6 | 63 | unsigned HOST_WIDE_INT, |
64 | rtx, scalar_int_mode, bool); | |
65 | static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode, | |
35cb5232 | 66 | unsigned HOST_WIDE_INT, |
292237f3 | 67 | unsigned HOST_WIDE_INT, rtx, int, bool); |
5e864fc6 | 68 | static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode, |
b4d02378 | 69 | unsigned HOST_WIDE_INT, |
292237f3 | 70 | unsigned HOST_WIDE_INT, rtx, int, bool); |
3754d046 | 71 | static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); |
5e864fc6 | 72 | static rtx extract_split_bit_field (rtx, opt_scalar_int_mode, |
73 | unsigned HOST_WIDE_INT, | |
292237f3 | 74 | unsigned HOST_WIDE_INT, int, bool); |
3754d046 | 75 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); |
f77c4496 | 76 | static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); |
77 | static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); | |
db96f378 | 78 | |
e913b5cd | 79 | /* Return a constant integer mask value of mode MODE with BITSIZE ones |
80 | followed by BITPOS zeros, or the complement of that if COMPLEMENT. | |
81 | The mask is truncated if necessary to the width of mode MODE. The | |
82 | mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ | |
83 | ||
ddb1be65 | 84 | static inline rtx |
f77c4496 | 85 | mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement) |
e913b5cd | 86 | { |
ddb1be65 | 87 | return immed_wide_int_const |
796b6678 | 88 | (wi::shifted_mask (bitpos, bitsize, complement, |
89 | GET_MODE_PRECISION (mode)), mode); | |
e913b5cd | 90 | } |
91 | ||
7a9e3364 | 92 | /* Test whether a value is zero of a power of two. */ |
af0907da | 93 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ |
edc19fd0 | 94 | (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) |
7a9e3364 | 95 | |
49db198b | 96 | struct init_expmed_rtl |
db96f378 | 97 | { |
e10a2a58 | 98 | rtx reg; |
99 | rtx plus; | |
100 | rtx neg; | |
101 | rtx mult; | |
102 | rtx sdiv; | |
103 | rtx udiv; | |
104 | rtx sdiv_32; | |
105 | rtx smod_32; | |
106 | rtx wide_mult; | |
107 | rtx wide_lshr; | |
108 | rtx wide_trunc; | |
109 | rtx shift; | |
110 | rtx shift_mult; | |
111 | rtx shift_add; | |
112 | rtx shift_sub0; | |
113 | rtx shift_sub1; | |
114 | rtx zext; | |
115 | rtx trunc; | |
649e81fd | 116 | |
e56afeb2 | 117 | rtx pow2[MAX_BITS_PER_WORD]; |
118 | rtx cint[MAX_BITS_PER_WORD]; | |
49db198b | 119 | }; |
120 | ||
573ff301 | 121 | static void |
f77c4496 | 122 | init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode, |
123 | scalar_int_mode from_mode, bool speed) | |
573ff301 | 124 | { |
125 | int to_size, from_size; | |
126 | rtx which; | |
127 | ||
4765975c | 128 | to_size = GET_MODE_PRECISION (to_mode); |
129 | from_size = GET_MODE_PRECISION (from_mode); | |
130 | ||
131 | /* Most partial integers have a precision less than the "full" | |
132 | integer it requires for storage. In case one doesn't, for | |
133 | comparison purposes here, reduce the bit size by one in that | |
134 | case. */ | |
135 | if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT | |
ac29ece2 | 136 | && pow2p_hwi (to_size)) |
4765975c | 137 | to_size --; |
138 | if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT | |
ac29ece2 | 139 | && pow2p_hwi (from_size)) |
4765975c | 140 | from_size --; |
573ff301 | 141 | |
142 | /* Assume cost of zero-extend and sign-extend is the same. */ | |
e10a2a58 | 143 | which = (to_size < from_size ? all->trunc : all->zext); |
573ff301 | 144 | |
e10a2a58 | 145 | PUT_MODE (all->reg, from_mode); |
5ae4887d | 146 | set_convert_cost (to_mode, from_mode, speed, |
147 | set_src_cost (which, to_mode, speed)); | |
573ff301 | 148 | } |
149 | ||
49db198b | 150 | static void |
151 | init_expmed_one_mode (struct init_expmed_rtl *all, | |
3754d046 | 152 | machine_mode mode, int speed) |
49db198b | 153 | { |
154 | int m, n, mode_bitsize; | |
3754d046 | 155 | machine_mode mode_from; |
db96f378 | 156 | |
49db198b | 157 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
941522d6 | 158 | |
e10a2a58 | 159 | PUT_MODE (all->reg, mode); |
160 | PUT_MODE (all->plus, mode); | |
161 | PUT_MODE (all->neg, mode); | |
162 | PUT_MODE (all->mult, mode); | |
163 | PUT_MODE (all->sdiv, mode); | |
164 | PUT_MODE (all->udiv, mode); | |
165 | PUT_MODE (all->sdiv_32, mode); | |
166 | PUT_MODE (all->smod_32, mode); | |
167 | PUT_MODE (all->wide_trunc, mode); | |
168 | PUT_MODE (all->shift, mode); | |
169 | PUT_MODE (all->shift_mult, mode); | |
170 | PUT_MODE (all->shift_add, mode); | |
171 | PUT_MODE (all->shift_sub0, mode); | |
172 | PUT_MODE (all->shift_sub1, mode); | |
173 | PUT_MODE (all->zext, mode); | |
174 | PUT_MODE (all->trunc, mode); | |
175 | ||
5ae4887d | 176 | set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed)); |
177 | set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed)); | |
178 | set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed)); | |
179 | set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed)); | |
180 | set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed)); | |
e10a2a58 | 181 | |
5ae4887d | 182 | set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed) |
49a71e58 | 183 | <= 2 * add_cost (speed, mode))); |
5ae4887d | 184 | set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed) |
49a71e58 | 185 | <= 4 * add_cost (speed, mode))); |
186 | ||
187 | set_shift_cost (speed, mode, 0, 0); | |
188 | { | |
189 | int cost = add_cost (speed, mode); | |
190 | set_shiftadd_cost (speed, mode, 0, cost); | |
191 | set_shiftsub0_cost (speed, mode, 0, cost); | |
192 | set_shiftsub1_cost (speed, mode, 0, cost); | |
193 | } | |
49db198b | 194 | |
195 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); | |
196 | for (m = 1; m < n; m++) | |
197 | { | |
e10a2a58 | 198 | XEXP (all->shift, 1) = all->cint[m]; |
199 | XEXP (all->shift_mult, 1) = all->pow2[m]; | |
49db198b | 200 | |
5ae4887d | 201 | set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed)); |
202 | set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode, | |
203 | speed)); | |
204 | set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode, | |
205 | speed)); | |
206 | set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode, | |
207 | speed)); | |
49db198b | 208 | } |
209 | ||
8974b7a3 | 210 | scalar_int_mode int_mode_to; |
211 | if (is_a <scalar_int_mode> (mode, &int_mode_to)) | |
e56afeb2 | 212 | { |
573ff301 | 213 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |
3754d046 | 214 | mode_from = (machine_mode)(mode_from + 1)) |
7a6aeeed | 215 | init_expmed_one_conv (all, int_mode_to, |
216 | as_a <scalar_int_mode> (mode_from), speed); | |
28ebc73c | 217 | |
8974b7a3 | 218 | scalar_int_mode wider_mode; |
219 | if (GET_MODE_CLASS (int_mode_to) == MODE_INT | |
220 | && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode)) | |
49db198b | 221 | { |
e10a2a58 | 222 | PUT_MODE (all->zext, wider_mode); |
223 | PUT_MODE (all->wide_mult, wider_mode); | |
224 | PUT_MODE (all->wide_lshr, wider_mode); | |
225 | XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize); | |
49db198b | 226 | |
573ff301 | 227 | set_mul_widen_cost (speed, wider_mode, |
5ae4887d | 228 | set_src_cost (all->wide_mult, wider_mode, speed)); |
8974b7a3 | 229 | set_mul_highpart_cost (speed, int_mode_to, |
230 | set_src_cost (all->wide_trunc, | |
231 | int_mode_to, speed)); | |
49db198b | 232 | } |
e56afeb2 | 233 | } |
49db198b | 234 | } |
235 | ||
236 | void | |
237 | init_expmed (void) | |
238 | { | |
239 | struct init_expmed_rtl all; | |
3754d046 | 240 | machine_mode mode = QImode; |
49db198b | 241 | int m, speed; |
242 | ||
649e81fd | 243 | memset (&all, 0, sizeof all); |
49db198b | 244 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
245 | { | |
edc19fd0 | 246 | all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); |
49db198b | 247 | all.cint[m] = GEN_INT (m); |
248 | } | |
649e81fd | 249 | |
15a79151 | 250 | /* Avoid using hard regs in ways which may be unsupported. */ |
15183fd2 | 251 | all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
e10a2a58 | 252 | all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); |
253 | all.neg = gen_rtx_NEG (mode, all.reg); | |
254 | all.mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
255 | all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); | |
256 | all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); | |
257 | all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); | |
258 | all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); | |
259 | all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); | |
260 | all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); | |
261 | all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); | |
262 | all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); | |
263 | all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); | |
264 | all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
265 | all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); | |
266 | all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); | |
267 | all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); | |
268 | all.trunc = gen_rtx_TRUNCATE (mode, all.reg); | |
72655676 | 269 | |
f529eb25 | 270 | for (speed = 0; speed < 2; speed++) |
33183a3c | 271 | { |
f529eb25 | 272 | crtl->maybe_hot_insn_p = speed; |
5ae4887d | 273 | set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed)); |
649e81fd | 274 | |
573ff301 | 275 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |
3754d046 | 276 | mode = (machine_mode)(mode + 1)) |
49db198b | 277 | init_expmed_one_mode (&all, mode, speed); |
649e81fd | 278 | |
573ff301 | 279 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |
280 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; | |
3754d046 | 281 | mode = (machine_mode)(mode + 1)) |
573ff301 | 282 | init_expmed_one_mode (&all, mode, speed); |
283 | ||
284 | if (MIN_MODE_VECTOR_INT != VOIDmode) | |
285 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; | |
3754d046 | 286 | mode = (machine_mode)(mode + 1)) |
573ff301 | 287 | init_expmed_one_mode (&all, mode, speed); |
649e81fd | 288 | } |
49db198b | 289 | |
49a71e58 | 290 | if (alg_hash_used_p ()) |
291 | { | |
292 | struct alg_hash_entry *p = alg_hash_entry_ptr (0); | |
293 | memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); | |
294 | } | |
92358f62 | 295 | else |
49a71e58 | 296 | set_alg_hash_used_p (true); |
f529eb25 | 297 | default_rtl_profile (); |
e10a2a58 | 298 | |
299 | ggc_free (all.trunc); | |
300 | ggc_free (all.shift_sub1); | |
301 | ggc_free (all.shift_sub0); | |
302 | ggc_free (all.shift_add); | |
303 | ggc_free (all.shift_mult); | |
304 | ggc_free (all.shift); | |
305 | ggc_free (all.wide_trunc); | |
306 | ggc_free (all.wide_lshr); | |
307 | ggc_free (all.wide_mult); | |
308 | ggc_free (all.zext); | |
309 | ggc_free (all.smod_32); | |
310 | ggc_free (all.sdiv_32); | |
311 | ggc_free (all.udiv); | |
312 | ggc_free (all.sdiv); | |
313 | ggc_free (all.mult); | |
314 | ggc_free (all.neg); | |
315 | ggc_free (all.plus); | |
316 | ggc_free (all.reg); | |
db96f378 | 317 | } |
318 | ||
319 | /* Return an rtx representing minus the value of X. | |
320 | MODE is the intended mode of the result, | |
321 | useful if X is a CONST_INT. */ | |
322 | ||
323 | rtx | |
3754d046 | 324 | negate_rtx (machine_mode mode, rtx x) |
db96f378 | 325 | { |
2242dc4b | 326 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |
327 | ||
4e57dfc5 | 328 | if (result == 0) |
2242dc4b | 329 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
330 | ||
331 | return result; | |
db96f378 | 332 | } |
9068af20 | 333 | |
292237f3 | 334 | /* Whether reverse storage order is supported on the target. */ |
335 | static int reverse_storage_order_supported = -1; | |
336 | ||
337 | /* Check whether reverse storage order is supported on the target. */ | |
338 | ||
339 | static void | |
340 | check_reverse_storage_order_support (void) | |
341 | { | |
342 | if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
343 | { | |
344 | reverse_storage_order_supported = 0; | |
345 | sorry ("reverse scalar storage order"); | |
346 | } | |
347 | else | |
348 | reverse_storage_order_supported = 1; | |
349 | } | |
350 | ||
351 | /* Whether reverse FP storage order is supported on the target. */ | |
352 | static int reverse_float_storage_order_supported = -1; | |
353 | ||
354 | /* Check whether reverse FP storage order is supported on the target. */ | |
355 | ||
356 | static void | |
357 | check_reverse_float_storage_order_support (void) | |
358 | { | |
359 | if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
360 | { | |
361 | reverse_float_storage_order_supported = 0; | |
362 | sorry ("reverse floating-point scalar storage order"); | |
363 | } | |
364 | else | |
365 | reverse_float_storage_order_supported = 1; | |
366 | } | |
367 | ||
368 | /* Return an rtx representing value of X with reverse storage order. | |
369 | MODE is the intended mode of the result, | |
370 | useful if X is a CONST_INT. */ | |
371 | ||
372 | rtx | |
582adad1 | 373 | flip_storage_order (machine_mode mode, rtx x) |
292237f3 | 374 | { |
44504d18 | 375 | scalar_int_mode int_mode; |
292237f3 | 376 | rtx result; |
377 | ||
378 | if (mode == QImode) | |
379 | return x; | |
380 | ||
381 | if (COMPLEX_MODE_P (mode)) | |
382 | { | |
383 | rtx real = read_complex_part (x, false); | |
384 | rtx imag = read_complex_part (x, true); | |
385 | ||
386 | real = flip_storage_order (GET_MODE_INNER (mode), real); | |
387 | imag = flip_storage_order (GET_MODE_INNER (mode), imag); | |
388 | ||
389 | return gen_rtx_CONCAT (mode, real, imag); | |
390 | } | |
391 | ||
392 | if (__builtin_expect (reverse_storage_order_supported < 0, 0)) | |
393 | check_reverse_storage_order_support (); | |
394 | ||
44504d18 | 395 | if (!is_a <scalar_int_mode> (mode, &int_mode)) |
292237f3 | 396 | { |
397 | if (FLOAT_MODE_P (mode) | |
398 | && __builtin_expect (reverse_float_storage_order_supported < 0, 0)) | |
399 | check_reverse_float_storage_order_support (); | |
400 | ||
44504d18 | 401 | if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)) |
292237f3 | 402 | { |
403 | sorry ("reverse storage order for %smode", GET_MODE_NAME (mode)); | |
404 | return x; | |
405 | } | |
406 | x = gen_lowpart (int_mode, x); | |
407 | } | |
408 | ||
409 | result = simplify_unary_operation (BSWAP, int_mode, x, int_mode); | |
410 | if (result == 0) | |
411 | result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); | |
412 | ||
413 | if (int_mode != mode) | |
414 | result = gen_lowpart (mode, result); | |
415 | ||
416 | return result; | |
417 | } | |
418 | ||
54fea56d | 419 | /* If MODE is set, adjust bitfield memory MEM so that it points to the |
420 | first unit of mode MODE that contains a bitfield of size BITSIZE at | |
421 | bit position BITNUM. If MODE is not set, return a BLKmode reference | |
422 | to every byte in the bitfield. Set *NEW_BITNUM to the bit position | |
423 | of the field within the new memory. */ | |
97b62a50 | 424 | |
425 | static rtx | |
54fea56d | 426 | narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode, |
97b62a50 | 427 | unsigned HOST_WIDE_INT bitsize, |
428 | unsigned HOST_WIDE_INT bitnum, | |
429 | unsigned HOST_WIDE_INT *new_bitnum) | |
430 | { | |
54fea56d | 431 | scalar_int_mode imode; |
432 | if (mode.exists (&imode)) | |
433 | { | |
434 | unsigned int unit = GET_MODE_BITSIZE (imode); | |
435 | *new_bitnum = bitnum % unit; | |
436 | HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; | |
437 | return adjust_bitfield_address (mem, imode, offset); | |
438 | } | |
439 | else | |
97b62a50 | 440 | { |
441 | *new_bitnum = bitnum % BITS_PER_UNIT; | |
442 | HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; | |
443 | HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) | |
444 | / BITS_PER_UNIT); | |
54fea56d | 445 | return adjust_bitfield_address_size (mem, BLKmode, offset, size); |
97b62a50 | 446 | } |
447 | } | |
448 | ||
26427966 | 449 | /* The caller wants to perform insertion or extraction PATTERN on a |
450 | bitfield of size BITSIZE at BITNUM bits into memory operand OP0. | |
451 | BITREGION_START and BITREGION_END are as for store_bit_field | |
452 | and FIELDMODE is the natural mode of the field. | |
453 | ||
454 | Search for a mode that is compatible with the memory access | |
455 | restrictions and (where applicable) with a register insertion or | |
456 | extraction. Return the new memory on success, storing the adjusted | |
457 | bit position in *NEW_BITNUM. Return null otherwise. */ | |
458 | ||
459 | static rtx | |
460 | adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, | |
461 | rtx op0, HOST_WIDE_INT bitsize, | |
462 | HOST_WIDE_INT bitnum, | |
463 | unsigned HOST_WIDE_INT bitregion_start, | |
464 | unsigned HOST_WIDE_INT bitregion_end, | |
3754d046 | 465 | machine_mode fieldmode, |
26427966 | 466 | unsigned HOST_WIDE_INT *new_bitnum) |
467 | { | |
468 | bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, | |
469 | bitregion_end, MEM_ALIGN (op0), | |
470 | MEM_VOLATILE_P (op0)); | |
1572c088 | 471 | scalar_int_mode best_mode; |
26427966 | 472 | if (iter.next_mode (&best_mode)) |
473 | { | |
474 | /* We can use a memory in BEST_MODE. See whether this is true for | |
475 | any wider modes. All other things being equal, we prefer to | |
476 | use the widest mode possible because it tends to expose more | |
477 | CSE opportunities. */ | |
478 | if (!iter.prefer_smaller_modes ()) | |
479 | { | |
480 | /* Limit the search to the mode required by the corresponding | |
481 | register insertion or extraction instruction, if any. */ | |
f77c4496 | 482 | scalar_int_mode limit_mode = word_mode; |
26427966 | 483 | extraction_insn insn; |
484 | if (get_best_reg_extraction_insn (&insn, pattern, | |
485 | GET_MODE_BITSIZE (best_mode), | |
486 | fieldmode)) | |
487 | limit_mode = insn.field_mode; | |
488 | ||
1572c088 | 489 | scalar_int_mode wider_mode; |
26427966 | 490 | while (iter.next_mode (&wider_mode) |
491 | && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) | |
492 | best_mode = wider_mode; | |
493 | } | |
494 | return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, | |
495 | new_bitnum); | |
496 | } | |
497 | return NULL_RTX; | |
498 | } | |
499 | ||
5e81ea64 | 500 | /* Return true if a bitfield of size BITSIZE at bit number BITNUM within |
501 | a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg | |
502 | offset is then BITNUM / BITS_PER_UNIT. */ | |
503 | ||
504 | static bool | |
505 | lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum, | |
506 | unsigned HOST_WIDE_INT bitsize, | |
3754d046 | 507 | machine_mode struct_mode) |
5e81ea64 | 508 | { |
44ce7b27 | 509 | unsigned HOST_WIDE_INT regsize = REGMODE_NATURAL_SIZE (struct_mode); |
5e81ea64 | 510 | if (BYTES_BIG_ENDIAN) |
d7d98104 | 511 | return (bitnum % BITS_PER_UNIT == 0 |
5e81ea64 | 512 | && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode) |
44ce7b27 | 513 | || (bitnum + bitsize) % (regsize * BITS_PER_UNIT) == 0)); |
5e81ea64 | 514 | else |
44ce7b27 | 515 | return bitnum % (regsize * BITS_PER_UNIT) == 0; |
5e81ea64 | 516 | } |
06bedae0 | 517 | |
e534aceb | 518 | /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 |
b4d02378 | 519 | containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. |
520 | Return false if the access would touch memory outside the range | |
521 | BITREGION_START to BITREGION_END for conformance to the C++ memory | |
522 | model. */ | |
72e9011e | 523 | |
524 | static bool | |
525 | strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
526 | unsigned HOST_WIDE_INT bitnum, | |
819eaef1 | 527 | scalar_int_mode fieldmode, |
b4d02378 | 528 | unsigned HOST_WIDE_INT bitregion_start, |
529 | unsigned HOST_WIDE_INT bitregion_end) | |
72e9011e | 530 | { |
531 | unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); | |
532 | ||
533 | /* -fstrict-volatile-bitfields must be enabled and we must have a | |
534 | volatile MEM. */ | |
535 | if (!MEM_P (op0) | |
536 | || !MEM_VOLATILE_P (op0) | |
537 | || flag_strict_volatile_bitfields <= 0) | |
538 | return false; | |
539 | ||
72e9011e | 540 | /* The bit size must not be larger than the field mode, and |
541 | the field mode must not be larger than a word. */ | |
542 | if (bitsize > modesize || modesize > BITS_PER_WORD) | |
543 | return false; | |
544 | ||
545 | /* Check for cases of unaligned fields that must be split. */ | |
dda86e31 | 546 | if (bitnum % modesize + bitsize > modesize) |
547 | return false; | |
548 | ||
549 | /* The memory must be sufficiently aligned for a MODESIZE access. | |
550 | This condition guarantees, that the memory access will not | |
551 | touch anything after the end of the structure. */ | |
552 | if (MEM_ALIGN (op0) < modesize) | |
72e9011e | 553 | return false; |
554 | ||
b4d02378 | 555 | /* Check for cases where the C++ memory model applies. */ |
556 | if (bitregion_end != 0 | |
557 | && (bitnum - bitnum % modesize < bitregion_start | |
f3715c39 | 558 | || bitnum - bitnum % modesize + modesize - 1 > bitregion_end)) |
b4d02378 | 559 | return false; |
560 | ||
72e9011e | 561 | return true; |
562 | } | |
563 | ||
06bedae0 | 564 | /* Return true if OP is a memory and if a bitfield of size BITSIZE at |
565 | bit number BITNUM can be treated as a simple value of mode MODE. */ | |
566 | ||
567 | static bool | |
568 | simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
3754d046 | 569 | unsigned HOST_WIDE_INT bitnum, machine_mode mode) |
06bedae0 | 570 | { |
571 | return (MEM_P (op0) | |
572 | && bitnum % BITS_PER_UNIT == 0 | |
573 | && bitsize == GET_MODE_BITSIZE (mode) | |
dfdced85 | 574 | && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0)) |
06bedae0 | 575 | || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 |
576 | && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); | |
577 | } | |
36122326 | 578 | \f |
26427966 | 579 | /* Try to use instruction INSV to store VALUE into a field of OP0. |
5e864fc6 | 580 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a |
581 | BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM | |
582 | are as for store_bit_field. */ | |
ba8869c8 | 583 | |
584 | static bool | |
26427966 | 585 | store_bit_field_using_insv (const extraction_insn *insv, rtx op0, |
5e864fc6 | 586 | opt_scalar_int_mode op0_mode, |
26427966 | 587 | unsigned HOST_WIDE_INT bitsize, |
e534aceb | 588 | unsigned HOST_WIDE_INT bitnum, |
5e864fc6 | 589 | rtx value, scalar_int_mode value_mode) |
ba8869c8 | 590 | { |
591 | struct expand_operand ops[4]; | |
592 | rtx value1; | |
593 | rtx xop0 = op0; | |
89ca96ce | 594 | rtx_insn *last = get_last_insn (); |
ba8869c8 | 595 | bool copy_back = false; |
596 | ||
5e864fc6 | 597 | scalar_int_mode op_mode = insv->field_mode; |
ba8869c8 | 598 | unsigned int unit = GET_MODE_BITSIZE (op_mode); |
599 | if (bitsize == 0 || bitsize > unit) | |
600 | return false; | |
601 | ||
602 | if (MEM_P (xop0)) | |
97b62a50 | 603 | /* Get a reference to the first byte of the field. */ |
26427966 | 604 | xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, |
605 | &bitnum); | |
ba8869c8 | 606 | else |
607 | { | |
608 | /* Convert from counting within OP0 to counting in OP_MODE. */ | |
609 | if (BYTES_BIG_ENDIAN) | |
5e864fc6 | 610 | bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ()); |
ba8869c8 | 611 | |
612 | /* If xop0 is a register, we need it in OP_MODE | |
613 | to make it acceptable to the format of insv. */ | |
614 | if (GET_CODE (xop0) == SUBREG) | |
615 | /* We can't just change the mode, because this might clobber op0, | |
616 | and we will need the original value of op0 if insv fails. */ | |
617 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); | |
618 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) | |
619 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); | |
620 | } | |
621 | ||
622 | /* If the destination is a paradoxical subreg such that we need a | |
623 | truncate to the inner mode, perform the insertion on a temporary and | |
624 | truncate the result to the original destination. Note that we can't | |
625 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N | |
626 | X) 0)) is (reg:N X). */ | |
627 | if (GET_CODE (xop0) == SUBREG | |
628 | && REG_P (SUBREG_REG (xop0)) | |
629 | && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), | |
630 | op_mode)) | |
631 | { | |
632 | rtx tem = gen_reg_rtx (op_mode); | |
633 | emit_move_insn (tem, xop0); | |
634 | xop0 = tem; | |
635 | copy_back = true; | |
636 | } | |
637 | ||
d2d18163 | 638 | /* There are similar overflow check at the start of store_bit_field_1, |
639 | but that only check the situation where the field lies completely | |
640 | outside the register, while there do have situation where the field | |
641 | lies partialy in the register, we need to adjust bitsize for this | |
642 | partial overflow situation. Without this fix, pr48335-2.c on big-endian | |
643 | will broken on those arch support bit insert instruction, like arm, aarch64 | |
644 | etc. */ | |
645 | if (bitsize + bitnum > unit && bitnum < unit) | |
646 | { | |
11581eb7 | 647 | warning (OPT_Wextra, "write of %wu-bit data outside the bound of " |
648 | "destination object, data truncated into %wu-bit", | |
649 | bitsize, unit - bitnum); | |
d2d18163 | 650 | bitsize = unit - bitnum; |
651 | } | |
652 | ||
ba8869c8 | 653 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
654 | "backwards" from the size of the unit we are inserting into. | |
655 | Otherwise, we count bits from the most significant on a | |
656 | BYTES/BITS_BIG_ENDIAN machine. */ | |
657 | ||
658 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
659 | bitnum = unit - bitsize - bitnum; | |
660 | ||
661 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ | |
662 | value1 = value; | |
5e864fc6 | 663 | if (value_mode != op_mode) |
ba8869c8 | 664 | { |
5e864fc6 | 665 | if (GET_MODE_BITSIZE (value_mode) >= bitsize) |
ba8869c8 | 666 | { |
6f9a7425 | 667 | rtx tmp; |
ba8869c8 | 668 | /* Optimization: Don't bother really extending VALUE |
669 | if it has all the bits we will actually use. However, | |
670 | if we must narrow it, be sure we do it correctly. */ | |
671 | ||
5e864fc6 | 672 | if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode)) |
ba8869c8 | 673 | { |
5e864fc6 | 674 | tmp = simplify_subreg (op_mode, value1, value_mode, 0); |
ba8869c8 | 675 | if (! tmp) |
676 | tmp = simplify_gen_subreg (op_mode, | |
5e864fc6 | 677 | force_reg (value_mode, value1), |
678 | value_mode, 0); | |
ba8869c8 | 679 | } |
680 | else | |
6f9a7425 | 681 | { |
682 | tmp = gen_lowpart_if_possible (op_mode, value1); | |
683 | if (! tmp) | |
5e864fc6 | 684 | tmp = gen_lowpart (op_mode, force_reg (value_mode, value1)); |
6f9a7425 | 685 | } |
686 | value1 = tmp; | |
ba8869c8 | 687 | } |
688 | else if (CONST_INT_P (value)) | |
689 | value1 = gen_int_mode (INTVAL (value), op_mode); | |
690 | else | |
691 | /* Parse phase is supposed to make VALUE's data type | |
692 | match that of the component reference, which is a type | |
693 | at least as wide as the field; so VALUE should have | |
694 | a mode that corresponds to that type. */ | |
695 | gcc_assert (CONSTANT_P (value)); | |
696 | } | |
697 | ||
698 | create_fixed_operand (&ops[0], xop0); | |
699 | create_integer_operand (&ops[1], bitsize); | |
700 | create_integer_operand (&ops[2], bitnum); | |
701 | create_input_operand (&ops[3], value1, op_mode); | |
26427966 | 702 | if (maybe_expand_insn (insv->icode, 4, ops)) |
ba8869c8 | 703 | { |
704 | if (copy_back) | |
705 | convert_move (op0, xop0, true); | |
706 | return true; | |
707 | } | |
708 | delete_insns_since (last); | |
709 | return false; | |
710 | } | |
711 | ||
36122326 | 712 | /* A subroutine of store_bit_field, with the same arguments. Return true |
713 | if the operation could be implemented. | |
db96f378 | 714 | |
36122326 | 715 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
716 | no other way of implementing the operation. If FALLBACK_P is false, | |
717 | return false instead. */ | |
718 | ||
719 | static bool | |
720 | store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
4bb60ec7 | 721 | unsigned HOST_WIDE_INT bitnum, |
722 | unsigned HOST_WIDE_INT bitregion_start, | |
723 | unsigned HOST_WIDE_INT bitregion_end, | |
3754d046 | 724 | machine_mode fieldmode, |
292237f3 | 725 | rtx value, bool reverse, bool fallback_p) |
db96f378 | 726 | { |
19cb6b50 | 727 | rtx op0 = str_rtx; |
3cc25a48 | 728 | rtx orig_value; |
9068af20 | 729 | |
db96f378 | 730 | while (GET_CODE (op0) == SUBREG) |
731 | { | |
57689c10 | 732 | bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT; |
db96f378 | 733 | op0 = SUBREG_REG (op0); |
734 | } | |
735 | ||
bc5449fc | 736 | /* No action is needed if the target is a register and if the field |
737 | lies completely outside that register. This can occur if the source | |
738 | code contains an out-of-bounds access to a small array. */ | |
739 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
36122326 | 740 | return true; |
bc5449fc | 741 | |
cb746719 | 742 | /* Use vec_set patterns for inserting parts of vectors whenever |
b8d2bcdd | 743 | available. */ |
9fcae33e | 744 | machine_mode outermode = GET_MODE (op0); |
745 | scalar_mode innermode = GET_MODE_INNER (outermode); | |
746 | if (VECTOR_MODE_P (outermode) | |
e16ceb8e | 747 | && !MEM_P (op0) |
9fcae33e | 748 | && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing |
749 | && fieldmode == innermode | |
750 | && bitsize == GET_MODE_BITSIZE (innermode) | |
751 | && !(bitnum % GET_MODE_BITSIZE (innermode))) | |
b8d2bcdd | 752 | { |
8786db1e | 753 | struct expand_operand ops[3]; |
8786db1e | 754 | enum insn_code icode = optab_handler (vec_set_optab, outermode); |
b8d2bcdd | 755 | int pos = bitnum / GET_MODE_BITSIZE (innermode); |
b8d2bcdd | 756 | |
8786db1e | 757 | create_fixed_operand (&ops[0], op0); |
758 | create_input_operand (&ops[1], value, innermode); | |
759 | create_integer_operand (&ops[2], pos); | |
760 | if (maybe_expand_insn (icode, 3, ops)) | |
761 | return true; | |
b8d2bcdd | 762 | } |
763 | ||
9e527b97 | 764 | /* If the target is a register, overwriting the entire object, or storing |
5e81ea64 | 765 | a full-word or multi-word field can be done with just a SUBREG. */ |
766 | if (!MEM_P (op0) | |
767 | && bitsize == GET_MODE_BITSIZE (fieldmode) | |
768 | && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0) | |
769 | || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) | |
770 | { | |
771 | /* Use the subreg machinery either to narrow OP0 to the required | |
09e640e6 | 772 | words or to cope with mode punning between equal-sized modes. |
773 | In the latter case, use subreg on the rhs side, not lhs. */ | |
774 | rtx sub; | |
775 | ||
776 | if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) | |
777 | { | |
778 | sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); | |
779 | if (sub) | |
780 | { | |
292237f3 | 781 | if (reverse) |
782 | sub = flip_storage_order (GET_MODE (op0), sub); | |
09e640e6 | 783 | emit_move_insn (op0, sub); |
784 | return true; | |
785 | } | |
786 | } | |
787 | else | |
5e81ea64 | 788 | { |
09e640e6 | 789 | sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), |
790 | bitnum / BITS_PER_UNIT); | |
791 | if (sub) | |
792 | { | |
292237f3 | 793 | if (reverse) |
794 | value = flip_storage_order (fieldmode, value); | |
09e640e6 | 795 | emit_move_insn (sub, value); |
796 | return true; | |
797 | } | |
5e81ea64 | 798 | } |
799 | } | |
9e527b97 | 800 | |
5e81ea64 | 801 | /* If the target is memory, storing any naturally aligned field can be |
9e527b97 | 802 | done with a simple store. For targets that support fast unaligned |
bc9d42da | 803 | memory, any naturally sized, unit aligned field can be done directly. */ |
06bedae0 | 804 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode)) |
db96f378 | 805 | { |
5e81ea64 | 806 | op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT); |
292237f3 | 807 | if (reverse) |
808 | value = flip_storage_order (fieldmode, value); | |
db96f378 | 809 | emit_move_insn (op0, value); |
36122326 | 810 | return true; |
db96f378 | 811 | } |
812 | ||
b708a05c | 813 | /* Make sure we are playing with integral modes. Pun with subregs |
814 | if we aren't. This must come after the entire register case above, | |
815 | since that case is valid for any mode. The following cases are only | |
816 | valid for integral modes. */ | |
5e864fc6 | 817 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |
2cf1bb25 | 818 | scalar_int_mode imode; |
5e864fc6 | 819 | if (!op0_mode.exists (&imode) || imode != GET_MODE (op0)) |
2cf1bb25 | 820 | { |
821 | if (MEM_P (op0)) | |
5e864fc6 | 822 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |
2cf1bb25 | 823 | 0, MEM_SIZE (op0)); |
824 | else | |
825 | op0 = gen_lowpart (op0_mode.require (), op0); | |
826 | } | |
b708a05c | 827 | |
db96f378 | 828 | /* Storing an lsb-aligned field in a register |
5e81ea64 | 829 | can be done with a movstrict instruction. */ |
db96f378 | 830 | |
e16ceb8e | 831 | if (!MEM_P (op0) |
292237f3 | 832 | && !reverse |
5e81ea64 | 833 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) |
db96f378 | 834 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
d6bf3b14 | 835 | && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) |
db96f378 | 836 | { |
8786db1e | 837 | struct expand_operand ops[2]; |
838 | enum insn_code icode = optab_handler (movstrict_optab, fieldmode); | |
c2ef487a | 839 | rtx arg0 = op0; |
3a175160 | 840 | unsigned HOST_WIDE_INT subreg_off; |
ee2ba10c | 841 | |
8786db1e | 842 | if (GET_CODE (arg0) == SUBREG) |
db96f378 | 843 | { |
611234b4 | 844 | /* Else we've got some float mode source being extracted into |
845 | a different float mode destination -- this combination of | |
846 | subregs results in Severe Tire Damage. */ | |
8786db1e | 847 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |
611234b4 | 848 | || GET_MODE_CLASS (fieldmode) == MODE_INT |
849 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); | |
8786db1e | 850 | arg0 = SUBREG_REG (arg0); |
ee2ba10c | 851 | } |
650df5df | 852 | |
5e81ea64 | 853 | subreg_off = bitnum / BITS_PER_UNIT; |
3a175160 | 854 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) |
855 | { | |
856 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); | |
8786db1e | 857 | |
3a175160 | 858 | create_fixed_operand (&ops[0], arg0); |
859 | /* Shrink the source operand to FIELDMODE. */ | |
860 | create_convert_operand_to (&ops[1], value, fieldmode, false); | |
861 | if (maybe_expand_insn (icode, 2, ops)) | |
862 | return true; | |
863 | } | |
db96f378 | 864 | } |
865 | ||
866 | /* Handle fields bigger than a word. */ | |
867 | ||
868 | if (bitsize > BITS_PER_WORD) | |
869 | { | |
870 | /* Here we transfer the words of the field | |
871 | in the order least significant first. | |
872 | This is because the most significant word is the one which may | |
766e2366 | 873 | be less than full. |
874 | However, only do that if the value is not BLKmode. */ | |
875 | ||
292237f3 | 876 | const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
02e7a332 | 877 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
878 | unsigned int i; | |
89ca96ce | 879 | rtx_insn *last; |
db96f378 | 880 | |
881 | /* This is the mode we must force value to, so that there will be enough | |
882 | subwords to extract. Note that fieldmode will often (always?) be | |
883 | VOIDmode, because that is what store_field uses to indicate that this | |
89f18f73 | 884 | is a bit field, but passing VOIDmode to operand_subword_force |
885 | is not allowed. */ | |
7ac3f3c5 | 886 | fieldmode = GET_MODE (value); |
887 | if (fieldmode == VOIDmode) | |
1a5d4b27 | 888 | fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD); |
db96f378 | 889 | |
36122326 | 890 | last = get_last_insn (); |
db96f378 | 891 | for (i = 0; i < nwords; i++) |
892 | { | |
766e2366 | 893 | /* If I is 0, use the low-order word in both field and target; |
894 | if I is 1, use the next to lowest word; and so on. */ | |
cefa4c33 | 895 | unsigned int wordnum = (backwards |
896 | ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD | |
897 | - i - 1 | |
898 | : i); | |
292237f3 | 899 | unsigned int bit_offset = (backwards ^ reverse |
2c269e73 | 900 | ? MAX ((int) bitsize - ((int) i + 1) |
901 | * BITS_PER_WORD, | |
902 | 0) | |
903 | : (int) i * BITS_PER_WORD); | |
36122326 | 904 | rtx value_word = operand_subword_force (value, wordnum, fieldmode); |
4d18c297 | 905 | unsigned HOST_WIDE_INT new_bitsize = |
906 | MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); | |
907 | ||
908 | /* If the remaining chunk doesn't have full wordsize we have | |
292237f3 | 909 | to make sure that for big-endian machines the higher order |
4d18c297 | 910 | bits are used. */ |
911 | if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) | |
912 | value_word = simplify_expand_binop (word_mode, lshr_optab, | |
913 | value_word, | |
914 | GEN_INT (BITS_PER_WORD | |
915 | - new_bitsize), | |
916 | NULL_RTX, true, | |
917 | OPTAB_LIB_WIDEN); | |
918 | ||
919 | if (!store_bit_field_1 (op0, new_bitsize, | |
4bb60ec7 | 920 | bitnum + bit_offset, |
921 | bitregion_start, bitregion_end, | |
922 | word_mode, | |
292237f3 | 923 | value_word, reverse, fallback_p)) |
36122326 | 924 | { |
925 | delete_insns_since (last); | |
926 | return false; | |
927 | } | |
db96f378 | 928 | } |
36122326 | 929 | return true; |
db96f378 | 930 | } |
931 | ||
2b701a08 | 932 | /* If VALUE has a floating-point or complex mode, access it as an |
933 | integer of the corresponding size. This can occur on a machine | |
934 | with 64 bit registers that uses SFmode for float. It can also | |
935 | occur for unaligned float or complex fields. */ | |
3cc25a48 | 936 | orig_value = value; |
5e864fc6 | 937 | scalar_int_mode value_mode; |
938 | if (GET_MODE (value) == VOIDmode) | |
939 | /* By this point we've dealt with values that are bigger than a word, | |
940 | so word_mode is a conservatively correct choice. */ | |
941 | value_mode = word_mode; | |
942 | else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode)) | |
2b701a08 | 943 | { |
5e864fc6 | 944 | value_mode = int_mode_for_mode (GET_MODE (value)).require (); |
945 | value = gen_reg_rtx (value_mode); | |
2b701a08 | 946 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); |
947 | } | |
acb2971d | 948 | |
5e81ea64 | 949 | /* If OP0 is a multi-word register, narrow it to the affected word. |
6ee2f129 | 950 | If the region spans two words, defer to store_split_bit_field. |
951 | Don't do this if op0 is a single hard register wider than word | |
952 | such as a float or vector register. */ | |
953 | if (!MEM_P (op0) | |
5e864fc6 | 954 | && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD |
6ee2f129 | 955 | && (!REG_P (op0) |
956 | || !HARD_REGISTER_P (op0) | |
61f54514 | 957 | || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1)) |
5e81ea64 | 958 | { |
22e14902 | 959 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
5e81ea64 | 960 | { |
961 | if (!fallback_p) | |
962 | return false; | |
963 | ||
5e864fc6 | 964 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |
965 | bitregion_start, bitregion_end, | |
966 | value, value_mode, reverse); | |
5e81ea64 | 967 | return true; |
968 | } | |
5e864fc6 | 969 | op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (), |
22e14902 | 970 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); |
971 | gcc_assert (op0); | |
5e864fc6 | 972 | op0_mode = word_mode; |
22e14902 | 973 | bitnum %= BITS_PER_WORD; |
5e81ea64 | 974 | } |
975 | ||
976 | /* From here on we can assume that the field to be stored in fits | |
977 | within a word. If the destination is a register, it too fits | |
978 | in a word. */ | |
db96f378 | 979 | |
26427966 | 980 | extraction_insn insv; |
981 | if (!MEM_P (op0) | |
292237f3 | 982 | && !reverse |
26427966 | 983 | && get_best_reg_extraction_insn (&insv, EP_insv, |
5e864fc6 | 984 | GET_MODE_BITSIZE (op0_mode.require ()), |
26427966 | 985 | fieldmode) |
5e864fc6 | 986 | && store_bit_field_using_insv (&insv, op0, op0_mode, |
987 | bitsize, bitnum, value, value_mode)) | |
ba8869c8 | 988 | return true; |
36122326 | 989 | |
990 | /* If OP0 is a memory, try copying it to a register and seeing if a | |
991 | cheap register alternative is available. */ | |
292237f3 | 992 | if (MEM_P (op0) && !reverse) |
36122326 | 993 | { |
72e9011e | 994 | if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, |
995 | fieldmode) | |
5e864fc6 | 996 | && store_bit_field_using_insv (&insv, op0, op0_mode, |
997 | bitsize, bitnum, value, value_mode)) | |
f77dcefa | 998 | return true; |
999 | ||
89ca96ce | 1000 | rtx_insn *last = get_last_insn (); |
36122326 | 1001 | |
26427966 | 1002 | /* Try loading part of OP0 into a register, inserting the bitfield |
1003 | into that, and then copying the result back to OP0. */ | |
1004 | unsigned HOST_WIDE_INT bitpos; | |
1005 | rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, | |
1006 | bitregion_start, bitregion_end, | |
1007 | fieldmode, &bitpos); | |
1008 | if (xop0) | |
ff385626 | 1009 | { |
26427966 | 1010 | rtx tempreg = copy_to_reg (xop0); |
5e81ea64 | 1011 | if (store_bit_field_1 (tempreg, bitsize, bitpos, |
4bb60ec7 | 1012 | bitregion_start, bitregion_end, |
292237f3 | 1013 | fieldmode, orig_value, reverse, false)) |
36122326 | 1014 | { |
1015 | emit_move_insn (xop0, tempreg); | |
1016 | return true; | |
1017 | } | |
db96f378 | 1018 | delete_insns_since (last); |
db96f378 | 1019 | } |
1020 | } | |
36122326 | 1021 | |
1022 | if (!fallback_p) | |
1023 | return false; | |
1024 | ||
5e864fc6 | 1025 | store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start, |
1026 | bitregion_end, value, value_mode, reverse); | |
36122326 | 1027 | return true; |
1028 | } | |
1029 | ||
1030 | /* Generate code to store value from rtx VALUE | |
1031 | into a bit-field within structure STR_RTX | |
1032 | containing BITSIZE bits starting at bit BITNUM. | |
4bb60ec7 | 1033 | |
1034 | BITREGION_START is bitpos of the first bitfield in this region. | |
1035 | BITREGION_END is the bitpos of the ending bitfield in this region. | |
1036 | These two fields are 0, if the C++ memory model does not apply, | |
1037 | or we are not interested in keeping track of bitfield regions. | |
1038 | ||
292237f3 | 1039 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. |
1040 | ||
1041 | If REVERSE is true, the store is to be done in reverse order. */ | |
36122326 | 1042 | |
1043 | void | |
1044 | store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
4bb60ec7 | 1045 | unsigned HOST_WIDE_INT bitnum, |
1046 | unsigned HOST_WIDE_INT bitregion_start, | |
1047 | unsigned HOST_WIDE_INT bitregion_end, | |
3754d046 | 1048 | machine_mode fieldmode, |
292237f3 | 1049 | rtx value, bool reverse) |
36122326 | 1050 | { |
72e9011e | 1051 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
819eaef1 | 1052 | scalar_int_mode int_mode; |
1053 | if (is_a <scalar_int_mode> (fieldmode, &int_mode) | |
1054 | && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, | |
1055 | bitregion_start, bitregion_end)) | |
72e9011e | 1056 | { |
dda86e31 | 1057 | /* Storing of a full word can be done with a simple store. |
1058 | We know here that the field can be accessed with one single | |
1059 | instruction. For targets that support unaligned memory, | |
1060 | an unaligned access may be necessary. */ | |
819eaef1 | 1061 | if (bitsize == GET_MODE_BITSIZE (int_mode)) |
72e9011e | 1062 | { |
819eaef1 | 1063 | str_rtx = adjust_bitfield_address (str_rtx, int_mode, |
72e9011e | 1064 | bitnum / BITS_PER_UNIT); |
292237f3 | 1065 | if (reverse) |
819eaef1 | 1066 | value = flip_storage_order (int_mode, value); |
dda86e31 | 1067 | gcc_assert (bitnum % BITS_PER_UNIT == 0); |
72e9011e | 1068 | emit_move_insn (str_rtx, value); |
1069 | } | |
1070 | else | |
08b277ff | 1071 | { |
c0ec5045 | 1072 | rtx temp; |
1073 | ||
819eaef1 | 1074 | str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum, |
08b277ff | 1075 | &bitnum); |
819eaef1 | 1076 | gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode)); |
c0ec5045 | 1077 | temp = copy_to_reg (str_rtx); |
1078 | if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0, | |
819eaef1 | 1079 | int_mode, value, reverse, true)) |
c0ec5045 | 1080 | gcc_unreachable (); |
1081 | ||
1082 | emit_move_insn (str_rtx, temp); | |
08b277ff | 1083 | } |
1084 | ||
72e9011e | 1085 | return; |
1086 | } | |
1087 | ||
4bb60ec7 | 1088 | /* Under the C++0x memory model, we must not touch bits outside the |
1089 | bit region. Adjust the address to start at the beginning of the | |
1090 | bit region. */ | |
73041e9b | 1091 | if (MEM_P (str_rtx) && bitregion_start > 0) |
4bb60ec7 | 1092 | { |
1572c088 | 1093 | scalar_int_mode best_mode; |
1094 | machine_mode addr_mode = VOIDmode; | |
4733f549 | 1095 | HOST_WIDE_INT offset, size; |
4bb60ec7 | 1096 | |
73041e9b | 1097 | gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); |
1098 | ||
4bb60ec7 | 1099 | offset = bitregion_start / BITS_PER_UNIT; |
1100 | bitnum -= bitregion_start; | |
4733f549 | 1101 | size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT; |
4bb60ec7 | 1102 | bitregion_end -= bitregion_start; |
1103 | bitregion_start = 0; | |
1572c088 | 1104 | if (get_best_mode (bitsize, bitnum, |
1105 | bitregion_start, bitregion_end, | |
1106 | MEM_ALIGN (str_rtx), INT_MAX, | |
1107 | MEM_VOLATILE_P (str_rtx), &best_mode)) | |
1108 | addr_mode = best_mode; | |
1109 | str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode, | |
1110 | offset, size); | |
4bb60ec7 | 1111 | } |
1112 | ||
1113 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, | |
1114 | bitregion_start, bitregion_end, | |
292237f3 | 1115 | fieldmode, value, reverse, true)) |
36122326 | 1116 | gcc_unreachable (); |
db96f378 | 1117 | } |
1118 | \f | |
5e81ea64 | 1119 | /* Use shifts and boolean operations to store VALUE into a bit field of |
5e864fc6 | 1120 | width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined, |
1121 | it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is | |
1122 | the mode of VALUE. | |
292237f3 | 1123 | |
1124 | If REVERSE is true, the store is to be done in reverse order. */ | |
db96f378 | 1125 | |
1126 | static void | |
5e864fc6 | 1127 | store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
1128 | unsigned HOST_WIDE_INT bitsize, | |
5e81ea64 | 1129 | unsigned HOST_WIDE_INT bitnum, |
4bb60ec7 | 1130 | unsigned HOST_WIDE_INT bitregion_start, |
1131 | unsigned HOST_WIDE_INT bitregion_end, | |
5e864fc6 | 1132 | rtx value, scalar_int_mode value_mode, bool reverse) |
db96f378 | 1133 | { |
db96f378 | 1134 | /* There is a case not handled here: |
1135 | a structure with a known alignment of just a halfword | |
1136 | and a field split across two aligned halfwords within the structure. | |
1137 | Or likewise a structure with a known alignment of just a byte | |
1138 | and a field split across two bytes. | |
1139 | Such cases are not supposed to be able to occur. */ | |
1140 | ||
5e864fc6 | 1141 | scalar_int_mode best_mode; |
5e81ea64 | 1142 | if (MEM_P (op0)) |
db96f378 | 1143 | { |
5e864fc6 | 1144 | unsigned int max_bitsize = BITS_PER_WORD; |
1145 | scalar_int_mode imode; | |
1146 | if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize) | |
1147 | max_bitsize = GET_MODE_BITSIZE (imode); | |
1148 | ||
1572c088 | 1149 | if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, |
5e864fc6 | 1150 | MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0), |
1151 | &best_mode)) | |
db96f378 | 1152 | { |
1153 | /* The only way this should occur is if the field spans word | |
1154 | boundaries. */ | |
5e864fc6 | 1155 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1156 | bitregion_start, bitregion_end, | |
1157 | value, value_mode, reverse); | |
db96f378 | 1158 | return; |
1159 | } | |
1160 | ||
1572c088 | 1161 | op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum); |
db96f378 | 1162 | } |
5e864fc6 | 1163 | else |
1164 | best_mode = op0_mode.require (); | |
db96f378 | 1165 | |
5e864fc6 | 1166 | store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum, |
1167 | value, value_mode, reverse); | |
08b277ff | 1168 | } |
1169 | ||
1170 | /* Helper function for store_fixed_bit_field, stores | |
5e864fc6 | 1171 | the bit field always using MODE, which is the mode of OP0. The other |
1172 | arguments are as for store_fixed_bit_field. */ | |
08b277ff | 1173 | |
1174 | static void | |
5e864fc6 | 1175 | store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode, |
1176 | unsigned HOST_WIDE_INT bitsize, | |
e534aceb | 1177 | unsigned HOST_WIDE_INT bitnum, |
5e864fc6 | 1178 | rtx value, scalar_int_mode value_mode, bool reverse) |
08b277ff | 1179 | { |
08b277ff | 1180 | rtx temp; |
1181 | int all_zero = 0; | |
1182 | int all_one = 0; | |
1183 | ||
5e81ea64 | 1184 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
1185 | for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ | |
db96f378 | 1186 | |
292237f3 | 1187 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
5e81ea64 | 1188 | /* BITNUM is the distance between our msb |
1189 | and that of the containing datum. | |
1190 | Convert it to the distance from the lsb. */ | |
1191 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; | |
db96f378 | 1192 | |
5e81ea64 | 1193 | /* Now BITNUM is always the distance between our lsb |
db96f378 | 1194 | and that of OP0. */ |
1195 | ||
5e81ea64 | 1196 | /* Shift VALUE left by BITNUM bits. If VALUE is not constant, |
db96f378 | 1197 | we must first convert its mode to MODE. */ |
1198 | ||
971ba038 | 1199 | if (CONST_INT_P (value)) |
db96f378 | 1200 | { |
53c3bfa4 | 1201 | unsigned HOST_WIDE_INT v = UINTVAL (value); |
db96f378 | 1202 | |
50b0c9ee | 1203 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
edc19fd0 | 1204 | v &= (HOST_WIDE_INT_1U << bitsize) - 1; |
db96f378 | 1205 | |
1206 | if (v == 0) | |
1207 | all_zero = 1; | |
50b0c9ee | 1208 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
edc19fd0 | 1209 | && v == (HOST_WIDE_INT_1U << bitsize) - 1) |
53c3bfa4 | 1210 | || (bitsize == HOST_BITS_PER_WIDE_INT |
edc19fd0 | 1211 | && v == HOST_WIDE_INT_M1U)) |
db96f378 | 1212 | all_one = 1; |
1213 | ||
54f42f11 | 1214 | value = lshift_value (mode, v, bitnum); |
db96f378 | 1215 | } |
1216 | else | |
1217 | { | |
5e864fc6 | 1218 | int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize |
5e81ea64 | 1219 | && bitnum + bitsize != GET_MODE_BITSIZE (mode)); |
db96f378 | 1220 | |
5e864fc6 | 1221 | if (value_mode != mode) |
3c15005d | 1222 | value = convert_to_mode (mode, value, 1); |
db96f378 | 1223 | |
1224 | if (must_and) | |
1225 | value = expand_binop (mode, and_optab, value, | |
1226 | mask_rtx (mode, 0, bitsize, 0), | |
50b0c9ee | 1227 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
5e81ea64 | 1228 | if (bitnum > 0) |
db96f378 | 1229 | value = expand_shift (LSHIFT_EXPR, mode, value, |
5e81ea64 | 1230 | bitnum, NULL_RTX, 1); |
db96f378 | 1231 | } |
1232 | ||
292237f3 | 1233 | if (reverse) |
1234 | value = flip_storage_order (mode, value); | |
1235 | ||
db96f378 | 1236 | /* Now clear the chosen bits in OP0, |
1237 | except that if VALUE is -1 we need not bother. */ | |
f31d1dc3 | 1238 | /* We keep the intermediates in registers to allow CSE to combine |
1239 | consecutive bitfield assignments. */ | |
db96f378 | 1240 | |
f31d1dc3 | 1241 | temp = force_reg (mode, op0); |
db96f378 | 1242 | |
1243 | if (! all_one) | |
1244 | { | |
292237f3 | 1245 | rtx mask = mask_rtx (mode, bitnum, bitsize, 1); |
1246 | if (reverse) | |
1247 | mask = flip_storage_order (mode, mask); | |
1248 | temp = expand_binop (mode, and_optab, temp, mask, | |
f31d1dc3 | 1249 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1250 | temp = force_reg (mode, temp); | |
db96f378 | 1251 | } |
db96f378 | 1252 | |
1253 | /* Now logical-or VALUE into OP0, unless it is zero. */ | |
1254 | ||
1255 | if (! all_zero) | |
f31d1dc3 | 1256 | { |
1257 | temp = expand_binop (mode, ior_optab, temp, value, | |
1258 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
1259 | temp = force_reg (mode, temp); | |
1260 | } | |
1261 | ||
db96f378 | 1262 | if (op0 != temp) |
39925406 | 1263 | { |
1264 | op0 = copy_rtx (op0); | |
1265 | emit_move_insn (op0, temp); | |
1266 | } | |
db96f378 | 1267 | } |
1268 | \f | |
e9782169 | 1269 | /* Store a bit field that is split across multiple accessible memory objects. |
db96f378 | 1270 | |
e9782169 | 1271 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
db96f378 | 1272 | BITSIZE is the field width; BITPOS the position of its first bit |
1273 | (within the word). | |
5e864fc6 | 1274 | VALUE is the value to store, which has mode VALUE_MODE. |
1275 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is | |
1276 | a BLKmode MEM. | |
e9782169 | 1277 | |
292237f3 | 1278 | If REVERSE is true, the store is to be done in reverse order. |
1279 | ||
e9782169 | 1280 | This does not yet handle fields wider than BITS_PER_WORD. */ |
db96f378 | 1281 | |
1282 | static void | |
5e864fc6 | 1283 | store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
1284 | unsigned HOST_WIDE_INT bitsize, | |
4bb60ec7 | 1285 | unsigned HOST_WIDE_INT bitpos, |
1286 | unsigned HOST_WIDE_INT bitregion_start, | |
1287 | unsigned HOST_WIDE_INT bitregion_end, | |
5e864fc6 | 1288 | rtx value, scalar_int_mode value_mode, bool reverse) |
db96f378 | 1289 | { |
292237f3 | 1290 | unsigned int unit, total_bits, bitsdone = 0; |
e81f2e56 | 1291 | |
ba860eb2 | 1292 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1293 | much at a time. */ | |
8ad4c111 | 1294 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
e81f2e56 | 1295 | unit = BITS_PER_WORD; |
1296 | else | |
2c269e73 | 1297 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
6d199784 | 1298 | |
08b277ff | 1299 | /* If OP0 is a memory with a mode, then UNIT must not be larger than |
1300 | OP0's mode as well. Otherwise, store_fixed_bit_field will call us | |
1301 | again, and we will mutually recurse forever. */ | |
5e864fc6 | 1302 | if (MEM_P (op0) && op0_mode.exists ()) |
1303 | unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ())); | |
08b277ff | 1304 | |
88fc9196 | 1305 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1306 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note | |
1307 | that VALUE might be a floating-point constant. */ | |
971ba038 | 1308 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
88fc9196 | 1309 | { |
1310 | rtx word = gen_lowpart_common (word_mode, value); | |
1311 | ||
a2c7abaa | 1312 | if (word && (value != word)) |
88fc9196 | 1313 | value = word; |
1314 | else | |
5e864fc6 | 1315 | value = gen_lowpart_common (word_mode, force_reg (value_mode, value)); |
1316 | value_mode = word_mode; | |
88fc9196 | 1317 | } |
db96f378 | 1318 | |
5e864fc6 | 1319 | total_bits = GET_MODE_BITSIZE (value_mode); |
292237f3 | 1320 | |
e9782169 | 1321 | while (bitsdone < bitsize) |
db96f378 | 1322 | { |
02e7a332 | 1323 | unsigned HOST_WIDE_INT thissize; |
02e7a332 | 1324 | unsigned HOST_WIDE_INT thispos; |
1325 | unsigned HOST_WIDE_INT offset; | |
5e864fc6 | 1326 | rtx part; |
db96f378 | 1327 | |
e9782169 | 1328 | offset = (bitpos + bitsdone) / unit; |
1329 | thispos = (bitpos + bitsdone) % unit; | |
db96f378 | 1330 | |
7e9ba3f3 | 1331 | /* When region of bytes we can touch is restricted, decrease |
98908b23 | 1332 | UNIT close to the end of the region as needed. If op0 is a REG |
1333 | or SUBREG of REG, don't do this, as there can't be data races | |
1334 | on a register and we can expand shorter code in some cases. */ | |
7e9ba3f3 | 1335 | if (bitregion_end |
1336 | && unit > BITS_PER_UNIT | |
98908b23 | 1337 | && bitpos + bitsdone - thispos + unit > bitregion_end + 1 |
1338 | && !REG_P (op0) | |
1339 | && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) | |
7e9ba3f3 | 1340 | { |
1341 | unit = unit / 2; | |
1342 | continue; | |
1343 | } | |
1344 | ||
ba860eb2 | 1345 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1346 | store_fixed_bit_field will call us again, and we will mutually | |
1347 | recurse forever. */ | |
1348 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1349 | thissize = MIN (thissize, unit - thispos); | |
db96f378 | 1350 | |
292237f3 | 1351 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
51356f86 | 1352 | { |
1353 | /* Fetch successively less significant portions. */ | |
971ba038 | 1354 | if (CONST_INT_P (value)) |
51356f86 | 1355 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1356 | >> (bitsize - bitsdone - thissize)) | |
edc19fd0 | 1357 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
292237f3 | 1358 | /* Likewise, but the source is little-endian. */ |
1359 | else if (reverse) | |
5e864fc6 | 1360 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1361 | thissize, | |
292237f3 | 1362 | bitsize - bitsdone - thissize, |
1363 | NULL_RTX, 1, false); | |
51356f86 | 1364 | else |
5e864fc6 | 1365 | /* The args are chosen so that the last part includes the |
1366 | lsb. Give extract_bit_field the value it needs (with | |
1367 | endianness compensation) to fetch the piece we want. */ | |
1368 | part = extract_fixed_bit_field (word_mode, value, value_mode, | |
1369 | thissize, | |
1370 | total_bits - bitsize + bitsdone, | |
1371 | NULL_RTX, 1, false); | |
51356f86 | 1372 | } |
e9782169 | 1373 | else |
51356f86 | 1374 | { |
1375 | /* Fetch successively more significant portions. */ | |
971ba038 | 1376 | if (CONST_INT_P (value)) |
51356f86 | 1377 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1378 | >> bitsdone) | |
edc19fd0 | 1379 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
292237f3 | 1380 | /* Likewise, but the source is big-endian. */ |
1381 | else if (reverse) | |
5e864fc6 | 1382 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1383 | thissize, | |
292237f3 | 1384 | total_bits - bitsdone - thissize, |
1385 | NULL_RTX, 1, false); | |
51356f86 | 1386 | else |
5e864fc6 | 1387 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1388 | thissize, bitsdone, NULL_RTX, | |
1389 | 1, false); | |
51356f86 | 1390 | } |
db96f378 | 1391 | |
22e14902 | 1392 | /* If OP0 is a register, then handle OFFSET here. */ |
5e864fc6 | 1393 | rtx op0_piece = op0; |
1394 | opt_scalar_int_mode op0_piece_mode = op0_mode; | |
22e14902 | 1395 | if (SUBREG_P (op0) || REG_P (op0)) |
e9782169 | 1396 | { |
5e864fc6 | 1397 | scalar_int_mode imode; |
1398 | if (op0_mode.exists (&imode) | |
1399 | && GET_MODE_SIZE (imode) < UNITS_PER_WORD) | |
1400 | { | |
1401 | if (offset) | |
1402 | op0_piece = const0_rtx; | |
1403 | } | |
3a175160 | 1404 | else |
5e864fc6 | 1405 | { |
1406 | op0_piece = operand_subword_force (op0, | |
1407 | offset * unit / BITS_PER_WORD, | |
1408 | GET_MODE (op0)); | |
1409 | op0_piece_mode = word_mode; | |
1410 | } | |
98908b23 | 1411 | offset &= BITS_PER_WORD / unit - 1; |
e9782169 | 1412 | } |
db96f378 | 1413 | |
5e81ea64 | 1414 | /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, |
3a175160 | 1415 | it is just an out-of-bounds access. Ignore it. */ |
5e864fc6 | 1416 | if (op0_piece != const0_rtx) |
1417 | store_fixed_bit_field (op0_piece, op0_piece_mode, thissize, | |
1418 | offset * unit + thispos, bitregion_start, | |
1419 | bitregion_end, part, word_mode, reverse); | |
e9782169 | 1420 | bitsdone += thissize; |
1421 | } | |
db96f378 | 1422 | } |
1423 | \f | |
36122326 | 1424 | /* A subroutine of extract_bit_field_1 that converts return value X |
1425 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments | |
1426 | to extract_bit_field. */ | |
db96f378 | 1427 | |
36122326 | 1428 | static rtx |
3754d046 | 1429 | convert_extracted_bit_field (rtx x, machine_mode mode, |
1430 | machine_mode tmode, bool unsignedp) | |
36122326 | 1431 | { |
1432 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) | |
1433 | return x; | |
db96f378 | 1434 | |
36122326 | 1435 | /* If the x mode is not a scalar integral, first convert to the |
1436 | integer mode of that size and then access it as a floating-point | |
1437 | value via a SUBREG. */ | |
1438 | if (!SCALAR_INT_MODE_P (tmode)) | |
1439 | { | |
2cf1bb25 | 1440 | scalar_int_mode int_mode = int_mode_for_mode (tmode).require (); |
44504d18 | 1441 | x = convert_to_mode (int_mode, x, unsignedp); |
1442 | x = force_reg (int_mode, x); | |
36122326 | 1443 | return gen_lowpart (tmode, x); |
1444 | } | |
db96f378 | 1445 | |
36122326 | 1446 | return convert_to_mode (tmode, x, unsignedp); |
1447 | } | |
1448 | ||
ba8869c8 | 1449 | /* Try to use an ext(z)v pattern to extract a field from OP0. |
1450 | Return the extracted value on success, otherwise return null. | |
5e864fc6 | 1451 | EXTV describes the extraction instruction to use. If OP0_MODE |
1452 | is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM. | |
1453 | The other arguments are as for extract_bit_field. */ | |
ba8869c8 | 1454 | |
1455 | static rtx | |
26427966 | 1456 | extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, |
5e864fc6 | 1457 | opt_scalar_int_mode op0_mode, |
26427966 | 1458 | unsigned HOST_WIDE_INT bitsize, |
ba8869c8 | 1459 | unsigned HOST_WIDE_INT bitnum, |
1460 | int unsignedp, rtx target, | |
3754d046 | 1461 | machine_mode mode, machine_mode tmode) |
ba8869c8 | 1462 | { |
1463 | struct expand_operand ops[4]; | |
1464 | rtx spec_target = target; | |
1465 | rtx spec_target_subreg = 0; | |
5e864fc6 | 1466 | scalar_int_mode ext_mode = extv->field_mode; |
ba8869c8 | 1467 | unsigned unit = GET_MODE_BITSIZE (ext_mode); |
1468 | ||
1469 | if (bitsize == 0 || unit < bitsize) | |
1470 | return NULL_RTX; | |
1471 | ||
1472 | if (MEM_P (op0)) | |
97b62a50 | 1473 | /* Get a reference to the first byte of the field. */ |
26427966 | 1474 | op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, |
1475 | &bitnum); | |
ba8869c8 | 1476 | else |
1477 | { | |
1478 | /* Convert from counting within OP0 to counting in EXT_MODE. */ | |
1479 | if (BYTES_BIG_ENDIAN) | |
5e864fc6 | 1480 | bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ()); |
ba8869c8 | 1481 | |
1482 | /* If op0 is a register, we need it in EXT_MODE to make it | |
1483 | acceptable to the format of ext(z)v. */ | |
5e864fc6 | 1484 | if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode) |
ba8869c8 | 1485 | return NULL_RTX; |
5e864fc6 | 1486 | if (REG_P (op0) && op0_mode.require () != ext_mode) |
ba8869c8 | 1487 | op0 = gen_lowpart_SUBREG (ext_mode, op0); |
1488 | } | |
1489 | ||
1490 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count | |
1491 | "backwards" from the size of the unit we are extracting from. | |
1492 | Otherwise, we count bits from the most significant on a | |
1493 | BYTES/BITS_BIG_ENDIAN machine. */ | |
1494 | ||
1495 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
1496 | bitnum = unit - bitsize - bitnum; | |
1497 | ||
1498 | if (target == 0) | |
1499 | target = spec_target = gen_reg_rtx (tmode); | |
1500 | ||
1501 | if (GET_MODE (target) != ext_mode) | |
1502 | { | |
1503 | /* Don't use LHS paradoxical subreg if explicit truncation is needed | |
1504 | between the mode of the extraction (word_mode) and the target | |
1505 | mode. Instead, create a temporary and use convert_move to set | |
1506 | the target. */ | |
1507 | if (REG_P (target) | |
1508 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) | |
1509 | { | |
1510 | target = gen_lowpart (ext_mode, target); | |
974534ab | 1511 | if (partial_subreg_p (GET_MODE (spec_target), ext_mode)) |
ba8869c8 | 1512 | spec_target_subreg = target; |
1513 | } | |
1514 | else | |
1515 | target = gen_reg_rtx (ext_mode); | |
1516 | } | |
1517 | ||
1518 | create_output_operand (&ops[0], target, ext_mode); | |
1519 | create_fixed_operand (&ops[1], op0); | |
1520 | create_integer_operand (&ops[2], bitsize); | |
1521 | create_integer_operand (&ops[3], bitnum); | |
26427966 | 1522 | if (maybe_expand_insn (extv->icode, 4, ops)) |
ba8869c8 | 1523 | { |
1524 | target = ops[0].value; | |
1525 | if (target == spec_target) | |
1526 | return target; | |
1527 | if (target == spec_target_subreg) | |
1528 | return spec_target; | |
1529 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1530 | } | |
1531 | return NULL_RTX; | |
1532 | } | |
1533 | ||
36122326 | 1534 | /* A subroutine of extract_bit_field, with the same arguments. |
1535 | If FALLBACK_P is true, fall back to extract_fixed_bit_field | |
1536 | if we can find no other means of implementing the operation. | |
1537 | if FALLBACK_P is false, return NULL instead. */ | |
1538 | ||
1539 | static rtx | |
1540 | extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
3f71db40 | 1541 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
3754d046 | 1542 | machine_mode mode, machine_mode tmode, |
5d77cce2 | 1543 | bool reverse, bool fallback_p, rtx *alt_rtl) |
db96f378 | 1544 | { |
19cb6b50 | 1545 | rtx op0 = str_rtx; |
3754d046 | 1546 | machine_mode mode1; |
db96f378 | 1547 | |
db96f378 | 1548 | if (tmode == VOIDmode) |
1549 | tmode = mode; | |
804e9c91 | 1550 | |
db96f378 | 1551 | while (GET_CODE (op0) == SUBREG) |
1552 | { | |
bc5449fc | 1553 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
db96f378 | 1554 | op0 = SUBREG_REG (op0); |
1555 | } | |
c88df841 | 1556 | |
bc5449fc | 1557 | /* If we have an out-of-bounds access to a register, just return an |
2fb89879 | 1558 | uninitialized register of the required mode. This can occur if the |
bc5449fc | 1559 | source code contains an out-of-bounds access to a small array. */ |
1560 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
1561 | return gen_reg_rtx (tmode); | |
1562 | ||
8ad4c111 | 1563 | if (REG_P (op0) |
3664abd2 | 1564 | && mode == GET_MODE (op0) |
1565 | && bitnum == 0 | |
bc9d42da | 1566 | && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |
3664abd2 | 1567 | { |
292237f3 | 1568 | if (reverse) |
1569 | op0 = flip_storage_order (mode, op0); | |
bc9d42da | 1570 | /* We're trying to extract a full register from itself. */ |
3664abd2 | 1571 | return op0; |
1572 | } | |
1573 | ||
447443f5 | 1574 | /* First try to check for vector from vector extractions. */ |
1575 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1576 | && !MEM_P (op0) | |
1577 | && VECTOR_MODE_P (tmode) | |
1578 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode)) | |
1579 | { | |
1580 | machine_mode new_mode = GET_MODE (op0); | |
1581 | if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)) | |
1582 | { | |
ab53cba7 | 1583 | scalar_mode inner_mode = GET_MODE_INNER (tmode); |
1584 | unsigned int nunits = (GET_MODE_BITSIZE (GET_MODE (op0)) | |
1585 | / GET_MODE_UNIT_BITSIZE (tmode)); | |
1586 | if (!mode_for_vector (inner_mode, nunits).exists (&new_mode) | |
1587 | || !VECTOR_MODE_P (new_mode) | |
447443f5 | 1588 | || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0)) |
1589 | || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode) | |
1590 | || !targetm.vector_mode_supported_p (new_mode)) | |
1591 | new_mode = VOIDmode; | |
1592 | } | |
1593 | if (new_mode != VOIDmode | |
1594 | && (convert_optab_handler (vec_extract_optab, new_mode, tmode) | |
1595 | != CODE_FOR_nothing) | |
1596 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode) | |
1597 | == bitnum / GET_MODE_BITSIZE (tmode))) | |
1598 | { | |
1599 | struct expand_operand ops[3]; | |
1600 | machine_mode outermode = new_mode; | |
1601 | machine_mode innermode = tmode; | |
1602 | enum insn_code icode | |
1603 | = convert_optab_handler (vec_extract_optab, outermode, innermode); | |
1604 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); | |
1605 | ||
1606 | if (new_mode != GET_MODE (op0)) | |
1607 | op0 = gen_lowpart (new_mode, op0); | |
1608 | create_output_operand (&ops[0], target, innermode); | |
1609 | ops[0].target = 1; | |
1610 | create_input_operand (&ops[1], op0, outermode); | |
1611 | create_integer_operand (&ops[2], pos); | |
1612 | if (maybe_expand_insn (icode, 3, ops)) | |
1613 | { | |
1614 | if (alt_rtl && ops[0].target) | |
1615 | *alt_rtl = target; | |
1616 | target = ops[0].value; | |
1617 | if (GET_MODE (target) != mode) | |
1618 | return gen_lowpart (tmode, target); | |
1619 | return target; | |
1620 | } | |
1621 | } | |
1622 | } | |
1623 | ||
8ea8de24 | 1624 | /* See if we can get a better vector mode before extracting. */ |
1625 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1626 | && !MEM_P (op0) | |
1627 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) | |
1628 | { | |
3754d046 | 1629 | machine_mode new_mode; |
8ea8de24 | 1630 | |
1631 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) | |
1632 | new_mode = MIN_MODE_VECTOR_FLOAT; | |
06f0b99c | 1633 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1634 | new_mode = MIN_MODE_VECTOR_FRACT; | |
1635 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) | |
1636 | new_mode = MIN_MODE_VECTOR_UFRACT; | |
1637 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) | |
1638 | new_mode = MIN_MODE_VECTOR_ACCUM; | |
1639 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) | |
1640 | new_mode = MIN_MODE_VECTOR_UACCUM; | |
8ea8de24 | 1641 | else |
1642 | new_mode = MIN_MODE_VECTOR_INT; | |
1643 | ||
19a4dce4 | 1644 | FOR_EACH_MODE_FROM (new_mode, new_mode) |
d5f2f2c4 | 1645 | if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) |
55143495 | 1646 | && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode) |
8ea8de24 | 1647 | && targetm.vector_mode_supported_p (new_mode)) |
1648 | break; | |
1649 | if (new_mode != VOIDmode) | |
1650 | op0 = gen_lowpart (new_mode, op0); | |
1651 | } | |
1652 | ||
b8d2bcdd | 1653 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1654 | available. */ | |
9fcae33e | 1655 | machine_mode outermode = GET_MODE (op0); |
1656 | scalar_mode innermode = GET_MODE_INNER (outermode); | |
1657 | if (VECTOR_MODE_P (outermode) | |
e16ceb8e | 1658 | && !MEM_P (op0) |
9fcae33e | 1659 | && (convert_optab_handler (vec_extract_optab, outermode, innermode) |
447443f5 | 1660 | != CODE_FOR_nothing) |
9fcae33e | 1661 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode) |
1662 | == bitnum / GET_MODE_BITSIZE (innermode))) | |
b8d2bcdd | 1663 | { |
8786db1e | 1664 | struct expand_operand ops[3]; |
447443f5 | 1665 | enum insn_code icode |
1666 | = convert_optab_handler (vec_extract_optab, outermode, innermode); | |
cb746719 | 1667 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |
b8d2bcdd | 1668 | |
8786db1e | 1669 | create_output_operand (&ops[0], target, innermode); |
5d77cce2 | 1670 | ops[0].target = 1; |
8786db1e | 1671 | create_input_operand (&ops[1], op0, outermode); |
1672 | create_integer_operand (&ops[2], pos); | |
1673 | if (maybe_expand_insn (icode, 3, ops)) | |
b8d2bcdd | 1674 | { |
5d77cce2 | 1675 | if (alt_rtl && ops[0].target) |
1676 | *alt_rtl = target; | |
8786db1e | 1677 | target = ops[0].value; |
1678 | if (GET_MODE (target) != mode) | |
1679 | return gen_lowpart (tmode, target); | |
1680 | return target; | |
b8d2bcdd | 1681 | } |
1682 | } | |
1683 | ||
86cde393 | 1684 | /* Make sure we are playing with integral modes. Pun with subregs |
1685 | if we aren't. */ | |
5e864fc6 | 1686 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |
2cf1bb25 | 1687 | scalar_int_mode imode; |
5e864fc6 | 1688 | if (!op0_mode.exists (&imode) || imode != GET_MODE (op0)) |
2cf1bb25 | 1689 | { |
1690 | if (MEM_P (op0)) | |
5e864fc6 | 1691 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |
2cf1bb25 | 1692 | 0, MEM_SIZE (op0)); |
5e864fc6 | 1693 | else if (op0_mode.exists (&imode)) |
2cf1bb25 | 1694 | { |
1695 | op0 = gen_lowpart (imode, op0); | |
1696 | ||
1697 | /* If we got a SUBREG, force it into a register since we | |
1698 | aren't going to be able to do another SUBREG on it. */ | |
1699 | if (GET_CODE (op0) == SUBREG) | |
1700 | op0 = force_reg (imode, op0); | |
1701 | } | |
1702 | else | |
1703 | { | |
1704 | HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0)); | |
1705 | rtx mem = assign_stack_temp (GET_MODE (op0), size); | |
1706 | emit_move_insn (mem, op0); | |
1707 | op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); | |
1708 | } | |
1709 | } | |
86cde393 | 1710 | |
804e9c91 | 1711 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1712 | If that's wrong, the solution is to test for it and set TARGET to 0 | |
1713 | if needed. */ | |
ce7c9eb1 | 1714 | |
72e9011e | 1715 | /* Get the mode of the field to use for atomic access or subreg |
1716 | conversion. */ | |
ef379746 | 1717 | if (!SCALAR_INT_MODE_P (tmode) |
1718 | || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1)) | |
1719 | mode1 = mode; | |
ebf8dd06 | 1720 | gcc_assert (mode1 != BLKmode); |
1721 | ||
1722 | /* Extraction of a full MODE1 value can be done with a subreg as long | |
1723 | as the least significant bit of the value is the least significant | |
1724 | bit of either OP0 or a word of OP0. */ | |
1725 | if (!MEM_P (op0) | |
292237f3 | 1726 | && !reverse |
5e864fc6 | 1727 | && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ()) |
ebf8dd06 | 1728 | && bitsize == GET_MODE_BITSIZE (mode1) |
5e864fc6 | 1729 | && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ())) |
ebf8dd06 | 1730 | { |
5e864fc6 | 1731 | rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (), |
ebf8dd06 | 1732 | bitnum / BITS_PER_UNIT); |
1733 | if (sub) | |
1734 | return convert_extracted_bit_field (sub, mode, tmode, unsignedp); | |
1735 | } | |
1736 | ||
1737 | /* Extraction of a full MODE1 value can be done with a load as long as | |
1738 | the field is on a byte boundary and is sufficiently aligned. */ | |
06bedae0 | 1739 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1)) |
ebf8dd06 | 1740 | { |
1741 | op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT); | |
292237f3 | 1742 | if (reverse) |
1743 | op0 = flip_storage_order (mode1, op0); | |
ebf8dd06 | 1744 | return convert_extracted_bit_field (op0, mode, tmode, unsignedp); |
db96f378 | 1745 | } |
ebf8dd06 | 1746 | |
db96f378 | 1747 | /* Handle fields bigger than a word. */ |
a4194ff7 | 1748 | |
db96f378 | 1749 | if (bitsize > BITS_PER_WORD) |
1750 | { | |
1751 | /* Here we transfer the words of the field | |
1752 | in the order least significant first. | |
1753 | This is because the most significant word is the one which may | |
1754 | be less than full. */ | |
1755 | ||
292237f3 | 1756 | const bool backwards = WORDS_BIG_ENDIAN; |
02e7a332 | 1757 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1758 | unsigned int i; | |
89ca96ce | 1759 | rtx_insn *last; |
db96f378 | 1760 | |
e09c2930 | 1761 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |
db96f378 | 1762 | target = gen_reg_rtx (mode); |
1763 | ||
d342df18 | 1764 | /* In case we're about to clobber a base register or something |
1765 | (see gcc.c-torture/execute/20040625-1.c). */ | |
1766 | if (reg_mentioned_p (target, str_rtx)) | |
1767 | target = gen_reg_rtx (mode); | |
1768 | ||
625d6efb | 1769 | /* Indicate for flow that the entire target reg is being set. */ |
18b42941 | 1770 | emit_clobber (target); |
625d6efb | 1771 | |
2d0fd66d | 1772 | last = get_last_insn (); |
db96f378 | 1773 | for (i = 0; i < nwords; i++) |
1774 | { | |
1775 | /* If I is 0, use the low-order word in both field and target; | |
1776 | if I is 1, use the next to lowest word; and so on. */ | |
c88df841 | 1777 | /* Word number in TARGET to use. */ |
02e7a332 | 1778 | unsigned int wordnum |
8744b4c5 | 1779 | = (backwards |
02e7a332 | 1780 | ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 |
1781 | : i); | |
c88df841 | 1782 | /* Offset from start of field in OP0. */ |
292237f3 | 1783 | unsigned int bit_offset = (backwards ^ reverse |
8744b4c5 | 1784 | ? MAX ((int) bitsize - ((int) i + 1) |
1785 | * BITS_PER_WORD, | |
1786 | 0) | |
02e7a332 | 1787 | : (int) i * BITS_PER_WORD); |
db96f378 | 1788 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1789 | rtx result_part | |
2d0fd66d | 1790 | = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, |
1791 | bitsize - i * BITS_PER_WORD), | |
3f71db40 | 1792 | bitnum + bit_offset, 1, target_part, |
5d77cce2 | 1793 | mode, word_mode, reverse, fallback_p, NULL); |
db96f378 | 1794 | |
611234b4 | 1795 | gcc_assert (target_part); |
2d0fd66d | 1796 | if (!result_part) |
1797 | { | |
1798 | delete_insns_since (last); | |
1799 | return NULL; | |
1800 | } | |
db96f378 | 1801 | |
1802 | if (result_part != target_part) | |
1803 | emit_move_insn (target_part, result_part); | |
1804 | } | |
1805 | ||
6d292981 | 1806 | if (unsignedp) |
c88df841 | 1807 | { |
1808 | /* Unless we've filled TARGET, the upper regs in a multi-reg value | |
1809 | need to be zero'd out. */ | |
1810 | if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) | |
1811 | { | |
02e7a332 | 1812 | unsigned int i, total_words; |
c88df841 | 1813 | |
1814 | total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; | |
1815 | for (i = nwords; i < total_words; i++) | |
2c269e73 | 1816 | emit_move_insn |
1817 | (operand_subword (target, | |
8744b4c5 | 1818 | backwards ? total_words - i - 1 : i, |
2c269e73 | 1819 | 1, VOIDmode), |
1820 | const0_rtx); | |
c88df841 | 1821 | } |
1822 | return target; | |
1823 | } | |
1824 | ||
6d292981 | 1825 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1826 | target = expand_shift (LSHIFT_EXPR, mode, target, | |
f5ff0b21 | 1827 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
6d292981 | 1828 | return expand_shift (RSHIFT_EXPR, mode, target, |
f5ff0b21 | 1829 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
db96f378 | 1830 | } |
a4194ff7 | 1831 | |
ebf8dd06 | 1832 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1833 | If the region spans two words, defer to extract_split_bit_field. */ | |
5e864fc6 | 1834 | if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD) |
db96f378 | 1835 | { |
22e14902 | 1836 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
650df5df | 1837 | { |
ebf8dd06 | 1838 | if (!fallback_p) |
1839 | return NULL_RTX; | |
5e864fc6 | 1840 | target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1841 | unsignedp, reverse); | |
ebf8dd06 | 1842 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |
650df5df | 1843 | } |
5e864fc6 | 1844 | op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (), |
22e14902 | 1845 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); |
5e864fc6 | 1846 | op0_mode = word_mode; |
22e14902 | 1847 | bitnum %= BITS_PER_WORD; |
db96f378 | 1848 | } |
db96f378 | 1849 | |
ebf8dd06 | 1850 | /* From here on we know the desired field is smaller than a word. |
1851 | If OP0 is a register, it too fits within a word. */ | |
26427966 | 1852 | enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; |
1853 | extraction_insn extv; | |
1854 | if (!MEM_P (op0) | |
292237f3 | 1855 | && !reverse |
bd23a499 | 1856 | /* ??? We could limit the structure size to the part of OP0 that |
1857 | contains the field, with appropriate checks for endianness | |
050dd610 | 1858 | and TARGET_TRULY_NOOP_TRUNCATION. */ |
bd23a499 | 1859 | && get_best_reg_extraction_insn (&extv, pattern, |
5e864fc6 | 1860 | GET_MODE_BITSIZE (op0_mode.require ()), |
26427966 | 1861 | tmode)) |
db96f378 | 1862 | { |
5e864fc6 | 1863 | rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode, |
1864 | bitsize, bitnum, | |
ba8869c8 | 1865 | unsignedp, target, mode, |
26427966 | 1866 | tmode); |
ba8869c8 | 1867 | if (result) |
1868 | return result; | |
db96f378 | 1869 | } |
51356f86 | 1870 | |
36122326 | 1871 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1872 | cheap register alternative is available. */ | |
292237f3 | 1873 | if (MEM_P (op0) & !reverse) |
36122326 | 1874 | { |
72e9011e | 1875 | if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, |
1876 | tmode)) | |
f77dcefa | 1877 | { |
5e864fc6 | 1878 | rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode, |
1879 | bitsize, bitnum, | |
1880 | unsignedp, target, mode, | |
26427966 | 1881 | tmode); |
f77dcefa | 1882 | if (result) |
1883 | return result; | |
1884 | } | |
1885 | ||
89ca96ce | 1886 | rtx_insn *last = get_last_insn (); |
51356f86 | 1887 | |
26427966 | 1888 | /* Try loading part of OP0 into a register and extracting the |
1889 | bitfield from that. */ | |
1890 | unsigned HOST_WIDE_INT bitpos; | |
1891 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, | |
1892 | 0, 0, tmode, &bitpos); | |
1893 | if (xop0) | |
36122326 | 1894 | { |
26427966 | 1895 | xop0 = copy_to_reg (xop0); |
1896 | rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, | |
3f71db40 | 1897 | unsignedp, target, |
5d77cce2 | 1898 | mode, tmode, reverse, false, NULL); |
26427966 | 1899 | if (result) |
1900 | return result; | |
1901 | delete_insns_since (last); | |
a4194ff7 | 1902 | } |
db96f378 | 1903 | } |
4b6262b1 | 1904 | |
36122326 | 1905 | if (!fallback_p) |
1906 | return NULL; | |
1907 | ||
ebf8dd06 | 1908 | /* Find a correspondingly-sized integer field, so we can apply |
1909 | shifts and masks to it. */ | |
2cf1bb25 | 1910 | scalar_int_mode int_mode; |
1911 | if (!int_mode_for_mode (tmode).exists (&int_mode)) | |
1912 | /* If this fails, we should probably push op0 out to memory and then | |
1913 | do a load. */ | |
1914 | int_mode = int_mode_for_mode (mode).require (); | |
ebf8dd06 | 1915 | |
5e864fc6 | 1916 | target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize, |
1917 | bitnum, target, unsignedp, reverse); | |
292237f3 | 1918 | |
1919 | /* Complex values must be reversed piecewise, so we need to undo the global | |
1920 | reversal, convert to the complex mode and reverse again. */ | |
1921 | if (reverse && COMPLEX_MODE_P (tmode)) | |
1922 | { | |
1923 | target = flip_storage_order (int_mode, target); | |
1924 | target = convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1925 | target = flip_storage_order (tmode, target); | |
1926 | } | |
1927 | else | |
1928 | target = convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1929 | ||
1930 | return target; | |
36122326 | 1931 | } |
1932 | ||
1933 | /* Generate code to extract a byte-field from STR_RTX | |
1934 | containing BITSIZE bits, starting at BITNUM, | |
1935 | and put it in TARGET if possible (if TARGET is nonzero). | |
1936 | Regardless of TARGET, we return the rtx for where the value is placed. | |
1937 | ||
1938 | STR_RTX is the structure containing the byte (a REG or MEM). | |
1939 | UNSIGNEDP is nonzero if this is an unsigned bit field. | |
1940 | MODE is the natural mode of the field value once extracted. | |
1941 | TMODE is the mode the caller would like the value to have; | |
1942 | but the value may be returned with type MODE instead. | |
1943 | ||
292237f3 | 1944 | If REVERSE is true, the extraction is to be done in reverse order. |
1945 | ||
36122326 | 1946 | If a TARGET is specified and we can store in it at no extra cost, |
1947 | we do so, and return TARGET. | |
1948 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred | |
1949 | if they are equally easy. */ | |
1950 | ||
1951 | rtx | |
1952 | extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
3f71db40 | 1953 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
5d77cce2 | 1954 | machine_mode mode, machine_mode tmode, bool reverse, |
1955 | rtx *alt_rtl) | |
36122326 | 1956 | { |
3754d046 | 1957 | machine_mode mode1; |
72e9011e | 1958 | |
1959 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ | |
1960 | if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0) | |
1961 | mode1 = GET_MODE (str_rtx); | |
1962 | else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) | |
1963 | mode1 = GET_MODE (target); | |
1964 | else | |
1965 | mode1 = tmode; | |
1966 | ||
819eaef1 | 1967 | scalar_int_mode int_mode; |
1968 | if (is_a <scalar_int_mode> (mode1, &int_mode) | |
1969 | && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0)) | |
72e9011e | 1970 | { |
819eaef1 | 1971 | /* Extraction of a full INT_MODE value can be done with a simple load. |
dda86e31 | 1972 | We know here that the field can be accessed with one single |
1973 | instruction. For targets that support unaligned memory, | |
1974 | an unaligned access may be necessary. */ | |
819eaef1 | 1975 | if (bitsize == GET_MODE_BITSIZE (int_mode)) |
b4d02378 | 1976 | { |
819eaef1 | 1977 | rtx result = adjust_bitfield_address (str_rtx, int_mode, |
c0ec5045 | 1978 | bitnum / BITS_PER_UNIT); |
292237f3 | 1979 | if (reverse) |
819eaef1 | 1980 | result = flip_storage_order (int_mode, result); |
dda86e31 | 1981 | gcc_assert (bitnum % BITS_PER_UNIT == 0); |
c0ec5045 | 1982 | return convert_extracted_bit_field (result, mode, tmode, unsignedp); |
b4d02378 | 1983 | } |
1984 | ||
819eaef1 | 1985 | str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum, |
c0ec5045 | 1986 | &bitnum); |
819eaef1 | 1987 | gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode)); |
c0ec5045 | 1988 | str_rtx = copy_to_reg (str_rtx); |
72e9011e | 1989 | } |
c0ec5045 | 1990 | |
3f71db40 | 1991 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |
5d77cce2 | 1992 | target, mode, tmode, reverse, true, alt_rtl); |
db96f378 | 1993 | } |
1994 | \f | |
ebf8dd06 | 1995 | /* Use shifts and boolean operations to extract a field of BITSIZE bits |
5e864fc6 | 1996 | from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0, |
1997 | otherwise OP0 is a BLKmode MEM. | |
db96f378 | 1998 | |
1999 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). | |
292237f3 | 2000 | If REVERSE is true, the extraction is to be done in reverse order. |
2001 | ||
db96f378 | 2002 | If TARGET is nonzero, attempts to store the value there |
2003 | and return TARGET, but this is not guaranteed. | |
2c269e73 | 2004 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
db96f378 | 2005 | |
2006 | static rtx | |
3754d046 | 2007 | extract_fixed_bit_field (machine_mode tmode, rtx op0, |
5e864fc6 | 2008 | opt_scalar_int_mode op0_mode, |
35cb5232 | 2009 | unsigned HOST_WIDE_INT bitsize, |
ebf8dd06 | 2010 | unsigned HOST_WIDE_INT bitnum, rtx target, |
292237f3 | 2011 | int unsignedp, bool reverse) |
db96f378 | 2012 | { |
5e864fc6 | 2013 | scalar_int_mode mode; |
ebf8dd06 | 2014 | if (MEM_P (op0)) |
db96f378 | 2015 | { |
1572c088 | 2016 | if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), |
2017 | BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode)) | |
db96f378 | 2018 | /* The only way this should occur is if the field spans word |
2019 | boundaries. */ | |
5e864fc6 | 2020 | return extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |
2021 | unsignedp, reverse); | |
db96f378 | 2022 | |
72e9011e | 2023 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |
db96f378 | 2024 | } |
5e864fc6 | 2025 | else |
2026 | mode = op0_mode.require (); | |
db96f378 | 2027 | |
5e864fc6 | 2028 | return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum, |
292237f3 | 2029 | target, unsignedp, reverse); |
b4d02378 | 2030 | } |
2031 | ||
2032 | /* Helper function for extract_fixed_bit_field, extracts | |
5e864fc6 | 2033 | the bit field always using MODE, which is the mode of OP0. |
2034 | The other arguments are as for extract_fixed_bit_field. */ | |
b4d02378 | 2035 | |
2036 | static rtx | |
5e864fc6 | 2037 | extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode, |
b4d02378 | 2038 | unsigned HOST_WIDE_INT bitsize, |
2039 | unsigned HOST_WIDE_INT bitnum, rtx target, | |
292237f3 | 2040 | int unsignedp, bool reverse) |
b4d02378 | 2041 | { |
ebf8dd06 | 2042 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
2043 | for invalid input, such as extract equivalent of f5 from | |
2044 | gcc.dg/pr48335-2.c. */ | |
052251d0 | 2045 | |
292237f3 | 2046 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
ebf8dd06 | 2047 | /* BITNUM is the distance between our msb and that of OP0. |
2c269e73 | 2048 | Convert it to the distance from the lsb. */ |
ebf8dd06 | 2049 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
db96f378 | 2050 | |
ebf8dd06 | 2051 | /* Now BITNUM is always the distance between the field's lsb and that of OP0. |
db96f378 | 2052 | We have reduced the big-endian case to the little-endian case. */ |
292237f3 | 2053 | if (reverse) |
2054 | op0 = flip_storage_order (mode, op0); | |
db96f378 | 2055 | |
2056 | if (unsignedp) | |
2057 | { | |
ebf8dd06 | 2058 | if (bitnum) |
db96f378 | 2059 | { |
2060 | /* If the field does not already start at the lsb, | |
2061 | shift it so it does. */ | |
db96f378 | 2062 | /* Maybe propagate the target for the shift. */ |
8ad4c111 | 2063 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
ea1760a3 | 2064 | if (tmode != mode) |
2065 | subtarget = 0; | |
ebf8dd06 | 2066 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); |
db96f378 | 2067 | } |
5e864fc6 | 2068 | /* Convert the value to the desired mode. TMODE must also be a |
2069 | scalar integer for this conversion to make sense, since we | |
2070 | shouldn't reinterpret the bits. */ | |
2071 | scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode); | |
2072 | if (mode != new_mode) | |
2073 | op0 = convert_to_mode (new_mode, op0, 1); | |
db96f378 | 2074 | |
2075 | /* Unless the msb of the field used to be the msb when we shifted, | |
2076 | mask out the upper bits. */ | |
2077 | ||
ebf8dd06 | 2078 | if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) |
5e864fc6 | 2079 | return expand_binop (new_mode, and_optab, op0, |
2080 | mask_rtx (new_mode, 0, bitsize, 0), | |
db96f378 | 2081 | target, 1, OPTAB_LIB_WIDEN); |
2082 | return op0; | |
2083 | } | |
2084 | ||
2085 | /* To extract a signed bit-field, first shift its msb to the msb of the word, | |
2086 | then arithmetic-shift its lsb to the lsb of the word. */ | |
2087 | op0 = force_reg (mode, op0); | |
db96f378 | 2088 | |
2089 | /* Find the narrowest integer mode that contains the field. */ | |
2090 | ||
2216255f | 2091 | opt_scalar_int_mode mode_iter; |
2092 | FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) | |
2093 | if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum) | |
2094 | break; | |
2095 | ||
2096 | mode = mode_iter.require (); | |
2097 | op0 = convert_to_mode (mode, op0, 0); | |
db96f378 | 2098 | |
fcae9057 | 2099 | if (mode != tmode) |
2100 | target = 0; | |
2101 | ||
ebf8dd06 | 2102 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) |
db96f378 | 2103 | { |
ebf8dd06 | 2104 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); |
db96f378 | 2105 | /* Maybe propagate the target for the shift. */ |
8ad4c111 | 2106 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
db96f378 | 2107 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
2108 | } | |
2109 | ||
2110 | return expand_shift (RSHIFT_EXPR, mode, op0, | |
f5ff0b21 | 2111 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |
db96f378 | 2112 | } |
db96f378 | 2113 | |
2114 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value | |
54f42f11 | 2115 | VALUE << BITPOS. */ |
db96f378 | 2116 | |
2117 | static rtx | |
3754d046 | 2118 | lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, |
54f42f11 | 2119 | int bitpos) |
db96f378 | 2120 | { |
cc5bf449 | 2121 | return immed_wide_int_const (wi::lshift (value, bitpos), mode); |
db96f378 | 2122 | } |
2123 | \f | |
2124 | /* Extract a bit field that is split across two words | |
2125 | and return an RTX for the result. | |
2126 | ||
2127 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. | |
2128 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. | |
292237f3 | 2129 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. |
5e864fc6 | 2130 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is |
2131 | a BLKmode MEM. | |
292237f3 | 2132 | |
2133 | If REVERSE is true, the extraction is to be done in reverse order. */ | |
db96f378 | 2134 | |
2135 | static rtx | |
5e864fc6 | 2136 | extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
2137 | unsigned HOST_WIDE_INT bitsize, | |
292237f3 | 2138 | unsigned HOST_WIDE_INT bitpos, int unsignedp, |
2139 | bool reverse) | |
db96f378 | 2140 | { |
02e7a332 | 2141 | unsigned int unit; |
2142 | unsigned int bitsdone = 0; | |
b1924c4b | 2143 | rtx result = NULL_RTX; |
e9782169 | 2144 | int first = 1; |
db96f378 | 2145 | |
e81f2e56 | 2146 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
2147 | much at a time. */ | |
8ad4c111 | 2148 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
e81f2e56 | 2149 | unit = BITS_PER_WORD; |
2150 | else | |
6b2813fb | 2151 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
e81f2e56 | 2152 | |
e9782169 | 2153 | while (bitsdone < bitsize) |
2154 | { | |
02e7a332 | 2155 | unsigned HOST_WIDE_INT thissize; |
5e864fc6 | 2156 | rtx part; |
02e7a332 | 2157 | unsigned HOST_WIDE_INT thispos; |
2158 | unsigned HOST_WIDE_INT offset; | |
e9782169 | 2159 | |
2160 | offset = (bitpos + bitsdone) / unit; | |
2161 | thispos = (bitpos + bitsdone) % unit; | |
2162 | ||
ba860eb2 | 2163 | /* THISSIZE must not overrun a word boundary. Otherwise, |
2164 | extract_fixed_bit_field will call us again, and we will mutually | |
2165 | recurse forever. */ | |
2166 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
2167 | thissize = MIN (thissize, unit - thispos); | |
e9782169 | 2168 | |
22e14902 | 2169 | /* If OP0 is a register, then handle OFFSET here. */ |
5e864fc6 | 2170 | rtx op0_piece = op0; |
2171 | opt_scalar_int_mode op0_piece_mode = op0_mode; | |
22e14902 | 2172 | if (SUBREG_P (op0) || REG_P (op0)) |
e9782169 | 2173 | { |
5e864fc6 | 2174 | op0_piece = operand_subword_force (op0, offset, op0_mode.require ()); |
2175 | op0_piece_mode = word_mode; | |
e9782169 | 2176 | offset = 0; |
2177 | } | |
e9782169 | 2178 | |
e9782169 | 2179 | /* Extract the parts in bit-counting order, |
ba860eb2 | 2180 | whose meaning is determined by BYTES_PER_UNIT. |
ebf8dd06 | 2181 | OFFSET is in UNITs, and UNIT is in bits. */ |
5e864fc6 | 2182 | part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode, |
2183 | thissize, offset * unit + thispos, | |
2184 | 0, 1, reverse); | |
e9782169 | 2185 | bitsdone += thissize; |
db96f378 | 2186 | |
e9782169 | 2187 | /* Shift this part into place for the result. */ |
292237f3 | 2188 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
51356f86 | 2189 | { |
2190 | if (bitsize != bitsdone) | |
2191 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
f5ff0b21 | 2192 | bitsize - bitsdone, 0, 1); |
51356f86 | 2193 | } |
2194 | else | |
2195 | { | |
2196 | if (bitsdone != thissize) | |
2197 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
f5ff0b21 | 2198 | bitsdone - thissize, 0, 1); |
51356f86 | 2199 | } |
db96f378 | 2200 | |
e9782169 | 2201 | if (first) |
2202 | result = part; | |
2203 | else | |
2204 | /* Combine the parts with bitwise or. This works | |
2205 | because we extracted each part as an unsigned bit field. */ | |
2206 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, | |
2207 | OPTAB_LIB_WIDEN); | |
2208 | ||
2209 | first = 0; | |
2210 | } | |
db96f378 | 2211 | |
2212 | /* Unsigned bit field: we are done. */ | |
2213 | if (unsignedp) | |
2214 | return result; | |
2215 | /* Signed bit field: sign-extend with two arithmetic shifts. */ | |
2216 | result = expand_shift (LSHIFT_EXPR, word_mode, result, | |
f5ff0b21 | 2217 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
db96f378 | 2218 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
f5ff0b21 | 2219 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
db96f378 | 2220 | } |
2221 | \f | |
10d4de0e | 2222 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
2223 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than | |
2224 | MODE, fill the upper bits with zeros. Fail if the layout of either | |
2225 | mode is unknown (as for CC modes) or if the extraction would involve | |
2226 | unprofitable mode punning. Return the value on success, otherwise | |
2227 | return null. | |
2228 | ||
2229 | This is different from gen_lowpart* in these respects: | |
2230 | ||
2231 | - the returned value must always be considered an rvalue | |
2232 | ||
2233 | - when MODE is wider than SRC_MODE, the extraction involves | |
2234 | a zero extension | |
2235 | ||
2236 | - when MODE is smaller than SRC_MODE, the extraction involves | |
050dd610 | 2237 | a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION). |
10d4de0e | 2238 | |
2239 | In other words, this routine performs a computation, whereas the | |
2240 | gen_lowpart* routines are conceptually lvalue or rvalue subreg | |
2241 | operations. */ | |
2242 | ||
2243 | rtx | |
3754d046 | 2244 | extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) |
10d4de0e | 2245 | { |
f77c4496 | 2246 | scalar_int_mode int_mode, src_int_mode; |
10d4de0e | 2247 | |
2248 | if (mode == src_mode) | |
2249 | return src; | |
2250 | ||
2251 | if (CONSTANT_P (src)) | |
171557e8 | 2252 | { |
2253 | /* simplify_gen_subreg can't be used here, as if simplify_subreg | |
2254 | fails, it will happily create (subreg (symbol_ref)) or similar | |
2255 | invalid SUBREGs. */ | |
2256 | unsigned int byte = subreg_lowpart_offset (mode, src_mode); | |
2257 | rtx ret = simplify_subreg (mode, src, src_mode, byte); | |
2258 | if (ret) | |
2259 | return ret; | |
2260 | ||
2261 | if (GET_MODE (src) == VOIDmode | |
2262 | || !validate_subreg (mode, src_mode, src, byte)) | |
2263 | return NULL_RTX; | |
2264 | ||
2265 | src = force_reg (GET_MODE (src), src); | |
2266 | return gen_rtx_SUBREG (mode, src, byte); | |
2267 | } | |
10d4de0e | 2268 | |
2269 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) | |
2270 | return NULL_RTX; | |
2271 | ||
2272 | if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) | |
5f6dcf1a | 2273 | && targetm.modes_tieable_p (mode, src_mode)) |
10d4de0e | 2274 | { |
2275 | rtx x = gen_lowpart_common (mode, src); | |
2276 | if (x) | |
2277 | return x; | |
2278 | } | |
2279 | ||
2cf1bb25 | 2280 | if (!int_mode_for_mode (src_mode).exists (&src_int_mode) |
2281 | || !int_mode_for_mode (mode).exists (&int_mode)) | |
10d4de0e | 2282 | return NULL_RTX; |
2283 | ||
5f6dcf1a | 2284 | if (!targetm.modes_tieable_p (src_int_mode, src_mode)) |
10d4de0e | 2285 | return NULL_RTX; |
5f6dcf1a | 2286 | if (!targetm.modes_tieable_p (int_mode, mode)) |
10d4de0e | 2287 | return NULL_RTX; |
2288 | ||
2289 | src = gen_lowpart (src_int_mode, src); | |
2290 | src = convert_modes (int_mode, src_int_mode, src, true); | |
2291 | src = gen_lowpart (mode, src); | |
2292 | return src; | |
2293 | } | |
2294 | \f | |
db96f378 | 2295 | /* Add INC into TARGET. */ |
2296 | ||
2297 | void | |
35cb5232 | 2298 | expand_inc (rtx target, rtx inc) |
db96f378 | 2299 | { |
2300 | rtx value = expand_binop (GET_MODE (target), add_optab, | |
2301 | target, inc, | |
2302 | target, 0, OPTAB_LIB_WIDEN); | |
2303 | if (value != target) | |
2304 | emit_move_insn (target, value); | |
2305 | } | |
2306 | ||
2307 | /* Subtract DEC from TARGET. */ | |
2308 | ||
2309 | void | |
35cb5232 | 2310 | expand_dec (rtx target, rtx dec) |
db96f378 | 2311 | { |
2312 | rtx value = expand_binop (GET_MODE (target), sub_optab, | |
2313 | target, dec, | |
2314 | target, 0, OPTAB_LIB_WIDEN); | |
2315 | if (value != target) | |
2316 | emit_move_insn (target, value); | |
2317 | } | |
2318 | \f | |
2319 | /* Output a shift instruction for expression code CODE, | |
2320 | with SHIFTED being the rtx for the value to shift, | |
32d37219 | 2321 | and AMOUNT the rtx for the amount to shift by. |
db96f378 | 2322 | Store the result in the rtx TARGET, if that is convenient. |
2323 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
82f4e127 | 2324 | Return the rtx for where the value is. |
2325 | If that cannot be done, abort the compilation unless MAY_FAIL is true, | |
2326 | in which case 0 is returned. */ | |
db96f378 | 2327 | |
32d37219 | 2328 | static rtx |
3754d046 | 2329 | expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, |
82f4e127 | 2330 | rtx amount, rtx target, int unsignedp, bool may_fail = false) |
db96f378 | 2331 | { |
19cb6b50 | 2332 | rtx op1, temp = 0; |
2333 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); | |
2334 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); | |
4d54df85 | 2335 | optab lshift_optab = ashl_optab; |
2336 | optab rshift_arith_optab = ashr_optab; | |
2337 | optab rshift_uns_optab = lshr_optab; | |
2338 | optab lrotate_optab = rotl_optab; | |
2339 | optab rrotate_optab = rotr_optab; | |
3754d046 | 2340 | machine_mode op1_mode; |
6ce33017 | 2341 | scalar_mode scalar_mode = GET_MODE_INNER (mode); |
6659485c | 2342 | int attempt; |
f529eb25 | 2343 | bool speed = optimize_insn_for_speed_p (); |
db96f378 | 2344 | |
32d37219 | 2345 | op1 = amount; |
4d54df85 | 2346 | op1_mode = GET_MODE (op1); |
2347 | ||
2348 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the | |
2349 | shift amount is a vector, use the vector/vector shift patterns. */ | |
2350 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) | |
2351 | { | |
2352 | lshift_optab = vashl_optab; | |
2353 | rshift_arith_optab = vashr_optab; | |
2354 | rshift_uns_optab = vlshr_optab; | |
2355 | lrotate_optab = vrotl_optab; | |
2356 | rrotate_optab = vrotr_optab; | |
2357 | } | |
2358 | ||
db96f378 | 2359 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2360 | and shifted in the other direction; but that does not work | |
2361 | on all machines. */ | |
2362 | ||
d58b6b22 | 2363 | if (SHIFT_COUNT_TRUNCATED) |
2364 | { | |
971ba038 | 2365 | if (CONST_INT_P (op1) |
ff385626 | 2366 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
c10b4d55 | 2367 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) |
ff385626 | 2368 | op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |
c10b4d55 | 2369 | % GET_MODE_BITSIZE (scalar_mode)); |
d58b6b22 | 2370 | else if (GET_CODE (op1) == SUBREG |
298bbd8d | 2371 | && subreg_lowpart_p (op1) |
944c7922 | 2372 | && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) |
2373 | && SCALAR_INT_MODE_P (GET_MODE (op1))) | |
d58b6b22 | 2374 | op1 = SUBREG_REG (op1); |
2375 | } | |
73432b7f | 2376 | |
b96ca9ca | 2377 | /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, |
2378 | prefer left rotation, if op1 is from bitsize / 2 + 1 to | |
2379 | bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 | |
2380 | amount instead. */ | |
2381 | if (rotate | |
2382 | && CONST_INT_P (op1) | |
c10b4d55 | 2383 | && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, |
2384 | GET_MODE_BITSIZE (scalar_mode) - 1)) | |
b96ca9ca | 2385 | { |
c10b4d55 | 2386 | op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); |
b96ca9ca | 2387 | left = !left; |
2388 | code = left ? LROTATE_EXPR : RROTATE_EXPR; | |
2389 | } | |
2390 | ||
06b040bd | 2391 | /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. |
2392 | Note that this is not the case for bigger values. For instance a rotation | |
2393 | of 0x01020304 by 16 bits gives 0x03040102 which is different from | |
2394 | 0x04030201 (bswapsi). */ | |
2395 | if (rotate | |
2396 | && CONST_INT_P (op1) | |
2397 | && INTVAL (op1) == BITS_PER_UNIT | |
2398 | && GET_MODE_SIZE (scalar_mode) == 2 | |
2399 | && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing) | |
2400 | return expand_unop (HImode, bswap_optab, shifted, NULL_RTX, | |
2401 | unsignedp); | |
2402 | ||
db96f378 | 2403 | if (op1 == const0_rtx) |
2404 | return shifted; | |
2405 | ||
617cc55d | 2406 | /* Check whether its cheaper to implement a left shift by a constant |
2407 | bit count by a sequence of additions. */ | |
2408 | if (code == LSHIFT_EXPR | |
971ba038 | 2409 | && CONST_INT_P (op1) |
617cc55d | 2410 | && INTVAL (op1) > 0 |
c10b4d55 | 2411 | && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) |
99d2e279 | 2412 | && INTVAL (op1) < MAX_BITS_PER_WORD |
49a71e58 | 2413 | && (shift_cost (speed, mode, INTVAL (op1)) |
2414 | > INTVAL (op1) * add_cost (speed, mode)) | |
2415 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) | |
617cc55d | 2416 | { |
2417 | int i; | |
2418 | for (i = 0; i < INTVAL (op1); i++) | |
2419 | { | |
2420 | temp = force_reg (mode, shifted); | |
2421 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, | |
2422 | unsignedp, OPTAB_LIB_WIDEN); | |
2423 | } | |
2424 | return shifted; | |
2425 | } | |
2426 | ||
6659485c | 2427 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
db96f378 | 2428 | { |
2429 | enum optab_methods methods; | |
2430 | ||
6659485c | 2431 | if (attempt == 0) |
db96f378 | 2432 | methods = OPTAB_DIRECT; |
6659485c | 2433 | else if (attempt == 1) |
db96f378 | 2434 | methods = OPTAB_WIDEN; |
2435 | else | |
2436 | methods = OPTAB_LIB_WIDEN; | |
2437 | ||
2438 | if (rotate) | |
2439 | { | |
2440 | /* Widening does not work for rotation. */ | |
2441 | if (methods == OPTAB_WIDEN) | |
2442 | continue; | |
2443 | else if (methods == OPTAB_LIB_WIDEN) | |
1290205f | 2444 | { |
c2c10df6 | 2445 | /* If we have been unable to open-code this by a rotation, |
1290205f | 2446 | do it as the IOR of two shifts. I.e., to rotate A |
043ce677 | 2447 | by N bits, compute |
2448 | (A << N) | ((unsigned) A >> ((-N) & (C - 1))) | |
1290205f | 2449 | where C is the bitsize of A. |
2450 | ||
2451 | It is theoretically possible that the target machine might | |
2452 | not be able to perform either shift and hence we would | |
2453 | be making two libcalls rather than just the one for the | |
2454 | shift (similarly if IOR could not be done). We will allow | |
2455 | this extremely unlikely lossage to avoid complicating the | |
2456 | code below. */ | |
2457 | ||
c2c10df6 | 2458 | rtx subtarget = target == shifted ? 0 : target; |
32d37219 | 2459 | rtx new_amount, other_amount; |
c2c10df6 | 2460 | rtx temp1; |
32d37219 | 2461 | |
2462 | new_amount = op1; | |
043ce677 | 2463 | if (op1 == const0_rtx) |
2464 | return shifted; | |
2465 | else if (CONST_INT_P (op1)) | |
c10b4d55 | 2466 | other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode) |
714e9434 | 2467 | - INTVAL (op1)); |
2468 | else | |
043ce677 | 2469 | { |
2470 | other_amount | |
2471 | = simplify_gen_unary (NEG, GET_MODE (op1), | |
2472 | op1, GET_MODE (op1)); | |
c10b4d55 | 2473 | HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; |
043ce677 | 2474 | other_amount |
5d5ee71f | 2475 | = simplify_gen_binary (AND, GET_MODE (op1), other_amount, |
2476 | gen_int_mode (mask, GET_MODE (op1))); | |
043ce677 | 2477 | } |
c2c10df6 | 2478 | |
2479 | shifted = force_reg (mode, shifted); | |
2480 | ||
32d37219 | 2481 | temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, |
2482 | mode, shifted, new_amount, 0, 1); | |
2483 | temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, | |
2484 | mode, shifted, other_amount, | |
2485 | subtarget, 1); | |
c2c10df6 | 2486 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2487 | unsignedp, methods); | |
1290205f | 2488 | } |
db96f378 | 2489 | |
2490 | temp = expand_binop (mode, | |
4d54df85 | 2491 | left ? lrotate_optab : rrotate_optab, |
db96f378 | 2492 | shifted, op1, target, unsignedp, methods); |
2493 | } | |
2494 | else if (unsignedp) | |
e1abcbdd | 2495 | temp = expand_binop (mode, |
4d54df85 | 2496 | left ? lshift_optab : rshift_uns_optab, |
e1abcbdd | 2497 | shifted, op1, target, unsignedp, methods); |
db96f378 | 2498 | |
2499 | /* Do arithmetic shifts. | |
2500 | Also, if we are going to widen the operand, we can just as well | |
2501 | use an arithmetic right-shift instead of a logical one. */ | |
2502 | if (temp == 0 && ! rotate | |
2503 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) | |
2504 | { | |
2505 | enum optab_methods methods1 = methods; | |
2506 | ||
2507 | /* If trying to widen a log shift to an arithmetic shift, | |
2508 | don't accept an arithmetic shift of the same size. */ | |
2509 | if (unsignedp) | |
2510 | methods1 = OPTAB_MUST_WIDEN; | |
2511 | ||
2512 | /* Arithmetic shift */ | |
2513 | ||
2514 | temp = expand_binop (mode, | |
4d54df85 | 2515 | left ? lshift_optab : rshift_arith_optab, |
db96f378 | 2516 | shifted, op1, target, unsignedp, methods1); |
2517 | } | |
2518 | ||
8164ec17 | 2519 | /* We used to try extzv here for logical right shifts, but that was |
a4194ff7 | 2520 | only useful for one machine, the VAX, and caused poor code |
8164ec17 | 2521 | generation there for lshrdi3, so the code was deleted and a |
2522 | define_expand for lshrsi3 was added to vax.md. */ | |
db96f378 | 2523 | } |
2524 | ||
82f4e127 | 2525 | gcc_assert (temp != NULL_RTX || may_fail); |
db96f378 | 2526 | return temp; |
2527 | } | |
f5ff0b21 | 2528 | |
2529 | /* Output a shift instruction for expression code CODE, | |
2530 | with SHIFTED being the rtx for the value to shift, | |
2531 | and AMOUNT the amount to shift by. | |
2532 | Store the result in the rtx TARGET, if that is convenient. | |
2533 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2534 | Return the rtx for where the value is. */ | |
2535 | ||
2536 | rtx | |
3754d046 | 2537 | expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
f5ff0b21 | 2538 | int amount, rtx target, int unsignedp) |
2539 | { | |
32d37219 | 2540 | return expand_shift_1 (code, mode, |
2541 | shifted, GEN_INT (amount), target, unsignedp); | |
2542 | } | |
2543 | ||
82f4e127 | 2544 | /* Likewise, but return 0 if that cannot be done. */ |
2545 | ||
2546 | static rtx | |
2547 | maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, | |
2548 | int amount, rtx target, int unsignedp) | |
2549 | { | |
2550 | return expand_shift_1 (code, mode, | |
2551 | shifted, GEN_INT (amount), target, unsignedp, true); | |
2552 | } | |
2553 | ||
32d37219 | 2554 | /* Output a shift instruction for expression code CODE, |
2555 | with SHIFTED being the rtx for the value to shift, | |
2556 | and AMOUNT the tree for the amount to shift by. | |
2557 | Store the result in the rtx TARGET, if that is convenient. | |
2558 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2559 | Return the rtx for where the value is. */ | |
2560 | ||
2561 | rtx | |
3754d046 | 2562 | expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, |
32d37219 | 2563 | tree amount, rtx target, int unsignedp) |
2564 | { | |
2565 | return expand_shift_1 (code, mode, | |
2566 | shifted, expand_normal (amount), target, unsignedp); | |
f5ff0b21 | 2567 | } |
32d37219 | 2568 | |
db96f378 | 2569 | \f |
49931967 | 2570 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
3754d046 | 2571 | const struct mult_cost *, machine_mode mode); |
3754d046 | 2572 | static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, |
27588b0f | 2573 | const struct algorithm *, enum mult_variant); |
35cb5232 | 2574 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
f77c4496 | 2575 | static rtx extract_high_half (scalar_int_mode, rtx); |
2576 | static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int); | |
2577 | static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx, | |
27588b0f | 2578 | int, int); |
db96f378 | 2579 | /* Compute and return the best algorithm for multiplying by T. |
4b780351 | 2580 | The algorithm must cost less than cost_limit |
2581 | If retval.cost >= COST_LIMIT, no algorithm was found and all | |
49931967 | 2582 | other field of the returned struct are undefined. |
2583 | MODE is the machine mode of the multiplication. */ | |
db96f378 | 2584 | |
1e401f10 | 2585 | static void |
35cb5232 | 2586 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
3754d046 | 2587 | const struct mult_cost *cost_limit, machine_mode mode) |
db96f378 | 2588 | { |
183a33d2 | 2589 | int m; |
709f9009 | 2590 | struct algorithm *alg_in, *best_alg; |
d9154849 | 2591 | struct mult_cost best_cost; |
2592 | struct mult_cost new_limit; | |
2593 | int op_cost, op_latency; | |
b592bb50 | 2594 | unsigned HOST_WIDE_INT orig_t = t; |
183a33d2 | 2595 | unsigned HOST_WIDE_INT q; |
49db198b | 2596 | int maxm, hash_index; |
7fe4cfe2 | 2597 | bool cache_hit = false; |
2598 | enum alg_code cache_alg = alg_zero; | |
f529eb25 | 2599 | bool speed = optimize_insn_for_speed_p (); |
7a6aeeed | 2600 | scalar_int_mode imode; |
49a71e58 | 2601 | struct alg_hash_entry *entry_ptr; |
db96f378 | 2602 | |
4b780351 | 2603 | /* Indicate that no algorithm is yet found. If no algorithm |
2604 | is found, this value will be returned and indicate failure. */ | |
d9154849 | 2605 | alg_out->cost.cost = cost_limit->cost + 1; |
eddf2705 | 2606 | alg_out->cost.latency = cost_limit->latency + 1; |
db96f378 | 2607 | |
d9154849 | 2608 | if (cost_limit->cost < 0 |
2609 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) | |
1e401f10 | 2610 | return; |
db96f378 | 2611 | |
49db198b | 2612 | /* Be prepared for vector modes. */ |
7a6aeeed | 2613 | imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode)); |
49db198b | 2614 | |
2615 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); | |
2616 | ||
83df06d0 | 2617 | /* Restrict the bits of "t" to the multiplication's mode. */ |
49db198b | 2618 | t &= GET_MODE_MASK (imode); |
83df06d0 | 2619 | |
798c4e27 | 2620 | /* t == 1 can be done in zero cost. */ |
2621 | if (t == 1) | |
183a33d2 | 2622 | { |
1e401f10 | 2623 | alg_out->ops = 1; |
d9154849 | 2624 | alg_out->cost.cost = 0; |
2625 | alg_out->cost.latency = 0; | |
1e401f10 | 2626 | alg_out->op[0] = alg_m; |
2627 | return; | |
183a33d2 | 2628 | } |
2629 | ||
798c4e27 | 2630 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2631 | fail now. */ | |
1e401f10 | 2632 | if (t == 0) |
798c4e27 | 2633 | { |
49a71e58 | 2634 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |
1e401f10 | 2635 | return; |
798c4e27 | 2636 | else |
2637 | { | |
1e401f10 | 2638 | alg_out->ops = 1; |
49a71e58 | 2639 | alg_out->cost.cost = zero_cost (speed); |
2640 | alg_out->cost.latency = zero_cost (speed); | |
1e401f10 | 2641 | alg_out->op[0] = alg_zero; |
2642 | return; | |
798c4e27 | 2643 | } |
2644 | } | |
2645 | ||
709f9009 | 2646 | /* We'll be needing a couple extra algorithm structures now. */ |
2647 | ||
2457c754 | 2648 | alg_in = XALLOCA (struct algorithm); |
2649 | best_alg = XALLOCA (struct algorithm); | |
d9154849 | 2650 | best_cost = *cost_limit; |
709f9009 | 2651 | |
7fe4cfe2 | 2652 | /* Compute the hash index. */ |
f529eb25 | 2653 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
7fe4cfe2 | 2654 | |
2655 | /* See if we already know what to do for T. */ | |
49a71e58 | 2656 | entry_ptr = alg_hash_entry_ptr (hash_index); |
2657 | if (entry_ptr->t == t | |
49a71e58 | 2658 | && entry_ptr->mode == mode |
2659 | && entry_ptr->speed == speed | |
2660 | && entry_ptr->alg != alg_unknown) | |
7fe4cfe2 | 2661 | { |
49a71e58 | 2662 | cache_alg = entry_ptr->alg; |
44a03d75 | 2663 | |
2664 | if (cache_alg == alg_impossible) | |
7fe4cfe2 | 2665 | { |
44a03d75 | 2666 | /* The cache tells us that it's impossible to synthesize |
49a71e58 | 2667 | multiplication by T within entry_ptr->cost. */ |
2668 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) | |
44a03d75 | 2669 | /* COST_LIMIT is at least as restrictive as the one |
2670 | recorded in the hash table, in which case we have no | |
2671 | hope of synthesizing a multiplication. Just | |
2672 | return. */ | |
2673 | return; | |
2674 | ||
2675 | /* If we get here, COST_LIMIT is less restrictive than the | |
2676 | one recorded in the hash table, so we may be able to | |
2677 | synthesize a multiplication. Proceed as if we didn't | |
2678 | have the cache entry. */ | |
2679 | } | |
2680 | else | |
2681 | { | |
49a71e58 | 2682 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |
44a03d75 | 2683 | /* The cached algorithm shows that this multiplication |
2684 | requires more cost than COST_LIMIT. Just return. This | |
2685 | way, we don't clobber this cache entry with | |
2686 | alg_impossible but retain useful information. */ | |
2687 | return; | |
7fe4cfe2 | 2688 | |
44a03d75 | 2689 | cache_hit = true; |
2690 | ||
2691 | switch (cache_alg) | |
2692 | { | |
2693 | case alg_shift: | |
2694 | goto do_alg_shift; | |
7fe4cfe2 | 2695 | |
44a03d75 | 2696 | case alg_add_t_m2: |
2697 | case alg_sub_t_m2: | |
2698 | goto do_alg_addsub_t_m2; | |
7fe4cfe2 | 2699 | |
44a03d75 | 2700 | case alg_add_factor: |
2701 | case alg_sub_factor: | |
2702 | goto do_alg_addsub_factor; | |
7fe4cfe2 | 2703 | |
44a03d75 | 2704 | case alg_add_t2_m: |
2705 | goto do_alg_add_t2_m; | |
7fe4cfe2 | 2706 | |
44a03d75 | 2707 | case alg_sub_t2_m: |
2708 | goto do_alg_sub_t2_m; | |
2709 | ||
2710 | default: | |
2711 | gcc_unreachable (); | |
2712 | } | |
7fe4cfe2 | 2713 | } |
2714 | } | |
2715 | ||
798c4e27 | 2716 | /* If we have a group of zero bits at the low-order part of T, try |
2717 | multiplying by the remaining bits and then doing a shift. */ | |
2718 | ||
183a33d2 | 2719 | if ((t & 1) == 0) |
db96f378 | 2720 | { |
7fe4cfe2 | 2721 | do_alg_shift: |
ac29ece2 | 2722 | m = ctz_or_zero (t); /* m = number of low zero bits */ |
83df06d0 | 2723 | if (m < maxm) |
db96f378 | 2724 | { |
84ab528e | 2725 | q = t >> m; |
617cc55d | 2726 | /* The function expand_shift will choose between a shift and |
2727 | a sequence of additions, so the observed cost is given as | |
49a71e58 | 2728 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |
2729 | op_cost = m * add_cost (speed, mode); | |
2730 | if (shift_cost (speed, mode, m) < op_cost) | |
2731 | op_cost = shift_cost (speed, mode, m); | |
d9154849 | 2732 | new_limit.cost = best_cost.cost - op_cost; |
2733 | new_limit.latency = best_cost.latency - op_cost; | |
2734 | synth_mult (alg_in, q, &new_limit, mode); | |
2735 | ||
2736 | alg_in->cost.cost += op_cost; | |
2737 | alg_in->cost.latency += op_cost; | |
2738 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
84ab528e | 2739 | { |
d9154849 | 2740 | best_cost = alg_in->cost; |
68eaff07 | 2741 | std::swap (alg_in, best_alg); |
84ab528e | 2742 | best_alg->log[best_alg->ops] = m; |
2743 | best_alg->op[best_alg->ops] = alg_shift; | |
84ab528e | 2744 | } |
5521b4c8 | 2745 | |
2746 | /* See if treating ORIG_T as a signed number yields a better | |
2747 | sequence. Try this sequence only for a negative ORIG_T | |
2748 | as it would be useless for a non-negative ORIG_T. */ | |
2749 | if ((HOST_WIDE_INT) orig_t < 0) | |
2750 | { | |
2751 | /* Shift ORIG_T as follows because a right shift of a | |
2752 | negative-valued signed type is implementation | |
2753 | defined. */ | |
2754 | q = ~(~orig_t >> m); | |
2755 | /* The function expand_shift will choose between a shift | |
2756 | and a sequence of additions, so the observed cost is | |
49a71e58 | 2757 | given as MIN (m * add_cost(speed, mode), |
2758 | shift_cost(speed, mode, m)). */ | |
2759 | op_cost = m * add_cost (speed, mode); | |
2760 | if (shift_cost (speed, mode, m) < op_cost) | |
2761 | op_cost = shift_cost (speed, mode, m); | |
5521b4c8 | 2762 | new_limit.cost = best_cost.cost - op_cost; |
2763 | new_limit.latency = best_cost.latency - op_cost; | |
2764 | synth_mult (alg_in, q, &new_limit, mode); | |
2765 | ||
2766 | alg_in->cost.cost += op_cost; | |
2767 | alg_in->cost.latency += op_cost; | |
2768 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2769 | { | |
5521b4c8 | 2770 | best_cost = alg_in->cost; |
68eaff07 | 2771 | std::swap (alg_in, best_alg); |
5521b4c8 | 2772 | best_alg->log[best_alg->ops] = m; |
2773 | best_alg->op[best_alg->ops] = alg_shift; | |
2774 | } | |
2775 | } | |
1e401f10 | 2776 | } |
7fe4cfe2 | 2777 | if (cache_hit) |
2778 | goto done; | |
1e401f10 | 2779 | } |
2780 | ||
2781 | /* If we have an odd number, add or subtract one. */ | |
2782 | if ((t & 1) != 0) | |
2783 | { | |
2784 | unsigned HOST_WIDE_INT w; | |
2785 | ||
7fe4cfe2 | 2786 | do_alg_addsub_t_m2: |
1e401f10 | 2787 | for (w = 1; (w & t) != 0; w <<= 1) |
2788 | ; | |
68215e49 | 2789 | /* If T was -1, then W will be zero after the loop. This is another |
a4194ff7 | 2790 | case where T ends with ...111. Handling this with (T + 1) and |
68215e49 | 2791 | subtract 1 produces slightly better code and results in algorithm |
2792 | selection much faster than treating it like the ...0111 case | |
2793 | below. */ | |
2794 | if (w == 0 | |
2795 | || (w > 2 | |
2796 | /* Reject the case where t is 3. | |
2797 | Thus we prefer addition in that case. */ | |
2798 | && t != 3)) | |
1e401f10 | 2799 | { |
bd725039 | 2800 | /* T ends with ...111. Multiply by (T + 1) and subtract T. */ |
1e401f10 | 2801 | |
49a71e58 | 2802 | op_cost = add_cost (speed, mode); |
d9154849 | 2803 | new_limit.cost = best_cost.cost - op_cost; |
2804 | new_limit.latency = best_cost.latency - op_cost; | |
2805 | synth_mult (alg_in, t + 1, &new_limit, mode); | |
183a33d2 | 2806 | |
d9154849 | 2807 | alg_in->cost.cost += op_cost; |
2808 | alg_in->cost.latency += op_cost; | |
2809 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
db96f378 | 2810 | { |
d9154849 | 2811 | best_cost = alg_in->cost; |
68eaff07 | 2812 | std::swap (alg_in, best_alg); |
1e401f10 | 2813 | best_alg->log[best_alg->ops] = 0; |
2814 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
db96f378 | 2815 | } |
db96f378 | 2816 | } |
1e401f10 | 2817 | else |
2818 | { | |
bd725039 | 2819 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ |
db96f378 | 2820 | |
49a71e58 | 2821 | op_cost = add_cost (speed, mode); |
d9154849 | 2822 | new_limit.cost = best_cost.cost - op_cost; |
2823 | new_limit.latency = best_cost.latency - op_cost; | |
2824 | synth_mult (alg_in, t - 1, &new_limit, mode); | |
1e401f10 | 2825 | |
d9154849 | 2826 | alg_in->cost.cost += op_cost; |
2827 | alg_in->cost.latency += op_cost; | |
2828 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
1e401f10 | 2829 | { |
d9154849 | 2830 | best_cost = alg_in->cost; |
68eaff07 | 2831 | std::swap (alg_in, best_alg); |
1e401f10 | 2832 | best_alg->log[best_alg->ops] = 0; |
2833 | best_alg->op[best_alg->ops] = alg_add_t_m2; | |
1e401f10 | 2834 | } |
2835 | } | |
b592bb50 | 2836 | |
2837 | /* We may be able to calculate a * -7, a * -15, a * -31, etc | |
2838 | quickly with a - a * n for some appropriate constant n. */ | |
2839 | m = exact_log2 (-orig_t + 1); | |
2840 | if (m >= 0 && m < maxm) | |
2841 | { | |
59699fd3 | 2842 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
2843 | /* If the target has a cheap shift-and-subtract insn use | |
2844 | that in preference to a shift insn followed by a sub insn. | |
2845 | Assume that the shift-and-sub is "atomic" with a latency | |
2846 | equal to it's cost, otherwise assume that on superscalar | |
2847 | hardware the shift may be executed concurrently with the | |
2848 | earlier steps in the algorithm. */ | |
2849 | if (shiftsub1_cost (speed, mode, m) <= op_cost) | |
2850 | { | |
2851 | op_cost = shiftsub1_cost (speed, mode, m); | |
2852 | op_latency = op_cost; | |
2853 | } | |
2854 | else | |
2855 | op_latency = add_cost (speed, mode); | |
2856 | ||
b592bb50 | 2857 | new_limit.cost = best_cost.cost - op_cost; |
59699fd3 | 2858 | new_limit.latency = best_cost.latency - op_latency; |
49db198b | 2859 | synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |
2860 | &new_limit, mode); | |
b592bb50 | 2861 | |
2862 | alg_in->cost.cost += op_cost; | |
59699fd3 | 2863 | alg_in->cost.latency += op_latency; |
b592bb50 | 2864 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2865 | { | |
b592bb50 | 2866 | best_cost = alg_in->cost; |
68eaff07 | 2867 | std::swap (alg_in, best_alg); |
b592bb50 | 2868 | best_alg->log[best_alg->ops] = m; |
2869 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
2870 | } | |
2871 | } | |
2872 | ||
7fe4cfe2 | 2873 | if (cache_hit) |
2874 | goto done; | |
1e401f10 | 2875 | } |
4e6e0ee8 | 2876 | |
db96f378 | 2877 | /* Look for factors of t of the form |
4b780351 | 2878 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
db96f378 | 2879 | If we find such a factor, we can multiply by t using an algorithm that |
4b780351 | 2880 | multiplies by q, shift the result by m and add/subtract it to itself. |
db96f378 | 2881 | |
4b780351 | 2882 | We search for large factors first and loop down, even if large factors |
2883 | are less probable than small; if we find a large factor we will find a | |
2884 | good sequence quickly, and therefore be able to prune (by decreasing | |
2885 | COST_LIMIT) the search. */ | |
2886 | ||
7fe4cfe2 | 2887 | do_alg_addsub_factor: |
4b780351 | 2888 | for (m = floor_log2 (t - 1); m >= 2; m--) |
db96f378 | 2889 | { |
4b780351 | 2890 | unsigned HOST_WIDE_INT d; |
db96f378 | 2891 | |
edc19fd0 | 2892 | d = (HOST_WIDE_INT_1U << m) + 1; |
7fe4cfe2 | 2893 | if (t % d == 0 && t > d && m < maxm |
2894 | && (!cache_hit || cache_alg == alg_add_factor)) | |
db96f378 | 2895 | { |
49a71e58 | 2896 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
59699fd3 | 2897 | if (shiftadd_cost (speed, mode, m) <= op_cost) |
2898 | op_cost = shiftadd_cost (speed, mode, m); | |
2899 | ||
2900 | op_latency = op_cost; | |
2901 | ||
d9154849 | 2902 | |
2903 | new_limit.cost = best_cost.cost - op_cost; | |
2904 | new_limit.latency = best_cost.latency - op_latency; | |
2905 | synth_mult (alg_in, t / d, &new_limit, mode); | |
db96f378 | 2906 | |
d9154849 | 2907 | alg_in->cost.cost += op_cost; |
2908 | alg_in->cost.latency += op_latency; | |
2909 | if (alg_in->cost.latency < op_cost) | |
2910 | alg_in->cost.latency = op_cost; | |
2911 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
db96f378 | 2912 | { |
d9154849 | 2913 | best_cost = alg_in->cost; |
68eaff07 | 2914 | std::swap (alg_in, best_alg); |
798c4e27 | 2915 | best_alg->log[best_alg->ops] = m; |
1e401f10 | 2916 | best_alg->op[best_alg->ops] = alg_add_factor; |
db96f378 | 2917 | } |
04ba236f | 2918 | /* Other factors will have been taken care of in the recursion. */ |
2919 | break; | |
db96f378 | 2920 | } |
2921 | ||
edc19fd0 | 2922 | d = (HOST_WIDE_INT_1U << m) - 1; |
7fe4cfe2 | 2923 | if (t % d == 0 && t > d && m < maxm |
2924 | && (!cache_hit || cache_alg == alg_sub_factor)) | |
db96f378 | 2925 | { |
49a71e58 | 2926 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
59699fd3 | 2927 | if (shiftsub0_cost (speed, mode, m) <= op_cost) |
2928 | op_cost = shiftsub0_cost (speed, mode, m); | |
2929 | ||
2930 | op_latency = op_cost; | |
d9154849 | 2931 | |
2932 | new_limit.cost = best_cost.cost - op_cost; | |
379eaa7a | 2933 | new_limit.latency = best_cost.latency - op_latency; |
d9154849 | 2934 | synth_mult (alg_in, t / d, &new_limit, mode); |
db96f378 | 2935 | |
d9154849 | 2936 | alg_in->cost.cost += op_cost; |
2937 | alg_in->cost.latency += op_latency; | |
2938 | if (alg_in->cost.latency < op_cost) | |
2939 | alg_in->cost.latency = op_cost; | |
2940 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
db96f378 | 2941 | { |
d9154849 | 2942 | best_cost = alg_in->cost; |
68eaff07 | 2943 | std::swap (alg_in, best_alg); |
798c4e27 | 2944 | best_alg->log[best_alg->ops] = m; |
1e401f10 | 2945 | best_alg->op[best_alg->ops] = alg_sub_factor; |
db96f378 | 2946 | } |
04ba236f | 2947 | break; |
db96f378 | 2948 | } |
2949 | } | |
7fe4cfe2 | 2950 | if (cache_hit) |
2951 | goto done; | |
db96f378 | 2952 | |
4b780351 | 2953 | /* Try shift-and-add (load effective address) instructions, |
2954 | i.e. do a*3, a*5, a*9. */ | |
2955 | if ((t & 1) != 0) | |
2956 | { | |
7fe4cfe2 | 2957 | do_alg_add_t2_m: |
4b780351 | 2958 | q = t - 1; |
ac29ece2 | 2959 | m = ctz_hwi (q); |
2960 | if (q && m < maxm) | |
798c4e27 | 2961 | { |
49a71e58 | 2962 | op_cost = shiftadd_cost (speed, mode, m); |
d9154849 | 2963 | new_limit.cost = best_cost.cost - op_cost; |
2964 | new_limit.latency = best_cost.latency - op_cost; | |
2965 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); | |
2966 | ||
2967 | alg_in->cost.cost += op_cost; | |
2968 | alg_in->cost.latency += op_cost; | |
2969 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
010b6a23 | 2970 | { |
d9154849 | 2971 | best_cost = alg_in->cost; |
68eaff07 | 2972 | std::swap (alg_in, best_alg); |
010b6a23 | 2973 | best_alg->log[best_alg->ops] = m; |
1e401f10 | 2974 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
010b6a23 | 2975 | } |
4b780351 | 2976 | } |
7fe4cfe2 | 2977 | if (cache_hit) |
2978 | goto done; | |
db96f378 | 2979 | |
7fe4cfe2 | 2980 | do_alg_sub_t2_m: |
4b780351 | 2981 | q = t + 1; |
ac29ece2 | 2982 | m = ctz_hwi (q); |
2983 | if (q && m < maxm) | |
798c4e27 | 2984 | { |
49a71e58 | 2985 | op_cost = shiftsub0_cost (speed, mode, m); |
d9154849 | 2986 | new_limit.cost = best_cost.cost - op_cost; |
2987 | new_limit.latency = best_cost.latency - op_cost; | |
2988 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); | |
2989 | ||
2990 | alg_in->cost.cost += op_cost; | |
2991 | alg_in->cost.latency += op_cost; | |
2992 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
010b6a23 | 2993 | { |
d9154849 | 2994 | best_cost = alg_in->cost; |
68eaff07 | 2995 | std::swap (alg_in, best_alg); |
010b6a23 | 2996 | best_alg->log[best_alg->ops] = m; |
1e401f10 | 2997 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
010b6a23 | 2998 | } |
4b780351 | 2999 | } |
7fe4cfe2 | 3000 | if (cache_hit) |
3001 | goto done; | |
4b780351 | 3002 | } |
db96f378 | 3003 | |
7fe4cfe2 | 3004 | done: |
eddf2705 | 3005 | /* If best_cost has not decreased, we have not found any algorithm. */ |
3006 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) | |
44a03d75 | 3007 | { |
3008 | /* We failed to find an algorithm. Record alg_impossible for | |
3009 | this case (that is, <T, MODE, COST_LIMIT>) so that next time | |
3010 | we are asked to find an algorithm for T within the same or | |
3011 | lower COST_LIMIT, we can immediately return to the | |
3012 | caller. */ | |
49a71e58 | 3013 | entry_ptr->t = t; |
3014 | entry_ptr->mode = mode; | |
3015 | entry_ptr->speed = speed; | |
3016 | entry_ptr->alg = alg_impossible; | |
3017 | entry_ptr->cost = *cost_limit; | |
44a03d75 | 3018 | return; |
3019 | } | |
eddf2705 | 3020 | |
7fe4cfe2 | 3021 | /* Cache the result. */ |
3022 | if (!cache_hit) | |
3023 | { | |
49a71e58 | 3024 | entry_ptr->t = t; |
3025 | entry_ptr->mode = mode; | |
3026 | entry_ptr->speed = speed; | |
3027 | entry_ptr->alg = best_alg->op[best_alg->ops]; | |
3028 | entry_ptr->cost.cost = best_cost.cost; | |
3029 | entry_ptr->cost.latency = best_cost.latency; | |
7fe4cfe2 | 3030 | } |
3031 | ||
709f9009 | 3032 | /* If we are getting a too long sequence for `struct algorithm' |
3033 | to record, make this search fail. */ | |
3034 | if (best_alg->ops == MAX_BITS_PER_WORD) | |
3035 | return; | |
3036 | ||
1e401f10 | 3037 | /* Copy the algorithm from temporary space to the space at alg_out. |
3038 | We avoid using structure assignment because the majority of | |
3039 | best_alg is normally undefined, and this is a critical function. */ | |
3040 | alg_out->ops = best_alg->ops + 1; | |
d9154849 | 3041 | alg_out->cost = best_cost; |
b1b63592 | 3042 | memcpy (alg_out->op, best_alg->op, |
3043 | alg_out->ops * sizeof *alg_out->op); | |
3044 | memcpy (alg_out->log, best_alg->log, | |
3045 | alg_out->ops * sizeof *alg_out->log); | |
db96f378 | 3046 | } |
3047 | \f | |
3927afe0 | 3048 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
27588b0f | 3049 | Try three variations: |
3050 | ||
3051 | - a shift/add sequence based on VAL itself | |
3052 | - a shift/add sequence based on -VAL, followed by a negation | |
3053 | - a shift/add sequence based on VAL - 1, followed by an addition. | |
3054 | ||
e4fedb10 | 3055 | Return true if the cheapest of these cost less than MULT_COST, |
3056 | describing the algorithm in *ALG and final fixup in *VARIANT. */ | |
27588b0f | 3057 | |
cb1a970e | 3058 | bool |
3754d046 | 3059 | choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, |
e4fedb10 | 3060 | struct algorithm *alg, enum mult_variant *variant, |
3061 | int mult_cost) | |
27588b0f | 3062 | { |
27588b0f | 3063 | struct algorithm alg2; |
d9154849 | 3064 | struct mult_cost limit; |
3065 | int op_cost; | |
f529eb25 | 3066 | bool speed = optimize_insn_for_speed_p (); |
27588b0f | 3067 | |
4be3f855 | 3068 | /* Fail quickly for impossible bounds. */ |
3069 | if (mult_cost < 0) | |
3070 | return false; | |
3071 | ||
3072 | /* Ensure that mult_cost provides a reasonable upper bound. | |
3073 | Any constant multiplication can be performed with less | |
3074 | than 2 * bits additions. */ | |
49a71e58 | 3075 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |
4be3f855 | 3076 | if (mult_cost > op_cost) |
3077 | mult_cost = op_cost; | |
3078 | ||
27588b0f | 3079 | *variant = basic_variant; |
d9154849 | 3080 | limit.cost = mult_cost; |
3081 | limit.latency = mult_cost; | |
3082 | synth_mult (alg, val, &limit, mode); | |
27588b0f | 3083 | |
3084 | /* This works only if the inverted value actually fits in an | |
3085 | `unsigned int' */ | |
49db198b | 3086 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |
27588b0f | 3087 | { |
9af5ce0c | 3088 | op_cost = neg_cost (speed, mode); |
d9154849 | 3089 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3090 | { | |
3091 | limit.cost = alg->cost.cost - op_cost; | |
3092 | limit.latency = alg->cost.latency - op_cost; | |
3093 | } | |
3094 | else | |
3095 | { | |
3096 | limit.cost = mult_cost - op_cost; | |
3097 | limit.latency = mult_cost - op_cost; | |
3098 | } | |
3099 | ||
3100 | synth_mult (&alg2, -val, &limit, mode); | |
3101 | alg2.cost.cost += op_cost; | |
3102 | alg2.cost.latency += op_cost; | |
3103 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
27588b0f | 3104 | *alg = alg2, *variant = negate_variant; |
3105 | } | |
3106 | ||
3107 | /* This proves very useful for division-by-constant. */ | |
49a71e58 | 3108 | op_cost = add_cost (speed, mode); |
d9154849 | 3109 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3110 | { | |
3111 | limit.cost = alg->cost.cost - op_cost; | |
3112 | limit.latency = alg->cost.latency - op_cost; | |
3113 | } | |
3114 | else | |
3115 | { | |
3116 | limit.cost = mult_cost - op_cost; | |
3117 | limit.latency = mult_cost - op_cost; | |
3118 | } | |
3119 | ||
3120 | synth_mult (&alg2, val - 1, &limit, mode); | |
3121 | alg2.cost.cost += op_cost; | |
3122 | alg2.cost.latency += op_cost; | |
3123 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
27588b0f | 3124 | *alg = alg2, *variant = add_variant; |
3125 | ||
d9154849 | 3126 | return MULT_COST_LESS (&alg->cost, mult_cost); |
27588b0f | 3127 | } |
3128 | ||
3129 | /* A subroutine of expand_mult, used for constant multiplications. | |
3130 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if | |
3131 | convenient. Use the shift/add sequence described by ALG and apply | |
3132 | the final fixup specified by VARIANT. */ | |
3133 | ||
3134 | static rtx | |
3754d046 | 3135 | expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, |
27588b0f | 3136 | rtx target, const struct algorithm *alg, |
3137 | enum mult_variant variant) | |
3138 | { | |
9ea71b15 | 3139 | unsigned HOST_WIDE_INT val_so_far; |
89ca96ce | 3140 | rtx_insn *insn; |
3141 | rtx accum, tem; | |
27588b0f | 3142 | int opno; |
3754d046 | 3143 | machine_mode nmode; |
27588b0f | 3144 | |
f45da063 | 3145 | /* Avoid referencing memory over and over and invalid sharing |
3146 | on SUBREGs. */ | |
3147 | op0 = force_reg (mode, op0); | |
27588b0f | 3148 | |
3149 | /* ACCUM starts out either as OP0 or as a zero, depending on | |
3150 | the first operation. */ | |
3151 | ||
3152 | if (alg->op[0] == alg_zero) | |
3153 | { | |
49db198b | 3154 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |
27588b0f | 3155 | val_so_far = 0; |
3156 | } | |
3157 | else if (alg->op[0] == alg_m) | |
3158 | { | |
3159 | accum = copy_to_mode_reg (mode, op0); | |
3160 | val_so_far = 1; | |
3161 | } | |
3162 | else | |
611234b4 | 3163 | gcc_unreachable (); |
27588b0f | 3164 | |
3165 | for (opno = 1; opno < alg->ops; opno++) | |
3166 | { | |
3167 | int log = alg->log[opno]; | |
a1ad7483 | 3168 | rtx shift_subtarget = optimize ? 0 : accum; |
27588b0f | 3169 | rtx add_target |
3170 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant | |
a1ad7483 | 3171 | && !optimize) |
27588b0f | 3172 | ? target : 0; |
a1ad7483 | 3173 | rtx accum_target = optimize ? 0 : accum; |
41cf444a | 3174 | rtx accum_inner; |
27588b0f | 3175 | |
3176 | switch (alg->op[opno]) | |
3177 | { | |
3178 | case alg_shift: | |
f5ff0b21 | 3179 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8813f8fe | 3180 | /* REG_EQUAL note will be attached to the following insn. */ |
3181 | emit_move_insn (accum, tem); | |
27588b0f | 3182 | val_so_far <<= log; |
3183 | break; | |
3184 | ||
3185 | case alg_add_t_m2: | |
f5ff0b21 | 3186 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
27588b0f | 3187 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3188 | add_target ? add_target : accum_target); | |
9ea71b15 | 3189 | val_so_far += HOST_WIDE_INT_1U << log; |
27588b0f | 3190 | break; |
3191 | ||
3192 | case alg_sub_t_m2: | |
f5ff0b21 | 3193 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
27588b0f | 3194 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
3195 | add_target ? add_target : accum_target); | |
9ea71b15 | 3196 | val_so_far -= HOST_WIDE_INT_1U << log; |
27588b0f | 3197 | break; |
3198 | ||
3199 | case alg_add_t2_m: | |
3200 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
f5ff0b21 | 3201 | log, shift_subtarget, 0); |
27588b0f | 3202 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
3203 | add_target ? add_target : accum_target); | |
3204 | val_so_far = (val_so_far << log) + 1; | |
3205 | break; | |
3206 | ||
3207 | case alg_sub_t2_m: | |
3208 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
f5ff0b21 | 3209 | log, shift_subtarget, 0); |
27588b0f | 3210 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
3211 | add_target ? add_target : accum_target); | |
3212 | val_so_far = (val_so_far << log) - 1; | |
3213 | break; | |
3214 | ||
3215 | case alg_add_factor: | |
f5ff0b21 | 3216 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
27588b0f | 3217 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3218 | add_target ? add_target : accum_target); | |
3219 | val_so_far += val_so_far << log; | |
3220 | break; | |
3221 | ||
3222 | case alg_sub_factor: | |
f5ff0b21 | 3223 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
27588b0f | 3224 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
a1ad7483 | 3225 | (add_target |
3226 | ? add_target : (optimize ? 0 : tem))); | |
27588b0f | 3227 | val_so_far = (val_so_far << log) - val_so_far; |
3228 | break; | |
3229 | ||
3230 | default: | |
611234b4 | 3231 | gcc_unreachable (); |
27588b0f | 3232 | } |
3233 | ||
49db198b | 3234 | if (SCALAR_INT_MODE_P (mode)) |
27588b0f | 3235 | { |
49db198b | 3236 | /* Write a REG_EQUAL note on the last insn so that we can cse |
3237 | multiplication sequences. Note that if ACCUM is a SUBREG, | |
3238 | we've set the inner register and must properly indicate that. */ | |
3239 | tem = op0, nmode = mode; | |
3240 | accum_inner = accum; | |
3241 | if (GET_CODE (accum) == SUBREG) | |
3242 | { | |
3243 | accum_inner = SUBREG_REG (accum); | |
3244 | nmode = GET_MODE (accum_inner); | |
3245 | tem = gen_lowpart (nmode, op0); | |
3246 | } | |
27588b0f | 3247 | |
49db198b | 3248 | insn = get_last_insn (); |
3249 | set_dst_reg_note (insn, REG_EQUAL, | |
c338f2e3 | 3250 | gen_rtx_MULT (nmode, tem, |
3251 | gen_int_mode (val_so_far, nmode)), | |
49db198b | 3252 | accum_inner); |
3253 | } | |
27588b0f | 3254 | } |
3255 | ||
3256 | if (variant == negate_variant) | |
3257 | { | |
3258 | val_so_far = -val_so_far; | |
3259 | accum = expand_unop (mode, neg_optab, accum, target, 0); | |
3260 | } | |
3261 | else if (variant == add_variant) | |
3262 | { | |
3263 | val_so_far = val_so_far + 1; | |
3264 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); | |
3265 | } | |
3266 | ||
a79b863a | 3267 | /* Compare only the bits of val and val_so_far that are significant |
3268 | in the result mode, to avoid sign-/zero-extension confusion. */ | |
49db198b | 3269 | nmode = GET_MODE_INNER (mode); |
49db198b | 3270 | val &= GET_MODE_MASK (nmode); |
3271 | val_so_far &= GET_MODE_MASK (nmode); | |
9ea71b15 | 3272 | gcc_assert (val == (HOST_WIDE_INT) val_so_far); |
27588b0f | 3273 | |
3274 | return accum; | |
3275 | } | |
3276 | ||
db96f378 | 3277 | /* Perform a multiplication and return an rtx for the result. |
3278 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3279 | TARGET is a suggestion for where to store the result (an rtx). | |
3280 | ||
3281 | We check specially for a constant integer as OP1. | |
3282 | If you want this check for OP0 as well, then before calling | |
3283 | you should swap the two operands if OP0 would be constant. */ | |
3284 | ||
3285 | rtx | |
3754d046 | 3286 | expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
52a1f3ab | 3287 | int unsignedp, bool no_libcall) |
db96f378 | 3288 | { |
27588b0f | 3289 | enum mult_variant variant; |
3290 | struct algorithm algorithm; | |
49db198b | 3291 | rtx scalar_op1; |
cfd6d985 | 3292 | int max_cost; |
f529eb25 | 3293 | bool speed = optimize_insn_for_speed_p (); |
49db198b | 3294 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |
db96f378 | 3295 | |
49db198b | 3296 | if (CONSTANT_P (op0)) |
eddc7545 | 3297 | std::swap (op0, op1); |
49db198b | 3298 | |
3299 | /* For vectors, there are several simplifications that can be made if | |
3300 | all elements of the vector constant are identical. */ | |
62fdb8e4 | 3301 | scalar_op1 = unwrap_const_vec_duplicate (op1); |
49db198b | 3302 | |
3303 | if (INTEGRAL_MODE_P (mode)) | |
3304 | { | |
3305 | rtx fake_reg; | |
d9dadd67 | 3306 | HOST_WIDE_INT coeff; |
3307 | bool is_neg; | |
49db198b | 3308 | int mode_bitsize; |
3309 | ||
3310 | if (op1 == CONST0_RTX (mode)) | |
3311 | return op1; | |
3312 | if (op1 == CONST1_RTX (mode)) | |
3313 | return op0; | |
3314 | if (op1 == CONSTM1_RTX (mode)) | |
3315 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, | |
3316 | op0, target, 0); | |
3317 | ||
3318 | if (do_trapv) | |
3319 | goto skip_synth; | |
3320 | ||
20ebf4ca | 3321 | /* If mode is integer vector mode, check if the backend supports |
3322 | vector lshift (by scalar or vector) at all. If not, we can't use | |
3323 | synthetized multiply. */ | |
3324 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT | |
3325 | && optab_handler (vashl_optab, mode) == CODE_FOR_nothing | |
3326 | && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) | |
3327 | goto skip_synth; | |
3328 | ||
49db198b | 3329 | /* These are the operations that are potentially turned into |
3330 | a sequence of shifts and additions. */ | |
3331 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); | |
cfd6d985 | 3332 | |
3333 | /* synth_mult does an `unsigned int' multiply. As long as the mode is | |
3334 | less than or equal in size to `unsigned int' this doesn't matter. | |
3335 | If the mode is larger than `unsigned int', then synth_mult works | |
3336 | only if the constant value exactly fits in an `unsigned int' without | |
3337 | any truncation. This means that multiplying by negative values does | |
3338 | not work; results are off by 2^32 on a 32 bit machine. */ | |
49db198b | 3339 | if (CONST_INT_P (scalar_op1)) |
7a9e3364 | 3340 | { |
49db198b | 3341 | coeff = INTVAL (scalar_op1); |
3342 | is_neg = coeff < 0; | |
cfd6d985 | 3343 | } |
e913b5cd | 3344 | #if TARGET_SUPPORTS_WIDE_INT |
3345 | else if (CONST_WIDE_INT_P (scalar_op1)) | |
3346 | #else | |
78f1962f | 3347 | else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) |
e913b5cd | 3348 | #endif |
cfd6d985 | 3349 | { |
c67875ad | 3350 | int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); |
944c6d47 | 3351 | /* Perfect power of 2 (other than 1, which is handled above). */ |
e913b5cd | 3352 | if (shift > 0) |
944c6d47 | 3353 | return expand_shift (LSHIFT_EXPR, mode, op0, |
3354 | shift, target, unsignedp); | |
d9dadd67 | 3355 | else |
3356 | goto skip_synth; | |
cfd6d985 | 3357 | } |
49db198b | 3358 | else |
3359 | goto skip_synth; | |
48e1416a | 3360 | |
cfd6d985 | 3361 | /* We used to test optimize here, on the grounds that it's better to |
3362 | produce a smaller program when -O is not used. But this causes | |
3363 | such a terrible slowdown sometimes that it seems better to always | |
3364 | use synth_mult. */ | |
cfd6d985 | 3365 | |
49db198b | 3366 | /* Special case powers of two. */ |
af0907da | 3367 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) |
3368 | && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) | |
49db198b | 3369 | return expand_shift (LSHIFT_EXPR, mode, op0, |
3370 | floor_log2 (coeff), target, unsignedp); | |
3371 | ||
3372 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3373 | ||
3374 | /* Attempt to handle multiplication of DImode values by negative | |
3375 | coefficients, by performing the multiplication by a positive | |
3376 | multiplier and then inverting the result. */ | |
49db198b | 3377 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) |
3378 | { | |
3379 | /* Its safe to use -coeff even for INT_MIN, as the | |
3380 | result is interpreted as an unsigned coefficient. | |
3381 | Exclude cost of op0 from max_cost to match the cost | |
ba83197c | 3382 | calculation of the synth_mult. */ |
af0907da | 3383 | coeff = -(unsigned HOST_WIDE_INT) coeff; |
5ae4887d | 3384 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), |
3385 | mode, speed) | |
9af5ce0c | 3386 | - neg_cost (speed, mode)); |
af0907da | 3387 | if (max_cost <= 0) |
3388 | goto skip_synth; | |
3389 | ||
3390 | /* Special case powers of two. */ | |
3391 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3392 | { | |
3393 | rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, | |
3394 | floor_log2 (coeff), target, unsignedp); | |
3395 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3396 | } | |
3397 | ||
3398 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, | |
3399 | max_cost)) | |
49db198b | 3400 | { |
af0907da | 3401 | rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, |
49db198b | 3402 | &algorithm, variant); |
3403 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3404 | } | |
4105320e | 3405 | goto skip_synth; |
7a9e3364 | 3406 | } |
db96f378 | 3407 | |
49db198b | 3408 | /* Exclude cost of op0 from max_cost to match the cost |
3409 | calculation of the synth_mult. */ | |
5ae4887d | 3410 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed); |
49db198b | 3411 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) |
3412 | return expand_mult_const (mode, op0, coeff, target, | |
3413 | &algorithm, variant); | |
cf58ef1d | 3414 | } |
49db198b | 3415 | skip_synth: |
cf58ef1d | 3416 | |
3417 | /* Expand x*2.0 as x+x. */ | |
945f7b03 | 3418 | if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) |
3419 | && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) | |
cf58ef1d | 3420 | { |
945f7b03 | 3421 | op0 = force_reg (GET_MODE (op0), op0); |
3422 | return expand_binop (mode, add_optab, op0, op0, | |
52a1f3ab | 3423 | target, unsignedp, |
3424 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); | |
cf58ef1d | 3425 | } |
3426 | ||
1e401f10 | 3427 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3428 | there is no difference between signed and unsigned. */ | |
49db198b | 3429 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |
52a1f3ab | 3430 | op0, op1, target, unsignedp, |
3431 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); | |
3432 | gcc_assert (op0 || no_libcall); | |
db96f378 | 3433 | return op0; |
3434 | } | |
62be004c | 3435 | |
72655676 | 3436 | /* Return a cost estimate for multiplying a register by the given |
3437 | COEFFicient in the given MODE and SPEED. */ | |
3438 | ||
3439 | int | |
3754d046 | 3440 | mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) |
72655676 | 3441 | { |
3442 | int max_cost; | |
3443 | struct algorithm algorithm; | |
3444 | enum mult_variant variant; | |
3445 | ||
3446 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
5ae4887d | 3447 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), |
3448 | mode, speed); | |
72655676 | 3449 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) |
3450 | return algorithm.cost.cost; | |
3451 | else | |
3452 | return max_cost; | |
3453 | } | |
3454 | ||
62be004c | 3455 | /* Perform a widening multiplication and return an rtx for the result. |
3456 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3457 | TARGET is a suggestion for where to store the result (an rtx). | |
3458 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab | |
3459 | or smul_widen_optab. | |
3460 | ||
3461 | We check specially for a constant integer as OP1, comparing the | |
3462 | cost of a widening multiply against the cost of a sequence of shifts | |
3463 | and adds. */ | |
3464 | ||
3465 | rtx | |
3754d046 | 3466 | expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
62be004c | 3467 | int unsignedp, optab this_optab) |
3468 | { | |
3469 | bool speed = optimize_insn_for_speed_p (); | |
38ba30bf | 3470 | rtx cop1; |
62be004c | 3471 | |
3472 | if (CONST_INT_P (op1) | |
38ba30bf | 3473 | && GET_MODE (op0) != VOIDmode |
3474 | && (cop1 = convert_modes (mode, GET_MODE (op0), op1, | |
3475 | this_optab == umul_widen_optab)) | |
3476 | && CONST_INT_P (cop1) | |
3477 | && (INTVAL (cop1) >= 0 | |
f179ee60 | 3478 | || HWI_COMPUTABLE_MODE_P (mode))) |
62be004c | 3479 | { |
38ba30bf | 3480 | HOST_WIDE_INT coeff = INTVAL (cop1); |
62be004c | 3481 | int max_cost; |
3482 | enum mult_variant variant; | |
3483 | struct algorithm algorithm; | |
3484 | ||
d0a9f10b | 3485 | if (coeff == 0) |
3486 | return CONST0_RTX (mode); | |
3487 | ||
62be004c | 3488 | /* Special case powers of two. */ |
3489 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3490 | { | |
3491 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3492 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
f5ff0b21 | 3493 | floor_log2 (coeff), target, unsignedp); |
62be004c | 3494 | } |
3495 | ||
3496 | /* Exclude cost of op0 from max_cost to match the cost | |
3497 | calculation of the synth_mult. */ | |
49a71e58 | 3498 | max_cost = mul_widen_cost (speed, mode); |
62be004c | 3499 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |
3500 | max_cost)) | |
3501 | { | |
3502 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3503 | return expand_mult_const (mode, op0, coeff, target, | |
3504 | &algorithm, variant); | |
3505 | } | |
3506 | } | |
3507 | return expand_binop (mode, this_optab, op0, op1, target, | |
3508 | unsignedp, OPTAB_LIB_WIDEN); | |
3509 | } | |
db96f378 | 3510 | \f |
d2fa4ea5 | 3511 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |
3512 | replace division by D, and put the least significant N bits of the result | |
3513 | in *MULTIPLIER_PTR and return the most significant bit. | |
3514 | ||
3515 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the | |
3516 | needed precision is in PRECISION (should be <= N). | |
3517 | ||
3518 | PRECISION should be as small as possible so this function can choose | |
3519 | multiplier more freely. | |
3520 | ||
3521 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that | |
3522 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. | |
3523 | ||
3524 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), | |
3525 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ | |
3526 | ||
d2fa4ea5 | 3527 | unsigned HOST_WIDE_INT |
35cb5232 | 3528 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
127cb1cd | 3529 | unsigned HOST_WIDE_INT *multiplier_ptr, |
3530 | int *post_shift_ptr, int *lgup_ptr) | |
d2fa4ea5 | 3531 | { |
d2fa4ea5 | 3532 | int lgup, post_shift; |
3533 | int pow, pow2; | |
d2fa4ea5 | 3534 | |
3535 | /* lgup = ceil(log2(divisor)); */ | |
3536 | lgup = ceil_log2 (d); | |
3537 | ||
611234b4 | 3538 | gcc_assert (lgup <= n); |
d2fa4ea5 | 3539 | |
3540 | pow = n + lgup; | |
3541 | pow2 = n + lgup - precision; | |
3542 | ||
d2fa4ea5 | 3543 | /* mlow = 2^(N + lgup)/d */ |
796b6678 | 3544 | wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); |
ab2c1de8 | 3545 | wide_int mlow = wi::udiv_trunc (val, d); |
d2fa4ea5 | 3546 | |
d67b7119 | 3547 | /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ |
796b6678 | 3548 | val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); |
ab2c1de8 | 3549 | wide_int mhigh = wi::udiv_trunc (val, d); |
d2fa4ea5 | 3550 | |
3551 | /* If precision == N, then mlow, mhigh exceed 2^N | |
3552 | (but they do not exceed 2^(N+1)). */ | |
3553 | ||
2358393e | 3554 | /* Reduce to lowest terms. */ |
d2fa4ea5 | 3555 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3556 | { | |
796b6678 | 3557 | unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, |
3558 | HOST_BITS_PER_WIDE_INT); | |
3559 | unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, | |
3560 | HOST_BITS_PER_WIDE_INT); | |
d2fa4ea5 | 3561 | if (ml_lo >= mh_lo) |
3562 | break; | |
3563 | ||
796b6678 | 3564 | mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); |
3565 | mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); | |
d2fa4ea5 | 3566 | } |
3567 | ||
3568 | *post_shift_ptr = post_shift; | |
3569 | *lgup_ptr = lgup; | |
3570 | if (n < HOST_BITS_PER_WIDE_INT) | |
3571 | { | |
edc19fd0 | 3572 | unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; |
e913b5cd | 3573 | *multiplier_ptr = mhigh.to_uhwi () & mask; |
3574 | return mhigh.to_uhwi () >= mask; | |
d2fa4ea5 | 3575 | } |
3576 | else | |
3577 | { | |
e913b5cd | 3578 | *multiplier_ptr = mhigh.to_uhwi (); |
796b6678 | 3579 | return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); |
d2fa4ea5 | 3580 | } |
3581 | } | |
3582 | ||
3583 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is | |
3584 | congruent to 1 (mod 2**N). */ | |
3585 | ||
3586 | static unsigned HOST_WIDE_INT | |
35cb5232 | 3587 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
d2fa4ea5 | 3588 | { |
a92771b8 | 3589 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
d2fa4ea5 | 3590 | |
3591 | /* The algorithm notes that the choice y = x satisfies | |
3592 | x*y == 1 mod 2^3, since x is assumed odd. | |
3593 | Each iteration doubles the number of bits of significance in y. */ | |
3594 | ||
3595 | unsigned HOST_WIDE_INT mask; | |
3596 | unsigned HOST_WIDE_INT y = x; | |
3597 | int nbit = 3; | |
3598 | ||
3599 | mask = (n == HOST_BITS_PER_WIDE_INT | |
7097b942 | 3600 | ? HOST_WIDE_INT_M1U |
edc19fd0 | 3601 | : (HOST_WIDE_INT_1U << n) - 1); |
d2fa4ea5 | 3602 | |
3603 | while (nbit < n) | |
3604 | { | |
3605 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ | |
3606 | nbit *= 2; | |
3607 | } | |
3608 | return y; | |
3609 | } | |
3610 | ||
3611 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness | |
3612 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the | |
3613 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product | |
3614 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to | |
3615 | become signed. | |
3616 | ||
3617 | The result is put in TARGET if that is convenient. | |
3618 | ||
3619 | MODE is the mode of operation. */ | |
3620 | ||
3621 | rtx | |
f77c4496 | 3622 | expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0, |
35cb5232 | 3623 | rtx op1, rtx target, int unsignedp) |
d2fa4ea5 | 3624 | { |
3625 | rtx tem; | |
3626 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; | |
3627 | ||
3628 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
f5ff0b21 | 3629 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
6de9716c | 3630 | tem = expand_and (mode, tem, op1, NULL_RTX); |
941522d6 | 3631 | adj_operand |
3632 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), | |
3633 | adj_operand); | |
d2fa4ea5 | 3634 | |
3635 | tem = expand_shift (RSHIFT_EXPR, mode, op1, | |
f5ff0b21 | 3636 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
6de9716c | 3637 | tem = expand_and (mode, tem, op0, NULL_RTX); |
941522d6 | 3638 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3639 | target); | |
d2fa4ea5 | 3640 | |
3641 | return target; | |
3642 | } | |
3643 | ||
ebf4f764 | 3644 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |
d2fa4ea5 | 3645 | |
27588b0f | 3646 | static rtx |
f77c4496 | 3647 | extract_high_half (scalar_int_mode mode, rtx op) |
27588b0f | 3648 | { |
27588b0f | 3649 | if (mode == word_mode) |
3650 | return gen_highpart (mode, op); | |
33183a3c | 3651 | |
cc9f5108 | 3652 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
069b07bf | 3653 | |
27588b0f | 3654 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, |
f5ff0b21 | 3655 | GET_MODE_BITSIZE (mode), 0, 1); |
27588b0f | 3656 | return convert_modes (mode, wider_mode, op, 0); |
3657 | } | |
d2fa4ea5 | 3658 | |
ebf4f764 | 3659 | /* Like expmed_mult_highpart, but only consider using a multiplication |
27588b0f | 3660 | optab. OP1 is an rtx for the constant operand. */ |
3661 | ||
3662 | static rtx | |
f77c4496 | 3663 | expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, |
27588b0f | 3664 | rtx target, int unsignedp, int max_cost) |
d2fa4ea5 | 3665 | { |
b4dcfd48 | 3666 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
d2fa4ea5 | 3667 | optab moptab; |
3668 | rtx tem; | |
27588b0f | 3669 | int size; |
f529eb25 | 3670 | bool speed = optimize_insn_for_speed_p (); |
d2fa4ea5 | 3671 | |
cc9f5108 | 3672 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
069b07bf | 3673 | |
27588b0f | 3674 | size = GET_MODE_BITSIZE (mode); |
d2fa4ea5 | 3675 | |
3676 | /* Firstly, try using a multiplication insn that only generates the needed | |
3677 | high part of the product, and in the sign flavor of unsignedp. */ | |
49a71e58 | 3678 | if (mul_highpart_cost (speed, mode) < max_cost) |
33183a3c | 3679 | { |
27588b0f | 3680 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
b4dcfd48 | 3681 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
27588b0f | 3682 | unsignedp, OPTAB_DIRECT); |
3683 | if (tem) | |
3684 | return tem; | |
33183a3c | 3685 | } |
d2fa4ea5 | 3686 | |
3687 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. | |
3688 | Need to adjust the result after the multiplication. */ | |
84ab528e | 3689 | if (size - 1 < BITS_PER_WORD |
49a71e58 | 3690 | && (mul_highpart_cost (speed, mode) |
3691 | + 2 * shift_cost (speed, mode, size-1) | |
3692 | + 4 * add_cost (speed, mode) < max_cost)) | |
33183a3c | 3693 | { |
27588b0f | 3694 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
b4dcfd48 | 3695 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
27588b0f | 3696 | unsignedp, OPTAB_DIRECT); |
3697 | if (tem) | |
33183a3c | 3698 | /* We used the wrong signedness. Adjust the result. */ |
cb2511ae | 3699 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
27588b0f | 3700 | tem, unsignedp); |
33183a3c | 3701 | } |
d2fa4ea5 | 3702 | |
33183a3c | 3703 | /* Try widening multiplication. */ |
d2fa4ea5 | 3704 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
d2a1b453 | 3705 | if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
49a71e58 | 3706 | && mul_widen_cost (speed, wider_mode) < max_cost) |
0e1adf44 | 3707 | { |
b4dcfd48 | 3708 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
27588b0f | 3709 | unsignedp, OPTAB_WIDEN); |
3710 | if (tem) | |
3711 | return extract_high_half (mode, tem); | |
a4194ff7 | 3712 | } |
33183a3c | 3713 | |
3714 | /* Try widening the mode and perform a non-widening multiplication. */ | |
d6bf3b14 | 3715 | if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing |
84ab528e | 3716 | && size - 1 < BITS_PER_WORD |
49a71e58 | 3717 | && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) |
3718 | < max_cost)) | |
0e1adf44 | 3719 | { |
89ca96ce | 3720 | rtx_insn *insns; |
3721 | rtx wop0, wop1; | |
857a1176 | 3722 | |
3723 | /* We need to widen the operands, for example to ensure the | |
3724 | constant multiplier is correctly sign or zero extended. | |
3725 | Use a sequence to clean-up any instructions emitted by | |
3726 | the conversions if things don't work out. */ | |
3727 | start_sequence (); | |
3728 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); | |
3729 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); | |
3730 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, | |
27588b0f | 3731 | unsignedp, OPTAB_WIDEN); |
857a1176 | 3732 | insns = get_insns (); |
3733 | end_sequence (); | |
3734 | ||
27588b0f | 3735 | if (tem) |
857a1176 | 3736 | { |
3737 | emit_insn (insns); | |
3738 | return extract_high_half (mode, tem); | |
3739 | } | |
0e1adf44 | 3740 | } |
33183a3c | 3741 | |
3742 | /* Try widening multiplication of opposite signedness, and adjust. */ | |
3743 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; | |
d2a1b453 | 3744 | if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
84ab528e | 3745 | && size - 1 < BITS_PER_WORD |
49a71e58 | 3746 | && (mul_widen_cost (speed, wider_mode) |
3747 | + 2 * shift_cost (speed, mode, size-1) | |
3748 | + 4 * add_cost (speed, mode) < max_cost)) | |
d2fa4ea5 | 3749 | { |
b4dcfd48 | 3750 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
33183a3c | 3751 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3752 | if (tem != 0) | |
d2fa4ea5 | 3753 | { |
27588b0f | 3754 | tem = extract_high_half (mode, tem); |
33183a3c | 3755 | /* We used the wrong signedness. Adjust the result. */ |
cb2511ae | 3756 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
33183a3c | 3757 | target, unsignedp); |
d2fa4ea5 | 3758 | } |
d2fa4ea5 | 3759 | } |
3760 | ||
33183a3c | 3761 | return 0; |
27588b0f | 3762 | } |
33183a3c | 3763 | |
0dc8d714 | 3764 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3765 | putting the high half of the result in TARGET if that is convenient, | |
3766 | and return where the result is. If the operation can not be performed, | |
3767 | 0 is returned. | |
d2fa4ea5 | 3768 | |
27588b0f | 3769 | MODE is the mode of operation and result. |
3770 | ||
3771 | UNSIGNEDP nonzero means unsigned multiply. | |
3772 | ||
3773 | MAX_COST is the total allowed cost for the expanded RTL. */ | |
3774 | ||
0dc8d714 | 3775 | static rtx |
f77c4496 | 3776 | expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, |
0dc8d714 | 3777 | rtx target, int unsignedp, int max_cost) |
27588b0f | 3778 | { |
0dc8d714 | 3779 | unsigned HOST_WIDE_INT cnst1; |
e4fedb10 | 3780 | int extra_cost; |
3781 | bool sign_adjust = false; | |
27588b0f | 3782 | enum mult_variant variant; |
3783 | struct algorithm alg; | |
0dc8d714 | 3784 | rtx tem; |
f529eb25 | 3785 | bool speed = optimize_insn_for_speed_p (); |
27588b0f | 3786 | |
3787 | /* We can't support modes wider than HOST_BITS_PER_INT. */ | |
f179ee60 | 3788 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |
27588b0f | 3789 | |
0dc8d714 | 3790 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
e4fedb10 | 3791 | |
48e1416a | 3792 | /* We can't optimize modes wider than BITS_PER_WORD. |
3793 | ??? We might be able to perform double-word arithmetic if | |
e4fedb10 | 3794 | mode == word_mode, however all the cost calculations in |
3795 | synth_mult etc. assume single-word operations. */ | |
cc9f5108 | 3796 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
e4fedb10 | 3797 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) |
ebf4f764 | 3798 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
e4fedb10 | 3799 | unsignedp, max_cost); |
3800 | ||
49a71e58 | 3801 | extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); |
e4fedb10 | 3802 | |
3803 | /* Check whether we try to multiply by a negative constant. */ | |
3804 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) | |
3805 | { | |
3806 | sign_adjust = true; | |
49a71e58 | 3807 | extra_cost += add_cost (speed, mode); |
e4fedb10 | 3808 | } |
27588b0f | 3809 | |
3810 | /* See whether shift/add multiplication is cheap enough. */ | |
e4fedb10 | 3811 | if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, |
3812 | max_cost - extra_cost)) | |
0e1adf44 | 3813 | { |
27588b0f | 3814 | /* See whether the specialized multiplication optabs are |
3815 | cheaper than the shift/add version. */ | |
ebf4f764 | 3816 | tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
d9154849 | 3817 | alg.cost.cost + extra_cost); |
27588b0f | 3818 | if (tem) |
3819 | return tem; | |
3820 | ||
e4fedb10 | 3821 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3822 | tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); | |
3823 | tem = extract_high_half (mode, tem); | |
3824 | ||
0bed3869 | 3825 | /* Adjust result for signedness. */ |
e4fedb10 | 3826 | if (sign_adjust) |
3827 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); | |
3828 | ||
3829 | return tem; | |
0e1adf44 | 3830 | } |
ebf4f764 | 3831 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
27588b0f | 3832 | unsignedp, max_cost); |
d2fa4ea5 | 3833 | } |
41323e11 | 3834 | |
3835 | ||
3836 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ | |
3837 | ||
3838 | static rtx | |
f77c4496 | 3839 | expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) |
41323e11 | 3840 | { |
89ca96ce | 3841 | rtx result, temp, shift; |
3842 | rtx_code_label *label; | |
41323e11 | 3843 | int logd; |
e913b5cd | 3844 | int prec = GET_MODE_PRECISION (mode); |
41323e11 | 3845 | |
3846 | logd = floor_log2 (d); | |
3847 | result = gen_reg_rtx (mode); | |
3848 | ||
3849 | /* Avoid conditional branches when they're expensive. */ | |
4a9d7ef7 | 3850 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
a0d18cec | 3851 | && optimize_insn_for_speed_p ()) |
41323e11 | 3852 | { |
3853 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, | |
3854 | mode, 0, -1); | |
3855 | if (signmask) | |
3856 | { | |
edc19fd0 | 3857 | HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1; |
41323e11 | 3858 | signmask = force_reg (mode, signmask); |
8b908ec4 | 3859 | shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); |
3860 | ||
3861 | /* Use the rtx_cost of a LSHIFTRT instruction to determine | |
3862 | which instruction sequence to use. If logical right shifts | |
3863 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise | |
3864 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ | |
4b05206e | 3865 | |
8b908ec4 | 3866 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
d6bf3b14 | 3867 | if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing |
5ae4887d | 3868 | || (set_src_cost (temp, mode, optimize_insn_for_speed_p ()) |
7013e87c | 3869 | > COSTS_N_INSNS (2))) |
8b908ec4 | 3870 | { |
3871 | temp = expand_binop (mode, xor_optab, op0, signmask, | |
3872 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3873 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3874 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
0359f9f5 | 3875 | temp = expand_binop (mode, and_optab, temp, |
3876 | gen_int_mode (masklow, mode), | |
8b908ec4 | 3877 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3878 | temp = expand_binop (mode, xor_optab, temp, signmask, | |
3879 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3880 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3881 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3882 | } | |
3883 | else | |
3884 | { | |
3885 | signmask = expand_binop (mode, lshr_optab, signmask, shift, | |
3886 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3887 | signmask = force_reg (mode, signmask); | |
3888 | ||
3889 | temp = expand_binop (mode, add_optab, op0, signmask, | |
3890 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
0359f9f5 | 3891 | temp = expand_binop (mode, and_optab, temp, |
3892 | gen_int_mode (masklow, mode), | |
8b908ec4 | 3893 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3894 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3895 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3896 | } | |
41323e11 | 3897 | return temp; |
3898 | } | |
3899 | } | |
3900 | ||
3901 | /* Mask contains the mode's signbit and the significant bits of the | |
3902 | modulus. By including the signbit in the operation, many targets | |
3903 | can avoid an explicit compare operation in the following comparison | |
3904 | against zero. */ | |
7a3a9290 | 3905 | wide_int mask = wi::mask (logd, false, prec); |
796b6678 | 3906 | mask = wi::set_bit (mask, prec - 1); |
41323e11 | 3907 | |
4b05206e | 3908 | temp = expand_binop (mode, and_optab, op0, |
e913b5cd | 3909 | immed_wide_int_const (mask, mode), |
4b05206e | 3910 | result, 1, OPTAB_LIB_WIDEN); |
41323e11 | 3911 | if (temp != result) |
3912 | emit_move_insn (result, temp); | |
3913 | ||
3914 | label = gen_label_rtx (); | |
3915 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); | |
3916 | ||
3917 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, | |
3918 | 0, OPTAB_LIB_WIDEN); | |
e913b5cd | 3919 | |
7a3a9290 | 3920 | mask = wi::mask (logd, true, prec); |
4b05206e | 3921 | temp = expand_binop (mode, ior_optab, temp, |
e913b5cd | 3922 | immed_wide_int_const (mask, mode), |
4b05206e | 3923 | result, 1, OPTAB_LIB_WIDEN); |
41323e11 | 3924 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
3925 | 0, OPTAB_LIB_WIDEN); | |
3926 | if (temp != result) | |
3927 | emit_move_insn (result, temp); | |
3928 | emit_label (label); | |
3929 | return result; | |
3930 | } | |
9c423367 | 3931 | |
3932 | /* Expand signed division of OP0 by a power of two D in mode MODE. | |
3933 | This routine is only called for positive values of D. */ | |
3934 | ||
3935 | static rtx | |
f77c4496 | 3936 | expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) |
9c423367 | 3937 | { |
89ca96ce | 3938 | rtx temp; |
3939 | rtx_code_label *label; | |
9c423367 | 3940 | int logd; |
3941 | ||
3942 | logd = floor_log2 (d); | |
9c423367 | 3943 | |
4a9d7ef7 | 3944 | if (d == 2 |
3945 | && BRANCH_COST (optimize_insn_for_speed_p (), | |
3946 | false) >= 1) | |
9c423367 | 3947 | { |
3948 | temp = gen_reg_rtx (mode); | |
3949 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); | |
3950 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
3951 | 0, OPTAB_LIB_WIDEN); | |
f5ff0b21 | 3952 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
9c423367 | 3953 | } |
3954 | ||
9630036a | 3955 | if (HAVE_conditional_move |
3956 | && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) | |
7f4f7064 | 3957 | { |
3958 | rtx temp2; | |
3959 | ||
3960 | start_sequence (); | |
3961 | temp2 = copy_to_mode_reg (mode, op0); | |
0359f9f5 | 3962 | temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), |
7f4f7064 | 3963 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
3964 | temp = force_reg (mode, temp); | |
3965 | ||
3966 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ | |
3967 | temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, | |
3968 | mode, temp, temp2, mode, 0); | |
3969 | if (temp2) | |
3970 | { | |
89ca96ce | 3971 | rtx_insn *seq = get_insns (); |
7f4f7064 | 3972 | end_sequence (); |
3973 | emit_insn (seq); | |
f5ff0b21 | 3974 | return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); |
7f4f7064 | 3975 | } |
3976 | end_sequence (); | |
3977 | } | |
7f4f7064 | 3978 | |
4a9d7ef7 | 3979 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
3980 | false) >= 2) | |
9c423367 | 3981 | { |
3982 | int ushift = GET_MODE_BITSIZE (mode) - logd; | |
3983 | ||
3984 | temp = gen_reg_rtx (mode); | |
3985 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); | |
d69521d8 | 3986 | if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD |
3987 | || shift_cost (optimize_insn_for_speed_p (), mode, ushift) | |
3988 | > COSTS_N_INSNS (1)) | |
0359f9f5 | 3989 | temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode), |
9c423367 | 3990 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
3991 | else | |
3992 | temp = expand_shift (RSHIFT_EXPR, mode, temp, | |
f5ff0b21 | 3993 | ushift, NULL_RTX, 1); |
9c423367 | 3994 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
3995 | 0, OPTAB_LIB_WIDEN); | |
f5ff0b21 | 3996 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
9c423367 | 3997 | } |
3998 | ||
3999 | label = gen_label_rtx (); | |
4000 | temp = copy_to_mode_reg (mode, op0); | |
4001 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); | |
0359f9f5 | 4002 | expand_inc (temp, gen_int_mode (d - 1, mode)); |
9c423367 | 4003 | emit_label (label); |
f5ff0b21 | 4004 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
9c423367 | 4005 | } |
d2fa4ea5 | 4006 | \f |
db96f378 | 4007 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
4008 | if that is convenient, and returning where the result is. | |
4009 | You may request either the quotient or the remainder as the result; | |
4010 | specify REM_FLAG nonzero to get the remainder. | |
4011 | ||
4012 | CODE is the expression code for which kind of division this is; | |
4013 | it controls how rounding is done. MODE is the machine mode to use. | |
4014 | UNSIGNEDP nonzero means do unsigned division. */ | |
4015 | ||
4016 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI | |
4017 | and then correct it by or'ing in missing high bits | |
4018 | if result of ANDI is nonzero. | |
4019 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. | |
4020 | This could optimize to a bfexts instruction. | |
4021 | But C doesn't use these operations, so their optimizations are | |
4022 | left for later. */ | |
a490489b | 4023 | /* ??? For modulo, we don't actually need the highpart of the first product, |
4024 | the low part will do nicely. And for small divisors, the second multiply | |
4025 | can also be a low-part only multiply or even be completely left out. | |
4026 | E.g. to calculate the remainder of a division by 3 with a 32 bit | |
4027 | multiply, multiply with 0x55555556 and extract the upper two bits; | |
4028 | the result is exact for inputs up to 0x1fffffff. | |
4029 | The input range can be reduced by using cross-sum rules. | |
4030 | For odd divisors >= 3, the following table gives right shift counts | |
edc2a478 | 4031 | so that if a number is shifted by an integer multiple of the given |
a490489b | 4032 | amount, the remainder stays the same: |
4033 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, | |
4034 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, | |
4035 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, | |
4036 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, | |
4037 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 | |
4038 | ||
4039 | Cross-sum rules for even numbers can be derived by leaving as many bits | |
4040 | to the right alone as the divisor has zeros to the right. | |
4041 | E.g. if x is an unsigned 32 bit number: | |
4042 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 | |
4043 | */ | |
db96f378 | 4044 | |
4045 | rtx | |
3754d046 | 4046 | expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, |
35cb5232 | 4047 | rtx op0, rtx op1, rtx target, int unsignedp) |
db96f378 | 4048 | { |
3754d046 | 4049 | machine_mode compute_mode; |
19cb6b50 | 4050 | rtx tquotient; |
d2fa4ea5 | 4051 | rtx quotient = 0, remainder = 0; |
89ca96ce | 4052 | rtx_insn *last; |
89ca96ce | 4053 | rtx_insn *insn; |
db96f378 | 4054 | optab optab1, optab2; |
ae01b312 | 4055 | int op1_is_constant, op1_is_pow2 = 0; |
33183a3c | 4056 | int max_cost, extra_cost; |
9e042f31 | 4057 | static HOST_WIDE_INT last_div_const = 0; |
f529eb25 | 4058 | bool speed = optimize_insn_for_speed_p (); |
d2fa4ea5 | 4059 | |
971ba038 | 4060 | op1_is_constant = CONST_INT_P (op1); |
ae01b312 | 4061 | if (op1_is_constant) |
4062 | { | |
ab104738 | 4063 | wide_int ext_op1 = rtx_mode_t (op1, mode); |
4064 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 | |
4065 | || (! unsignedp | |
4066 | && wi::popcount (wi::neg (ext_op1)) == 1)); | |
ae01b312 | 4067 | } |
d2fa4ea5 | 4068 | |
4069 | /* | |
4070 | This is the structure of expand_divmod: | |
4071 | ||
4072 | First comes code to fix up the operands so we can perform the operations | |
4073 | correctly and efficiently. | |
4074 | ||
4075 | Second comes a switch statement with code specific for each rounding mode. | |
4076 | For some special operands this code emits all RTL for the desired | |
c3118728 | 4077 | operation, for other cases, it generates only a quotient and stores it in |
d2fa4ea5 | 4078 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
4079 | to indicate that it has not done anything. | |
4080 | ||
c3118728 | 4081 | Last comes code that finishes the operation. If QUOTIENT is set and |
4082 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If | |
4083 | QUOTIENT is not set, it is computed using trunc rounding. | |
db96f378 | 4084 | |
d2fa4ea5 | 4085 | We try to generate special code for division and remainder when OP1 is a |
4086 | constant. If |OP1| = 2**n we can use shifts and some other fast | |
4087 | operations. For other values of OP1, we compute a carefully selected | |
4088 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 | |
4089 | by m. | |
4090 | ||
4091 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper | |
4092 | half of the product. Different strategies for generating the product are | |
ebf4f764 | 4093 | implemented in expmed_mult_highpart. |
d2fa4ea5 | 4094 | |
4095 | If what we actually want is the remainder, we generate that by another | |
4096 | by-constant multiplication and a subtraction. */ | |
4097 | ||
4098 | /* We shouldn't be called with OP1 == const1_rtx, but some of the | |
1b05ead8 | 4099 | code below will malfunction if we are, so check here and handle |
4100 | the special case if so. */ | |
4101 | if (op1 == const1_rtx) | |
4102 | return rem_flag ? const0_rtx : op0; | |
4103 | ||
bec2d490 | 4104 | /* When dividing by -1, we could get an overflow. |
4105 | negv_optab can handle overflows. */ | |
4106 | if (! unsignedp && op1 == constm1_rtx) | |
4107 | { | |
4108 | if (rem_flag) | |
ff385626 | 4109 | return const0_rtx; |
9af5ce0c | 4110 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT |
ff385626 | 4111 | ? negv_optab : neg_optab, op0, target, 0); |
bec2d490 | 4112 | } |
4113 | ||
64e50eaa | 4114 | if (target |
4115 | /* Don't use the function value register as a target | |
4116 | since we have to read it as well as write it, | |
4117 | and function-inlining gets confused by this. */ | |
4118 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) | |
4119 | /* Don't clobber an operand while doing a multi-step calculation. */ | |
eb55662f | 4120 | || ((rem_flag || op1_is_constant) |
64e50eaa | 4121 | && (reg_mentioned_p (target, op0) |
e16ceb8e | 4122 | || (MEM_P (op0) && MEM_P (target)))) |
64e50eaa | 4123 | || reg_mentioned_p (target, op1) |
e16ceb8e | 4124 | || (MEM_P (op1) && MEM_P (target)))) |
db96f378 | 4125 | target = 0; |
4126 | ||
db96f378 | 4127 | /* Get the mode in which to perform this computation. Normally it will |
4128 | be MODE, but sometimes we can't do the desired operation in MODE. | |
4129 | If so, pick a wider mode in which we can do the operation. Convert | |
4130 | to that mode at the start to avoid repeated conversions. | |
4131 | ||
4132 | First see what operations we need. These depend on the expression | |
4133 | we are evaluating. (We assume that divxx3 insns exist under the | |
4134 | same conditions that modxx3 insns and that these insns don't normally | |
4135 | fail. If these assumptions are not correct, we may generate less | |
4136 | efficient code in some cases.) | |
4137 | ||
4138 | Then see if we find a mode in which we can open-code that operation | |
4139 | (either a division, modulus, or shift). Finally, check for the smallest | |
4140 | mode for which we can do the operation with a library call. */ | |
4141 | ||
d2fa4ea5 | 4142 | /* We might want to refine this now that we have division-by-constant |
ebf4f764 | 4143 | optimization. Since expmed_mult_highpart tries so many variants, it is |
d2fa4ea5 | 4144 | not straightforward to generalize this. Maybe we should make an array |
4145 | of possible modes in init_expmed? Save this for GCC 2.7. */ | |
4146 | ||
ab104738 | 4147 | optab1 = (op1_is_pow2 |
0ceee13a | 4148 | ? (unsignedp ? lshr_optab : ashr_optab) |
db96f378 | 4149 | : (unsignedp ? udiv_optab : sdiv_optab)); |
ab104738 | 4150 | optab2 = (op1_is_pow2 ? optab1 |
0ceee13a | 4151 | : (unsignedp ? udivmod_optab : sdivmod_optab)); |
db96f378 | 4152 | |
19a4dce4 | 4153 | FOR_EACH_MODE_FROM (compute_mode, mode) |
d6bf3b14 | 4154 | if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing |
4155 | || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) | |
db96f378 | 4156 | break; |
4157 | ||
4158 | if (compute_mode == VOIDmode) | |
19a4dce4 | 4159 | FOR_EACH_MODE_FROM (compute_mode, mode) |
f36b9f69 | 4160 | if (optab_libfunc (optab1, compute_mode) |
4161 | || optab_libfunc (optab2, compute_mode)) | |
db96f378 | 4162 | break; |
4163 | ||
89f18f73 | 4164 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
4165 | probably die. */ | |
db96f378 | 4166 | if (compute_mode == VOIDmode) |
4167 | compute_mode = mode; | |
4168 | ||
d2fa4ea5 | 4169 | if (target && GET_MODE (target) == compute_mode) |
4170 | tquotient = target; | |
4171 | else | |
4172 | tquotient = gen_reg_rtx (compute_mode); | |
ccc4d85f | 4173 | |
d2fa4ea5 | 4174 | #if 0 |
4175 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE | |
33183a3c | 4176 | (mode), and thereby get better code when OP1 is a constant. Do that |
4177 | later. It will require going over all usages of SIZE below. */ | |
d2fa4ea5 | 4178 | size = GET_MODE_BITSIZE (mode); |
4179 | #endif | |
64e50eaa | 4180 | |
9e042f31 | 4181 | /* Only deduct something for a REM if the last divide done was |
4182 | for a different constant. Then set the constant of the last | |
4183 | divide. */ | |
49a71e58 | 4184 | max_cost = (unsignedp |
4185 | ? udiv_cost (speed, compute_mode) | |
4186 | : sdiv_cost (speed, compute_mode)); | |
1facc8d7 | 4187 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
4188 | && INTVAL (op1) == last_div_const)) | |
49a71e58 | 4189 | max_cost -= (mul_cost (speed, compute_mode) |
4190 | + add_cost (speed, compute_mode)); | |
9e042f31 | 4191 | |
4192 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; | |
33183a3c | 4193 | |
d2fa4ea5 | 4194 | /* Now convert to the best mode to use. */ |
db96f378 | 4195 | if (compute_mode != mode) |
4196 | { | |
d2fa4ea5 | 4197 | op0 = convert_modes (compute_mode, mode, op0, unsignedp); |
72467481 | 4198 | op1 = convert_modes (compute_mode, mode, op1, unsignedp); |
6d9d382f | 4199 | |
0dbd1c74 | 4200 | /* convert_modes may have placed op1 into a register, so we |
4201 | must recompute the following. */ | |
971ba038 | 4202 | op1_is_constant = CONST_INT_P (op1); |
ab104738 | 4203 | if (op1_is_constant) |
4204 | { | |
4205 | wide_int ext_op1 = rtx_mode_t (op1, compute_mode); | |
4206 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 | |
4207 | || (! unsignedp | |
4208 | && wi::popcount (wi::neg (ext_op1)) == 1)); | |
4209 | } | |
4210 | else | |
4211 | op1_is_pow2 = 0; | |
db96f378 | 4212 | } |
4213 | ||
d2fa4ea5 | 4214 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
8cdd0f84 | 4215 | |
e16ceb8e | 4216 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
d2fa4ea5 | 4217 | op0 = force_reg (compute_mode, op0); |
e16ceb8e | 4218 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
8cdd0f84 | 4219 | op1 = force_reg (compute_mode, op1); |
4220 | ||
008862a8 | 4221 | /* If we need the remainder or if OP1 is constant, we need to |
4222 | put OP0 in a register in case it has any queued subexpressions. */ | |
4223 | if (rem_flag || op1_is_constant) | |
4224 | op0 = force_reg (compute_mode, op0); | |
64e50eaa | 4225 | |
d2fa4ea5 | 4226 | last = get_last_insn (); |
db96f378 | 4227 | |
c3418f42 | 4228 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
d2fa4ea5 | 4229 | if (unsignedp) |
db96f378 | 4230 | { |
d2fa4ea5 | 4231 | if (code == FLOOR_DIV_EXPR) |
4232 | code = TRUNC_DIV_EXPR; | |
4233 | if (code == FLOOR_MOD_EXPR) | |
4234 | code = TRUNC_MOD_EXPR; | |
03b70ee3 | 4235 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4236 | code = TRUNC_DIV_EXPR; | |
d2fa4ea5 | 4237 | } |
64e50eaa | 4238 | |
d2fa4ea5 | 4239 | if (op1 != const0_rtx) |
4240 | switch (code) | |
4241 | { | |
4242 | case TRUNC_MOD_EXPR: | |
4243 | case TRUNC_DIV_EXPR: | |
61e477c7 | 4244 | if (op1_is_constant) |
d2fa4ea5 | 4245 | { |
7a6aeeed | 4246 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
4247 | int size = GET_MODE_BITSIZE (int_mode); | |
210ba7c8 | 4248 | if (unsignedp) |
d2fa4ea5 | 4249 | { |
127cb1cd | 4250 | unsigned HOST_WIDE_INT mh, ml; |
d2fa4ea5 | 4251 | int pre_shift, post_shift; |
4252 | int dummy; | |
7a6aeeed | 4253 | wide_int wd = rtx_mode_t (op1, int_mode); |
ab104738 | 4254 | unsigned HOST_WIDE_INT d = wd.to_uhwi (); |
d2fa4ea5 | 4255 | |
ab104738 | 4256 | if (wi::popcount (wd) == 1) |
d2fa4ea5 | 4257 | { |
4258 | pre_shift = floor_log2 (d); | |
4259 | if (rem_flag) | |
4260 | { | |
0359f9f5 | 4261 | unsigned HOST_WIDE_INT mask |
edc19fd0 | 4262 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
0bc644e0 | 4263 | remainder |
7a6aeeed | 4264 | = expand_binop (int_mode, and_optab, op0, |
4265 | gen_int_mode (mask, int_mode), | |
0bc644e0 | 4266 | remainder, 1, |
4267 | OPTAB_LIB_WIDEN); | |
d2fa4ea5 | 4268 | if (remainder) |
436b0397 | 4269 | return gen_lowpart (mode, remainder); |
d2fa4ea5 | 4270 | } |
7a6aeeed | 4271 | quotient = expand_shift (RSHIFT_EXPR, int_mode, op0, |
f5ff0b21 | 4272 | pre_shift, tquotient, 1); |
d2fa4ea5 | 4273 | } |
61e477c7 | 4274 | else if (size <= HOST_BITS_PER_WIDE_INT) |
d2fa4ea5 | 4275 | { |
edc19fd0 | 4276 | if (d >= (HOST_WIDE_INT_1U << (size - 1))) |
d2fa4ea5 | 4277 | { |
75ff336e | 4278 | /* Most significant bit of divisor is set; emit an scc |
4279 | insn. */ | |
dab963fb | 4280 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
7a6aeeed | 4281 | int_mode, 1, 1); |
d2fa4ea5 | 4282 | } |
4283 | else | |
4284 | { | |
75ff336e | 4285 | /* Find a suitable multiplier and right shift count |
4286 | instead of multiplying with D. */ | |
4287 | ||
4288 | mh = choose_multiplier (d, size, size, | |
4289 | &ml, &post_shift, &dummy); | |
4290 | ||
4291 | /* If the suggested multiplier is more than SIZE bits, | |
4292 | we can do better for even divisors, using an | |
4293 | initial right shift. */ | |
4294 | if (mh != 0 && (d & 1) == 0) | |
4295 | { | |
ac29ece2 | 4296 | pre_shift = ctz_or_zero (d); |
75ff336e | 4297 | mh = choose_multiplier (d >> pre_shift, size, |
4298 | size - pre_shift, | |
4299 | &ml, &post_shift, &dummy); | |
611234b4 | 4300 | gcc_assert (!mh); |
75ff336e | 4301 | } |
4302 | else | |
4303 | pre_shift = 0; | |
4304 | ||
4305 | if (mh != 0) | |
4306 | { | |
4307 | rtx t1, t2, t3, t4; | |
4308 | ||
84ab528e | 4309 | if (post_shift - 1 >= BITS_PER_WORD) |
4310 | goto fail1; | |
4311 | ||
e56afeb2 | 4312 | extra_cost |
7a6aeeed | 4313 | = (shift_cost (speed, int_mode, post_shift - 1) |
4314 | + shift_cost (speed, int_mode, 1) | |
4315 | + 2 * add_cost (speed, int_mode)); | |
0359f9f5 | 4316 | t1 = expmed_mult_highpart |
7a6aeeed | 4317 | (int_mode, op0, gen_int_mode (ml, int_mode), |
0359f9f5 | 4318 | NULL_RTX, 1, max_cost - extra_cost); |
75ff336e | 4319 | if (t1 == 0) |
4320 | goto fail1; | |
7a6aeeed | 4321 | t2 = force_operand (gen_rtx_MINUS (int_mode, |
941522d6 | 4322 | op0, t1), |
75ff336e | 4323 | NULL_RTX); |
7a6aeeed | 4324 | t3 = expand_shift (RSHIFT_EXPR, int_mode, |
f5ff0b21 | 4325 | t2, 1, NULL_RTX, 1); |
7a6aeeed | 4326 | t4 = force_operand (gen_rtx_PLUS (int_mode, |
941522d6 | 4327 | t1, t3), |
75ff336e | 4328 | NULL_RTX); |
7c446c95 | 4329 | quotient = expand_shift |
7a6aeeed | 4330 | (RSHIFT_EXPR, int_mode, t4, |
f5ff0b21 | 4331 | post_shift - 1, tquotient, 1); |
75ff336e | 4332 | } |
4333 | else | |
4334 | { | |
4335 | rtx t1, t2; | |
4336 | ||
84ab528e | 4337 | if (pre_shift >= BITS_PER_WORD |
4338 | || post_shift >= BITS_PER_WORD) | |
4339 | goto fail1; | |
4340 | ||
7c446c95 | 4341 | t1 = expand_shift |
7a6aeeed | 4342 | (RSHIFT_EXPR, int_mode, op0, |
f5ff0b21 | 4343 | pre_shift, NULL_RTX, 1); |
e56afeb2 | 4344 | extra_cost |
7a6aeeed | 4345 | = (shift_cost (speed, int_mode, pre_shift) |
4346 | + shift_cost (speed, int_mode, post_shift)); | |
0359f9f5 | 4347 | t2 = expmed_mult_highpart |
7a6aeeed | 4348 | (int_mode, t1, |
4349 | gen_int_mode (ml, int_mode), | |
0359f9f5 | 4350 | NULL_RTX, 1, max_cost - extra_cost); |
75ff336e | 4351 | if (t2 == 0) |
4352 | goto fail1; | |
7c446c95 | 4353 | quotient = expand_shift |
7a6aeeed | 4354 | (RSHIFT_EXPR, int_mode, t2, |
f5ff0b21 | 4355 | post_shift, tquotient, 1); |
75ff336e | 4356 | } |
d2fa4ea5 | 4357 | } |
4358 | } | |
61e477c7 | 4359 | else /* Too wide mode to use tricky code */ |
4360 | break; | |
d2fa4ea5 | 4361 | |
4362 | insn = get_last_insn (); | |
41cf444a | 4363 | if (insn != last) |
4364 | set_dst_reg_note (insn, REG_EQUAL, | |
7a6aeeed | 4365 | gen_rtx_UDIV (int_mode, op0, op1), |
41cf444a | 4366 | quotient); |
d2fa4ea5 | 4367 | } |
4368 | else /* TRUNC_DIV, signed */ | |
4369 | { | |
4370 | unsigned HOST_WIDE_INT ml; | |
4371 | int lgup, post_shift; | |
4de52edf | 4372 | rtx mlr; |
d2fa4ea5 | 4373 | HOST_WIDE_INT d = INTVAL (op1); |
f74f4e04 | 4374 | unsigned HOST_WIDE_INT abs_d; |
4375 | ||
3d77819c | 4376 | /* Since d might be INT_MIN, we have to cast to |
4377 | unsigned HOST_WIDE_INT before negating to avoid | |
4378 | undefined signed overflow. */ | |
34f60736 | 4379 | abs_d = (d >= 0 |
4380 | ? (unsigned HOST_WIDE_INT) d | |
4381 | : - (unsigned HOST_WIDE_INT) d); | |
d2fa4ea5 | 4382 | |
4383 | /* n rem d = n rem -d */ | |
4384 | if (rem_flag && d < 0) | |
4385 | { | |
4386 | d = abs_d; | |
7a6aeeed | 4387 | op1 = gen_int_mode (abs_d, int_mode); |
d2fa4ea5 | 4388 | } |
4389 | ||
4390 | if (d == 1) | |
4391 | quotient = op0; | |
4392 | else if (d == -1) | |
7a6aeeed | 4393 | quotient = expand_unop (int_mode, neg_optab, op0, |
d2fa4ea5 | 4394 | tquotient, 0); |
ab104738 | 4395 | else if (size <= HOST_BITS_PER_WIDE_INT |
edc19fd0 | 4396 | && abs_d == HOST_WIDE_INT_1U << (size - 1)) |
7676164c | 4397 | { |
4398 | /* This case is not handled correctly below. */ | |
4399 | quotient = emit_store_flag (tquotient, EQ, op0, op1, | |
7a6aeeed | 4400 | int_mode, 1, 1); |
7676164c | 4401 | if (quotient == 0) |
4402 | goto fail1; | |
4403 | } | |
d2fa4ea5 | 4404 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
ab104738 | 4405 | && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) |
49a71e58 | 4406 | && (rem_flag |
7a6aeeed | 4407 | ? smod_pow2_cheap (speed, int_mode) |
4408 | : sdiv_pow2_cheap (speed, int_mode)) | |
41323e11 | 4409 | /* We assume that cheap metric is true if the |
4410 | optab has an expander for this mode. */ | |
99bdde56 | 4411 | && ((optab_handler ((rem_flag ? smod_optab |
4412 | : sdiv_optab), | |
7a6aeeed | 4413 | int_mode) |
9884e77f | 4414 | != CODE_FOR_nothing) |
7a6aeeed | 4415 | || (optab_handler (sdivmod_optab, int_mode) |
d6bf3b14 | 4416 | != CODE_FOR_nothing))) |
d2fa4ea5 | 4417 | ; |
ab104738 | 4418 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d) |
4419 | && (size <= HOST_BITS_PER_WIDE_INT | |
4420 | || abs_d != (unsigned HOST_WIDE_INT) d)) | |
d2fa4ea5 | 4421 | { |
41323e11 | 4422 | if (rem_flag) |
4423 | { | |
7a6aeeed | 4424 | remainder = expand_smod_pow2 (int_mode, op0, d); |
41323e11 | 4425 | if (remainder) |
4426 | return gen_lowpart (mode, remainder); | |
4427 | } | |
cb2e141e | 4428 | |
7a6aeeed | 4429 | if (sdiv_pow2_cheap (speed, int_mode) |
4430 | && ((optab_handler (sdiv_optab, int_mode) | |
cb2e141e | 4431 | != CODE_FOR_nothing) |
7a6aeeed | 4432 | || (optab_handler (sdivmod_optab, int_mode) |
cb2e141e | 4433 | != CODE_FOR_nothing))) |
4434 | quotient = expand_divmod (0, TRUNC_DIV_EXPR, | |
7a6aeeed | 4435 | int_mode, op0, |
cb2e141e | 4436 | gen_int_mode (abs_d, |
7a6aeeed | 4437 | int_mode), |
cb2e141e | 4438 | NULL_RTX, 0); |
4439 | else | |
7a6aeeed | 4440 | quotient = expand_sdiv_pow2 (int_mode, op0, abs_d); |
d2fa4ea5 | 4441 | |
41323e11 | 4442 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4443 | negate the quotient. */ | |
d2fa4ea5 | 4444 | if (d < 0) |
4445 | { | |
4446 | insn = get_last_insn (); | |
38457527 | 4447 | if (insn != last |
edc19fd0 | 4448 | && abs_d < (HOST_WIDE_INT_1U |
dd192c2d | 4449 | << (HOST_BITS_PER_WIDE_INT - 1))) |
41cf444a | 4450 | set_dst_reg_note (insn, REG_EQUAL, |
7a6aeeed | 4451 | gen_rtx_DIV (int_mode, op0, |
f62058c3 | 4452 | gen_int_mode |
4453 | (abs_d, | |
7a6aeeed | 4454 | int_mode)), |
41cf444a | 4455 | quotient); |
d2fa4ea5 | 4456 | |
7a6aeeed | 4457 | quotient = expand_unop (int_mode, neg_optab, |
d2fa4ea5 | 4458 | quotient, quotient, 0); |
4459 | } | |
4460 | } | |
61e477c7 | 4461 | else if (size <= HOST_BITS_PER_WIDE_INT) |
d2fa4ea5 | 4462 | { |
4463 | choose_multiplier (abs_d, size, size - 1, | |
127cb1cd | 4464 | &ml, &post_shift, &lgup); |
edc19fd0 | 4465 | if (ml < HOST_WIDE_INT_1U << (size - 1)) |
d2fa4ea5 | 4466 | { |
4467 | rtx t1, t2, t3; | |
4468 | ||
84ab528e | 4469 | if (post_shift >= BITS_PER_WORD |
4470 | || size - 1 >= BITS_PER_WORD) | |
4471 | goto fail1; | |
4472 | ||
7a6aeeed | 4473 | extra_cost = (shift_cost (speed, int_mode, post_shift) |
4474 | + shift_cost (speed, int_mode, size - 1) | |
4475 | + add_cost (speed, int_mode)); | |
0359f9f5 | 4476 | t1 = expmed_mult_highpart |
7a6aeeed | 4477 | (int_mode, op0, gen_int_mode (ml, int_mode), |
0359f9f5 | 4478 | NULL_RTX, 0, max_cost - extra_cost); |
d2fa4ea5 | 4479 | if (t1 == 0) |
4480 | goto fail1; | |
7c446c95 | 4481 | t2 = expand_shift |
7a6aeeed | 4482 | (RSHIFT_EXPR, int_mode, t1, |
f5ff0b21 | 4483 | post_shift, NULL_RTX, 0); |
7c446c95 | 4484 | t3 = expand_shift |
7a6aeeed | 4485 | (RSHIFT_EXPR, int_mode, op0, |
f5ff0b21 | 4486 | size - 1, NULL_RTX, 0); |
d2fa4ea5 | 4487 | if (d < 0) |
7014838c | 4488 | quotient |
7a6aeeed | 4489 | = force_operand (gen_rtx_MINUS (int_mode, t3, t2), |
7014838c | 4490 | tquotient); |
d2fa4ea5 | 4491 | else |
7014838c | 4492 | quotient |
7a6aeeed | 4493 | = force_operand (gen_rtx_MINUS (int_mode, t2, t3), |
7014838c | 4494 | tquotient); |
d2fa4ea5 | 4495 | } |
4496 | else | |
4497 | { | |
4498 | rtx t1, t2, t3, t4; | |
4499 | ||
84ab528e | 4500 | if (post_shift >= BITS_PER_WORD |
4501 | || size - 1 >= BITS_PER_WORD) | |
4502 | goto fail1; | |
4503 | ||
7097b942 | 4504 | ml |= HOST_WIDE_INT_M1U << (size - 1); |
7a6aeeed | 4505 | mlr = gen_int_mode (ml, int_mode); |
4506 | extra_cost = (shift_cost (speed, int_mode, post_shift) | |
4507 | + shift_cost (speed, int_mode, size - 1) | |
4508 | + 2 * add_cost (speed, int_mode)); | |
4509 | t1 = expmed_mult_highpart (int_mode, op0, mlr, | |
33183a3c | 4510 | NULL_RTX, 0, |
4511 | max_cost - extra_cost); | |
d2fa4ea5 | 4512 | if (t1 == 0) |
4513 | goto fail1; | |
7a6aeeed | 4514 | t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0), |
d2fa4ea5 | 4515 | NULL_RTX); |
7c446c95 | 4516 | t3 = expand_shift |
7a6aeeed | 4517 | (RSHIFT_EXPR, int_mode, t2, |
f5ff0b21 | 4518 | post_shift, NULL_RTX, 0); |
7c446c95 | 4519 | t4 = expand_shift |
7a6aeeed | 4520 | (RSHIFT_EXPR, int_mode, op0, |
f5ff0b21 | 4521 | size - 1, NULL_RTX, 0); |
d2fa4ea5 | 4522 | if (d < 0) |
7014838c | 4523 | quotient |
7a6aeeed | 4524 | = force_operand (gen_rtx_MINUS (int_mode, t4, t3), |
7014838c | 4525 | tquotient); |
d2fa4ea5 | 4526 | else |
7014838c | 4527 | quotient |
7a6aeeed | 4528 | = force_operand (gen_rtx_MINUS (int_mode, t3, t4), |
7014838c | 4529 | tquotient); |
d2fa4ea5 | 4530 | } |
4531 | } | |
61e477c7 | 4532 | else /* Too wide mode to use tricky code */ |
4533 | break; | |
d2fa4ea5 | 4534 | |
38457527 | 4535 | insn = get_last_insn (); |
41cf444a | 4536 | if (insn != last) |
4537 | set_dst_reg_note (insn, REG_EQUAL, | |
7a6aeeed | 4538 | gen_rtx_DIV (int_mode, op0, op1), |
41cf444a | 4539 | quotient); |
d2fa4ea5 | 4540 | } |
4541 | break; | |
4542 | } | |
4543 | fail1: | |
4544 | delete_insns_since (last); | |
4545 | break; | |
db96f378 | 4546 | |
d2fa4ea5 | 4547 | case FLOOR_DIV_EXPR: |
4548 | case FLOOR_MOD_EXPR: | |
4549 | /* We will come here only for signed operations. */ | |
7a6aeeed | 4550 | if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode)) |
d2fa4ea5 | 4551 | { |
7a6aeeed | 4552 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
4553 | int size = GET_MODE_BITSIZE (int_mode); | |
127cb1cd | 4554 | unsigned HOST_WIDE_INT mh, ml; |
d2fa4ea5 | 4555 | int pre_shift, lgup, post_shift; |
4556 | HOST_WIDE_INT d = INTVAL (op1); | |
4557 | ||
4558 | if (d > 0) | |
4559 | { | |
4560 | /* We could just as easily deal with negative constants here, | |
4561 | but it does not seem worth the trouble for GCC 2.6. */ | |
4562 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4563 | { | |
4564 | pre_shift = floor_log2 (d); | |
4565 | if (rem_flag) | |
4566 | { | |
0359f9f5 | 4567 | unsigned HOST_WIDE_INT mask |
edc19fd0 | 4568 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
0359f9f5 | 4569 | remainder = expand_binop |
7a6aeeed | 4570 | (int_mode, and_optab, op0, |
4571 | gen_int_mode (mask, int_mode), | |
0359f9f5 | 4572 | remainder, 0, OPTAB_LIB_WIDEN); |
d2fa4ea5 | 4573 | if (remainder) |
436b0397 | 4574 | return gen_lowpart (mode, remainder); |
d2fa4ea5 | 4575 | } |
7c446c95 | 4576 | quotient = expand_shift |
7a6aeeed | 4577 | (RSHIFT_EXPR, int_mode, op0, |
f5ff0b21 | 4578 | pre_shift, tquotient, 0); |
d2fa4ea5 | 4579 | } |
4580 | else | |
4581 | { | |
4582 | rtx t1, t2, t3, t4; | |
4583 | ||
4584 | mh = choose_multiplier (d, size, size - 1, | |
4585 | &ml, &post_shift, &lgup); | |
611234b4 | 4586 | gcc_assert (!mh); |
d2fa4ea5 | 4587 | |
84ab528e | 4588 | if (post_shift < BITS_PER_WORD |
4589 | && size - 1 < BITS_PER_WORD) | |
d2fa4ea5 | 4590 | { |
7c446c95 | 4591 | t1 = expand_shift |
7a6aeeed | 4592 | (RSHIFT_EXPR, int_mode, op0, |
f5ff0b21 | 4593 | size - 1, NULL_RTX, 0); |
7a6aeeed | 4594 | t2 = expand_binop (int_mode, xor_optab, op0, t1, |
84ab528e | 4595 | NULL_RTX, 0, OPTAB_WIDEN); |
7a6aeeed | 4596 | extra_cost = (shift_cost (speed, int_mode, post_shift) |
4597 | + shift_cost (speed, int_mode, size - 1) | |
4598 | + 2 * add_cost (speed, int_mode)); | |
0359f9f5 | 4599 | t3 = expmed_mult_highpart |
7a6aeeed | 4600 | (int_mode, t2, gen_int_mode (ml, int_mode), |
0359f9f5 | 4601 | NULL_RTX, 1, max_cost - extra_cost); |
84ab528e | 4602 | if (t3 != 0) |
4603 | { | |
7c446c95 | 4604 | t4 = expand_shift |
7a6aeeed | 4605 | (RSHIFT_EXPR, int_mode, t3, |
f5ff0b21 | 4606 | post_shift, NULL_RTX, 1); |
7a6aeeed | 4607 | quotient = expand_binop (int_mode, xor_optab, |
84ab528e | 4608 | t4, t1, tquotient, 0, |
4609 | OPTAB_WIDEN); | |
4610 | } | |
d2fa4ea5 | 4611 | } |
4612 | } | |
4613 | } | |
4614 | else | |
4615 | { | |
4616 | rtx nsign, t1, t2, t3, t4; | |
7a6aeeed | 4617 | t1 = force_operand (gen_rtx_PLUS (int_mode, |
941522d6 | 4618 | op0, constm1_rtx), NULL_RTX); |
7a6aeeed | 4619 | t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX, |
d2fa4ea5 | 4620 | 0, OPTAB_WIDEN); |
7a6aeeed | 4621 | nsign = expand_shift (RSHIFT_EXPR, int_mode, t2, |
ab104738 | 4622 | size - 1, NULL_RTX, 0); |
7a6aeeed | 4623 | t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign), |
d2fa4ea5 | 4624 | NULL_RTX); |
7a6aeeed | 4625 | t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1, |
d2fa4ea5 | 4626 | NULL_RTX, 0); |
4627 | if (t4) | |
4628 | { | |
4629 | rtx t5; | |
7a6aeeed | 4630 | t5 = expand_unop (int_mode, one_cmpl_optab, nsign, |
d2fa4ea5 | 4631 | NULL_RTX, 0); |
7a6aeeed | 4632 | quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5), |
d2fa4ea5 | 4633 | tquotient); |
4634 | } | |
4635 | } | |
4636 | } | |
4637 | ||
4638 | if (quotient != 0) | |
4639 | break; | |
4640 | delete_insns_since (last); | |
4641 | ||
4642 | /* Try using an instruction that produces both the quotient and | |
4643 | remainder, using truncation. We can easily compensate the quotient | |
4644 | or remainder to get floor rounding, once we have the remainder. | |
4645 | Notice that we compute also the final remainder value here, | |
4646 | and return the result right away. */ | |
36db22a0 | 4647 | if (target == 0 || GET_MODE (target) != compute_mode) |
d2fa4ea5 | 4648 | target = gen_reg_rtx (compute_mode); |
e324608c | 4649 | |
d2fa4ea5 | 4650 | if (rem_flag) |
4651 | { | |
e324608c | 4652 | remainder |
8ad4c111 | 4653 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
d2fa4ea5 | 4654 | quotient = gen_reg_rtx (compute_mode); |
4655 | } | |
4656 | else | |
4657 | { | |
e324608c | 4658 | quotient |
8ad4c111 | 4659 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
d2fa4ea5 | 4660 | remainder = gen_reg_rtx (compute_mode); |
4661 | } | |
4662 | ||
4663 | if (expand_twoval_binop (sdivmod_optab, op0, op1, | |
4664 | quotient, remainder, 0)) | |
4665 | { | |
4666 | /* This could be computed with a branch-less sequence. | |
4667 | Save that for later. */ | |
4668 | rtx tem; | |
89ca96ce | 4669 | rtx_code_label *label = gen_label_rtx (); |
c5aa1e92 | 4670 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
d2fa4ea5 | 4671 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4672 | NULL_RTX, 0, OPTAB_WIDEN); | |
c5aa1e92 | 4673 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
d2fa4ea5 | 4674 | expand_dec (quotient, const1_rtx); |
4675 | expand_inc (remainder, op1); | |
4676 | emit_label (label); | |
436b0397 | 4677 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
d2fa4ea5 | 4678 | } |
4679 | ||
4680 | /* No luck with division elimination or divmod. Have to do it | |
4681 | by conditionally adjusting op0 *and* the result. */ | |
db96f378 | 4682 | { |
89ca96ce | 4683 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
d2fa4ea5 | 4684 | rtx adjusted_op0; |
4685 | rtx tem; | |
4686 | ||
4687 | quotient = gen_reg_rtx (compute_mode); | |
4688 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4689 | label1 = gen_label_rtx (); | |
4690 | label2 = gen_label_rtx (); | |
4691 | label3 = gen_label_rtx (); | |
4692 | label4 = gen_label_rtx (); | |
4693 | label5 = gen_label_rtx (); | |
c5aa1e92 | 4694 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4695 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); | |
d2fa4ea5 | 4696 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4697 | quotient, 0, OPTAB_LIB_WIDEN); | |
4698 | if (tem != quotient) | |
4699 | emit_move_insn (quotient, tem); | |
1d5ad681 | 4700 | emit_jump_insn (targetm.gen_jump (label5)); |
d2fa4ea5 | 4701 | emit_barrier (); |
4702 | emit_label (label1); | |
db96f378 | 4703 | expand_inc (adjusted_op0, const1_rtx); |
1d5ad681 | 4704 | emit_jump_insn (targetm.gen_jump (label4)); |
d2fa4ea5 | 4705 | emit_barrier (); |
4706 | emit_label (label2); | |
c5aa1e92 | 4707 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
d2fa4ea5 | 4708 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4709 | quotient, 0, OPTAB_LIB_WIDEN); | |
4710 | if (tem != quotient) | |
4711 | emit_move_insn (quotient, tem); | |
1d5ad681 | 4712 | emit_jump_insn (targetm.gen_jump (label5)); |
d2fa4ea5 | 4713 | emit_barrier (); |
4714 | emit_label (label3); | |
4715 | expand_dec (adjusted_op0, const1_rtx); | |
4716 | emit_label (label4); | |
4717 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4718 | quotient, 0, OPTAB_LIB_WIDEN); | |
4719 | if (tem != quotient) | |
4720 | emit_move_insn (quotient, tem); | |
4721 | expand_dec (quotient, const1_rtx); | |
4722 | emit_label (label5); | |
db96f378 | 4723 | } |
d2fa4ea5 | 4724 | break; |
db96f378 | 4725 | |
d2fa4ea5 | 4726 | case CEIL_DIV_EXPR: |
4727 | case CEIL_MOD_EXPR: | |
4728 | if (unsignedp) | |
4729 | { | |
ab104738 | 4730 | if (op1_is_constant |
4731 | && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) | |
7a6aeeed | 4732 | && (HWI_COMPUTABLE_MODE_P (compute_mode) |
ab104738 | 4733 | || INTVAL (op1) >= 0)) |
3f4d178c | 4734 | { |
7a6aeeed | 4735 | scalar_int_mode int_mode |
4736 | = as_a <scalar_int_mode> (compute_mode); | |
3f4d178c | 4737 | rtx t1, t2, t3; |
4738 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
7a6aeeed | 4739 | t1 = expand_shift (RSHIFT_EXPR, int_mode, op0, |
f5ff0b21 | 4740 | floor_log2 (d), tquotient, 1); |
7a6aeeed | 4741 | t2 = expand_binop (int_mode, and_optab, op0, |
4742 | gen_int_mode (d - 1, int_mode), | |
3f4d178c | 4743 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
7a6aeeed | 4744 | t3 = gen_reg_rtx (int_mode); |
4745 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1); | |
9d7a4e0b | 4746 | if (t3 == 0) |
4747 | { | |
89ca96ce | 4748 | rtx_code_label *lab; |
9d7a4e0b | 4749 | lab = gen_label_rtx (); |
7a6aeeed | 4750 | do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab); |
9d7a4e0b | 4751 | expand_inc (t1, const1_rtx); |
4752 | emit_label (lab); | |
4753 | quotient = t1; | |
4754 | } | |
4755 | else | |
7a6aeeed | 4756 | quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3), |
9d7a4e0b | 4757 | tquotient); |
3f4d178c | 4758 | break; |
4759 | } | |
d2fa4ea5 | 4760 | |
4761 | /* Try using an instruction that produces both the quotient and | |
4762 | remainder, using truncation. We can easily compensate the | |
4763 | quotient or remainder to get ceiling rounding, once we have the | |
4764 | remainder. Notice that we compute also the final remainder | |
4765 | value here, and return the result right away. */ | |
36db22a0 | 4766 | if (target == 0 || GET_MODE (target) != compute_mode) |
d2fa4ea5 | 4767 | target = gen_reg_rtx (compute_mode); |
e324608c | 4768 | |
d2fa4ea5 | 4769 | if (rem_flag) |
4770 | { | |
8ad4c111 | 4771 | remainder = (REG_P (target) |
e324608c | 4772 | ? target : gen_reg_rtx (compute_mode)); |
d2fa4ea5 | 4773 | quotient = gen_reg_rtx (compute_mode); |
4774 | } | |
4775 | else | |
4776 | { | |
8ad4c111 | 4777 | quotient = (REG_P (target) |
e324608c | 4778 | ? target : gen_reg_rtx (compute_mode)); |
d2fa4ea5 | 4779 | remainder = gen_reg_rtx (compute_mode); |
4780 | } | |
4781 | ||
4782 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, | |
4783 | remainder, 1)) | |
4784 | { | |
4785 | /* This could be computed with a branch-less sequence. | |
4786 | Save that for later. */ | |
89ca96ce | 4787 | rtx_code_label *label = gen_label_rtx (); |
c5aa1e92 | 4788 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4789 | compute_mode, label); | |
d2fa4ea5 | 4790 | expand_inc (quotient, const1_rtx); |
4791 | expand_dec (remainder, op1); | |
4792 | emit_label (label); | |
436b0397 | 4793 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
d2fa4ea5 | 4794 | } |
4795 | ||
4796 | /* No luck with division elimination or divmod. Have to do it | |
4797 | by conditionally adjusting op0 *and* the result. */ | |
db96f378 | 4798 | { |
89ca96ce | 4799 | rtx_code_label *label1, *label2; |
d2fa4ea5 | 4800 | rtx adjusted_op0, tem; |
4801 | ||
4802 | quotient = gen_reg_rtx (compute_mode); | |
4803 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4804 | label1 = gen_label_rtx (); | |
4805 | label2 = gen_label_rtx (); | |
c5aa1e92 | 4806 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4807 | compute_mode, label1); | |
d2fa4ea5 | 4808 | emit_move_insn (quotient, const0_rtx); |
1d5ad681 | 4809 | emit_jump_insn (targetm.gen_jump (label2)); |
d2fa4ea5 | 4810 | emit_barrier (); |
4811 | emit_label (label1); | |
4812 | expand_dec (adjusted_op0, const1_rtx); | |
4813 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, | |
4814 | quotient, 1, OPTAB_LIB_WIDEN); | |
4815 | if (tem != quotient) | |
4816 | emit_move_insn (quotient, tem); | |
4817 | expand_inc (quotient, const1_rtx); | |
4818 | emit_label (label2); | |
db96f378 | 4819 | } |
d2fa4ea5 | 4820 | } |
4821 | else /* signed */ | |
4822 | { | |
2b10064a | 4823 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4824 | && INTVAL (op1) >= 0) | |
4825 | { | |
4826 | /* This is extremely similar to the code for the unsigned case | |
4827 | above. For 2.7 we should merge these variants, but for | |
4828 | 2.6.1 I don't want to touch the code for unsigned since that | |
4829 | get used in C. The signed case will only be used by other | |
4830 | languages (Ada). */ | |
4831 | ||
4832 | rtx t1, t2, t3; | |
4833 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4834 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
f5ff0b21 | 4835 | floor_log2 (d), tquotient, 0); |
2b10064a | 4836 | t2 = expand_binop (compute_mode, and_optab, op0, |
0359f9f5 | 4837 | gen_int_mode (d - 1, compute_mode), |
2b10064a | 4838 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4839 | t3 = gen_reg_rtx (compute_mode); | |
4840 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4841 | compute_mode, 1, 1); | |
4842 | if (t3 == 0) | |
4843 | { | |
89ca96ce | 4844 | rtx_code_label *lab; |
2b10064a | 4845 | lab = gen_label_rtx (); |
c5aa1e92 | 4846 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
2b10064a | 4847 | expand_inc (t1, const1_rtx); |
4848 | emit_label (lab); | |
4849 | quotient = t1; | |
4850 | } | |
4851 | else | |
941522d6 | 4852 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4853 | t1, t3), | |
2b10064a | 4854 | tquotient); |
4855 | break; | |
4856 | } | |
4857 | ||
d2fa4ea5 | 4858 | /* Try using an instruction that produces both the quotient and |
4859 | remainder, using truncation. We can easily compensate the | |
4860 | quotient or remainder to get ceiling rounding, once we have the | |
4861 | remainder. Notice that we compute also the final remainder | |
4862 | value here, and return the result right away. */ | |
36db22a0 | 4863 | if (target == 0 || GET_MODE (target) != compute_mode) |
d2fa4ea5 | 4864 | target = gen_reg_rtx (compute_mode); |
4865 | if (rem_flag) | |
4866 | { | |
8ad4c111 | 4867 | remainder= (REG_P (target) |
e324608c | 4868 | ? target : gen_reg_rtx (compute_mode)); |
d2fa4ea5 | 4869 | quotient = gen_reg_rtx (compute_mode); |
4870 | } | |
4871 | else | |
4872 | { | |
8ad4c111 | 4873 | quotient = (REG_P (target) |
e324608c | 4874 | ? target : gen_reg_rtx (compute_mode)); |
d2fa4ea5 | 4875 | remainder = gen_reg_rtx (compute_mode); |
4876 | } | |
4877 | ||
4878 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, | |
4879 | remainder, 0)) | |
4880 | { | |
4881 | /* This could be computed with a branch-less sequence. | |
4882 | Save that for later. */ | |
4883 | rtx tem; | |
89ca96ce | 4884 | rtx_code_label *label = gen_label_rtx (); |
c5aa1e92 | 4885 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4886 | compute_mode, label); | |
d2fa4ea5 | 4887 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4888 | NULL_RTX, 0, OPTAB_WIDEN); | |
c5aa1e92 | 4889 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
d2fa4ea5 | 4890 | expand_inc (quotient, const1_rtx); |
4891 | expand_dec (remainder, op1); | |
4892 | emit_label (label); | |
436b0397 | 4893 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
d2fa4ea5 | 4894 | } |
4895 | ||
4896 | /* No luck with division elimination or divmod. Have to do it | |
4897 | by conditionally adjusting op0 *and* the result. */ | |
db96f378 | 4898 | { |
89ca96ce | 4899 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
d2fa4ea5 | 4900 | rtx adjusted_op0; |
4901 | rtx tem; | |
4902 | ||
4903 | quotient = gen_reg_rtx (compute_mode); | |
4904 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4905 | label1 = gen_label_rtx (); | |
4906 | label2 = gen_label_rtx (); | |
4907 | label3 = gen_label_rtx (); | |
4908 | label4 = gen_label_rtx (); | |
4909 | label5 = gen_label_rtx (); | |
c5aa1e92 | 4910 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4911 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, | |
4912 | compute_mode, label1); | |
d2fa4ea5 | 4913 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4914 | quotient, 0, OPTAB_LIB_WIDEN); | |
4915 | if (tem != quotient) | |
4916 | emit_move_insn (quotient, tem); | |
1d5ad681 | 4917 | emit_jump_insn (targetm.gen_jump (label5)); |
d2fa4ea5 | 4918 | emit_barrier (); |
4919 | emit_label (label1); | |
4920 | expand_dec (adjusted_op0, const1_rtx); | |
1d5ad681 | 4921 | emit_jump_insn (targetm.gen_jump (label4)); |
d2fa4ea5 | 4922 | emit_barrier (); |
4923 | emit_label (label2); | |
c5aa1e92 | 4924 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
4925 | compute_mode, label3); | |
d2fa4ea5 | 4926 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4927 | quotient, 0, OPTAB_LIB_WIDEN); | |
4928 | if (tem != quotient) | |
4929 | emit_move_insn (quotient, tem); | |
1d5ad681 | 4930 | emit_jump_insn (targetm.gen_jump (label5)); |
d2fa4ea5 | 4931 | emit_barrier (); |
4932 | emit_label (label3); | |
4933 | expand_inc (adjusted_op0, const1_rtx); | |
4934 | emit_label (label4); | |
4935 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4936 | quotient, 0, OPTAB_LIB_WIDEN); | |
4937 | if (tem != quotient) | |
4938 | emit_move_insn (quotient, tem); | |
4939 | expand_inc (quotient, const1_rtx); | |
4940 | emit_label (label5); | |
db96f378 | 4941 | } |
d2fa4ea5 | 4942 | } |
4943 | break; | |
64e50eaa | 4944 | |
d2fa4ea5 | 4945 | case EXACT_DIV_EXPR: |
7a6aeeed | 4946 | if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode)) |
d2fa4ea5 | 4947 | { |
7a6aeeed | 4948 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
4949 | int size = GET_MODE_BITSIZE (int_mode); | |
d2fa4ea5 | 4950 | HOST_WIDE_INT d = INTVAL (op1); |
4951 | unsigned HOST_WIDE_INT ml; | |
bec2d490 | 4952 | int pre_shift; |
d2fa4ea5 | 4953 | rtx t1; |
4954 | ||
ac29ece2 | 4955 | pre_shift = ctz_or_zero (d); |
bec2d490 | 4956 | ml = invert_mod2n (d >> pre_shift, size); |
7a6aeeed | 4957 | t1 = expand_shift (RSHIFT_EXPR, int_mode, op0, |
f5ff0b21 | 4958 | pre_shift, NULL_RTX, unsignedp); |
7a6aeeed | 4959 | quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode), |
273014f4 | 4960 | NULL_RTX, 1); |
d2fa4ea5 | 4961 | |
4962 | insn = get_last_insn (); | |
41cf444a | 4963 | set_dst_reg_note (insn, REG_EQUAL, |
4964 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, | |
7a6aeeed | 4965 | int_mode, op0, op1), |
41cf444a | 4966 | quotient); |
d2fa4ea5 | 4967 | } |
4968 | break; | |
4969 | ||
4970 | case ROUND_DIV_EXPR: | |
4971 | case ROUND_MOD_EXPR: | |
c3118728 | 4972 | if (unsignedp) |
4973 | { | |
7a6aeeed | 4974 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
c3118728 | 4975 | rtx tem; |
89ca96ce | 4976 | rtx_code_label *label; |
c3118728 | 4977 | label = gen_label_rtx (); |
7a6aeeed | 4978 | quotient = gen_reg_rtx (int_mode); |
4979 | remainder = gen_reg_rtx (int_mode); | |
c3118728 | 4980 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) |
4981 | { | |
4982 | rtx tem; | |
7a6aeeed | 4983 | quotient = expand_binop (int_mode, udiv_optab, op0, op1, |
c3118728 | 4984 | quotient, 1, OPTAB_LIB_WIDEN); |
7a6aeeed | 4985 | tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1); |
4986 | remainder = expand_binop (int_mode, sub_optab, op0, tem, | |
c3118728 | 4987 | remainder, 1, OPTAB_LIB_WIDEN); |
4988 | } | |
7a6aeeed | 4989 | tem = plus_constant (int_mode, op1, -1); |
4990 | tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1); | |
4991 | do_cmp_and_jump (remainder, tem, LEU, int_mode, label); | |
c3118728 | 4992 | expand_inc (quotient, const1_rtx); |
4993 | expand_dec (remainder, op1); | |
4994 | emit_label (label); | |
4995 | } | |
4996 | else | |
4997 | { | |
7a6aeeed | 4998 | scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode); |
4999 | int size = GET_MODE_BITSIZE (int_mode); | |
c3118728 | 5000 | rtx abs_rem, abs_op1, tem, mask; |
89ca96ce | 5001 | rtx_code_label *label; |
c3118728 | 5002 | label = gen_label_rtx (); |
7a6aeeed | 5003 | quotient = gen_reg_rtx (int_mode); |
5004 | remainder = gen_reg_rtx (int_mode); | |
c3118728 | 5005 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) |
5006 | { | |
5007 | rtx tem; | |
7a6aeeed | 5008 | quotient = expand_binop (int_mode, sdiv_optab, op0, op1, |
c3118728 | 5009 | quotient, 0, OPTAB_LIB_WIDEN); |
7a6aeeed | 5010 | tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0); |
5011 | remainder = expand_binop (int_mode, sub_optab, op0, tem, | |
c3118728 | 5012 | remainder, 0, OPTAB_LIB_WIDEN); |
5013 | } | |
7a6aeeed | 5014 | abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0); |
5015 | abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0); | |
5016 | tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem, | |
f5ff0b21 | 5017 | 1, NULL_RTX, 1); |
7a6aeeed | 5018 | do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label); |
5019 | tem = expand_binop (int_mode, xor_optab, op0, op1, | |
c3118728 | 5020 | NULL_RTX, 0, OPTAB_WIDEN); |
7a6aeeed | 5021 | mask = expand_shift (RSHIFT_EXPR, int_mode, tem, |
f5ff0b21 | 5022 | size - 1, NULL_RTX, 0); |
7a6aeeed | 5023 | tem = expand_binop (int_mode, xor_optab, mask, const1_rtx, |
c3118728 | 5024 | NULL_RTX, 0, OPTAB_WIDEN); |
7a6aeeed | 5025 | tem = expand_binop (int_mode, sub_optab, tem, mask, |
c3118728 | 5026 | NULL_RTX, 0, OPTAB_WIDEN); |
5027 | expand_inc (quotient, tem); | |
7a6aeeed | 5028 | tem = expand_binop (int_mode, xor_optab, mask, op1, |
c3118728 | 5029 | NULL_RTX, 0, OPTAB_WIDEN); |
7a6aeeed | 5030 | tem = expand_binop (int_mode, sub_optab, tem, mask, |
c3118728 | 5031 | NULL_RTX, 0, OPTAB_WIDEN); |
5032 | expand_dec (remainder, tem); | |
5033 | emit_label (label); | |
5034 | } | |
5035 | return gen_lowpart (mode, rem_flag ? remainder : quotient); | |
a4194ff7 | 5036 | |
0dbd1c74 | 5037 | default: |
611234b4 | 5038 | gcc_unreachable (); |
d2fa4ea5 | 5039 | } |
db96f378 | 5040 | |
d2fa4ea5 | 5041 | if (quotient == 0) |
db96f378 | 5042 | { |
36db22a0 | 5043 | if (target && GET_MODE (target) != compute_mode) |
5044 | target = 0; | |
5045 | ||
d2fa4ea5 | 5046 | if (rem_flag) |
db96f378 | 5047 | { |
d6567151 | 5048 | /* Try to produce the remainder without producing the quotient. |
cb0ccc1e | 5049 | If we seem to have a divmod pattern that does not require widening, |
df07c3ae | 5050 | don't try widening here. We should really have a WIDEN argument |
d6567151 | 5051 | to expand_twoval_binop, since what we'd really like to do here is |
5052 | 1) try a mod insn in compute_mode | |
5053 | 2) try a divmod insn in compute_mode | |
5054 | 3) try a div insn in compute_mode and multiply-subtract to get | |
5055 | remainder | |
5056 | 4) try the same things with widening allowed. */ | |
5057 | remainder | |
5058 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
5059 | op0, op1, target, | |
5060 | unsignedp, | |
d6bf3b14 | 5061 | ((optab_handler (optab2, compute_mode) |
d6567151 | 5062 | != CODE_FOR_nothing) |
5063 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
d2fa4ea5 | 5064 | if (remainder == 0) |
db96f378 | 5065 | { |
5066 | /* No luck there. Can we do remainder and divide at once | |
5067 | without a library call? */ | |
d2fa4ea5 | 5068 | remainder = gen_reg_rtx (compute_mode); |
5069 | if (! expand_twoval_binop ((unsignedp | |
5070 | ? udivmod_optab | |
5071 | : sdivmod_optab), | |
5072 | op0, op1, | |
5073 | NULL_RTX, remainder, unsignedp)) | |
5074 | remainder = 0; | |
db96f378 | 5075 | } |
d2fa4ea5 | 5076 | |
5077 | if (remainder) | |
5078 | return gen_lowpart (mode, remainder); | |
db96f378 | 5079 | } |
db96f378 | 5080 | |
22971e4a | 5081 | /* Produce the quotient. Try a quotient insn, but not a library call. |
5082 | If we have a divmod in this mode, use it in preference to widening | |
5083 | the div (for this test we assume it will not fail). Note that optab2 | |
5084 | is set to the one of the two optabs that the call below will use. */ | |
5085 | quotient | |
5086 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, | |
5087 | op0, op1, rem_flag ? NULL_RTX : target, | |
5088 | unsignedp, | |
d6bf3b14 | 5089 | ((optab_handler (optab2, compute_mode) |
22971e4a | 5090 | != CODE_FOR_nothing) |
5091 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
5092 | ||
d2fa4ea5 | 5093 | if (quotient == 0) |
db96f378 | 5094 | { |
5095 | /* No luck there. Try a quotient-and-remainder insn, | |
5096 | keeping the quotient alone. */ | |
d2fa4ea5 | 5097 | quotient = gen_reg_rtx (compute_mode); |
db96f378 | 5098 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
d2fa4ea5 | 5099 | op0, op1, |
5100 | quotient, NULL_RTX, unsignedp)) | |
5101 | { | |
5102 | quotient = 0; | |
5103 | if (! rem_flag) | |
5104 | /* Still no luck. If we are not computing the remainder, | |
5105 | use a library call for the quotient. */ | |
5106 | quotient = sign_expand_binop (compute_mode, | |
5107 | udiv_optab, sdiv_optab, | |
5108 | op0, op1, target, | |
5109 | unsignedp, OPTAB_LIB_WIDEN); | |
5110 | } | |
db96f378 | 5111 | } |
db96f378 | 5112 | } |
5113 | ||
db96f378 | 5114 | if (rem_flag) |
5115 | { | |
36db22a0 | 5116 | if (target && GET_MODE (target) != compute_mode) |
5117 | target = 0; | |
5118 | ||
d2fa4ea5 | 5119 | if (quotient == 0) |
30e9913f | 5120 | { |
5121 | /* No divide instruction either. Use library for remainder. */ | |
5122 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
5123 | op0, op1, target, | |
5124 | unsignedp, OPTAB_LIB_WIDEN); | |
5125 | /* No remainder function. Try a quotient-and-remainder | |
5126 | function, keeping the remainder. */ | |
5127 | if (!remainder) | |
5128 | { | |
5129 | remainder = gen_reg_rtx (compute_mode); | |
48e1416a | 5130 | if (!expand_twoval_binop_libfunc |
30e9913f | 5131 | (unsignedp ? udivmod_optab : sdivmod_optab, |
5132 | op0, op1, | |
5133 | NULL_RTX, remainder, | |
5134 | unsignedp ? UMOD : MOD)) | |
5135 | remainder = NULL_RTX; | |
5136 | } | |
5137 | } | |
db96f378 | 5138 | else |
5139 | { | |
5140 | /* We divided. Now finish doing X - Y * (X / Y). */ | |
d2fa4ea5 | 5141 | remainder = expand_mult (compute_mode, quotient, op1, |
5142 | NULL_RTX, unsignedp); | |
5143 | remainder = expand_binop (compute_mode, sub_optab, op0, | |
5144 | remainder, target, unsignedp, | |
5145 | OPTAB_LIB_WIDEN); | |
db96f378 | 5146 | } |
5147 | } | |
5148 | ||
d2fa4ea5 | 5149 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
db96f378 | 5150 | } |
5151 | \f | |
5152 | /* Return a tree node with data type TYPE, describing the value of X. | |
735f4358 | 5153 | Usually this is an VAR_DECL, if there is no obvious better choice. |
db96f378 | 5154 | X may be an expression, however we only support those expressions |
1e625a2e | 5155 | generated by loop.c. */ |
db96f378 | 5156 | |
5157 | tree | |
35cb5232 | 5158 | make_tree (tree type, rtx x) |
db96f378 | 5159 | { |
5160 | tree t; | |
5161 | ||
5162 | switch (GET_CODE (x)) | |
5163 | { | |
5164 | case CONST_INT: | |
e913b5cd | 5165 | case CONST_WIDE_INT: |
c67875ad | 5166 | t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type))); |
e913b5cd | 5167 | return t; |
48e1416a | 5168 | |
db96f378 | 5169 | case CONST_DOUBLE: |
c311b856 | 5170 | STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); |
e913b5cd | 5171 | if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) |
796b6678 | 5172 | t = wide_int_to_tree (type, |
5173 | wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, | |
5174 | HOST_BITS_PER_WIDE_INT * 2)); | |
db96f378 | 5175 | else |
945f7b03 | 5176 | t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x)); |
db96f378 | 5177 | |
5178 | return t; | |
886cfd4f | 5179 | |
5180 | case CONST_VECTOR: | |
5181 | { | |
aae57ecf | 5182 | int units = CONST_VECTOR_NUNITS (x); |
5183 | tree itype = TREE_TYPE (type); | |
aae57ecf | 5184 | int i; |
886cfd4f | 5185 | |
886cfd4f | 5186 | /* Build a tree with vector elements. */ |
eab42b58 | 5187 | auto_vec<tree, 32> elts (units); |
5188 | for (i = 0; i < units; ++i) | |
886cfd4f | 5189 | { |
aae57ecf | 5190 | rtx elt = CONST_VECTOR_ELT (x, i); |
eab42b58 | 5191 | elts.quick_push (make_tree (itype, elt)); |
886cfd4f | 5192 | } |
a4194ff7 | 5193 | |
fadf62f4 | 5194 | return build_vector (type, elts); |
886cfd4f | 5195 | } |
5196 | ||
db96f378 | 5197 | case PLUS: |
faa43f85 | 5198 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5199 | make_tree (type, XEXP (x, 1))); | |
a4194ff7 | 5200 | |
db96f378 | 5201 | case MINUS: |
faa43f85 | 5202 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5203 | make_tree (type, XEXP (x, 1))); | |
a4194ff7 | 5204 | |
db96f378 | 5205 | case NEG: |
faa43f85 | 5206 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
db96f378 | 5207 | |
5208 | case MULT: | |
faa43f85 | 5209 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5210 | make_tree (type, XEXP (x, 1))); | |
a4194ff7 | 5211 | |
db96f378 | 5212 | case ASHIFT: |
faa43f85 | 5213 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5214 | make_tree (type, XEXP (x, 1))); | |
a4194ff7 | 5215 | |
db96f378 | 5216 | case LSHIFTRT: |
71eea85c | 5217 | t = unsigned_type_for (type); |
37e8021c | 5218 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5219 | make_tree (t, XEXP (x, 0)), | |
5220 | make_tree (type, XEXP (x, 1)))); | |
a4194ff7 | 5221 | |
db96f378 | 5222 | case ASHIFTRT: |
11773141 | 5223 | t = signed_type_for (type); |
37e8021c | 5224 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5225 | make_tree (t, XEXP (x, 0)), | |
5226 | make_tree (type, XEXP (x, 1)))); | |
a4194ff7 | 5227 | |
db96f378 | 5228 | case DIV: |
5229 | if (TREE_CODE (type) != REAL_TYPE) | |
11773141 | 5230 | t = signed_type_for (type); |
db96f378 | 5231 | else |
5232 | t = type; | |
5233 | ||
37e8021c | 5234 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5235 | make_tree (t, XEXP (x, 0)), | |
5236 | make_tree (t, XEXP (x, 1)))); | |
db96f378 | 5237 | case UDIV: |
71eea85c | 5238 | t = unsigned_type_for (type); |
37e8021c | 5239 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5240 | make_tree (t, XEXP (x, 0)), | |
5241 | make_tree (t, XEXP (x, 1)))); | |
513fac1b | 5242 | |
5243 | case SIGN_EXTEND: | |
5244 | case ZERO_EXTEND: | |
dc24ddbd | 5245 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5246 | GET_CODE (x) == ZERO_EXTEND); | |
37e8021c | 5247 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
513fac1b | 5248 | |
96d5c2e2 | 5249 | case CONST: |
b58a8b74 | 5250 | { |
5251 | rtx op = XEXP (x, 0); | |
5252 | if (GET_CODE (op) == VEC_DUPLICATE) | |
5253 | { | |
5254 | tree elt_tree = make_tree (TREE_TYPE (type), XEXP (op, 0)); | |
5255 | return build_vector_from_val (type, elt_tree); | |
5256 | } | |
5257 | return make_tree (type, op); | |
5258 | } | |
96d5c2e2 | 5259 | |
5260 | case SYMBOL_REF: | |
5261 | t = SYMBOL_REF_DECL (x); | |
5262 | if (t) | |
5263 | return fold_convert (type, build_fold_addr_expr (t)); | |
e3533433 | 5264 | /* fall through. */ |
96d5c2e2 | 5265 | |
735f4358 | 5266 | default: |
e60a6f7b | 5267 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
c54c9422 | 5268 | |
98155838 | 5269 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5270 | address mode to pointer mode. */ | |
85d654dd | 5271 | if (POINTER_TYPE_P (type)) |
98155838 | 5272 | x = convert_memory_address_addr_space |
03b7a719 | 5273 | (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); |
c54c9422 | 5274 | |
9fabac44 | 5275 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5276 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ | |
5ded8c6f | 5277 | t->decl_with_rtl.rtl = x; |
735f4358 | 5278 | |
db96f378 | 5279 | return t; |
5280 | } | |
5281 | } | |
db96f378 | 5282 | \f |
5283 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET | |
5284 | and returning TARGET. | |
5285 | ||
5286 | If TARGET is 0, a pseudo-register or constant is returned. */ | |
5287 | ||
5288 | rtx | |
3754d046 | 5289 | expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) |
db96f378 | 5290 | { |
6de9716c | 5291 | rtx tem = 0; |
db96f378 | 5292 | |
6de9716c | 5293 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5294 | tem = simplify_binary_operation (AND, mode, op0, op1); | |
5295 | if (tem == 0) | |
db96f378 | 5296 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
db96f378 | 5297 | |
5298 | if (target == 0) | |
5299 | target = tem; | |
5300 | else if (tem != target) | |
5301 | emit_move_insn (target, tem); | |
5302 | return target; | |
5303 | } | |
80e1bfa1 | 5304 | |
595d88b5 | 5305 | /* Helper function for emit_store_flag. */ |
01ee997b | 5306 | rtx |
cf564daf | 5307 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
3754d046 | 5308 | machine_mode mode, machine_mode compare_mode, |
d68bc06a | 5309 | int unsignedp, rtx x, rtx y, int normalizep, |
3754d046 | 5310 | machine_mode target_mode) |
595d88b5 | 5311 | { |
8786db1e | 5312 | struct expand_operand ops[4]; |
89ca96ce | 5313 | rtx op0, comparison, subtarget; |
5314 | rtx_insn *last; | |
7cae74a7 | 5315 | scalar_int_mode result_mode = targetm.cstore_mode (icode); |
5316 | scalar_int_mode int_target_mode; | |
fb425e71 | 5317 | |
5318 | last = get_last_insn (); | |
5319 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); | |
5320 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); | |
8786db1e | 5321 | if (!x || !y) |
fb425e71 | 5322 | { |
5323 | delete_insns_since (last); | |
5324 | return NULL_RTX; | |
5325 | } | |
5326 | ||
d68bc06a | 5327 | if (target_mode == VOIDmode) |
7cae74a7 | 5328 | int_target_mode = result_mode; |
5329 | else | |
5330 | int_target_mode = as_a <scalar_int_mode> (target_mode); | |
d68bc06a | 5331 | if (!target) |
7cae74a7 | 5332 | target = gen_reg_rtx (int_target_mode); |
48e1416a | 5333 | |
8786db1e | 5334 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); |
fb425e71 | 5335 | |
8786db1e | 5336 | create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); |
5337 | create_fixed_operand (&ops[1], comparison); | |
5338 | create_fixed_operand (&ops[2], x); | |
5339 | create_fixed_operand (&ops[3], y); | |
5340 | if (!maybe_expand_insn (icode, 4, ops)) | |
5341 | { | |
5342 | delete_insns_since (last); | |
5343 | return NULL_RTX; | |
5344 | } | |
5345 | subtarget = ops[0].value; | |
80e1bfa1 | 5346 | |
595d88b5 | 5347 | /* If we are converting to a wider mode, first convert to |
7cae74a7 | 5348 | INT_TARGET_MODE, then normalize. This produces better combining |
595d88b5 | 5349 | opportunities on machines that have a SIGN_EXTRACT when we are |
5350 | testing a single bit. This mostly benefits the 68k. | |
5351 | ||
5352 | If STORE_FLAG_VALUE does not have the sign bit set when | |
5353 | interpreted in MODE, we can do this conversion as unsigned, which | |
5354 | is usually more efficient. */ | |
7cae74a7 | 5355 | if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode)) |
595d88b5 | 5356 | { |
5357 | convert_move (target, subtarget, | |
f92430e0 | 5358 | val_signbit_known_clear_p (result_mode, |
5359 | STORE_FLAG_VALUE)); | |
595d88b5 | 5360 | op0 = target; |
7cae74a7 | 5361 | result_mode = int_target_mode; |
595d88b5 | 5362 | } |
5363 | else | |
5364 | op0 = subtarget; | |
5365 | ||
5366 | /* If we want to keep subexpressions around, don't reuse our last | |
5367 | target. */ | |
5368 | if (optimize) | |
5369 | subtarget = 0; | |
5370 | ||
5371 | /* Now normalize to the proper value in MODE. Sometimes we don't | |
5372 | have to do anything. */ | |
5373 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) | |
5374 | ; | |
5375 | /* STORE_FLAG_VALUE might be the most negative number, so write | |
5376 | the comparison this way to avoid a compiler-time warning. */ | |
5377 | else if (- normalizep == STORE_FLAG_VALUE) | |
fb425e71 | 5378 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
595d88b5 | 5379 | |
5380 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes | |
5381 | it hard to use a value of just the sign bit due to ANSI integer | |
5382 | constant typing rules. */ | |
f92430e0 | 5383 | else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) |
fb425e71 | 5384 | op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, |
f5ff0b21 | 5385 | GET_MODE_BITSIZE (result_mode) - 1, subtarget, |
595d88b5 | 5386 | normalizep == 1); |
5387 | else | |
5388 | { | |
5389 | gcc_assert (STORE_FLAG_VALUE & 1); | |
5390 | ||
fb425e71 | 5391 | op0 = expand_and (result_mode, op0, const1_rtx, subtarget); |
595d88b5 | 5392 | if (normalizep == -1) |
fb425e71 | 5393 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
595d88b5 | 5394 | } |
5395 | ||
5396 | /* If we were converting to a smaller mode, do the conversion now. */ | |
7cae74a7 | 5397 | if (int_target_mode != result_mode) |
595d88b5 | 5398 | { |
5399 | convert_move (target, op0, 0); | |
5400 | return target; | |
5401 | } | |
5402 | else | |
5403 | return op0; | |
5404 | } | |
5405 | ||
db96f378 | 5406 | |
cf564daf | 5407 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5408 | need a recursive call. These are kept separate to avoid infinite | |
5409 | loops. */ | |
db96f378 | 5410 | |
cf564daf | 5411 | static rtx |
5412 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
3754d046 | 5413 | machine_mode mode, int unsignedp, int normalizep, |
5414 | machine_mode target_mode) | |
db96f378 | 5415 | { |
5416 | rtx subtarget; | |
5417 | enum insn_code icode; | |
3754d046 | 5418 | machine_mode compare_mode; |
74f4459c | 5419 | enum mode_class mclass; |
fb425e71 | 5420 | enum rtx_code scode; |
db96f378 | 5421 | |
1a29b174 | 5422 | if (unsignedp) |
5423 | code = unsigned_condition (code); | |
fb425e71 | 5424 | scode = swap_condition (code); |
1a29b174 | 5425 | |
9e2944e9 | 5426 | /* If one operand is constant, make it the second one. Only do this |
5427 | if the other operand is not constant as well. */ | |
5428 | ||
f5ef1390 | 5429 | if (swap_commutative_operands_p (op0, op1)) |
9e2944e9 | 5430 | { |
dfcf26a5 | 5431 | std::swap (op0, op1); |
9e2944e9 | 5432 | code = swap_condition (code); |
5433 | } | |
5434 | ||
b65270aa | 5435 | if (mode == VOIDmode) |
5436 | mode = GET_MODE (op0); | |
5437 | ||
a4194ff7 | 5438 | /* For some comparisons with 1 and -1, we can convert this to |
db96f378 | 5439 | comparisons with zero. This will often produce more opportunities for |
a92771b8 | 5440 | store-flag insns. */ |
db96f378 | 5441 | |
5442 | switch (code) | |
5443 | { | |
5444 | case LT: | |
5445 | if (op1 == const1_rtx) | |
5446 | op1 = const0_rtx, code = LE; | |
5447 | break; | |
5448 | case LE: | |
5449 | if (op1 == constm1_rtx) | |
5450 | op1 = const0_rtx, code = LT; | |
5451 | break; | |
5452 | case GE: | |
5453 | if (op1 == const1_rtx) | |
5454 | op1 = const0_rtx, code = GT; | |
5455 | break; | |
5456 | case GT: | |
5457 | if (op1 == constm1_rtx) | |
5458 | op1 = const0_rtx, code = GE; | |
5459 | break; | |
5460 | case GEU: | |
5461 | if (op1 == const1_rtx) | |
5462 | op1 = const0_rtx, code = NE; | |
5463 | break; | |
5464 | case LTU: | |
5465 | if (op1 == const1_rtx) | |
5466 | op1 = const0_rtx, code = EQ; | |
5467 | break; | |
0dbd1c74 | 5468 | default: |
5469 | break; | |
db96f378 | 5470 | } |
5471 | ||
2986c324 | 5472 | /* If we are comparing a double-word integer with zero or -1, we can |
5473 | convert the comparison into one involving a single word. */ | |
8b449599 | 5474 | scalar_int_mode int_mode; |
5475 | if (is_int_mode (mode, &int_mode) | |
5476 | && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2 | |
e16ceb8e | 5477 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
34a84294 | 5478 | { |
dfcf26a5 | 5479 | rtx tem; |
2986c324 | 5480 | if ((code == EQ || code == NE) |
5481 | && (op1 == const0_rtx || op1 == constm1_rtx)) | |
34a84294 | 5482 | { |
7d4098a0 | 5483 | rtx op00, op01; |
18df88fa | 5484 | |
595d88b5 | 5485 | /* Do a logical OR or AND of the two words and compare the |
5486 | result. */ | |
8b449599 | 5487 | op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0); |
5488 | op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD); | |
7d4098a0 | 5489 | tem = expand_binop (word_mode, |
5490 | op1 == const0_rtx ? ior_optab : and_optab, | |
5491 | op00, op01, NULL_RTX, unsignedp, | |
5492 | OPTAB_DIRECT); | |
2986c324 | 5493 | |
7d4098a0 | 5494 | if (tem != 0) |
5495 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, | |
d68bc06a | 5496 | unsignedp, normalizep); |
34a84294 | 5497 | } |
2986c324 | 5498 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
18df88fa | 5499 | { |
5500 | rtx op0h; | |
5501 | ||
5502 | /* If testing the sign bit, can just test on high word. */ | |
8b449599 | 5503 | op0h = simplify_gen_subreg (word_mode, op0, int_mode, |
595d88b5 | 5504 | subreg_highpart_offset (word_mode, |
8b449599 | 5505 | int_mode)); |
7d4098a0 | 5506 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5507 | unsignedp, normalizep); | |
5508 | } | |
5509 | else | |
5510 | tem = NULL_RTX; | |
5511 | ||
5512 | if (tem) | |
5513 | { | |
d68bc06a | 5514 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
7d4098a0 | 5515 | return tem; |
d68bc06a | 5516 | if (!target) |
5517 | target = gen_reg_rtx (target_mode); | |
7d4098a0 | 5518 | |
5519 | convert_move (target, tem, | |
f92430e0 | 5520 | !val_signbit_known_set_p (word_mode, |
5521 | (normalizep ? normalizep | |
5522 | : STORE_FLAG_VALUE))); | |
7d4098a0 | 5523 | return target; |
18df88fa | 5524 | } |
34a84294 | 5525 | } |
5526 | ||
db96f378 | 5527 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5528 | complement of A (for GE) and shifting the sign bit to the low bit. */ | |
5529 | if (op1 == const0_rtx && (code == LT || code == GE) | |
8b449599 | 5530 | && is_int_mode (mode, &int_mode) |
db96f378 | 5531 | && (normalizep || STORE_FLAG_VALUE == 1 |
8b449599 | 5532 | || val_signbit_p (int_mode, STORE_FLAG_VALUE))) |
db96f378 | 5533 | { |
7a6aeeed | 5534 | scalar_int_mode int_target_mode; |
02bd6c04 | 5535 | subtarget = target; |
db96f378 | 5536 | |
80e1bfa1 | 5537 | if (!target) |
7a6aeeed | 5538 | int_target_mode = int_mode; |
5539 | else | |
db96f378 | 5540 | { |
7a6aeeed | 5541 | /* If the result is to be wider than OP0, it is best to convert it |
5542 | first. If it is to be narrower, it is *incorrect* to convert it | |
5543 | first. */ | |
5544 | int_target_mode = as_a <scalar_int_mode> (target_mode); | |
5545 | if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode)) | |
5546 | { | |
5547 | op0 = convert_modes (int_target_mode, int_mode, op0, 0); | |
5548 | int_mode = int_target_mode; | |
5549 | } | |
db96f378 | 5550 | } |
5551 | ||
7a6aeeed | 5552 | if (int_target_mode != int_mode) |
db96f378 | 5553 | subtarget = 0; |
5554 | ||
5555 | if (code == GE) | |
7a6aeeed | 5556 | op0 = expand_unop (int_mode, one_cmpl_optab, op0, |
fbb71644 | 5557 | ((STORE_FLAG_VALUE == 1 || normalizep) |
5558 | ? 0 : subtarget), 0); | |
db96f378 | 5559 | |
fbb71644 | 5560 | if (STORE_FLAG_VALUE == 1 || normalizep) |
db96f378 | 5561 | /* If we are supposed to produce a 0/1 value, we want to do |
5562 | a logical shift from the sign bit to the low-order bit; for | |
5563 | a -1/0 value, we do an arithmetic shift. */ | |
7a6aeeed | 5564 | op0 = expand_shift (RSHIFT_EXPR, int_mode, op0, |
5565 | GET_MODE_BITSIZE (int_mode) - 1, | |
db96f378 | 5566 | subtarget, normalizep != -1); |
5567 | ||
7a6aeeed | 5568 | if (int_mode != int_target_mode) |
5569 | op0 = convert_modes (int_target_mode, int_mode, op0, 0); | |
db96f378 | 5570 | |
5571 | return op0; | |
5572 | } | |
5573 | ||
74f4459c | 5574 | mclass = GET_MODE_CLASS (mode); |
19a4dce4 | 5575 | FOR_EACH_MODE_FROM (compare_mode, mode) |
595d88b5 | 5576 | { |
3754d046 | 5577 | machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
d6bf3b14 | 5578 | icode = optab_handler (cstore_optab, optab_mode); |
74f4459c | 5579 | if (icode != CODE_FOR_nothing) |
595d88b5 | 5580 | { |
595d88b5 | 5581 | do_pending_stack_adjust (); |
dfcf26a5 | 5582 | rtx tem = emit_cstore (target, icode, code, mode, compare_mode, |
5583 | unsignedp, op0, op1, normalizep, target_mode); | |
fb425e71 | 5584 | if (tem) |
5585 | return tem; | |
db96f378 | 5586 | |
fb425e71 | 5587 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
db96f378 | 5588 | { |
cf564daf | 5589 | tem = emit_cstore (target, icode, scode, mode, compare_mode, |
d68bc06a | 5590 | unsignedp, op1, op0, normalizep, target_mode); |
fb425e71 | 5591 | if (tem) |
5592 | return tem; | |
db96f378 | 5593 | } |
74f4459c | 5594 | break; |
db96f378 | 5595 | } |
5596 | } | |
5597 | ||
cf564daf | 5598 | return 0; |
5599 | } | |
5600 | ||
e5392ef2 | 5601 | /* Subroutine of emit_store_flag that handles cases in which the operands |
5602 | are scalar integers. SUBTARGET is the target to use for temporary | |
5603 | operations and TRUEVAL is the value to store when the condition is | |
5604 | true. All other arguments are as for emit_store_flag. */ | |
cf564daf | 5605 | |
5606 | rtx | |
e5392ef2 | 5607 | emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0, |
f77c4496 | 5608 | rtx op1, scalar_int_mode mode, int unsignedp, |
e5392ef2 | 5609 | int normalizep, rtx trueval) |
cf564daf | 5610 | { |
3754d046 | 5611 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
e5392ef2 | 5612 | rtx_insn *last = get_last_insn (); |
80e1bfa1 | 5613 | |
5614 | /* If this is an equality comparison of integers, we can try to exclusive-or | |
db96f378 | 5615 | (or subtract) the two operands and use a recursive call to try the |
5616 | comparison with zero. Don't do any of these cases if branches are | |
5617 | very cheap. */ | |
5618 | ||
80e1bfa1 | 5619 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
db96f378 | 5620 | { |
bad1c260 | 5621 | rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, |
5622 | OPTAB_WIDEN); | |
db96f378 | 5623 | |
5624 | if (tem == 0) | |
5625 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, | |
5626 | OPTAB_WIDEN); | |
5627 | if (tem != 0) | |
39a4aea8 | 5628 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5629 | mode, unsignedp, normalizep); | |
80e1bfa1 | 5630 | if (tem != 0) |
5631 | return tem; | |
5632 | ||
5633 | delete_insns_since (last); | |
5634 | } | |
5635 | ||
5636 | /* For integer comparisons, try the reverse comparison. However, for | |
5637 | small X and if we'd have anyway to extend, implementing "X != 0" | |
5638 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ | |
e5392ef2 | 5639 | rtx_code rcode = reverse_condition (code); |
80e1bfa1 | 5640 | if (can_compare_p (rcode, mode, ccp_store_flag) |
d6bf3b14 | 5641 | && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing |
80e1bfa1 | 5642 | && code == NE |
5643 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD | |
5644 | && op1 == const0_rtx)) | |
5645 | { | |
12df6aa8 | 5646 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5647 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5648 | ||
80e1bfa1 | 5649 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
12df6aa8 | 5650 | if (want_add |
5ae4887d | 5651 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, |
12df6aa8 | 5652 | optimize_insn_for_speed_p ()) == 0) |
80e1bfa1 | 5653 | { |
bad1c260 | 5654 | rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
5655 | STORE_FLAG_VALUE, target_mode); | |
80e1bfa1 | 5656 | if (tem != 0) |
e5392ef2 | 5657 | tem = expand_binop (target_mode, add_optab, tem, |
0359f9f5 | 5658 | gen_int_mode (normalizep, target_mode), |
5659 | target, 0, OPTAB_WIDEN); | |
bad1c260 | 5660 | if (tem != 0) |
5661 | return tem; | |
80e1bfa1 | 5662 | } |
12df6aa8 | 5663 | else if (!want_add |
5ae4887d | 5664 | && rtx_cost (trueval, mode, XOR, 1, |
12df6aa8 | 5665 | optimize_insn_for_speed_p ()) == 0) |
80e1bfa1 | 5666 | { |
bad1c260 | 5667 | rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
5668 | normalizep, target_mode); | |
80e1bfa1 | 5669 | if (tem != 0) |
e5392ef2 | 5670 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, |
80e1bfa1 | 5671 | INTVAL (trueval) >= 0, OPTAB_WIDEN); |
bad1c260 | 5672 | if (tem != 0) |
5673 | return tem; | |
80e1bfa1 | 5674 | } |
5675 | ||
80e1bfa1 | 5676 | delete_insns_since (last); |
db96f378 | 5677 | } |
5678 | ||
a4194ff7 | 5679 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
db96f378 | 5680 | the constant zero. Reject all other comparisons at this point. Only |
5681 | do LE and GT if branches are expensive since they are expensive on | |
5682 | 2-operand machines. */ | |
5683 | ||
80e1bfa1 | 5684 | if (op1 != const0_rtx |
db96f378 | 5685 | || (code != EQ && code != NE |
4a9d7ef7 | 5686 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5687 | false) <= 1 || (code != LE && code != GT)))) | |
db96f378 | 5688 | return 0; |
5689 | ||
db96f378 | 5690 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5691 | do the necessary operation below. */ | |
5692 | ||
bad1c260 | 5693 | rtx tem = 0; |
db96f378 | 5694 | |
5695 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has | |
5696 | the sign bit set. */ | |
5697 | ||
5698 | if (code == LE) | |
5699 | { | |
5700 | /* This is destructive, so SUBTARGET can't be OP0. */ | |
5701 | if (rtx_equal_p (subtarget, op0)) | |
5702 | subtarget = 0; | |
5703 | ||
5704 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, | |
5705 | OPTAB_WIDEN); | |
5706 | if (tem) | |
5707 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, | |
5708 | OPTAB_WIDEN); | |
5709 | } | |
5710 | ||
5711 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the | |
5712 | number of bits in the mode of OP0, minus one. */ | |
5713 | ||
5714 | if (code == GT) | |
5715 | { | |
5716 | if (rtx_equal_p (subtarget, op0)) | |
5717 | subtarget = 0; | |
5718 | ||
82f4e127 | 5719 | tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0, |
5720 | GET_MODE_BITSIZE (mode) - 1, | |
5721 | subtarget, 0); | |
5722 | if (tem) | |
5723 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, | |
5724 | OPTAB_WIDEN); | |
db96f378 | 5725 | } |
a4194ff7 | 5726 | |
db96f378 | 5727 | if (code == EQ || code == NE) |
5728 | { | |
5729 | /* For EQ or NE, one way to do the comparison is to apply an operation | |
6ef828f9 | 5730 | that converts the operand into a positive number if it is nonzero |
db96f378 | 5731 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5732 | for NE we negate. This puts the result in the sign bit. Then we | |
a4194ff7 | 5733 | normalize with a shift, if needed. |
db96f378 | 5734 | |
5735 | Two operations that can do the above actions are ABS and FFS, so try | |
5736 | them. If that doesn't work, and MODE is smaller than a full word, | |
2f4e12a2 | 5737 | we can use zero-extension to the wider mode (an unsigned conversion) |
db96f378 | 5738 | as the operation. */ |
5739 | ||
a4194ff7 | 5740 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5741 | that is compensated by the subsequent overflow when subtracting | |
6312a35e | 5742 | one / negating. */ |
bec2d490 | 5743 | |
d6bf3b14 | 5744 | if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) |
db96f378 | 5745 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
d6bf3b14 | 5746 | else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) |
db96f378 | 5747 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5748 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5749 | { | |
1d8c65d8 | 5750 | tem = convert_modes (word_mode, mode, op0, 1); |
72467481 | 5751 | mode = word_mode; |
db96f378 | 5752 | } |
5753 | ||
5754 | if (tem != 0) | |
5755 | { | |
5756 | if (code == EQ) | |
5757 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, | |
5758 | 0, OPTAB_WIDEN); | |
5759 | else | |
5760 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); | |
5761 | } | |
5762 | ||
5763 | /* If we couldn't do it that way, for NE we can "or" the two's complement | |
5764 | of the value with itself. For EQ, we take the one's complement of | |
5765 | that "or", which is an extra insn, so we only handle EQ if branches | |
5766 | are expensive. */ | |
5767 | ||
4a9d7ef7 | 5768 | if (tem == 0 |
5769 | && (code == NE | |
5770 | || BRANCH_COST (optimize_insn_for_speed_p (), | |
e5392ef2 | 5771 | false) > 1)) |
db96f378 | 5772 | { |
2f4e12a2 | 5773 | if (rtx_equal_p (subtarget, op0)) |
5774 | subtarget = 0; | |
5775 | ||
db96f378 | 5776 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5777 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, | |
5778 | OPTAB_WIDEN); | |
5779 | ||
5780 | if (tem && code == EQ) | |
5781 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); | |
5782 | } | |
5783 | } | |
5784 | ||
5785 | if (tem && normalizep) | |
82f4e127 | 5786 | tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem, |
5787 | GET_MODE_BITSIZE (mode) - 1, | |
5788 | subtarget, normalizep == 1); | |
db96f378 | 5789 | |
98f85d3a | 5790 | if (tem) |
db96f378 | 5791 | { |
80e1bfa1 | 5792 | if (!target) |
e5392ef2 | 5793 | ; |
80e1bfa1 | 5794 | else if (GET_MODE (tem) != target_mode) |
98f85d3a | 5795 | { |
5796 | convert_move (target, tem, 0); | |
5797 | tem = target; | |
5798 | } | |
5799 | else if (!subtarget) | |
5800 | { | |
5801 | emit_move_insn (target, tem); | |
5802 | tem = target; | |
5803 | } | |
db96f378 | 5804 | } |
98f85d3a | 5805 | else |
db96f378 | 5806 | delete_insns_since (last); |
5807 | ||
5808 | return tem; | |
5809 | } | |
469b49fd | 5810 | |
e5392ef2 | 5811 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 |
5812 | and storing in TARGET. Normally return TARGET. | |
5813 | Return 0 if that cannot be done. | |
5814 | ||
5815 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If | |
5816 | it is VOIDmode, they cannot both be CONST_INT. | |
5817 | ||
5818 | UNSIGNEDP is for the case where we have to widen the operands | |
5819 | to perform the operation. It says to use zero-extension. | |
5820 | ||
5821 | NORMALIZEP is 1 if we should convert the result to be either zero | |
5822 | or one. Normalize is -1 if we should convert the result to be | |
5823 | either zero or -1. If NORMALIZEP is zero, the result will be left | |
5824 | "raw" out of the scc insn. */ | |
5825 | ||
5826 | rtx | |
5827 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
5828 | machine_mode mode, int unsignedp, int normalizep) | |
5829 | { | |
5830 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; | |
5831 | enum rtx_code rcode; | |
5832 | rtx subtarget; | |
5833 | rtx tem, trueval; | |
5834 | rtx_insn *last; | |
5835 | ||
5836 | /* If we compare constants, we shouldn't use a store-flag operation, | |
5837 | but a constant load. We can get there via the vanilla route that | |
5838 | usually generates a compare-branch sequence, but will in this case | |
5839 | fold the comparison to a constant, and thus elide the branch. */ | |
5840 | if (CONSTANT_P (op0) && CONSTANT_P (op1)) | |
5841 | return NULL_RTX; | |
5842 | ||
5843 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, | |
5844 | target_mode); | |
5845 | if (tem) | |
5846 | return tem; | |
5847 | ||
5848 | /* If we reached here, we can't do this with a scc insn, however there | |
5849 | are some comparisons that can be done in other ways. Don't do any | |
5850 | of these cases if branches are very cheap. */ | |
5851 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) | |
5852 | return 0; | |
5853 | ||
5854 | /* See what we need to return. We can only return a 1, -1, or the | |
5855 | sign bit. */ | |
5856 | ||
5857 | if (normalizep == 0) | |
5858 | { | |
5859 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) | |
5860 | normalizep = STORE_FLAG_VALUE; | |
5861 | ||
5862 | else if (val_signbit_p (mode, STORE_FLAG_VALUE)) | |
5863 | ; | |
5864 | else | |
5865 | return 0; | |
5866 | } | |
5867 | ||
5868 | last = get_last_insn (); | |
5869 | ||
5870 | /* If optimizing, use different pseudo registers for each insn, instead | |
5871 | of reusing the same pseudo. This leads to better CSE, but slows | |
5872 | down the compiler, since there are more pseudos. */ | |
5873 | subtarget = (!optimize | |
5874 | && (target_mode == mode)) ? target : NULL_RTX; | |
5875 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); | |
5876 | ||
5877 | /* For floating-point comparisons, try the reverse comparison or try | |
5878 | changing the "orderedness" of the comparison. */ | |
5879 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
5880 | { | |
5881 | enum rtx_code first_code; | |
5882 | bool and_them; | |
5883 | ||
5884 | rcode = reverse_condition_maybe_unordered (code); | |
5885 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
5886 | && (code == ORDERED || code == UNORDERED | |
5887 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5888 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5889 | { | |
5890 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) | |
5891 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5892 | ||
5893 | /* For the reverse comparison, use either an addition or a XOR. */ | |
5894 | if (want_add | |
5895 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, | |
5896 | optimize_insn_for_speed_p ()) == 0) | |
5897 | { | |
5898 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, | |
5899 | STORE_FLAG_VALUE, target_mode); | |
5900 | if (tem) | |
5901 | return expand_binop (target_mode, add_optab, tem, | |
5902 | gen_int_mode (normalizep, target_mode), | |
5903 | target, 0, OPTAB_WIDEN); | |
5904 | } | |
5905 | else if (!want_add | |
5906 | && rtx_cost (trueval, mode, XOR, 1, | |
5907 | optimize_insn_for_speed_p ()) == 0) | |
5908 | { | |
5909 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, | |
5910 | normalizep, target_mode); | |
5911 | if (tem) | |
5912 | return expand_binop (target_mode, xor_optab, tem, trueval, | |
5913 | target, INTVAL (trueval) >= 0, | |
5914 | OPTAB_WIDEN); | |
5915 | } | |
5916 | } | |
5917 | ||
5918 | delete_insns_since (last); | |
5919 | ||
5920 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ | |
5921 | if (code == ORDERED || code == UNORDERED) | |
5922 | return 0; | |
5923 | ||
5924 | and_them = split_comparison (code, mode, &first_code, &code); | |
5925 | ||
5926 | /* If there are no NaNs, the first comparison should always fall through. | |
5927 | Effectively change the comparison to the other one. */ | |
5928 | if (!HONOR_NANS (mode)) | |
5929 | { | |
5930 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); | |
5931 | return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, | |
5932 | target_mode); | |
5933 | } | |
5934 | ||
5935 | if (!HAVE_conditional_move) | |
5936 | return 0; | |
5937 | ||
5938 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a | |
5939 | conditional move. */ | |
5940 | tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, | |
5941 | normalizep, target_mode); | |
5942 | if (tem == 0) | |
5943 | return 0; | |
5944 | ||
5945 | if (and_them) | |
5946 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5947 | tem, const0_rtx, GET_MODE (tem), 0); | |
5948 | else | |
5949 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5950 | trueval, tem, GET_MODE (tem), 0); | |
5951 | ||
5952 | if (tem == 0) | |
5953 | delete_insns_since (last); | |
5954 | return tem; | |
5955 | } | |
5956 | ||
5957 | /* The remaining tricks only apply to integer comparisons. */ | |
5958 | ||
8b449599 | 5959 | scalar_int_mode int_mode; |
5960 | if (is_int_mode (mode, &int_mode)) | |
5961 | return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode, | |
e5392ef2 | 5962 | unsignedp, normalizep, trueval); |
5963 | ||
5964 | return 0; | |
5965 | } | |
5966 | ||
469b49fd | 5967 | /* Like emit_store_flag, but always succeeds. */ |
5968 | ||
5969 | rtx | |
35cb5232 | 5970 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
3754d046 | 5971 | machine_mode mode, int unsignedp, int normalizep) |
469b49fd | 5972 | { |
89ca96ce | 5973 | rtx tem; |
5974 | rtx_code_label *label; | |
80e1bfa1 | 5975 | rtx trueval, falseval; |
469b49fd | 5976 | |
5977 | /* First see if emit_store_flag can do the job. */ | |
5978 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); | |
5979 | if (tem != 0) | |
5980 | return tem; | |
5981 | ||
80e1bfa1 | 5982 | if (!target) |
5983 | target = gen_reg_rtx (word_mode); | |
469b49fd | 5984 | |
80e1bfa1 | 5985 | /* If this failed, we have to do this with set/compare/jump/set code. |
5986 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ | |
5987 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; | |
48e1416a | 5988 | if (code == NE |
80e1bfa1 | 5989 | && GET_MODE_CLASS (mode) == MODE_INT |
5990 | && REG_P (target) | |
5991 | && op0 == target | |
5992 | && op1 == const0_rtx) | |
5993 | { | |
5994 | label = gen_label_rtx (); | |
f9a00e9e | 5995 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, |
720cfc43 | 5996 | NULL_RTX, NULL, label, |
5997 | profile_probability::uninitialized ()); | |
80e1bfa1 | 5998 | emit_move_insn (target, trueval); |
5999 | emit_label (label); | |
6000 | return target; | |
6001 | } | |
469b49fd | 6002 | |
8ad4c111 | 6003 | if (!REG_P (target) |
469b49fd | 6004 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
6005 | target = gen_reg_rtx (GET_MODE (target)); | |
6006 | ||
80e1bfa1 | 6007 | /* Jump in the right direction if the target cannot implement CODE |
6008 | but can jump on its reverse condition. */ | |
6009 | falseval = const0_rtx; | |
6010 | if (! can_compare_p (code, mode, ccp_jump) | |
6011 | && (! FLOAT_MODE_P (mode) | |
6012 | || code == ORDERED || code == UNORDERED | |
6013 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
6014 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
6015 | { | |
6016 | enum rtx_code rcode; | |
6017 | if (FLOAT_MODE_P (mode)) | |
6018 | rcode = reverse_condition_maybe_unordered (code); | |
6019 | else | |
6020 | rcode = reverse_condition (code); | |
6021 | ||
6022 | /* Canonicalize to UNORDERED for the libcall. */ | |
6023 | if (can_compare_p (rcode, mode, ccp_jump) | |
6024 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) | |
6025 | { | |
6026 | falseval = trueval; | |
6027 | trueval = const0_rtx; | |
6028 | code = rcode; | |
6029 | } | |
6030 | } | |
6031 | ||
6032 | emit_move_insn (target, trueval); | |
469b49fd | 6033 | label = gen_label_rtx (); |
f9a00e9e | 6034 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, |
720cfc43 | 6035 | label, profile_probability::uninitialized ()); |
469b49fd | 6036 | |
80e1bfa1 | 6037 | emit_move_insn (target, falseval); |
db96f378 | 6038 | emit_label (label); |
6039 | ||
6040 | return target; | |
6041 | } | |
c5aa1e92 | 6042 | \f |
6043 | /* Perform possibly multi-word comparison and conditional jump to LABEL | |
85afca2d | 6044 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
6045 | now a thin wrapper around do_compare_rtx_and_jump. */ | |
c5aa1e92 | 6046 | |
6047 | static void | |
3754d046 | 6048 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, |
89ca96ce | 6049 | rtx_code_label *label) |
c5aa1e92 | 6050 | { |
85afca2d | 6051 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
f9a00e9e | 6052 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, |
720cfc43 | 6053 | NULL, label, profile_probability::uninitialized ()); |
c5aa1e92 | 6054 | } |