]>
Commit | Line | Data |
---|---|---|
44037a66 TG |
1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. | |
cbe34bb5 | 3 | Copyright (C) 1987-2017 Free Software Foundation, Inc. |
44037a66 | 4 | |
1322177d | 5 | This file is part of GCC. |
44037a66 | 6 | |
1322177d LB |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
1322177d | 10 | version. |
44037a66 | 11 | |
1322177d LB |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
44037a66 TG |
16 | |
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
44037a66 TG |
20 | |
21 | ||
22 | #include "config.h" | |
670ee920 | 23 | #include "system.h" |
4977bab6 | 24 | #include "coretypes.h" |
c7131fb2 | 25 | #include "backend.h" |
957060b5 | 26 | #include "target.h" |
44037a66 | 27 | #include "rtl.h" |
957060b5 AM |
28 | #include "tree.h" |
29 | #include "predict.h" | |
4d0cdd0c | 30 | #include "memmodel.h" |
957060b5 AM |
31 | #include "tm_p.h" |
32 | #include "expmed.h" | |
33 | #include "optabs.h" | |
34 | #include "emit-rtl.h" | |
c7131fb2 | 35 | #include "diagnostic-core.h" |
40e23961 | 36 | #include "fold-const.h" |
d8a2d370 | 37 | #include "stor-layout.h" |
36566b39 PK |
38 | #include "dojump.h" |
39 | #include "explow.h" | |
44037a66 | 40 | #include "expr.h" |
b0c48229 | 41 | #include "langhooks.h" |
462f85ce RS |
42 | |
43 | struct target_expmed default_target_expmed; | |
44 | #if SWITCHABLE_TARGET | |
45 | struct target_expmed *this_target_expmed = &default_target_expmed; | |
46 | #endif | |
44037a66 | 47 | |
502b8322 AJ |
48 | static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, |
49 | unsigned HOST_WIDE_INT, | |
1169e45d AH |
50 | unsigned HOST_WIDE_INT, |
51 | unsigned HOST_WIDE_INT, | |
ee45a32d | 52 | rtx, bool); |
ebb99f96 BE |
53 | static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT, |
54 | unsigned HOST_WIDE_INT, | |
ee45a32d | 55 | rtx, bool); |
502b8322 | 56 | static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
1169e45d AH |
57 | unsigned HOST_WIDE_INT, |
58 | unsigned HOST_WIDE_INT, | |
59 | unsigned HOST_WIDE_INT, | |
ee45a32d | 60 | rtx, bool); |
ef4bddc2 | 61 | static rtx extract_fixed_bit_field (machine_mode, rtx, |
502b8322 | 62 | unsigned HOST_WIDE_INT, |
ee45a32d | 63 | unsigned HOST_WIDE_INT, rtx, int, bool); |
ef4bddc2 | 64 | static rtx extract_fixed_bit_field_1 (machine_mode, rtx, |
6f4e9cf8 | 65 | unsigned HOST_WIDE_INT, |
ee45a32d | 66 | unsigned HOST_WIDE_INT, rtx, int, bool); |
ef4bddc2 | 67 | static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); |
502b8322 | 68 | static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
ee45a32d | 69 | unsigned HOST_WIDE_INT, int, bool); |
ef4bddc2 RS |
70 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); |
71 | static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT); | |
72 | static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT); | |
44037a66 | 73 | |
807e902e KZ |
74 | /* Return a constant integer mask value of mode MODE with BITSIZE ones |
75 | followed by BITPOS zeros, or the complement of that if COMPLEMENT. | |
76 | The mask is truncated if necessary to the width of mode MODE. The | |
77 | mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ | |
78 | ||
79 | static inline rtx | |
ef4bddc2 | 80 | mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement) |
807e902e KZ |
81 | { |
82 | return immed_wide_int_const | |
83 | (wi::shifted_mask (bitpos, bitsize, complement, | |
84 | GET_MODE_PRECISION (mode)), mode); | |
85 | } | |
86 | ||
58b42e19 | 87 | /* Test whether a value is zero of a power of two. */ |
be63b77d | 88 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ |
fecfbfa4 | 89 | (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) |
58b42e19 | 90 | |
84ddb681 | 91 | struct init_expmed_rtl |
44037a66 | 92 | { |
c83cf304 JJ |
93 | rtx reg; |
94 | rtx plus; | |
95 | rtx neg; | |
96 | rtx mult; | |
97 | rtx sdiv; | |
98 | rtx udiv; | |
99 | rtx sdiv_32; | |
100 | rtx smod_32; | |
101 | rtx wide_mult; | |
102 | rtx wide_lshr; | |
103 | rtx wide_trunc; | |
104 | rtx shift; | |
105 | rtx shift_mult; | |
106 | rtx shift_add; | |
107 | rtx shift_sub0; | |
108 | rtx shift_sub1; | |
109 | rtx zext; | |
110 | rtx trunc; | |
79b4a8dc | 111 | |
965703ed RS |
112 | rtx pow2[MAX_BITS_PER_WORD]; |
113 | rtx cint[MAX_BITS_PER_WORD]; | |
84ddb681 RH |
114 | }; |
115 | ||
91f8035e | 116 | static void |
ef4bddc2 RS |
117 | init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode, |
118 | machine_mode from_mode, bool speed) | |
91f8035e RH |
119 | { |
120 | int to_size, from_size; | |
121 | rtx which; | |
122 | ||
50b6ee8b DD |
123 | to_size = GET_MODE_PRECISION (to_mode); |
124 | from_size = GET_MODE_PRECISION (from_mode); | |
125 | ||
126 | /* Most partial integers have a precision less than the "full" | |
127 | integer it requires for storage. In case one doesn't, for | |
128 | comparison purposes here, reduce the bit size by one in that | |
129 | case. */ | |
130 | if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT | |
146ec50f | 131 | && pow2p_hwi (to_size)) |
50b6ee8b DD |
132 | to_size --; |
133 | if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT | |
146ec50f | 134 | && pow2p_hwi (from_size)) |
50b6ee8b | 135 | from_size --; |
91f8035e RH |
136 | |
137 | /* Assume cost of zero-extend and sign-extend is the same. */ | |
c83cf304 | 138 | which = (to_size < from_size ? all->trunc : all->zext); |
91f8035e | 139 | |
c83cf304 | 140 | PUT_MODE (all->reg, from_mode); |
e548c9df AM |
141 | set_convert_cost (to_mode, from_mode, speed, |
142 | set_src_cost (which, to_mode, speed)); | |
91f8035e RH |
143 | } |
144 | ||
84ddb681 RH |
145 | static void |
146 | init_expmed_one_mode (struct init_expmed_rtl *all, | |
ef4bddc2 | 147 | machine_mode mode, int speed) |
84ddb681 RH |
148 | { |
149 | int m, n, mode_bitsize; | |
ef4bddc2 | 150 | machine_mode mode_from; |
44037a66 | 151 | |
84ddb681 | 152 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
38a448ca | 153 | |
c83cf304 JJ |
154 | PUT_MODE (all->reg, mode); |
155 | PUT_MODE (all->plus, mode); | |
156 | PUT_MODE (all->neg, mode); | |
157 | PUT_MODE (all->mult, mode); | |
158 | PUT_MODE (all->sdiv, mode); | |
159 | PUT_MODE (all->udiv, mode); | |
160 | PUT_MODE (all->sdiv_32, mode); | |
161 | PUT_MODE (all->smod_32, mode); | |
162 | PUT_MODE (all->wide_trunc, mode); | |
163 | PUT_MODE (all->shift, mode); | |
164 | PUT_MODE (all->shift_mult, mode); | |
165 | PUT_MODE (all->shift_add, mode); | |
166 | PUT_MODE (all->shift_sub0, mode); | |
167 | PUT_MODE (all->shift_sub1, mode); | |
168 | PUT_MODE (all->zext, mode); | |
169 | PUT_MODE (all->trunc, mode); | |
170 | ||
e548c9df AM |
171 | set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed)); |
172 | set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed)); | |
173 | set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed)); | |
174 | set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed)); | |
175 | set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed)); | |
c83cf304 | 176 | |
e548c9df | 177 | set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed) |
5322d07e | 178 | <= 2 * add_cost (speed, mode))); |
e548c9df | 179 | set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed) |
5322d07e NF |
180 | <= 4 * add_cost (speed, mode))); |
181 | ||
182 | set_shift_cost (speed, mode, 0, 0); | |
183 | { | |
184 | int cost = add_cost (speed, mode); | |
185 | set_shiftadd_cost (speed, mode, 0, cost); | |
186 | set_shiftsub0_cost (speed, mode, 0, cost); | |
187 | set_shiftsub1_cost (speed, mode, 0, cost); | |
188 | } | |
84ddb681 RH |
189 | |
190 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); | |
191 | for (m = 1; m < n; m++) | |
192 | { | |
c83cf304 JJ |
193 | XEXP (all->shift, 1) = all->cint[m]; |
194 | XEXP (all->shift_mult, 1) = all->pow2[m]; | |
84ddb681 | 195 | |
e548c9df AM |
196 | set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed)); |
197 | set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode, | |
198 | speed)); | |
199 | set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode, | |
200 | speed)); | |
201 | set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode, | |
202 | speed)); | |
84ddb681 RH |
203 | } |
204 | ||
205 | if (SCALAR_INT_MODE_P (mode)) | |
965703ed | 206 | { |
91f8035e | 207 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |
ef4bddc2 | 208 | mode_from = (machine_mode)(mode_from + 1)) |
91f8035e | 209 | init_expmed_one_conv (all, mode, mode_from, speed); |
490d0f6c RS |
210 | |
211 | machine_mode wider_mode; | |
212 | if (GET_MODE_CLASS (mode) == MODE_INT | |
213 | && GET_MODE_WIDER_MODE (mode).exists (&wider_mode)) | |
84ddb681 | 214 | { |
c83cf304 JJ |
215 | PUT_MODE (all->zext, wider_mode); |
216 | PUT_MODE (all->wide_mult, wider_mode); | |
217 | PUT_MODE (all->wide_lshr, wider_mode); | |
218 | XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize); | |
84ddb681 | 219 | |
91f8035e | 220 | set_mul_widen_cost (speed, wider_mode, |
e548c9df | 221 | set_src_cost (all->wide_mult, wider_mode, speed)); |
91f8035e | 222 | set_mul_highpart_cost (speed, mode, |
e548c9df | 223 | set_src_cost (all->wide_trunc, mode, speed)); |
84ddb681 | 224 | } |
965703ed | 225 | } |
84ddb681 RH |
226 | } |
227 | ||
228 | void | |
229 | init_expmed (void) | |
230 | { | |
231 | struct init_expmed_rtl all; | |
ef4bddc2 | 232 | machine_mode mode = QImode; |
84ddb681 RH |
233 | int m, speed; |
234 | ||
79b4a8dc | 235 | memset (&all, 0, sizeof all); |
84ddb681 RH |
236 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
237 | { | |
fecfbfa4 | 238 | all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); |
84ddb681 RH |
239 | all.cint[m] = GEN_INT (m); |
240 | } | |
79b4a8dc | 241 | |
1d27fed4 | 242 | /* Avoid using hard regs in ways which may be unsupported. */ |
9fccb335 | 243 | all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
c83cf304 JJ |
244 | all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); |
245 | all.neg = gen_rtx_NEG (mode, all.reg); | |
246 | all.mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
247 | all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); | |
248 | all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); | |
249 | all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); | |
250 | all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); | |
251 | all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); | |
252 | all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); | |
253 | all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); | |
254 | all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); | |
255 | all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); | |
256 | all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); | |
257 | all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); | |
258 | all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); | |
259 | all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); | |
260 | all.trunc = gen_rtx_TRUNCATE (mode, all.reg); | |
6dd8f4bb | 261 | |
f40751dd | 262 | for (speed = 0; speed < 2; speed++) |
71af73bb | 263 | { |
f40751dd | 264 | crtl->maybe_hot_insn_p = speed; |
e548c9df | 265 | set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed)); |
79b4a8dc | 266 | |
91f8035e | 267 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |
ef4bddc2 | 268 | mode = (machine_mode)(mode + 1)) |
84ddb681 | 269 | init_expmed_one_mode (&all, mode, speed); |
79b4a8dc | 270 | |
91f8035e RH |
271 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |
272 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; | |
ef4bddc2 | 273 | mode = (machine_mode)(mode + 1)) |
91f8035e RH |
274 | init_expmed_one_mode (&all, mode, speed); |
275 | ||
276 | if (MIN_MODE_VECTOR_INT != VOIDmode) | |
277 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; | |
ef4bddc2 | 278 | mode = (machine_mode)(mode + 1)) |
91f8035e | 279 | init_expmed_one_mode (&all, mode, speed); |
79b4a8dc | 280 | } |
84ddb681 | 281 | |
5322d07e NF |
282 | if (alg_hash_used_p ()) |
283 | { | |
284 | struct alg_hash_entry *p = alg_hash_entry_ptr (0); | |
285 | memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); | |
286 | } | |
c371bb73 | 287 | else |
5322d07e | 288 | set_alg_hash_used_p (true); |
f40751dd | 289 | default_rtl_profile (); |
c83cf304 JJ |
290 | |
291 | ggc_free (all.trunc); | |
292 | ggc_free (all.shift_sub1); | |
293 | ggc_free (all.shift_sub0); | |
294 | ggc_free (all.shift_add); | |
295 | ggc_free (all.shift_mult); | |
296 | ggc_free (all.shift); | |
297 | ggc_free (all.wide_trunc); | |
298 | ggc_free (all.wide_lshr); | |
299 | ggc_free (all.wide_mult); | |
300 | ggc_free (all.zext); | |
301 | ggc_free (all.smod_32); | |
302 | ggc_free (all.sdiv_32); | |
303 | ggc_free (all.udiv); | |
304 | ggc_free (all.sdiv); | |
305 | ggc_free (all.mult); | |
306 | ggc_free (all.neg); | |
307 | ggc_free (all.plus); | |
308 | ggc_free (all.reg); | |
44037a66 TG |
309 | } |
310 | ||
311 | /* Return an rtx representing minus the value of X. | |
312 | MODE is the intended mode of the result, | |
313 | useful if X is a CONST_INT. */ | |
314 | ||
315 | rtx | |
ef4bddc2 | 316 | negate_rtx (machine_mode mode, rtx x) |
44037a66 | 317 | { |
a39a7484 RK |
318 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |
319 | ||
fdb5537f | 320 | if (result == 0) |
a39a7484 RK |
321 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
322 | ||
323 | return result; | |
44037a66 | 324 | } |
da920570 | 325 | |
ee45a32d EB |
326 | /* Whether reverse storage order is supported on the target. */ |
327 | static int reverse_storage_order_supported = -1; | |
328 | ||
329 | /* Check whether reverse storage order is supported on the target. */ | |
330 | ||
331 | static void | |
332 | check_reverse_storage_order_support (void) | |
333 | { | |
334 | if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
335 | { | |
336 | reverse_storage_order_supported = 0; | |
337 | sorry ("reverse scalar storage order"); | |
338 | } | |
339 | else | |
340 | reverse_storage_order_supported = 1; | |
341 | } | |
342 | ||
343 | /* Whether reverse FP storage order is supported on the target. */ | |
344 | static int reverse_float_storage_order_supported = -1; | |
345 | ||
346 | /* Check whether reverse FP storage order is supported on the target. */ | |
347 | ||
348 | static void | |
349 | check_reverse_float_storage_order_support (void) | |
350 | { | |
351 | if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
352 | { | |
353 | reverse_float_storage_order_supported = 0; | |
354 | sorry ("reverse floating-point scalar storage order"); | |
355 | } | |
356 | else | |
357 | reverse_float_storage_order_supported = 1; | |
358 | } | |
359 | ||
360 | /* Return an rtx representing value of X with reverse storage order. | |
361 | MODE is the intended mode of the result, | |
362 | useful if X is a CONST_INT. */ | |
363 | ||
364 | rtx | |
b8506a8a | 365 | flip_storage_order (machine_mode mode, rtx x) |
ee45a32d | 366 | { |
b8506a8a | 367 | machine_mode int_mode; |
ee45a32d EB |
368 | rtx result; |
369 | ||
370 | if (mode == QImode) | |
371 | return x; | |
372 | ||
373 | if (COMPLEX_MODE_P (mode)) | |
374 | { | |
375 | rtx real = read_complex_part (x, false); | |
376 | rtx imag = read_complex_part (x, true); | |
377 | ||
378 | real = flip_storage_order (GET_MODE_INNER (mode), real); | |
379 | imag = flip_storage_order (GET_MODE_INNER (mode), imag); | |
380 | ||
381 | return gen_rtx_CONCAT (mode, real, imag); | |
382 | } | |
383 | ||
384 | if (__builtin_expect (reverse_storage_order_supported < 0, 0)) | |
385 | check_reverse_storage_order_support (); | |
386 | ||
387 | if (SCALAR_INT_MODE_P (mode)) | |
388 | int_mode = mode; | |
389 | else | |
390 | { | |
391 | if (FLOAT_MODE_P (mode) | |
392 | && __builtin_expect (reverse_float_storage_order_supported < 0, 0)) | |
393 | check_reverse_float_storage_order_support (); | |
394 | ||
395 | int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0); | |
396 | if (int_mode == BLKmode) | |
397 | { | |
398 | sorry ("reverse storage order for %smode", GET_MODE_NAME (mode)); | |
399 | return x; | |
400 | } | |
401 | x = gen_lowpart (int_mode, x); | |
402 | } | |
403 | ||
404 | result = simplify_unary_operation (BSWAP, int_mode, x, int_mode); | |
405 | if (result == 0) | |
406 | result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); | |
407 | ||
408 | if (int_mode != mode) | |
409 | result = gen_lowpart (mode, result); | |
410 | ||
411 | return result; | |
412 | } | |
413 | ||
26f8b976 RS |
414 | /* Adjust bitfield memory MEM so that it points to the first unit of mode |
415 | MODE that contains a bitfield of size BITSIZE at bit position BITNUM. | |
416 | If MODE is BLKmode, return a reference to every byte in the bitfield. | |
417 | Set *NEW_BITNUM to the bit position of the field within the new memory. */ | |
418 | ||
419 | static rtx | |
ef4bddc2 | 420 | narrow_bit_field_mem (rtx mem, machine_mode mode, |
26f8b976 RS |
421 | unsigned HOST_WIDE_INT bitsize, |
422 | unsigned HOST_WIDE_INT bitnum, | |
423 | unsigned HOST_WIDE_INT *new_bitnum) | |
424 | { | |
425 | if (mode == BLKmode) | |
426 | { | |
427 | *new_bitnum = bitnum % BITS_PER_UNIT; | |
428 | HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; | |
429 | HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) | |
430 | / BITS_PER_UNIT); | |
431 | return adjust_bitfield_address_size (mem, mode, offset, size); | |
432 | } | |
433 | else | |
434 | { | |
435 | unsigned int unit = GET_MODE_BITSIZE (mode); | |
436 | *new_bitnum = bitnum % unit; | |
437 | HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; | |
438 | return adjust_bitfield_address (mem, mode, offset); | |
439 | } | |
440 | } | |
441 | ||
fcdd52b7 RS |
442 | /* The caller wants to perform insertion or extraction PATTERN on a |
443 | bitfield of size BITSIZE at BITNUM bits into memory operand OP0. | |
444 | BITREGION_START and BITREGION_END are as for store_bit_field | |
445 | and FIELDMODE is the natural mode of the field. | |
446 | ||
447 | Search for a mode that is compatible with the memory access | |
448 | restrictions and (where applicable) with a register insertion or | |
449 | extraction. Return the new memory on success, storing the adjusted | |
450 | bit position in *NEW_BITNUM. Return null otherwise. */ | |
451 | ||
452 | static rtx | |
453 | adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, | |
454 | rtx op0, HOST_WIDE_INT bitsize, | |
455 | HOST_WIDE_INT bitnum, | |
456 | unsigned HOST_WIDE_INT bitregion_start, | |
457 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 458 | machine_mode fieldmode, |
fcdd52b7 RS |
459 | unsigned HOST_WIDE_INT *new_bitnum) |
460 | { | |
461 | bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, | |
462 | bitregion_end, MEM_ALIGN (op0), | |
463 | MEM_VOLATILE_P (op0)); | |
ef4bddc2 | 464 | machine_mode best_mode; |
fcdd52b7 RS |
465 | if (iter.next_mode (&best_mode)) |
466 | { | |
467 | /* We can use a memory in BEST_MODE. See whether this is true for | |
468 | any wider modes. All other things being equal, we prefer to | |
469 | use the widest mode possible because it tends to expose more | |
470 | CSE opportunities. */ | |
471 | if (!iter.prefer_smaller_modes ()) | |
472 | { | |
473 | /* Limit the search to the mode required by the corresponding | |
474 | register insertion or extraction instruction, if any. */ | |
ef4bddc2 | 475 | machine_mode limit_mode = word_mode; |
fcdd52b7 RS |
476 | extraction_insn insn; |
477 | if (get_best_reg_extraction_insn (&insn, pattern, | |
478 | GET_MODE_BITSIZE (best_mode), | |
479 | fieldmode)) | |
480 | limit_mode = insn.field_mode; | |
481 | ||
ef4bddc2 | 482 | machine_mode wider_mode; |
fcdd52b7 RS |
483 | while (iter.next_mode (&wider_mode) |
484 | && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) | |
485 | best_mode = wider_mode; | |
486 | } | |
487 | return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, | |
488 | new_bitnum); | |
489 | } | |
490 | return NULL_RTX; | |
491 | } | |
492 | ||
bebf0797 RS |
493 | /* Return true if a bitfield of size BITSIZE at bit number BITNUM within |
494 | a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg | |
495 | offset is then BITNUM / BITS_PER_UNIT. */ | |
496 | ||
497 | static bool | |
498 | lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum, | |
499 | unsigned HOST_WIDE_INT bitsize, | |
ef4bddc2 | 500 | machine_mode struct_mode) |
bebf0797 RS |
501 | { |
502 | if (BYTES_BIG_ENDIAN) | |
c1a4d0b5 | 503 | return (bitnum % BITS_PER_UNIT == 0 |
bebf0797 RS |
504 | && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode) |
505 | || (bitnum + bitsize) % BITS_PER_WORD == 0)); | |
506 | else | |
507 | return bitnum % BITS_PER_WORD == 0; | |
508 | } | |
00efe3ea | 509 | |
548cfdc2 | 510 | /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 |
6f4e9cf8 BE |
511 | containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. |
512 | Return false if the access would touch memory outside the range | |
513 | BITREGION_START to BITREGION_END for conformance to the C++ memory | |
514 | model. */ | |
f5d4f18c SL |
515 | |
516 | static bool | |
517 | strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
518 | unsigned HOST_WIDE_INT bitnum, | |
ef4bddc2 | 519 | machine_mode fieldmode, |
6f4e9cf8 BE |
520 | unsigned HOST_WIDE_INT bitregion_start, |
521 | unsigned HOST_WIDE_INT bitregion_end) | |
f5d4f18c SL |
522 | { |
523 | unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); | |
524 | ||
525 | /* -fstrict-volatile-bitfields must be enabled and we must have a | |
526 | volatile MEM. */ | |
527 | if (!MEM_P (op0) | |
528 | || !MEM_VOLATILE_P (op0) | |
529 | || flag_strict_volatile_bitfields <= 0) | |
530 | return false; | |
531 | ||
532 | /* Non-integral modes likely only happen with packed structures. | |
533 | Punt. */ | |
534 | if (!SCALAR_INT_MODE_P (fieldmode)) | |
535 | return false; | |
536 | ||
537 | /* The bit size must not be larger than the field mode, and | |
538 | the field mode must not be larger than a word. */ | |
539 | if (bitsize > modesize || modesize > BITS_PER_WORD) | |
540 | return false; | |
541 | ||
542 | /* Check for cases of unaligned fields that must be split. */ | |
b6dd42a9 BE |
543 | if (bitnum % modesize + bitsize > modesize) |
544 | return false; | |
545 | ||
546 | /* The memory must be sufficiently aligned for a MODESIZE access. | |
547 | This condition guarantees, that the memory access will not | |
548 | touch anything after the end of the structure. */ | |
549 | if (MEM_ALIGN (op0) < modesize) | |
f5d4f18c SL |
550 | return false; |
551 | ||
6f4e9cf8 BE |
552 | /* Check for cases where the C++ memory model applies. */ |
553 | if (bitregion_end != 0 | |
554 | && (bitnum - bitnum % modesize < bitregion_start | |
40f94f7d | 555 | || bitnum - bitnum % modesize + modesize - 1 > bitregion_end)) |
6f4e9cf8 BE |
556 | return false; |
557 | ||
f5d4f18c SL |
558 | return true; |
559 | } | |
560 | ||
00efe3ea RS |
561 | /* Return true if OP is a memory and if a bitfield of size BITSIZE at |
562 | bit number BITNUM can be treated as a simple value of mode MODE. */ | |
563 | ||
564 | static bool | |
565 | simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
ef4bddc2 | 566 | unsigned HOST_WIDE_INT bitnum, machine_mode mode) |
00efe3ea RS |
567 | { |
568 | return (MEM_P (op0) | |
569 | && bitnum % BITS_PER_UNIT == 0 | |
570 | && bitsize == GET_MODE_BITSIZE (mode) | |
571 | && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) | |
572 | || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 | |
573 | && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); | |
574 | } | |
6d7db3c5 | 575 | \f |
fcdd52b7 RS |
576 | /* Try to use instruction INSV to store VALUE into a field of OP0. |
577 | BITSIZE and BITNUM are as for store_bit_field. */ | |
a20556e4 RS |
578 | |
579 | static bool | |
fcdd52b7 RS |
580 | store_bit_field_using_insv (const extraction_insn *insv, rtx op0, |
581 | unsigned HOST_WIDE_INT bitsize, | |
548cfdc2 EB |
582 | unsigned HOST_WIDE_INT bitnum, |
583 | rtx value) | |
a20556e4 RS |
584 | { |
585 | struct expand_operand ops[4]; | |
586 | rtx value1; | |
587 | rtx xop0 = op0; | |
f3f6fb16 | 588 | rtx_insn *last = get_last_insn (); |
a20556e4 RS |
589 | bool copy_back = false; |
590 | ||
ef4bddc2 | 591 | machine_mode op_mode = insv->field_mode; |
a20556e4 RS |
592 | unsigned int unit = GET_MODE_BITSIZE (op_mode); |
593 | if (bitsize == 0 || bitsize > unit) | |
594 | return false; | |
595 | ||
596 | if (MEM_P (xop0)) | |
26f8b976 | 597 | /* Get a reference to the first byte of the field. */ |
fcdd52b7 RS |
598 | xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, |
599 | &bitnum); | |
a20556e4 RS |
600 | else |
601 | { | |
602 | /* Convert from counting within OP0 to counting in OP_MODE. */ | |
603 | if (BYTES_BIG_ENDIAN) | |
604 | bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
605 | ||
606 | /* If xop0 is a register, we need it in OP_MODE | |
607 | to make it acceptable to the format of insv. */ | |
608 | if (GET_CODE (xop0) == SUBREG) | |
609 | /* We can't just change the mode, because this might clobber op0, | |
610 | and we will need the original value of op0 if insv fails. */ | |
611 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); | |
612 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) | |
613 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); | |
614 | } | |
615 | ||
616 | /* If the destination is a paradoxical subreg such that we need a | |
617 | truncate to the inner mode, perform the insertion on a temporary and | |
618 | truncate the result to the original destination. Note that we can't | |
619 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N | |
620 | X) 0)) is (reg:N X). */ | |
621 | if (GET_CODE (xop0) == SUBREG | |
622 | && REG_P (SUBREG_REG (xop0)) | |
623 | && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), | |
624 | op_mode)) | |
625 | { | |
626 | rtx tem = gen_reg_rtx (op_mode); | |
627 | emit_move_insn (tem, xop0); | |
628 | xop0 = tem; | |
629 | copy_back = true; | |
630 | } | |
631 | ||
4ae9783e JW |
632 | /* There are similar overflow check at the start of store_bit_field_1, |
633 | but that only check the situation where the field lies completely | |
634 | outside the register, while there do have situation where the field | |
635 | lies partialy in the register, we need to adjust bitsize for this | |
636 | partial overflow situation. Without this fix, pr48335-2.c on big-endian | |
637 | will broken on those arch support bit insert instruction, like arm, aarch64 | |
638 | etc. */ | |
639 | if (bitsize + bitnum > unit && bitnum < unit) | |
640 | { | |
e623cedf JW |
641 | warning (OPT_Wextra, "write of %wu-bit data outside the bound of " |
642 | "destination object, data truncated into %wu-bit", | |
643 | bitsize, unit - bitnum); | |
4ae9783e JW |
644 | bitsize = unit - bitnum; |
645 | } | |
646 | ||
a20556e4 RS |
647 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
648 | "backwards" from the size of the unit we are inserting into. | |
649 | Otherwise, we count bits from the most significant on a | |
650 | BYTES/BITS_BIG_ENDIAN machine. */ | |
651 | ||
652 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
653 | bitnum = unit - bitsize - bitnum; | |
654 | ||
655 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ | |
656 | value1 = value; | |
657 | if (GET_MODE (value) != op_mode) | |
658 | { | |
659 | if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) | |
660 | { | |
686d390a | 661 | rtx tmp; |
a20556e4 RS |
662 | /* Optimization: Don't bother really extending VALUE |
663 | if it has all the bits we will actually use. However, | |
664 | if we must narrow it, be sure we do it correctly. */ | |
665 | ||
666 | if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) | |
667 | { | |
a20556e4 RS |
668 | tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); |
669 | if (! tmp) | |
670 | tmp = simplify_gen_subreg (op_mode, | |
671 | force_reg (GET_MODE (value), | |
672 | value1), | |
673 | GET_MODE (value), 0); | |
a20556e4 RS |
674 | } |
675 | else | |
686d390a JJ |
676 | { |
677 | tmp = gen_lowpart_if_possible (op_mode, value1); | |
678 | if (! tmp) | |
679 | tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value), | |
680 | value1)); | |
681 | } | |
682 | value1 = tmp; | |
a20556e4 RS |
683 | } |
684 | else if (CONST_INT_P (value)) | |
685 | value1 = gen_int_mode (INTVAL (value), op_mode); | |
686 | else | |
687 | /* Parse phase is supposed to make VALUE's data type | |
688 | match that of the component reference, which is a type | |
689 | at least as wide as the field; so VALUE should have | |
690 | a mode that corresponds to that type. */ | |
691 | gcc_assert (CONSTANT_P (value)); | |
692 | } | |
693 | ||
694 | create_fixed_operand (&ops[0], xop0); | |
695 | create_integer_operand (&ops[1], bitsize); | |
696 | create_integer_operand (&ops[2], bitnum); | |
697 | create_input_operand (&ops[3], value1, op_mode); | |
fcdd52b7 | 698 | if (maybe_expand_insn (insv->icode, 4, ops)) |
a20556e4 RS |
699 | { |
700 | if (copy_back) | |
701 | convert_move (op0, xop0, true); | |
702 | return true; | |
703 | } | |
704 | delete_insns_since (last); | |
705 | return false; | |
706 | } | |
707 | ||
6d7db3c5 RS |
708 | /* A subroutine of store_bit_field, with the same arguments. Return true |
709 | if the operation could be implemented. | |
44037a66 | 710 | |
6d7db3c5 RS |
711 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
712 | no other way of implementing the operation. If FALLBACK_P is false, | |
713 | return false instead. */ | |
714 | ||
715 | static bool | |
716 | store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
1169e45d AH |
717 | unsigned HOST_WIDE_INT bitnum, |
718 | unsigned HOST_WIDE_INT bitregion_start, | |
719 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 720 | machine_mode fieldmode, |
ee45a32d | 721 | rtx value, bool reverse, bool fallback_p) |
44037a66 | 722 | { |
b3694847 | 723 | rtx op0 = str_rtx; |
28526e20 | 724 | rtx orig_value; |
da920570 | 725 | |
44037a66 TG |
726 | while (GET_CODE (op0) == SUBREG) |
727 | { | |
728 | /* The following line once was done only if WORDS_BIG_ENDIAN, | |
729 | but I think that is a mistake. WORDS_BIG_ENDIAN is | |
730 | meaningful at a much higher level; when structures are copied | |
731 | between memory and regs, the higher-numbered regs | |
732 | always get higher addresses. */ | |
495db1a1 AK |
733 | int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); |
734 | int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); | |
bebf0797 | 735 | int byte_offset = 0; |
495db1a1 | 736 | |
ee45a32d | 737 | /* Paradoxical subregs need special handling on big-endian machines. */ |
03a95621 | 738 | if (paradoxical_subreg_p (op0)) |
495db1a1 AK |
739 | { |
740 | int difference = inner_mode_size - outer_mode_size; | |
741 | ||
742 | if (WORDS_BIG_ENDIAN) | |
743 | byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; | |
744 | if (BYTES_BIG_ENDIAN) | |
745 | byte_offset += difference % UNITS_PER_WORD; | |
746 | } | |
747 | else | |
748 | byte_offset = SUBREG_BYTE (op0); | |
749 | ||
750 | bitnum += byte_offset * BITS_PER_UNIT; | |
44037a66 TG |
751 | op0 = SUBREG_REG (op0); |
752 | } | |
753 | ||
2c58f7dd RS |
754 | /* No action is needed if the target is a register and if the field |
755 | lies completely outside that register. This can occur if the source | |
756 | code contains an out-of-bounds access to a small array. */ | |
757 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
6d7db3c5 | 758 | return true; |
2c58f7dd | 759 | |
b42271d6 | 760 | /* Use vec_set patterns for inserting parts of vectors whenever |
997404de JH |
761 | available. */ |
762 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 763 | && !MEM_P (op0) |
947131ba | 764 | && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing |
997404de | 765 | && fieldmode == GET_MODE_INNER (GET_MODE (op0)) |
6c825cd4 DS |
766 | && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0)) |
767 | && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0)))) | |
997404de | 768 | { |
a5c7d693 | 769 | struct expand_operand ops[3]; |
ef4bddc2 RS |
770 | machine_mode outermode = GET_MODE (op0); |
771 | machine_mode innermode = GET_MODE_INNER (outermode); | |
a5c7d693 | 772 | enum insn_code icode = optab_handler (vec_set_optab, outermode); |
997404de | 773 | int pos = bitnum / GET_MODE_BITSIZE (innermode); |
997404de | 774 | |
a5c7d693 RS |
775 | create_fixed_operand (&ops[0], op0); |
776 | create_input_operand (&ops[1], value, innermode); | |
777 | create_integer_operand (&ops[2], pos); | |
778 | if (maybe_expand_insn (icode, 3, ops)) | |
779 | return true; | |
997404de JH |
780 | } |
781 | ||
308ecea0 | 782 | /* If the target is a register, overwriting the entire object, or storing |
bebf0797 RS |
783 | a full-word or multi-word field can be done with just a SUBREG. */ |
784 | if (!MEM_P (op0) | |
785 | && bitsize == GET_MODE_BITSIZE (fieldmode) | |
786 | && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0) | |
787 | || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) | |
788 | { | |
789 | /* Use the subreg machinery either to narrow OP0 to the required | |
d8c84975 JJ |
790 | words or to cope with mode punning between equal-sized modes. |
791 | In the latter case, use subreg on the rhs side, not lhs. */ | |
792 | rtx sub; | |
793 | ||
794 | if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) | |
795 | { | |
796 | sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); | |
797 | if (sub) | |
798 | { | |
ee45a32d EB |
799 | if (reverse) |
800 | sub = flip_storage_order (GET_MODE (op0), sub); | |
d8c84975 JJ |
801 | emit_move_insn (op0, sub); |
802 | return true; | |
803 | } | |
804 | } | |
805 | else | |
bebf0797 | 806 | { |
d8c84975 JJ |
807 | sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), |
808 | bitnum / BITS_PER_UNIT); | |
809 | if (sub) | |
810 | { | |
ee45a32d EB |
811 | if (reverse) |
812 | value = flip_storage_order (fieldmode, value); | |
d8c84975 JJ |
813 | emit_move_insn (sub, value); |
814 | return true; | |
815 | } | |
bebf0797 RS |
816 | } |
817 | } | |
308ecea0 | 818 | |
bebf0797 | 819 | /* If the target is memory, storing any naturally aligned field can be |
308ecea0 | 820 | done with a simple store. For targets that support fast unaligned |
0b69c29f | 821 | memory, any naturally sized, unit aligned field can be done directly. */ |
00efe3ea | 822 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode)) |
44037a66 | 823 | { |
bebf0797 | 824 | op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT); |
ee45a32d EB |
825 | if (reverse) |
826 | value = flip_storage_order (fieldmode, value); | |
44037a66 | 827 | emit_move_insn (op0, value); |
6d7db3c5 | 828 | return true; |
44037a66 TG |
829 | } |
830 | ||
a8ca7756 JW |
831 | /* Make sure we are playing with integral modes. Pun with subregs |
832 | if we aren't. This must come after the entire register case above, | |
833 | since that case is valid for any mode. The following cases are only | |
834 | valid for integral modes. */ | |
835 | { | |
ef4bddc2 | 836 | machine_mode imode = int_mode_for_mode (GET_MODE (op0)); |
a8ca7756 JW |
837 | if (imode != GET_MODE (op0)) |
838 | { | |
3c0cb5de | 839 | if (MEM_P (op0)) |
e98fc6de | 840 | op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); |
a8ca7756 | 841 | else |
5b0264cb NS |
842 | { |
843 | gcc_assert (imode != BLKmode); | |
844 | op0 = gen_lowpart (imode, op0); | |
845 | } | |
a8ca7756 JW |
846 | } |
847 | } | |
848 | ||
44037a66 | 849 | /* Storing an lsb-aligned field in a register |
bebf0797 | 850 | can be done with a movstrict instruction. */ |
44037a66 | 851 | |
3c0cb5de | 852 | if (!MEM_P (op0) |
ee45a32d | 853 | && !reverse |
bebf0797 | 854 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) |
44037a66 | 855 | && bitsize == GET_MODE_BITSIZE (fieldmode) |
947131ba | 856 | && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) |
44037a66 | 857 | { |
a5c7d693 RS |
858 | struct expand_operand ops[2]; |
859 | enum insn_code icode = optab_handler (movstrict_optab, fieldmode); | |
5d560619 | 860 | rtx arg0 = op0; |
19228b93 | 861 | unsigned HOST_WIDE_INT subreg_off; |
5e4900c7 | 862 | |
a5c7d693 | 863 | if (GET_CODE (arg0) == SUBREG) |
44037a66 | 864 | { |
5b0264cb NS |
865 | /* Else we've got some float mode source being extracted into |
866 | a different float mode destination -- this combination of | |
867 | subregs results in Severe Tire Damage. */ | |
a5c7d693 | 868 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |
5b0264cb NS |
869 | || GET_MODE_CLASS (fieldmode) == MODE_INT |
870 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); | |
a5c7d693 | 871 | arg0 = SUBREG_REG (arg0); |
5e4900c7 | 872 | } |
470032d7 | 873 | |
bebf0797 | 874 | subreg_off = bitnum / BITS_PER_UNIT; |
19228b93 JJ |
875 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) |
876 | { | |
877 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); | |
a5c7d693 | 878 | |
19228b93 JJ |
879 | create_fixed_operand (&ops[0], arg0); |
880 | /* Shrink the source operand to FIELDMODE. */ | |
881 | create_convert_operand_to (&ops[1], value, fieldmode, false); | |
882 | if (maybe_expand_insn (icode, 2, ops)) | |
883 | return true; | |
884 | } | |
44037a66 TG |
885 | } |
886 | ||
887 | /* Handle fields bigger than a word. */ | |
888 | ||
889 | if (bitsize > BITS_PER_WORD) | |
890 | { | |
891 | /* Here we transfer the words of the field | |
892 | in the order least significant first. | |
893 | This is because the most significant word is the one which may | |
ad83e87b PB |
894 | be less than full. |
895 | However, only do that if the value is not BLKmode. */ | |
896 | ||
ee45a32d | 897 | const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
770ae6cc RK |
898 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
899 | unsigned int i; | |
f3f6fb16 | 900 | rtx_insn *last; |
44037a66 TG |
901 | |
902 | /* This is the mode we must force value to, so that there will be enough | |
903 | subwords to extract. Note that fieldmode will often (always?) be | |
904 | VOIDmode, because that is what store_field uses to indicate that this | |
535a42b1 NS |
905 | is a bit field, but passing VOIDmode to operand_subword_force |
906 | is not allowed. */ | |
9f5e2e11 RS |
907 | fieldmode = GET_MODE (value); |
908 | if (fieldmode == VOIDmode) | |
6f83092f | 909 | fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); |
44037a66 | 910 | |
6d7db3c5 | 911 | last = get_last_insn (); |
44037a66 TG |
912 | for (i = 0; i < nwords; i++) |
913 | { | |
ad83e87b PB |
914 | /* If I is 0, use the low-order word in both field and target; |
915 | if I is 1, use the next to lowest word; and so on. */ | |
00d6b19a AB |
916 | unsigned int wordnum = (backwards |
917 | ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD | |
918 | - i - 1 | |
919 | : i); | |
ee45a32d | 920 | unsigned int bit_offset = (backwards ^ reverse |
04050c69 RK |
921 | ? MAX ((int) bitsize - ((int) i + 1) |
922 | * BITS_PER_WORD, | |
923 | 0) | |
924 | : (int) i * BITS_PER_WORD); | |
6d7db3c5 | 925 | rtx value_word = operand_subword_force (value, wordnum, fieldmode); |
3bdb97b8 AK |
926 | unsigned HOST_WIDE_INT new_bitsize = |
927 | MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); | |
928 | ||
929 | /* If the remaining chunk doesn't have full wordsize we have | |
ee45a32d | 930 | to make sure that for big-endian machines the higher order |
3bdb97b8 AK |
931 | bits are used. */ |
932 | if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) | |
933 | value_word = simplify_expand_binop (word_mode, lshr_optab, | |
934 | value_word, | |
935 | GEN_INT (BITS_PER_WORD | |
936 | - new_bitsize), | |
937 | NULL_RTX, true, | |
938 | OPTAB_LIB_WIDEN); | |
939 | ||
940 | if (!store_bit_field_1 (op0, new_bitsize, | |
1169e45d AH |
941 | bitnum + bit_offset, |
942 | bitregion_start, bitregion_end, | |
943 | word_mode, | |
ee45a32d | 944 | value_word, reverse, fallback_p)) |
6d7db3c5 RS |
945 | { |
946 | delete_insns_since (last); | |
947 | return false; | |
948 | } | |
44037a66 | 949 | } |
6d7db3c5 | 950 | return true; |
44037a66 TG |
951 | } |
952 | ||
4f1da2e9 RS |
953 | /* If VALUE has a floating-point or complex mode, access it as an |
954 | integer of the corresponding size. This can occur on a machine | |
955 | with 64 bit registers that uses SFmode for float. It can also | |
956 | occur for unaligned float or complex fields. */ | |
28526e20 | 957 | orig_value = value; |
4f1da2e9 RS |
958 | if (GET_MODE (value) != VOIDmode |
959 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT | |
32b069d3 | 960 | && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) |
4f1da2e9 RS |
961 | { |
962 | value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); | |
963 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); | |
964 | } | |
2305bcad | 965 | |
bebf0797 | 966 | /* If OP0 is a multi-word register, narrow it to the affected word. |
7d790165 JJ |
967 | If the region spans two words, defer to store_split_bit_field. |
968 | Don't do this if op0 is a single hard register wider than word | |
969 | such as a float or vector register. */ | |
970 | if (!MEM_P (op0) | |
971 | && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD | |
972 | && (!REG_P (op0) | |
973 | || !HARD_REGISTER_P (op0) | |
974 | || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1)) | |
bebf0797 | 975 | { |
867a0126 | 976 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
bebf0797 RS |
977 | { |
978 | if (!fallback_p) | |
979 | return false; | |
980 | ||
981 | store_split_bit_field (op0, bitsize, bitnum, bitregion_start, | |
ee45a32d | 982 | bitregion_end, value, reverse); |
bebf0797 RS |
983 | return true; |
984 | } | |
867a0126 RS |
985 | op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), |
986 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); | |
987 | gcc_assert (op0); | |
988 | bitnum %= BITS_PER_WORD; | |
bebf0797 RS |
989 | } |
990 | ||
991 | /* From here on we can assume that the field to be stored in fits | |
992 | within a word. If the destination is a register, it too fits | |
993 | in a word. */ | |
44037a66 | 994 | |
fcdd52b7 RS |
995 | extraction_insn insv; |
996 | if (!MEM_P (op0) | |
ee45a32d | 997 | && !reverse |
fcdd52b7 RS |
998 | && get_best_reg_extraction_insn (&insv, EP_insv, |
999 | GET_MODE_BITSIZE (GET_MODE (op0)), | |
1000 | fieldmode) | |
1001 | && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) | |
a20556e4 | 1002 | return true; |
6d7db3c5 RS |
1003 | |
1004 | /* If OP0 is a memory, try copying it to a register and seeing if a | |
1005 | cheap register alternative is available. */ | |
ee45a32d | 1006 | if (MEM_P (op0) && !reverse) |
6d7db3c5 | 1007 | { |
f5d4f18c SL |
1008 | if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, |
1009 | fieldmode) | |
fcdd52b7 | 1010 | && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) |
17a73ba0 RS |
1011 | return true; |
1012 | ||
f3f6fb16 | 1013 | rtx_insn *last = get_last_insn (); |
6d7db3c5 | 1014 | |
fcdd52b7 RS |
1015 | /* Try loading part of OP0 into a register, inserting the bitfield |
1016 | into that, and then copying the result back to OP0. */ | |
1017 | unsigned HOST_WIDE_INT bitpos; | |
1018 | rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, | |
1019 | bitregion_start, bitregion_end, | |
1020 | fieldmode, &bitpos); | |
1021 | if (xop0) | |
0fb7aeda | 1022 | { |
fcdd52b7 | 1023 | rtx tempreg = copy_to_reg (xop0); |
bebf0797 | 1024 | if (store_bit_field_1 (tempreg, bitsize, bitpos, |
1169e45d | 1025 | bitregion_start, bitregion_end, |
ee45a32d | 1026 | fieldmode, orig_value, reverse, false)) |
6d7db3c5 RS |
1027 | { |
1028 | emit_move_insn (xop0, tempreg); | |
1029 | return true; | |
1030 | } | |
44037a66 | 1031 | delete_insns_since (last); |
44037a66 TG |
1032 | } |
1033 | } | |
6d7db3c5 RS |
1034 | |
1035 | if (!fallback_p) | |
1036 | return false; | |
1037 | ||
bebf0797 | 1038 | store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start, |
ee45a32d | 1039 | bitregion_end, value, reverse); |
6d7db3c5 RS |
1040 | return true; |
1041 | } | |
1042 | ||
1043 | /* Generate code to store value from rtx VALUE | |
1044 | into a bit-field within structure STR_RTX | |
1045 | containing BITSIZE bits starting at bit BITNUM. | |
1169e45d AH |
1046 | |
1047 | BITREGION_START is bitpos of the first bitfield in this region. | |
1048 | BITREGION_END is the bitpos of the ending bitfield in this region. | |
1049 | These two fields are 0, if the C++ memory model does not apply, | |
1050 | or we are not interested in keeping track of bitfield regions. | |
1051 | ||
ee45a32d EB |
1052 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. |
1053 | ||
1054 | If REVERSE is true, the store is to be done in reverse order. */ | |
6d7db3c5 RS |
1055 | |
1056 | void | |
1057 | store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
1169e45d AH |
1058 | unsigned HOST_WIDE_INT bitnum, |
1059 | unsigned HOST_WIDE_INT bitregion_start, | |
1060 | unsigned HOST_WIDE_INT bitregion_end, | |
ef4bddc2 | 1061 | machine_mode fieldmode, |
ee45a32d | 1062 | rtx value, bool reverse) |
6d7db3c5 | 1063 | { |
f5d4f18c | 1064 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
6f4e9cf8 BE |
1065 | if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode, |
1066 | bitregion_start, bitregion_end)) | |
f5d4f18c | 1067 | { |
b6dd42a9 BE |
1068 | /* Storing of a full word can be done with a simple store. |
1069 | We know here that the field can be accessed with one single | |
1070 | instruction. For targets that support unaligned memory, | |
1071 | an unaligned access may be necessary. */ | |
53c615a2 | 1072 | if (bitsize == GET_MODE_BITSIZE (fieldmode)) |
f5d4f18c SL |
1073 | { |
1074 | str_rtx = adjust_bitfield_address (str_rtx, fieldmode, | |
1075 | bitnum / BITS_PER_UNIT); | |
ee45a32d EB |
1076 | if (reverse) |
1077 | value = flip_storage_order (fieldmode, value); | |
b6dd42a9 | 1078 | gcc_assert (bitnum % BITS_PER_UNIT == 0); |
f5d4f18c SL |
1079 | emit_move_insn (str_rtx, value); |
1080 | } | |
1081 | else | |
ebb99f96 | 1082 | { |
53c615a2 BE |
1083 | rtx temp; |
1084 | ||
ebb99f96 BE |
1085 | str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum, |
1086 | &bitnum); | |
b6dd42a9 | 1087 | gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode)); |
53c615a2 BE |
1088 | temp = copy_to_reg (str_rtx); |
1089 | if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0, | |
ee45a32d | 1090 | fieldmode, value, reverse, true)) |
53c615a2 BE |
1091 | gcc_unreachable (); |
1092 | ||
1093 | emit_move_insn (str_rtx, temp); | |
ebb99f96 BE |
1094 | } |
1095 | ||
f5d4f18c SL |
1096 | return; |
1097 | } | |
1098 | ||
1169e45d AH |
1099 | /* Under the C++0x memory model, we must not touch bits outside the |
1100 | bit region. Adjust the address to start at the beginning of the | |
1101 | bit region. */ | |
a59b038c | 1102 | if (MEM_P (str_rtx) && bitregion_start > 0) |
1169e45d | 1103 | { |
ef4bddc2 | 1104 | machine_mode bestmode; |
ee88e690 | 1105 | HOST_WIDE_INT offset, size; |
1169e45d | 1106 | |
a59b038c EB |
1107 | gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); |
1108 | ||
1169e45d AH |
1109 | offset = bitregion_start / BITS_PER_UNIT; |
1110 | bitnum -= bitregion_start; | |
ee88e690 | 1111 | size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT; |
1169e45d AH |
1112 | bitregion_end -= bitregion_start; |
1113 | bitregion_start = 0; | |
1114 | bestmode = get_best_mode (bitsize, bitnum, | |
1115 | bitregion_start, bitregion_end, | |
fcdd52b7 | 1116 | MEM_ALIGN (str_rtx), VOIDmode, |
1169e45d | 1117 | MEM_VOLATILE_P (str_rtx)); |
ee88e690 | 1118 | str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size); |
1169e45d AH |
1119 | } |
1120 | ||
1121 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, | |
1122 | bitregion_start, bitregion_end, | |
ee45a32d | 1123 | fieldmode, value, reverse, true)) |
6d7db3c5 | 1124 | gcc_unreachable (); |
44037a66 TG |
1125 | } |
1126 | \f | |
bebf0797 | 1127 | /* Use shifts and boolean operations to store VALUE into a bit field of |
ee45a32d EB |
1128 | width BITSIZE in OP0, starting at bit BITNUM. |
1129 | ||
1130 | If REVERSE is true, the store is to be done in reverse order. */ | |
44037a66 TG |
1131 | |
1132 | static void | |
bebf0797 RS |
1133 | store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1134 | unsigned HOST_WIDE_INT bitnum, | |
1169e45d AH |
1135 | unsigned HOST_WIDE_INT bitregion_start, |
1136 | unsigned HOST_WIDE_INT bitregion_end, | |
ee45a32d | 1137 | rtx value, bool reverse) |
44037a66 | 1138 | { |
44037a66 TG |
1139 | /* There is a case not handled here: |
1140 | a structure with a known alignment of just a halfword | |
1141 | and a field split across two aligned halfwords within the structure. | |
1142 | Or likewise a structure with a known alignment of just a byte | |
1143 | and a field split across two bytes. | |
1144 | Such cases are not supposed to be able to occur. */ | |
1145 | ||
bebf0797 | 1146 | if (MEM_P (op0)) |
44037a66 | 1147 | { |
ef4bddc2 | 1148 | machine_mode mode = GET_MODE (op0); |
053a35af | 1149 | if (GET_MODE_BITSIZE (mode) == 0 |
0fb7aeda KH |
1150 | || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) |
1151 | mode = word_mode; | |
f5d4f18c SL |
1152 | mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, |
1153 | MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); | |
44037a66 TG |
1154 | |
1155 | if (mode == VOIDmode) | |
1156 | { | |
1157 | /* The only way this should occur is if the field spans word | |
1158 | boundaries. */ | |
bebf0797 | 1159 | store_split_bit_field (op0, bitsize, bitnum, bitregion_start, |
ee45a32d | 1160 | bitregion_end, value, reverse); |
44037a66 TG |
1161 | return; |
1162 | } | |
1163 | ||
26f8b976 | 1164 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |
44037a66 TG |
1165 | } |
1166 | ||
ee45a32d | 1167 | store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse); |
ebb99f96 BE |
1168 | } |
1169 | ||
1170 | /* Helper function for store_fixed_bit_field, stores | |
1171 | the bit field always using the MODE of OP0. */ | |
1172 | ||
1173 | static void | |
1174 | store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize, | |
548cfdc2 | 1175 | unsigned HOST_WIDE_INT bitnum, |
ee45a32d | 1176 | rtx value, bool reverse) |
ebb99f96 | 1177 | { |
ef4bddc2 | 1178 | machine_mode mode; |
ebb99f96 BE |
1179 | rtx temp; |
1180 | int all_zero = 0; | |
1181 | int all_one = 0; | |
1182 | ||
44037a66 | 1183 | mode = GET_MODE (op0); |
bebf0797 | 1184 | gcc_assert (SCALAR_INT_MODE_P (mode)); |
44037a66 | 1185 | |
bebf0797 RS |
1186 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
1187 | for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ | |
44037a66 | 1188 | |
ee45a32d | 1189 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
bebf0797 RS |
1190 | /* BITNUM is the distance between our msb |
1191 | and that of the containing datum. | |
1192 | Convert it to the distance from the lsb. */ | |
1193 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; | |
44037a66 | 1194 | |
bebf0797 | 1195 | /* Now BITNUM is always the distance between our lsb |
44037a66 TG |
1196 | and that of OP0. */ |
1197 | ||
bebf0797 | 1198 | /* Shift VALUE left by BITNUM bits. If VALUE is not constant, |
44037a66 TG |
1199 | we must first convert its mode to MODE. */ |
1200 | ||
481683e1 | 1201 | if (CONST_INT_P (value)) |
44037a66 | 1202 | { |
e507a433 | 1203 | unsigned HOST_WIDE_INT v = UINTVAL (value); |
44037a66 | 1204 | |
b1ec3c92 | 1205 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
fecfbfa4 | 1206 | v &= (HOST_WIDE_INT_1U << bitsize) - 1; |
44037a66 TG |
1207 | |
1208 | if (v == 0) | |
1209 | all_zero = 1; | |
b1ec3c92 | 1210 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
fecfbfa4 | 1211 | && v == (HOST_WIDE_INT_1U << bitsize) - 1) |
e507a433 | 1212 | || (bitsize == HOST_BITS_PER_WIDE_INT |
fecfbfa4 | 1213 | && v == HOST_WIDE_INT_M1U)) |
44037a66 TG |
1214 | all_one = 1; |
1215 | ||
088c5368 | 1216 | value = lshift_value (mode, v, bitnum); |
44037a66 TG |
1217 | } |
1218 | else | |
1219 | { | |
1220 | int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize | |
bebf0797 | 1221 | && bitnum + bitsize != GET_MODE_BITSIZE (mode)); |
44037a66 TG |
1222 | |
1223 | if (GET_MODE (value) != mode) | |
86cfb27a | 1224 | value = convert_to_mode (mode, value, 1); |
44037a66 TG |
1225 | |
1226 | if (must_and) | |
1227 | value = expand_binop (mode, and_optab, value, | |
1228 | mask_rtx (mode, 0, bitsize, 0), | |
b1ec3c92 | 1229 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
bebf0797 | 1230 | if (bitnum > 0) |
44037a66 | 1231 | value = expand_shift (LSHIFT_EXPR, mode, value, |
bebf0797 | 1232 | bitnum, NULL_RTX, 1); |
44037a66 TG |
1233 | } |
1234 | ||
ee45a32d EB |
1235 | if (reverse) |
1236 | value = flip_storage_order (mode, value); | |
1237 | ||
44037a66 TG |
1238 | /* Now clear the chosen bits in OP0, |
1239 | except that if VALUE is -1 we need not bother. */ | |
c505fc06 RS |
1240 | /* We keep the intermediates in registers to allow CSE to combine |
1241 | consecutive bitfield assignments. */ | |
44037a66 | 1242 | |
c505fc06 | 1243 | temp = force_reg (mode, op0); |
44037a66 TG |
1244 | |
1245 | if (! all_one) | |
1246 | { | |
ee45a32d EB |
1247 | rtx mask = mask_rtx (mode, bitnum, bitsize, 1); |
1248 | if (reverse) | |
1249 | mask = flip_storage_order (mode, mask); | |
1250 | temp = expand_binop (mode, and_optab, temp, mask, | |
c505fc06 RS |
1251 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1252 | temp = force_reg (mode, temp); | |
44037a66 | 1253 | } |
44037a66 TG |
1254 | |
1255 | /* Now logical-or VALUE into OP0, unless it is zero. */ | |
1256 | ||
1257 | if (! all_zero) | |
c505fc06 RS |
1258 | { |
1259 | temp = expand_binop (mode, ior_optab, temp, value, | |
1260 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
1261 | temp = force_reg (mode, temp); | |
1262 | } | |
1263 | ||
44037a66 | 1264 | if (op0 != temp) |
4679504c UB |
1265 | { |
1266 | op0 = copy_rtx (op0); | |
1267 | emit_move_insn (op0, temp); | |
1268 | } | |
44037a66 TG |
1269 | } |
1270 | \f | |
06c94bce | 1271 | /* Store a bit field that is split across multiple accessible memory objects. |
44037a66 | 1272 | |
06c94bce | 1273 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
44037a66 TG |
1274 | BITSIZE is the field width; BITPOS the position of its first bit |
1275 | (within the word). | |
06c94bce | 1276 | VALUE is the value to store. |
06c94bce | 1277 | |
ee45a32d EB |
1278 | If REVERSE is true, the store is to be done in reverse order. |
1279 | ||
06c94bce | 1280 | This does not yet handle fields wider than BITS_PER_WORD. */ |
44037a66 TG |
1281 | |
1282 | static void | |
502b8322 | 1283 | store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
1169e45d AH |
1284 | unsigned HOST_WIDE_INT bitpos, |
1285 | unsigned HOST_WIDE_INT bitregion_start, | |
1286 | unsigned HOST_WIDE_INT bitregion_end, | |
ee45a32d | 1287 | rtx value, bool reverse) |
44037a66 | 1288 | { |
ee45a32d | 1289 | unsigned int unit, total_bits, bitsdone = 0; |
4ee16841 | 1290 | |
0eb61c19 DE |
1291 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1292 | much at a time. */ | |
f8cfc6aa | 1293 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
1294 | unit = BITS_PER_WORD; |
1295 | else | |
04050c69 | 1296 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
e54d80d0 | 1297 | |
ebb99f96 BE |
1298 | /* If OP0 is a memory with a mode, then UNIT must not be larger than |
1299 | OP0's mode as well. Otherwise, store_fixed_bit_field will call us | |
1300 | again, and we will mutually recurse forever. */ | |
1301 | if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0) | |
1302 | unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0))); | |
1303 | ||
3d709ff0 RS |
1304 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1305 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note | |
1306 | that VALUE might be a floating-point constant. */ | |
481683e1 | 1307 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
3d709ff0 RS |
1308 | { |
1309 | rtx word = gen_lowpart_common (word_mode, value); | |
1310 | ||
bc8a0e39 | 1311 | if (word && (value != word)) |
3d709ff0 RS |
1312 | value = word; |
1313 | else | |
1314 | value = gen_lowpart_common (word_mode, | |
d01bc862 DE |
1315 | force_reg (GET_MODE (value) != VOIDmode |
1316 | ? GET_MODE (value) | |
1317 | : word_mode, value)); | |
3d709ff0 | 1318 | } |
44037a66 | 1319 | |
ee45a32d EB |
1320 | total_bits = GET_MODE_BITSIZE (GET_MODE (value)); |
1321 | ||
06c94bce | 1322 | while (bitsdone < bitsize) |
44037a66 | 1323 | { |
770ae6cc | 1324 | unsigned HOST_WIDE_INT thissize; |
770ae6cc RK |
1325 | unsigned HOST_WIDE_INT thispos; |
1326 | unsigned HOST_WIDE_INT offset; | |
ee45a32d | 1327 | rtx part, word; |
44037a66 | 1328 | |
06c94bce RS |
1329 | offset = (bitpos + bitsdone) / unit; |
1330 | thispos = (bitpos + bitsdone) % unit; | |
44037a66 | 1331 | |
f1cc9589 | 1332 | /* When region of bytes we can touch is restricted, decrease |
bd3647bf JJ |
1333 | UNIT close to the end of the region as needed. If op0 is a REG |
1334 | or SUBREG of REG, don't do this, as there can't be data races | |
1335 | on a register and we can expand shorter code in some cases. */ | |
f1cc9589 JJ |
1336 | if (bitregion_end |
1337 | && unit > BITS_PER_UNIT | |
bd3647bf JJ |
1338 | && bitpos + bitsdone - thispos + unit > bitregion_end + 1 |
1339 | && !REG_P (op0) | |
1340 | && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) | |
f1cc9589 JJ |
1341 | { |
1342 | unit = unit / 2; | |
1343 | continue; | |
1344 | } | |
1345 | ||
0eb61c19 DE |
1346 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1347 | store_fixed_bit_field will call us again, and we will mutually | |
1348 | recurse forever. */ | |
1349 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
1350 | thissize = MIN (thissize, unit - thispos); | |
44037a66 | 1351 | |
ee45a32d | 1352 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
f76b9db2 ILT |
1353 | { |
1354 | /* Fetch successively less significant portions. */ | |
481683e1 | 1355 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1356 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1357 | >> (bitsize - bitsdone - thissize)) | |
fecfbfa4 | 1358 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
ee45a32d EB |
1359 | /* Likewise, but the source is little-endian. */ |
1360 | else if (reverse) | |
1361 | part = extract_fixed_bit_field (word_mode, value, thissize, | |
1362 | bitsize - bitsdone - thissize, | |
1363 | NULL_RTX, 1, false); | |
f76b9db2 | 1364 | else |
b8ab7fc8 RS |
1365 | { |
1366 | int total_bits = GET_MODE_BITSIZE (GET_MODE (value)); | |
1367 | /* The args are chosen so that the last part includes the | |
1368 | lsb. Give extract_bit_field the value it needs (with | |
1369 | endianness compensation) to fetch the piece we want. */ | |
1370 | part = extract_fixed_bit_field (word_mode, value, thissize, | |
1371 | total_bits - bitsize + bitsdone, | |
ee45a32d | 1372 | NULL_RTX, 1, false); |
b8ab7fc8 | 1373 | } |
f76b9db2 | 1374 | } |
06c94bce | 1375 | else |
f76b9db2 ILT |
1376 | { |
1377 | /* Fetch successively more significant portions. */ | |
481683e1 | 1378 | if (CONST_INT_P (value)) |
f76b9db2 ILT |
1379 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1380 | >> bitsdone) | |
fecfbfa4 | 1381 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
ee45a32d EB |
1382 | /* Likewise, but the source is big-endian. */ |
1383 | else if (reverse) | |
1384 | part = extract_fixed_bit_field (word_mode, value, thissize, | |
1385 | total_bits - bitsdone - thissize, | |
1386 | NULL_RTX, 1, false); | |
f76b9db2 | 1387 | else |
b8ab7fc8 | 1388 | part = extract_fixed_bit_field (word_mode, value, thissize, |
ee45a32d | 1389 | bitsdone, NULL_RTX, 1, false); |
f76b9db2 | 1390 | } |
44037a66 | 1391 | |
867a0126 RS |
1392 | /* If OP0 is a register, then handle OFFSET here. */ |
1393 | if (SUBREG_P (op0) || REG_P (op0)) | |
06c94bce | 1394 | { |
ef4bddc2 | 1395 | machine_mode op0_mode = GET_MODE (op0); |
19228b93 JJ |
1396 | if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) |
1397 | word = offset ? const0_rtx : op0; | |
1398 | else | |
bd3647bf JJ |
1399 | word = operand_subword_force (op0, offset * unit / BITS_PER_WORD, |
1400 | GET_MODE (op0)); | |
1401 | offset &= BITS_PER_WORD / unit - 1; | |
06c94bce RS |
1402 | } |
1403 | else | |
1404 | word = op0; | |
44037a66 | 1405 | |
bebf0797 | 1406 | /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, |
19228b93 JJ |
1407 | it is just an out-of-bounds access. Ignore it. */ |
1408 | if (word != const0_rtx) | |
bebf0797 | 1409 | store_fixed_bit_field (word, thissize, offset * unit + thispos, |
ee45a32d EB |
1410 | bitregion_start, bitregion_end, part, |
1411 | reverse); | |
06c94bce RS |
1412 | bitsdone += thissize; |
1413 | } | |
44037a66 TG |
1414 | } |
1415 | \f | |
6d7db3c5 RS |
1416 | /* A subroutine of extract_bit_field_1 that converts return value X |
1417 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments | |
1418 | to extract_bit_field. */ | |
44037a66 | 1419 | |
6d7db3c5 | 1420 | static rtx |
ef4bddc2 RS |
1421 | convert_extracted_bit_field (rtx x, machine_mode mode, |
1422 | machine_mode tmode, bool unsignedp) | |
6d7db3c5 RS |
1423 | { |
1424 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) | |
1425 | return x; | |
44037a66 | 1426 | |
6d7db3c5 RS |
1427 | /* If the x mode is not a scalar integral, first convert to the |
1428 | integer mode of that size and then access it as a floating-point | |
1429 | value via a SUBREG. */ | |
1430 | if (!SCALAR_INT_MODE_P (tmode)) | |
1431 | { | |
ef4bddc2 | 1432 | machine_mode smode; |
44037a66 | 1433 | |
6d7db3c5 RS |
1434 | smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); |
1435 | x = convert_to_mode (smode, x, unsignedp); | |
1436 | x = force_reg (smode, x); | |
1437 | return gen_lowpart (tmode, x); | |
1438 | } | |
44037a66 | 1439 | |
6d7db3c5 RS |
1440 | return convert_to_mode (tmode, x, unsignedp); |
1441 | } | |
1442 | ||
a20556e4 RS |
1443 | /* Try to use an ext(z)v pattern to extract a field from OP0. |
1444 | Return the extracted value on success, otherwise return null. | |
1445 | EXT_MODE is the mode of the extraction and the other arguments | |
1446 | are as for extract_bit_field. */ | |
1447 | ||
1448 | static rtx | |
fcdd52b7 RS |
1449 | extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, |
1450 | unsigned HOST_WIDE_INT bitsize, | |
a20556e4 RS |
1451 | unsigned HOST_WIDE_INT bitnum, |
1452 | int unsignedp, rtx target, | |
ef4bddc2 | 1453 | machine_mode mode, machine_mode tmode) |
a20556e4 RS |
1454 | { |
1455 | struct expand_operand ops[4]; | |
1456 | rtx spec_target = target; | |
1457 | rtx spec_target_subreg = 0; | |
ef4bddc2 | 1458 | machine_mode ext_mode = extv->field_mode; |
a20556e4 RS |
1459 | unsigned unit = GET_MODE_BITSIZE (ext_mode); |
1460 | ||
1461 | if (bitsize == 0 || unit < bitsize) | |
1462 | return NULL_RTX; | |
1463 | ||
1464 | if (MEM_P (op0)) | |
26f8b976 | 1465 | /* Get a reference to the first byte of the field. */ |
fcdd52b7 RS |
1466 | op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, |
1467 | &bitnum); | |
a20556e4 RS |
1468 | else |
1469 | { | |
1470 | /* Convert from counting within OP0 to counting in EXT_MODE. */ | |
1471 | if (BYTES_BIG_ENDIAN) | |
1472 | bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); | |
1473 | ||
1474 | /* If op0 is a register, we need it in EXT_MODE to make it | |
1475 | acceptable to the format of ext(z)v. */ | |
1476 | if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) | |
1477 | return NULL_RTX; | |
1478 | if (REG_P (op0) && GET_MODE (op0) != ext_mode) | |
1479 | op0 = gen_lowpart_SUBREG (ext_mode, op0); | |
1480 | } | |
1481 | ||
1482 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count | |
1483 | "backwards" from the size of the unit we are extracting from. | |
1484 | Otherwise, we count bits from the most significant on a | |
1485 | BYTES/BITS_BIG_ENDIAN machine. */ | |
1486 | ||
1487 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) | |
1488 | bitnum = unit - bitsize - bitnum; | |
1489 | ||
1490 | if (target == 0) | |
1491 | target = spec_target = gen_reg_rtx (tmode); | |
1492 | ||
1493 | if (GET_MODE (target) != ext_mode) | |
1494 | { | |
1495 | /* Don't use LHS paradoxical subreg if explicit truncation is needed | |
1496 | between the mode of the extraction (word_mode) and the target | |
1497 | mode. Instead, create a temporary and use convert_move to set | |
1498 | the target. */ | |
1499 | if (REG_P (target) | |
1500 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) | |
1501 | { | |
1502 | target = gen_lowpart (ext_mode, target); | |
1503 | if (GET_MODE_PRECISION (ext_mode) | |
1504 | > GET_MODE_PRECISION (GET_MODE (spec_target))) | |
1505 | spec_target_subreg = target; | |
1506 | } | |
1507 | else | |
1508 | target = gen_reg_rtx (ext_mode); | |
1509 | } | |
1510 | ||
1511 | create_output_operand (&ops[0], target, ext_mode); | |
1512 | create_fixed_operand (&ops[1], op0); | |
1513 | create_integer_operand (&ops[2], bitsize); | |
1514 | create_integer_operand (&ops[3], bitnum); | |
fcdd52b7 | 1515 | if (maybe_expand_insn (extv->icode, 4, ops)) |
a20556e4 RS |
1516 | { |
1517 | target = ops[0].value; | |
1518 | if (target == spec_target) | |
1519 | return target; | |
1520 | if (target == spec_target_subreg) | |
1521 | return spec_target; | |
1522 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1523 | } | |
1524 | return NULL_RTX; | |
1525 | } | |
1526 | ||
6d7db3c5 RS |
1527 | /* A subroutine of extract_bit_field, with the same arguments. |
1528 | If FALLBACK_P is true, fall back to extract_fixed_bit_field | |
1529 | if we can find no other means of implementing the operation. | |
1530 | if FALLBACK_P is false, return NULL instead. */ | |
1531 | ||
1532 | static rtx | |
1533 | extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
c6285bd7 | 1534 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
ef4bddc2 | 1535 | machine_mode mode, machine_mode tmode, |
f96bf49a | 1536 | bool reverse, bool fallback_p, rtx *alt_rtl) |
44037a66 | 1537 | { |
b3694847 | 1538 | rtx op0 = str_rtx; |
ef4bddc2 RS |
1539 | machine_mode int_mode; |
1540 | machine_mode mode1; | |
44037a66 | 1541 | |
44037a66 TG |
1542 | if (tmode == VOIDmode) |
1543 | tmode = mode; | |
6ca6193b | 1544 | |
44037a66 TG |
1545 | while (GET_CODE (op0) == SUBREG) |
1546 | { | |
2c58f7dd | 1547 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
44037a66 TG |
1548 | op0 = SUBREG_REG (op0); |
1549 | } | |
77295dec | 1550 | |
2c58f7dd | 1551 | /* If we have an out-of-bounds access to a register, just return an |
647eea9d | 1552 | uninitialized register of the required mode. This can occur if the |
2c58f7dd RS |
1553 | source code contains an out-of-bounds access to a small array. */ |
1554 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) | |
1555 | return gen_reg_rtx (tmode); | |
1556 | ||
f8cfc6aa | 1557 | if (REG_P (op0) |
aac280fb DD |
1558 | && mode == GET_MODE (op0) |
1559 | && bitnum == 0 | |
0b69c29f | 1560 | && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |
aac280fb | 1561 | { |
ee45a32d EB |
1562 | if (reverse) |
1563 | op0 = flip_storage_order (mode, op0); | |
0b69c29f | 1564 | /* We're trying to extract a full register from itself. */ |
aac280fb DD |
1565 | return op0; |
1566 | } | |
1567 | ||
ff03930a JJ |
1568 | /* First try to check for vector from vector extractions. */ |
1569 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1570 | && !MEM_P (op0) | |
1571 | && VECTOR_MODE_P (tmode) | |
1572 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode)) | |
1573 | { | |
1574 | machine_mode new_mode = GET_MODE (op0); | |
1575 | if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)) | |
1576 | { | |
1577 | new_mode = mode_for_vector (GET_MODE_INNER (tmode), | |
1578 | GET_MODE_BITSIZE (GET_MODE (op0)) | |
1579 | / GET_MODE_UNIT_BITSIZE (tmode)); | |
1580 | if (!VECTOR_MODE_P (new_mode) | |
1581 | || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0)) | |
1582 | || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode) | |
1583 | || !targetm.vector_mode_supported_p (new_mode)) | |
1584 | new_mode = VOIDmode; | |
1585 | } | |
1586 | if (new_mode != VOIDmode | |
1587 | && (convert_optab_handler (vec_extract_optab, new_mode, tmode) | |
1588 | != CODE_FOR_nothing) | |
1589 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode) | |
1590 | == bitnum / GET_MODE_BITSIZE (tmode))) | |
1591 | { | |
1592 | struct expand_operand ops[3]; | |
1593 | machine_mode outermode = new_mode; | |
1594 | machine_mode innermode = tmode; | |
1595 | enum insn_code icode | |
1596 | = convert_optab_handler (vec_extract_optab, outermode, innermode); | |
1597 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); | |
1598 | ||
1599 | if (new_mode != GET_MODE (op0)) | |
1600 | op0 = gen_lowpart (new_mode, op0); | |
1601 | create_output_operand (&ops[0], target, innermode); | |
1602 | ops[0].target = 1; | |
1603 | create_input_operand (&ops[1], op0, outermode); | |
1604 | create_integer_operand (&ops[2], pos); | |
1605 | if (maybe_expand_insn (icode, 3, ops)) | |
1606 | { | |
1607 | if (alt_rtl && ops[0].target) | |
1608 | *alt_rtl = target; | |
1609 | target = ops[0].value; | |
1610 | if (GET_MODE (target) != mode) | |
1611 | return gen_lowpart (tmode, target); | |
1612 | return target; | |
1613 | } | |
1614 | } | |
1615 | } | |
1616 | ||
0890b981 AP |
1617 | /* See if we can get a better vector mode before extracting. */ |
1618 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
1619 | && !MEM_P (op0) | |
1620 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) | |
1621 | { | |
ef4bddc2 | 1622 | machine_mode new_mode; |
0890b981 AP |
1623 | |
1624 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) | |
1625 | new_mode = MIN_MODE_VECTOR_FLOAT; | |
325217ed CF |
1626 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1627 | new_mode = MIN_MODE_VECTOR_FRACT; | |
1628 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) | |
1629 | new_mode = MIN_MODE_VECTOR_UFRACT; | |
1630 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) | |
1631 | new_mode = MIN_MODE_VECTOR_ACCUM; | |
1632 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) | |
1633 | new_mode = MIN_MODE_VECTOR_UACCUM; | |
0890b981 AP |
1634 | else |
1635 | new_mode = MIN_MODE_VECTOR_INT; | |
1636 | ||
c94843d2 | 1637 | FOR_EACH_MODE_FROM (new_mode, new_mode) |
b147c5b9 | 1638 | if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) |
ab2b55c1 | 1639 | && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode) |
0890b981 AP |
1640 | && targetm.vector_mode_supported_p (new_mode)) |
1641 | break; | |
1642 | if (new_mode != VOIDmode) | |
1643 | op0 = gen_lowpart (new_mode, op0); | |
1644 | } | |
1645 | ||
997404de JH |
1646 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1647 | available. */ | |
1648 | if (VECTOR_MODE_P (GET_MODE (op0)) | |
3c0cb5de | 1649 | && !MEM_P (op0) |
ff03930a JJ |
1650 | && (convert_optab_handler (vec_extract_optab, GET_MODE (op0), |
1651 | GET_MODE_INNER (GET_MODE (op0))) | |
1652 | != CODE_FOR_nothing) | |
6c825cd4 DS |
1653 | && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0)) |
1654 | == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0)))) | |
997404de | 1655 | { |
a5c7d693 | 1656 | struct expand_operand ops[3]; |
ef4bddc2 RS |
1657 | machine_mode outermode = GET_MODE (op0); |
1658 | machine_mode innermode = GET_MODE_INNER (outermode); | |
ff03930a JJ |
1659 | enum insn_code icode |
1660 | = convert_optab_handler (vec_extract_optab, outermode, innermode); | |
b42271d6 | 1661 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |
997404de | 1662 | |
a5c7d693 | 1663 | create_output_operand (&ops[0], target, innermode); |
f96bf49a | 1664 | ops[0].target = 1; |
a5c7d693 RS |
1665 | create_input_operand (&ops[1], op0, outermode); |
1666 | create_integer_operand (&ops[2], pos); | |
1667 | if (maybe_expand_insn (icode, 3, ops)) | |
997404de | 1668 | { |
f96bf49a JW |
1669 | if (alt_rtl && ops[0].target) |
1670 | *alt_rtl = target; | |
a5c7d693 RS |
1671 | target = ops[0].value; |
1672 | if (GET_MODE (target) != mode) | |
1673 | return gen_lowpart (tmode, target); | |
1674 | return target; | |
997404de JH |
1675 | } |
1676 | } | |
1677 | ||
d006aa54 RH |
1678 | /* Make sure we are playing with integral modes. Pun with subregs |
1679 | if we aren't. */ | |
1680 | { | |
ef4bddc2 | 1681 | machine_mode imode = int_mode_for_mode (GET_MODE (op0)); |
d006aa54 RH |
1682 | if (imode != GET_MODE (op0)) |
1683 | { | |
a6d2976a | 1684 | if (MEM_P (op0)) |
e98fc6de | 1685 | op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); |
7d293b58 | 1686 | else if (imode != BLKmode) |
a6d2976a | 1687 | { |
a6d2976a | 1688 | op0 = gen_lowpart (imode, op0); |
360e3535 | 1689 | |
a6d2976a JDA |
1690 | /* If we got a SUBREG, force it into a register since we |
1691 | aren't going to be able to do another SUBREG on it. */ | |
1692 | if (GET_CODE (op0) == SUBREG) | |
1693 | op0 = force_reg (imode, op0); | |
1694 | } | |
7d293b58 JJ |
1695 | else |
1696 | { | |
e98fc6de RS |
1697 | HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0)); |
1698 | rtx mem = assign_stack_temp (GET_MODE (op0), size); | |
7d293b58 | 1699 | emit_move_insn (mem, op0); |
e98fc6de | 1700 | op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); |
7d293b58 | 1701 | } |
d006aa54 RH |
1702 | } |
1703 | } | |
1704 | ||
6ca6193b JDA |
1705 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1706 | If that's wrong, the solution is to test for it and set TARGET to 0 | |
1707 | if needed. */ | |
e98f90d3 | 1708 | |
f5d4f18c SL |
1709 | /* Get the mode of the field to use for atomic access or subreg |
1710 | conversion. */ | |
b8ab7fc8 RS |
1711 | mode1 = mode; |
1712 | if (SCALAR_INT_MODE_P (tmode)) | |
44037a66 | 1713 | { |
ef4bddc2 | 1714 | machine_mode try_mode = mode_for_size (bitsize, |
b8ab7fc8 RS |
1715 | GET_MODE_CLASS (tmode), 0); |
1716 | if (try_mode != BLKmode) | |
1717 | mode1 = try_mode; | |
1718 | } | |
1719 | gcc_assert (mode1 != BLKmode); | |
1720 | ||
1721 | /* Extraction of a full MODE1 value can be done with a subreg as long | |
1722 | as the least significant bit of the value is the least significant | |
1723 | bit of either OP0 or a word of OP0. */ | |
1724 | if (!MEM_P (op0) | |
ee45a32d | 1725 | && !reverse |
b8ab7fc8 RS |
1726 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) |
1727 | && bitsize == GET_MODE_BITSIZE (mode1) | |
1728 | && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))) | |
1729 | { | |
1730 | rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), | |
1731 | bitnum / BITS_PER_UNIT); | |
1732 | if (sub) | |
1733 | return convert_extracted_bit_field (sub, mode, tmode, unsignedp); | |
1734 | } | |
1735 | ||
1736 | /* Extraction of a full MODE1 value can be done with a load as long as | |
1737 | the field is on a byte boundary and is sufficiently aligned. */ | |
00efe3ea | 1738 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1)) |
b8ab7fc8 RS |
1739 | { |
1740 | op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT); | |
ee45a32d EB |
1741 | if (reverse) |
1742 | op0 = flip_storage_order (mode1, op0); | |
b8ab7fc8 | 1743 | return convert_extracted_bit_field (op0, mode, tmode, unsignedp); |
44037a66 | 1744 | } |
b8ab7fc8 | 1745 | |
44037a66 | 1746 | /* Handle fields bigger than a word. */ |
c410d49e | 1747 | |
44037a66 TG |
1748 | if (bitsize > BITS_PER_WORD) |
1749 | { | |
1750 | /* Here we transfer the words of the field | |
1751 | in the order least significant first. | |
1752 | This is because the most significant word is the one which may | |
1753 | be less than full. */ | |
1754 | ||
ee45a32d | 1755 | const bool backwards = WORDS_BIG_ENDIAN; |
770ae6cc RK |
1756 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1757 | unsigned int i; | |
f3f6fb16 | 1758 | rtx_insn *last; |
44037a66 | 1759 | |
02972eaf | 1760 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |
44037a66 TG |
1761 | target = gen_reg_rtx (mode); |
1762 | ||
7d21a61e DD |
1763 | /* In case we're about to clobber a base register or something |
1764 | (see gcc.c-torture/execute/20040625-1.c). */ | |
1765 | if (reg_mentioned_p (target, str_rtx)) | |
1766 | target = gen_reg_rtx (mode); | |
1767 | ||
34ea783b | 1768 | /* Indicate for flow that the entire target reg is being set. */ |
c41c1387 | 1769 | emit_clobber (target); |
34ea783b | 1770 | |
5ef0b50d | 1771 | last = get_last_insn (); |
44037a66 TG |
1772 | for (i = 0; i < nwords; i++) |
1773 | { | |
1774 | /* If I is 0, use the low-order word in both field and target; | |
1775 | if I is 1, use the next to lowest word; and so on. */ | |
77295dec | 1776 | /* Word number in TARGET to use. */ |
770ae6cc | 1777 | unsigned int wordnum |
0cd9e9ee | 1778 | = (backwards |
770ae6cc RK |
1779 | ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 |
1780 | : i); | |
77295dec | 1781 | /* Offset from start of field in OP0. */ |
ee45a32d | 1782 | unsigned int bit_offset = (backwards ^ reverse |
0cd9e9ee EB |
1783 | ? MAX ((int) bitsize - ((int) i + 1) |
1784 | * BITS_PER_WORD, | |
1785 | 0) | |
770ae6cc | 1786 | : (int) i * BITS_PER_WORD); |
44037a66 TG |
1787 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1788 | rtx result_part | |
5ef0b50d EB |
1789 | = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, |
1790 | bitsize - i * BITS_PER_WORD), | |
c6285bd7 | 1791 | bitnum + bit_offset, 1, target_part, |
f96bf49a | 1792 | mode, word_mode, reverse, fallback_p, NULL); |
44037a66 | 1793 | |
5b0264cb | 1794 | gcc_assert (target_part); |
5ef0b50d EB |
1795 | if (!result_part) |
1796 | { | |
1797 | delete_insns_since (last); | |
1798 | return NULL; | |
1799 | } | |
44037a66 TG |
1800 | |
1801 | if (result_part != target_part) | |
1802 | emit_move_insn (target_part, result_part); | |
1803 | } | |
1804 | ||
5f57dff0 | 1805 | if (unsignedp) |
77295dec DE |
1806 | { |
1807 | /* Unless we've filled TARGET, the upper regs in a multi-reg value | |
1808 | need to be zero'd out. */ | |
1809 | if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) | |
1810 | { | |
770ae6cc | 1811 | unsigned int i, total_words; |
77295dec DE |
1812 | |
1813 | total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; | |
1814 | for (i = nwords; i < total_words; i++) | |
04050c69 RK |
1815 | emit_move_insn |
1816 | (operand_subword (target, | |
0cd9e9ee | 1817 | backwards ? total_words - i - 1 : i, |
04050c69 RK |
1818 | 1, VOIDmode), |
1819 | const0_rtx); | |
77295dec DE |
1820 | } |
1821 | return target; | |
1822 | } | |
1823 | ||
5f57dff0 JW |
1824 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1825 | target = expand_shift (LSHIFT_EXPR, mode, target, | |
eb6c3df1 | 1826 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
5f57dff0 | 1827 | return expand_shift (RSHIFT_EXPR, mode, target, |
eb6c3df1 | 1828 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
44037a66 | 1829 | } |
c410d49e | 1830 | |
b8ab7fc8 RS |
1831 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1832 | If the region spans two words, defer to extract_split_bit_field. */ | |
1833 | if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) | |
44037a66 | 1834 | { |
867a0126 | 1835 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
470032d7 | 1836 | { |
b8ab7fc8 RS |
1837 | if (!fallback_p) |
1838 | return NULL_RTX; | |
ee45a32d EB |
1839 | target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp, |
1840 | reverse); | |
b8ab7fc8 | 1841 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |
470032d7 | 1842 | } |
867a0126 RS |
1843 | op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), |
1844 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); | |
1845 | bitnum %= BITS_PER_WORD; | |
44037a66 | 1846 | } |
44037a66 | 1847 | |
b8ab7fc8 RS |
1848 | /* From here on we know the desired field is smaller than a word. |
1849 | If OP0 is a register, it too fits within a word. */ | |
fcdd52b7 RS |
1850 | enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; |
1851 | extraction_insn extv; | |
1852 | if (!MEM_P (op0) | |
ee45a32d | 1853 | && !reverse |
c0a8a3e6 RS |
1854 | /* ??? We could limit the structure size to the part of OP0 that |
1855 | contains the field, with appropriate checks for endianness | |
1856 | and TRULY_NOOP_TRUNCATION. */ | |
1857 | && get_best_reg_extraction_insn (&extv, pattern, | |
1858 | GET_MODE_BITSIZE (GET_MODE (op0)), | |
fcdd52b7 | 1859 | tmode)) |
44037a66 | 1860 | { |
fcdd52b7 | 1861 | rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum, |
a20556e4 | 1862 | unsignedp, target, mode, |
fcdd52b7 | 1863 | tmode); |
a20556e4 RS |
1864 | if (result) |
1865 | return result; | |
44037a66 | 1866 | } |
f76b9db2 | 1867 | |
6d7db3c5 RS |
1868 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1869 | cheap register alternative is available. */ | |
ee45a32d | 1870 | if (MEM_P (op0) & !reverse) |
6d7db3c5 | 1871 | { |
f5d4f18c SL |
1872 | if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, |
1873 | tmode)) | |
17a73ba0 | 1874 | { |
fcdd52b7 RS |
1875 | rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, |
1876 | bitnum, unsignedp, | |
1877 | target, mode, | |
1878 | tmode); | |
17a73ba0 RS |
1879 | if (result) |
1880 | return result; | |
1881 | } | |
1882 | ||
f3f6fb16 | 1883 | rtx_insn *last = get_last_insn (); |
f76b9db2 | 1884 | |
fcdd52b7 RS |
1885 | /* Try loading part of OP0 into a register and extracting the |
1886 | bitfield from that. */ | |
1887 | unsigned HOST_WIDE_INT bitpos; | |
1888 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, | |
1889 | 0, 0, tmode, &bitpos); | |
1890 | if (xop0) | |
6d7db3c5 | 1891 | { |
fcdd52b7 RS |
1892 | xop0 = copy_to_reg (xop0); |
1893 | rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, | |
c6285bd7 | 1894 | unsignedp, target, |
f96bf49a | 1895 | mode, tmode, reverse, false, NULL); |
fcdd52b7 RS |
1896 | if (result) |
1897 | return result; | |
1898 | delete_insns_since (last); | |
c410d49e | 1899 | } |
44037a66 | 1900 | } |
562fc702 | 1901 | |
6d7db3c5 RS |
1902 | if (!fallback_p) |
1903 | return NULL; | |
1904 | ||
b8ab7fc8 RS |
1905 | /* Find a correspondingly-sized integer field, so we can apply |
1906 | shifts and masks to it. */ | |
1907 | int_mode = int_mode_for_mode (tmode); | |
1908 | if (int_mode == BLKmode) | |
1909 | int_mode = int_mode_for_mode (mode); | |
1910 | /* Should probably push op0 out to memory and then do a load. */ | |
1911 | gcc_assert (int_mode != BLKmode); | |
1912 | ||
ee45a32d EB |
1913 | target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target, |
1914 | unsignedp, reverse); | |
1915 | ||
1916 | /* Complex values must be reversed piecewise, so we need to undo the global | |
1917 | reversal, convert to the complex mode and reverse again. */ | |
1918 | if (reverse && COMPLEX_MODE_P (tmode)) | |
1919 | { | |
1920 | target = flip_storage_order (int_mode, target); | |
1921 | target = convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1922 | target = flip_storage_order (tmode, target); | |
1923 | } | |
1924 | else | |
1925 | target = convert_extracted_bit_field (target, mode, tmode, unsignedp); | |
1926 | ||
1927 | return target; | |
6d7db3c5 RS |
1928 | } |
1929 | ||
1930 | /* Generate code to extract a byte-field from STR_RTX | |
1931 | containing BITSIZE bits, starting at BITNUM, | |
1932 | and put it in TARGET if possible (if TARGET is nonzero). | |
1933 | Regardless of TARGET, we return the rtx for where the value is placed. | |
1934 | ||
1935 | STR_RTX is the structure containing the byte (a REG or MEM). | |
1936 | UNSIGNEDP is nonzero if this is an unsigned bit field. | |
1937 | MODE is the natural mode of the field value once extracted. | |
1938 | TMODE is the mode the caller would like the value to have; | |
1939 | but the value may be returned with type MODE instead. | |
1940 | ||
ee45a32d EB |
1941 | If REVERSE is true, the extraction is to be done in reverse order. |
1942 | ||
6d7db3c5 RS |
1943 | If a TARGET is specified and we can store in it at no extra cost, |
1944 | we do so, and return TARGET. | |
1945 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred | |
1946 | if they are equally easy. */ | |
1947 | ||
1948 | rtx | |
1949 | extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, | |
c6285bd7 | 1950 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
f96bf49a JW |
1951 | machine_mode mode, machine_mode tmode, bool reverse, |
1952 | rtx *alt_rtl) | |
6d7db3c5 | 1953 | { |
ef4bddc2 | 1954 | machine_mode mode1; |
f5d4f18c SL |
1955 | |
1956 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ | |
1957 | if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0) | |
1958 | mode1 = GET_MODE (str_rtx); | |
1959 | else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) | |
1960 | mode1 = GET_MODE (target); | |
1961 | else | |
1962 | mode1 = tmode; | |
1963 | ||
6f4e9cf8 | 1964 | if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0)) |
f5d4f18c | 1965 | { |
b6dd42a9 BE |
1966 | /* Extraction of a full MODE1 value can be done with a simple load. |
1967 | We know here that the field can be accessed with one single | |
1968 | instruction. For targets that support unaligned memory, | |
1969 | an unaligned access may be necessary. */ | |
1970 | if (bitsize == GET_MODE_BITSIZE (mode1)) | |
6f4e9cf8 | 1971 | { |
53c615a2 BE |
1972 | rtx result = adjust_bitfield_address (str_rtx, mode1, |
1973 | bitnum / BITS_PER_UNIT); | |
ee45a32d EB |
1974 | if (reverse) |
1975 | result = flip_storage_order (mode1, result); | |
b6dd42a9 | 1976 | gcc_assert (bitnum % BITS_PER_UNIT == 0); |
53c615a2 | 1977 | return convert_extracted_bit_field (result, mode, tmode, unsignedp); |
6f4e9cf8 BE |
1978 | } |
1979 | ||
53c615a2 BE |
1980 | str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum, |
1981 | &bitnum); | |
b6dd42a9 | 1982 | gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1)); |
53c615a2 | 1983 | str_rtx = copy_to_reg (str_rtx); |
f5d4f18c | 1984 | } |
53c615a2 | 1985 | |
c6285bd7 | 1986 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |
f96bf49a | 1987 | target, mode, tmode, reverse, true, alt_rtl); |
44037a66 TG |
1988 | } |
1989 | \f | |
b8ab7fc8 RS |
1990 | /* Use shifts and boolean operations to extract a field of BITSIZE bits |
1991 | from bit BITNUM of OP0. | |
44037a66 TG |
1992 | |
1993 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). | |
ee45a32d EB |
1994 | If REVERSE is true, the extraction is to be done in reverse order. |
1995 | ||
44037a66 TG |
1996 | If TARGET is nonzero, attempts to store the value there |
1997 | and return TARGET, but this is not guaranteed. | |
04050c69 | 1998 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
44037a66 TG |
1999 | |
2000 | static rtx | |
ef4bddc2 | 2001 | extract_fixed_bit_field (machine_mode tmode, rtx op0, |
502b8322 | 2002 | unsigned HOST_WIDE_INT bitsize, |
b8ab7fc8 | 2003 | unsigned HOST_WIDE_INT bitnum, rtx target, |
ee45a32d | 2004 | int unsignedp, bool reverse) |
44037a66 | 2005 | { |
b8ab7fc8 | 2006 | if (MEM_P (op0)) |
44037a66 | 2007 | { |
ef4bddc2 | 2008 | machine_mode mode |
548cfdc2 EB |
2009 | = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode, |
2010 | MEM_VOLATILE_P (op0)); | |
44037a66 TG |
2011 | |
2012 | if (mode == VOIDmode) | |
2013 | /* The only way this should occur is if the field spans word | |
2014 | boundaries. */ | |
ee45a32d EB |
2015 | return extract_split_bit_field (op0, bitsize, bitnum, unsignedp, |
2016 | reverse); | |
44037a66 | 2017 | |
f5d4f18c | 2018 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |
44037a66 TG |
2019 | } |
2020 | ||
6f4e9cf8 | 2021 | return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum, |
ee45a32d | 2022 | target, unsignedp, reverse); |
6f4e9cf8 BE |
2023 | } |
2024 | ||
2025 | /* Helper function for extract_fixed_bit_field, extracts | |
2026 | the bit field always using the MODE of OP0. */ | |
2027 | ||
2028 | static rtx | |
ef4bddc2 | 2029 | extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, |
6f4e9cf8 BE |
2030 | unsigned HOST_WIDE_INT bitsize, |
2031 | unsigned HOST_WIDE_INT bitnum, rtx target, | |
ee45a32d | 2032 | int unsignedp, bool reverse) |
6f4e9cf8 | 2033 | { |
ef4bddc2 | 2034 | machine_mode mode = GET_MODE (op0); |
b8ab7fc8 RS |
2035 | gcc_assert (SCALAR_INT_MODE_P (mode)); |
2036 | ||
2037 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) | |
2038 | for invalid input, such as extract equivalent of f5 from | |
2039 | gcc.dg/pr48335-2.c. */ | |
37811a73 | 2040 | |
ee45a32d | 2041 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
b8ab7fc8 | 2042 | /* BITNUM is the distance between our msb and that of OP0. |
04050c69 | 2043 | Convert it to the distance from the lsb. */ |
b8ab7fc8 | 2044 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
44037a66 | 2045 | |
b8ab7fc8 | 2046 | /* Now BITNUM is always the distance between the field's lsb and that of OP0. |
44037a66 | 2047 | We have reduced the big-endian case to the little-endian case. */ |
ee45a32d EB |
2048 | if (reverse) |
2049 | op0 = flip_storage_order (mode, op0); | |
44037a66 TG |
2050 | |
2051 | if (unsignedp) | |
2052 | { | |
b8ab7fc8 | 2053 | if (bitnum) |
44037a66 TG |
2054 | { |
2055 | /* If the field does not already start at the lsb, | |
2056 | shift it so it does. */ | |
44037a66 | 2057 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 2058 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
6399c0ab SB |
2059 | if (tmode != mode) |
2060 | subtarget = 0; | |
b8ab7fc8 | 2061 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); |
44037a66 TG |
2062 | } |
2063 | /* Convert the value to the desired mode. */ | |
2064 | if (mode != tmode) | |
2065 | op0 = convert_to_mode (tmode, op0, 1); | |
2066 | ||
2067 | /* Unless the msb of the field used to be the msb when we shifted, | |
2068 | mask out the upper bits. */ | |
2069 | ||
b8ab7fc8 | 2070 | if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) |
44037a66 TG |
2071 | return expand_binop (GET_MODE (op0), and_optab, op0, |
2072 | mask_rtx (GET_MODE (op0), 0, bitsize, 0), | |
2073 | target, 1, OPTAB_LIB_WIDEN); | |
2074 | return op0; | |
2075 | } | |
2076 | ||
2077 | /* To extract a signed bit-field, first shift its msb to the msb of the word, | |
2078 | then arithmetic-shift its lsb to the lsb of the word. */ | |
2079 | op0 = force_reg (mode, op0); | |
44037a66 TG |
2080 | |
2081 | /* Find the narrowest integer mode that contains the field. */ | |
2082 | ||
c94843d2 | 2083 | FOR_EACH_MODE_IN_CLASS (mode, MODE_INT) |
b8ab7fc8 | 2084 | if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum) |
44037a66 TG |
2085 | { |
2086 | op0 = convert_to_mode (mode, op0, 0); | |
2087 | break; | |
2088 | } | |
2089 | ||
ccb1b17b JJ |
2090 | if (mode != tmode) |
2091 | target = 0; | |
2092 | ||
b8ab7fc8 | 2093 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) |
44037a66 | 2094 | { |
b8ab7fc8 | 2095 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); |
44037a66 | 2096 | /* Maybe propagate the target for the shift. */ |
f8cfc6aa | 2097 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
44037a66 TG |
2098 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
2099 | } | |
2100 | ||
2101 | return expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 2102 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |
44037a66 | 2103 | } |
44037a66 TG |
2104 | |
2105 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value | |
088c5368 | 2106 | VALUE << BITPOS. */ |
44037a66 TG |
2107 | |
2108 | static rtx | |
ef4bddc2 | 2109 | lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, |
088c5368 | 2110 | int bitpos) |
44037a66 | 2111 | { |
807e902e | 2112 | return immed_wide_int_const (wi::lshift (value, bitpos), mode); |
44037a66 TG |
2113 | } |
2114 | \f | |
2115 | /* Extract a bit field that is split across two words | |
2116 | and return an RTX for the result. | |
2117 | ||
2118 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. | |
2119 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. | |
ee45a32d EB |
2120 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. |
2121 | ||
2122 | If REVERSE is true, the extraction is to be done in reverse order. */ | |
44037a66 TG |
2123 | |
2124 | static rtx | |
502b8322 | 2125 | extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
ee45a32d EB |
2126 | unsigned HOST_WIDE_INT bitpos, int unsignedp, |
2127 | bool reverse) | |
44037a66 | 2128 | { |
770ae6cc RK |
2129 | unsigned int unit; |
2130 | unsigned int bitsdone = 0; | |
c16ddde3 | 2131 | rtx result = NULL_RTX; |
06c94bce | 2132 | int first = 1; |
44037a66 | 2133 | |
4ee16841 DE |
2134 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
2135 | much at a time. */ | |
f8cfc6aa | 2136 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
4ee16841 DE |
2137 | unit = BITS_PER_WORD; |
2138 | else | |
609023ff | 2139 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
4ee16841 | 2140 | |
06c94bce RS |
2141 | while (bitsdone < bitsize) |
2142 | { | |
770ae6cc | 2143 | unsigned HOST_WIDE_INT thissize; |
06c94bce | 2144 | rtx part, word; |
770ae6cc RK |
2145 | unsigned HOST_WIDE_INT thispos; |
2146 | unsigned HOST_WIDE_INT offset; | |
06c94bce RS |
2147 | |
2148 | offset = (bitpos + bitsdone) / unit; | |
2149 | thispos = (bitpos + bitsdone) % unit; | |
2150 | ||
0eb61c19 DE |
2151 | /* THISSIZE must not overrun a word boundary. Otherwise, |
2152 | extract_fixed_bit_field will call us again, and we will mutually | |
2153 | recurse forever. */ | |
2154 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); | |
2155 | thissize = MIN (thissize, unit - thispos); | |
06c94bce | 2156 | |
867a0126 RS |
2157 | /* If OP0 is a register, then handle OFFSET here. */ |
2158 | if (SUBREG_P (op0) || REG_P (op0)) | |
06c94bce RS |
2159 | { |
2160 | word = operand_subword_force (op0, offset, GET_MODE (op0)); | |
2161 | offset = 0; | |
2162 | } | |
2163 | else | |
2164 | word = op0; | |
2165 | ||
06c94bce | 2166 | /* Extract the parts in bit-counting order, |
0eb61c19 | 2167 | whose meaning is determined by BYTES_PER_UNIT. |
b8ab7fc8 RS |
2168 | OFFSET is in UNITs, and UNIT is in bits. */ |
2169 | part = extract_fixed_bit_field (word_mode, word, thissize, | |
ee45a32d | 2170 | offset * unit + thispos, 0, 1, reverse); |
06c94bce | 2171 | bitsdone += thissize; |
44037a66 | 2172 | |
06c94bce | 2173 | /* Shift this part into place for the result. */ |
ee45a32d | 2174 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
f76b9db2 ILT |
2175 | { |
2176 | if (bitsize != bitsdone) | |
2177 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
eb6c3df1 | 2178 | bitsize - bitsdone, 0, 1); |
f76b9db2 ILT |
2179 | } |
2180 | else | |
2181 | { | |
2182 | if (bitsdone != thissize) | |
2183 | part = expand_shift (LSHIFT_EXPR, word_mode, part, | |
eb6c3df1 | 2184 | bitsdone - thissize, 0, 1); |
f76b9db2 | 2185 | } |
44037a66 | 2186 | |
06c94bce RS |
2187 | if (first) |
2188 | result = part; | |
2189 | else | |
2190 | /* Combine the parts with bitwise or. This works | |
2191 | because we extracted each part as an unsigned bit field. */ | |
2192 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, | |
2193 | OPTAB_LIB_WIDEN); | |
2194 | ||
2195 | first = 0; | |
2196 | } | |
44037a66 TG |
2197 | |
2198 | /* Unsigned bit field: we are done. */ | |
2199 | if (unsignedp) | |
2200 | return result; | |
2201 | /* Signed bit field: sign-extend with two arithmetic shifts. */ | |
2202 | result = expand_shift (LSHIFT_EXPR, word_mode, result, | |
eb6c3df1 | 2203 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
44037a66 | 2204 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
eb6c3df1 | 2205 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
44037a66 TG |
2206 | } |
2207 | \f | |
18b526e8 RS |
2208 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
2209 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than | |
2210 | MODE, fill the upper bits with zeros. Fail if the layout of either | |
2211 | mode is unknown (as for CC modes) or if the extraction would involve | |
2212 | unprofitable mode punning. Return the value on success, otherwise | |
2213 | return null. | |
2214 | ||
2215 | This is different from gen_lowpart* in these respects: | |
2216 | ||
2217 | - the returned value must always be considered an rvalue | |
2218 | ||
2219 | - when MODE is wider than SRC_MODE, the extraction involves | |
2220 | a zero extension | |
2221 | ||
2222 | - when MODE is smaller than SRC_MODE, the extraction involves | |
2223 | a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). | |
2224 | ||
2225 | In other words, this routine performs a computation, whereas the | |
2226 | gen_lowpart* routines are conceptually lvalue or rvalue subreg | |
2227 | operations. */ | |
2228 | ||
2229 | rtx | |
ef4bddc2 | 2230 | extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) |
18b526e8 | 2231 | { |
ef4bddc2 | 2232 | machine_mode int_mode, src_int_mode; |
18b526e8 RS |
2233 | |
2234 | if (mode == src_mode) | |
2235 | return src; | |
2236 | ||
2237 | if (CONSTANT_P (src)) | |
d898d29b JJ |
2238 | { |
2239 | /* simplify_gen_subreg can't be used here, as if simplify_subreg | |
2240 | fails, it will happily create (subreg (symbol_ref)) or similar | |
2241 | invalid SUBREGs. */ | |
2242 | unsigned int byte = subreg_lowpart_offset (mode, src_mode); | |
2243 | rtx ret = simplify_subreg (mode, src, src_mode, byte); | |
2244 | if (ret) | |
2245 | return ret; | |
2246 | ||
2247 | if (GET_MODE (src) == VOIDmode | |
2248 | || !validate_subreg (mode, src_mode, src, byte)) | |
2249 | return NULL_RTX; | |
2250 | ||
2251 | src = force_reg (GET_MODE (src), src); | |
2252 | return gen_rtx_SUBREG (mode, src, byte); | |
2253 | } | |
18b526e8 RS |
2254 | |
2255 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) | |
2256 | return NULL_RTX; | |
2257 | ||
2258 | if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) | |
2259 | && MODES_TIEABLE_P (mode, src_mode)) | |
2260 | { | |
2261 | rtx x = gen_lowpart_common (mode, src); | |
2262 | if (x) | |
2263 | return x; | |
2264 | } | |
2265 | ||
2266 | src_int_mode = int_mode_for_mode (src_mode); | |
2267 | int_mode = int_mode_for_mode (mode); | |
2268 | if (src_int_mode == BLKmode || int_mode == BLKmode) | |
2269 | return NULL_RTX; | |
2270 | ||
2271 | if (!MODES_TIEABLE_P (src_int_mode, src_mode)) | |
2272 | return NULL_RTX; | |
2273 | if (!MODES_TIEABLE_P (int_mode, mode)) | |
2274 | return NULL_RTX; | |
2275 | ||
2276 | src = gen_lowpart (src_int_mode, src); | |
2277 | src = convert_modes (int_mode, src_int_mode, src, true); | |
2278 | src = gen_lowpart (mode, src); | |
2279 | return src; | |
2280 | } | |
2281 | \f | |
44037a66 TG |
2282 | /* Add INC into TARGET. */ |
2283 | ||
2284 | void | |
502b8322 | 2285 | expand_inc (rtx target, rtx inc) |
44037a66 TG |
2286 | { |
2287 | rtx value = expand_binop (GET_MODE (target), add_optab, | |
2288 | target, inc, | |
2289 | target, 0, OPTAB_LIB_WIDEN); | |
2290 | if (value != target) | |
2291 | emit_move_insn (target, value); | |
2292 | } | |
2293 | ||
2294 | /* Subtract DEC from TARGET. */ | |
2295 | ||
2296 | void | |
502b8322 | 2297 | expand_dec (rtx target, rtx dec) |
44037a66 TG |
2298 | { |
2299 | rtx value = expand_binop (GET_MODE (target), sub_optab, | |
2300 | target, dec, | |
2301 | target, 0, OPTAB_LIB_WIDEN); | |
2302 | if (value != target) | |
2303 | emit_move_insn (target, value); | |
2304 | } | |
2305 | \f | |
2306 | /* Output a shift instruction for expression code CODE, | |
2307 | with SHIFTED being the rtx for the value to shift, | |
86529a49 | 2308 | and AMOUNT the rtx for the amount to shift by. |
44037a66 TG |
2309 | Store the result in the rtx TARGET, if that is convenient. |
2310 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
ea000c3f EB |
2311 | Return the rtx for where the value is. |
2312 | If that cannot be done, abort the compilation unless MAY_FAIL is true, | |
2313 | in which case 0 is returned. */ | |
44037a66 | 2314 | |
86529a49 | 2315 | static rtx |
ef4bddc2 | 2316 | expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, |
ea000c3f | 2317 | rtx amount, rtx target, int unsignedp, bool may_fail = false) |
44037a66 | 2318 | { |
b3694847 SS |
2319 | rtx op1, temp = 0; |
2320 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); | |
2321 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); | |
71d46ca5 MM |
2322 | optab lshift_optab = ashl_optab; |
2323 | optab rshift_arith_optab = ashr_optab; | |
2324 | optab rshift_uns_optab = lshr_optab; | |
2325 | optab lrotate_optab = rotl_optab; | |
2326 | optab rrotate_optab = rotr_optab; | |
ef4bddc2 RS |
2327 | machine_mode op1_mode; |
2328 | machine_mode scalar_mode = mode; | |
d858f359 | 2329 | int attempt; |
f40751dd | 2330 | bool speed = optimize_insn_for_speed_p (); |
44037a66 | 2331 | |
a4ee446d JJ |
2332 | if (VECTOR_MODE_P (mode)) |
2333 | scalar_mode = GET_MODE_INNER (mode); | |
86529a49 | 2334 | op1 = amount; |
71d46ca5 MM |
2335 | op1_mode = GET_MODE (op1); |
2336 | ||
2337 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the | |
2338 | shift amount is a vector, use the vector/vector shift patterns. */ | |
2339 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) | |
2340 | { | |
2341 | lshift_optab = vashl_optab; | |
2342 | rshift_arith_optab = vashr_optab; | |
2343 | rshift_uns_optab = vlshr_optab; | |
2344 | lrotate_optab = vrotl_optab; | |
2345 | rrotate_optab = vrotr_optab; | |
2346 | } | |
2347 | ||
44037a66 TG |
2348 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2349 | and shifted in the other direction; but that does not work | |
2350 | on all machines. */ | |
2351 | ||
166cdf4a RH |
2352 | if (SHIFT_COUNT_TRUNCATED) |
2353 | { | |
481683e1 | 2354 | if (CONST_INT_P (op1) |
0fb7aeda | 2355 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
a4ee446d | 2356 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) |
0fb7aeda | 2357 | op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |
a4ee446d | 2358 | % GET_MODE_BITSIZE (scalar_mode)); |
166cdf4a | 2359 | else if (GET_CODE (op1) == SUBREG |
c1cb09ad | 2360 | && subreg_lowpart_p (op1) |
7afe2801 DM |
2361 | && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) |
2362 | && SCALAR_INT_MODE_P (GET_MODE (op1))) | |
166cdf4a RH |
2363 | op1 = SUBREG_REG (op1); |
2364 | } | |
2ab0a5c4 | 2365 | |
75776c6d JJ |
2366 | /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, |
2367 | prefer left rotation, if op1 is from bitsize / 2 + 1 to | |
2368 | bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 | |
2369 | amount instead. */ | |
2370 | if (rotate | |
2371 | && CONST_INT_P (op1) | |
a4ee446d JJ |
2372 | && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, |
2373 | GET_MODE_BITSIZE (scalar_mode) - 1)) | |
75776c6d | 2374 | { |
a4ee446d | 2375 | op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); |
75776c6d JJ |
2376 | left = !left; |
2377 | code = left ? LROTATE_EXPR : RROTATE_EXPR; | |
2378 | } | |
2379 | ||
a25efea0 TP |
2380 | /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. |
2381 | Note that this is not the case for bigger values. For instance a rotation | |
2382 | of 0x01020304 by 16 bits gives 0x03040102 which is different from | |
2383 | 0x04030201 (bswapsi). */ | |
2384 | if (rotate | |
2385 | && CONST_INT_P (op1) | |
2386 | && INTVAL (op1) == BITS_PER_UNIT | |
2387 | && GET_MODE_SIZE (scalar_mode) == 2 | |
2388 | && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing) | |
2389 | return expand_unop (HImode, bswap_optab, shifted, NULL_RTX, | |
2390 | unsignedp); | |
2391 | ||
44037a66 TG |
2392 | if (op1 == const0_rtx) |
2393 | return shifted; | |
2394 | ||
15bad393 RS |
2395 | /* Check whether its cheaper to implement a left shift by a constant |
2396 | bit count by a sequence of additions. */ | |
2397 | if (code == LSHIFT_EXPR | |
481683e1 | 2398 | && CONST_INT_P (op1) |
15bad393 | 2399 | && INTVAL (op1) > 0 |
a4ee446d | 2400 | && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) |
cb2eb96f | 2401 | && INTVAL (op1) < MAX_BITS_PER_WORD |
5322d07e NF |
2402 | && (shift_cost (speed, mode, INTVAL (op1)) |
2403 | > INTVAL (op1) * add_cost (speed, mode)) | |
2404 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) | |
15bad393 RS |
2405 | { |
2406 | int i; | |
2407 | for (i = 0; i < INTVAL (op1); i++) | |
2408 | { | |
2409 | temp = force_reg (mode, shifted); | |
2410 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, | |
2411 | unsignedp, OPTAB_LIB_WIDEN); | |
2412 | } | |
2413 | return shifted; | |
2414 | } | |
2415 | ||
d858f359 | 2416 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
44037a66 TG |
2417 | { |
2418 | enum optab_methods methods; | |
2419 | ||
d858f359 | 2420 | if (attempt == 0) |
44037a66 | 2421 | methods = OPTAB_DIRECT; |
d858f359 | 2422 | else if (attempt == 1) |
44037a66 TG |
2423 | methods = OPTAB_WIDEN; |
2424 | else | |
2425 | methods = OPTAB_LIB_WIDEN; | |
2426 | ||
2427 | if (rotate) | |
2428 | { | |
2429 | /* Widening does not work for rotation. */ | |
2430 | if (methods == OPTAB_WIDEN) | |
2431 | continue; | |
2432 | else if (methods == OPTAB_LIB_WIDEN) | |
cbec710e | 2433 | { |
39e71615 | 2434 | /* If we have been unable to open-code this by a rotation, |
cbec710e | 2435 | do it as the IOR of two shifts. I.e., to rotate A |
ae6fa899 JJ |
2436 | by N bits, compute |
2437 | (A << N) | ((unsigned) A >> ((-N) & (C - 1))) | |
cbec710e RK |
2438 | where C is the bitsize of A. |
2439 | ||
2440 | It is theoretically possible that the target machine might | |
2441 | not be able to perform either shift and hence we would | |
2442 | be making two libcalls rather than just the one for the | |
2443 | shift (similarly if IOR could not be done). We will allow | |
2444 | this extremely unlikely lossage to avoid complicating the | |
2445 | code below. */ | |
2446 | ||
39e71615 | 2447 | rtx subtarget = target == shifted ? 0 : target; |
86529a49 | 2448 | rtx new_amount, other_amount; |
39e71615 | 2449 | rtx temp1; |
86529a49 RG |
2450 | |
2451 | new_amount = op1; | |
ae6fa899 JJ |
2452 | if (op1 == const0_rtx) |
2453 | return shifted; | |
2454 | else if (CONST_INT_P (op1)) | |
a4ee446d | 2455 | other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode) |
5c049507 RG |
2456 | - INTVAL (op1)); |
2457 | else | |
ae6fa899 JJ |
2458 | { |
2459 | other_amount | |
2460 | = simplify_gen_unary (NEG, GET_MODE (op1), | |
2461 | op1, GET_MODE (op1)); | |
a4ee446d | 2462 | HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; |
ae6fa899 | 2463 | other_amount |
69a59f0f RS |
2464 | = simplify_gen_binary (AND, GET_MODE (op1), other_amount, |
2465 | gen_int_mode (mask, GET_MODE (op1))); | |
ae6fa899 | 2466 | } |
39e71615 RK |
2467 | |
2468 | shifted = force_reg (mode, shifted); | |
2469 | ||
86529a49 RG |
2470 | temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, |
2471 | mode, shifted, new_amount, 0, 1); | |
2472 | temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, | |
2473 | mode, shifted, other_amount, | |
2474 | subtarget, 1); | |
39e71615 RK |
2475 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2476 | unsignedp, methods); | |
cbec710e | 2477 | } |
44037a66 TG |
2478 | |
2479 | temp = expand_binop (mode, | |
71d46ca5 | 2480 | left ? lrotate_optab : rrotate_optab, |
44037a66 TG |
2481 | shifted, op1, target, unsignedp, methods); |
2482 | } | |
2483 | else if (unsignedp) | |
a34958c9 | 2484 | temp = expand_binop (mode, |
71d46ca5 | 2485 | left ? lshift_optab : rshift_uns_optab, |
a34958c9 | 2486 | shifted, op1, target, unsignedp, methods); |
44037a66 TG |
2487 | |
2488 | /* Do arithmetic shifts. | |
2489 | Also, if we are going to widen the operand, we can just as well | |
2490 | use an arithmetic right-shift instead of a logical one. */ | |
2491 | if (temp == 0 && ! rotate | |
2492 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) | |
2493 | { | |
2494 | enum optab_methods methods1 = methods; | |
2495 | ||
2496 | /* If trying to widen a log shift to an arithmetic shift, | |
2497 | don't accept an arithmetic shift of the same size. */ | |
2498 | if (unsignedp) | |
2499 | methods1 = OPTAB_MUST_WIDEN; | |
2500 | ||
2501 | /* Arithmetic shift */ | |
2502 | ||
2503 | temp = expand_binop (mode, | |
71d46ca5 | 2504 | left ? lshift_optab : rshift_arith_optab, |
44037a66 TG |
2505 | shifted, op1, target, unsignedp, methods1); |
2506 | } | |
2507 | ||
711a5e64 | 2508 | /* We used to try extzv here for logical right shifts, but that was |
c410d49e | 2509 | only useful for one machine, the VAX, and caused poor code |
711a5e64 RK |
2510 | generation there for lshrdi3, so the code was deleted and a |
2511 | define_expand for lshrsi3 was added to vax.md. */ | |
44037a66 TG |
2512 | } |
2513 | ||
ea000c3f | 2514 | gcc_assert (temp != NULL_RTX || may_fail); |
44037a66 TG |
2515 | return temp; |
2516 | } | |
eb6c3df1 RG |
2517 | |
2518 | /* Output a shift instruction for expression code CODE, | |
2519 | with SHIFTED being the rtx for the value to shift, | |
2520 | and AMOUNT the amount to shift by. | |
2521 | Store the result in the rtx TARGET, if that is convenient. | |
2522 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2523 | Return the rtx for where the value is. */ | |
2524 | ||
2525 | rtx | |
ef4bddc2 | 2526 | expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
eb6c3df1 RG |
2527 | int amount, rtx target, int unsignedp) |
2528 | { | |
86529a49 RG |
2529 | return expand_shift_1 (code, mode, |
2530 | shifted, GEN_INT (amount), target, unsignedp); | |
2531 | } | |
2532 | ||
ea000c3f EB |
2533 | /* Likewise, but return 0 if that cannot be done. */ |
2534 | ||
2535 | static rtx | |
2536 | maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, | |
2537 | int amount, rtx target, int unsignedp) | |
2538 | { | |
2539 | return expand_shift_1 (code, mode, | |
2540 | shifted, GEN_INT (amount), target, unsignedp, true); | |
2541 | } | |
2542 | ||
86529a49 RG |
2543 | /* Output a shift instruction for expression code CODE, |
2544 | with SHIFTED being the rtx for the value to shift, | |
2545 | and AMOUNT the tree for the amount to shift by. | |
2546 | Store the result in the rtx TARGET, if that is convenient. | |
2547 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. | |
2548 | Return the rtx for where the value is. */ | |
2549 | ||
2550 | rtx | |
ef4bddc2 | 2551 | expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, |
86529a49 RG |
2552 | tree amount, rtx target, int unsignedp) |
2553 | { | |
2554 | return expand_shift_1 (code, mode, | |
2555 | shifted, expand_normal (amount), target, unsignedp); | |
eb6c3df1 | 2556 | } |
86529a49 | 2557 | |
44037a66 | 2558 | \f |
41c64ac0 | 2559 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
ef4bddc2 | 2560 | const struct mult_cost *, machine_mode mode); |
ef4bddc2 | 2561 | static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, |
8efc8980 | 2562 | const struct algorithm *, enum mult_variant); |
502b8322 | 2563 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
ef4bddc2 RS |
2564 | static rtx extract_high_half (machine_mode, rtx); |
2565 | static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int); | |
2566 | static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx, | |
8efc8980 | 2567 | int, int); |
44037a66 | 2568 | /* Compute and return the best algorithm for multiplying by T. |
7963ac37 RK |
2569 | The algorithm must cost less than cost_limit |
2570 | If retval.cost >= COST_LIMIT, no algorithm was found and all | |
41c64ac0 RS |
2571 | other field of the returned struct are undefined. |
2572 | MODE is the machine mode of the multiplication. */ | |
44037a66 | 2573 | |
819126a6 | 2574 | static void |
502b8322 | 2575 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
ef4bddc2 | 2576 | const struct mult_cost *cost_limit, machine_mode mode) |
44037a66 | 2577 | { |
b2fb324c | 2578 | int m; |
52786026 | 2579 | struct algorithm *alg_in, *best_alg; |
26276705 RS |
2580 | struct mult_cost best_cost; |
2581 | struct mult_cost new_limit; | |
2582 | int op_cost, op_latency; | |
ef268d34 | 2583 | unsigned HOST_WIDE_INT orig_t = t; |
b2fb324c | 2584 | unsigned HOST_WIDE_INT q; |
84ddb681 | 2585 | int maxm, hash_index; |
7b13ee6b KH |
2586 | bool cache_hit = false; |
2587 | enum alg_code cache_alg = alg_zero; | |
f40751dd | 2588 | bool speed = optimize_insn_for_speed_p (); |
ef4bddc2 | 2589 | machine_mode imode; |
5322d07e | 2590 | struct alg_hash_entry *entry_ptr; |
44037a66 | 2591 | |
7963ac37 RK |
2592 | /* Indicate that no algorithm is yet found. If no algorithm |
2593 | is found, this value will be returned and indicate failure. */ | |
26276705 | 2594 | alg_out->cost.cost = cost_limit->cost + 1; |
3ab0f290 | 2595 | alg_out->cost.latency = cost_limit->latency + 1; |
44037a66 | 2596 | |
26276705 RS |
2597 | if (cost_limit->cost < 0 |
2598 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) | |
819126a6 | 2599 | return; |
44037a66 | 2600 | |
84ddb681 RH |
2601 | /* Be prepared for vector modes. */ |
2602 | imode = GET_MODE_INNER (mode); | |
84ddb681 RH |
2603 | |
2604 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); | |
2605 | ||
0792ab19 | 2606 | /* Restrict the bits of "t" to the multiplication's mode. */ |
84ddb681 | 2607 | t &= GET_MODE_MASK (imode); |
0792ab19 | 2608 | |
b385aeda RK |
2609 | /* t == 1 can be done in zero cost. */ |
2610 | if (t == 1) | |
b2fb324c | 2611 | { |
819126a6 | 2612 | alg_out->ops = 1; |
26276705 RS |
2613 | alg_out->cost.cost = 0; |
2614 | alg_out->cost.latency = 0; | |
819126a6 RK |
2615 | alg_out->op[0] = alg_m; |
2616 | return; | |
b2fb324c RK |
2617 | } |
2618 | ||
b385aeda RK |
2619 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2620 | fail now. */ | |
819126a6 | 2621 | if (t == 0) |
b385aeda | 2622 | { |
5322d07e | 2623 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |
819126a6 | 2624 | return; |
b385aeda RK |
2625 | else |
2626 | { | |
819126a6 | 2627 | alg_out->ops = 1; |
5322d07e NF |
2628 | alg_out->cost.cost = zero_cost (speed); |
2629 | alg_out->cost.latency = zero_cost (speed); | |
819126a6 RK |
2630 | alg_out->op[0] = alg_zero; |
2631 | return; | |
b385aeda RK |
2632 | } |
2633 | } | |
2634 | ||
52786026 RK |
2635 | /* We'll be needing a couple extra algorithm structures now. */ |
2636 | ||
1b4572a8 KG |
2637 | alg_in = XALLOCA (struct algorithm); |
2638 | best_alg = XALLOCA (struct algorithm); | |
26276705 | 2639 | best_cost = *cost_limit; |
52786026 | 2640 | |
7b13ee6b | 2641 | /* Compute the hash index. */ |
f40751dd | 2642 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
7b13ee6b KH |
2643 | |
2644 | /* See if we already know what to do for T. */ | |
5322d07e NF |
2645 | entry_ptr = alg_hash_entry_ptr (hash_index); |
2646 | if (entry_ptr->t == t | |
5322d07e NF |
2647 | && entry_ptr->mode == mode |
2648 | && entry_ptr->speed == speed | |
2649 | && entry_ptr->alg != alg_unknown) | |
7b13ee6b | 2650 | { |
5322d07e | 2651 | cache_alg = entry_ptr->alg; |
0178027c KH |
2652 | |
2653 | if (cache_alg == alg_impossible) | |
7b13ee6b | 2654 | { |
0178027c | 2655 | /* The cache tells us that it's impossible to synthesize |
5322d07e NF |
2656 | multiplication by T within entry_ptr->cost. */ |
2657 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) | |
0178027c KH |
2658 | /* COST_LIMIT is at least as restrictive as the one |
2659 | recorded in the hash table, in which case we have no | |
2660 | hope of synthesizing a multiplication. Just | |
2661 | return. */ | |
2662 | return; | |
2663 | ||
2664 | /* If we get here, COST_LIMIT is less restrictive than the | |
2665 | one recorded in the hash table, so we may be able to | |
2666 | synthesize a multiplication. Proceed as if we didn't | |
2667 | have the cache entry. */ | |
2668 | } | |
2669 | else | |
2670 | { | |
5322d07e | 2671 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |
0178027c KH |
2672 | /* The cached algorithm shows that this multiplication |
2673 | requires more cost than COST_LIMIT. Just return. This | |
2674 | way, we don't clobber this cache entry with | |
2675 | alg_impossible but retain useful information. */ | |
2676 | return; | |
7b13ee6b | 2677 | |
0178027c KH |
2678 | cache_hit = true; |
2679 | ||
2680 | switch (cache_alg) | |
2681 | { | |
2682 | case alg_shift: | |
2683 | goto do_alg_shift; | |
7b13ee6b | 2684 | |
0178027c KH |
2685 | case alg_add_t_m2: |
2686 | case alg_sub_t_m2: | |
2687 | goto do_alg_addsub_t_m2; | |
7b13ee6b | 2688 | |
0178027c KH |
2689 | case alg_add_factor: |
2690 | case alg_sub_factor: | |
2691 | goto do_alg_addsub_factor; | |
7b13ee6b | 2692 | |
0178027c KH |
2693 | case alg_add_t2_m: |
2694 | goto do_alg_add_t2_m; | |
7b13ee6b | 2695 | |
0178027c KH |
2696 | case alg_sub_t2_m: |
2697 | goto do_alg_sub_t2_m; | |
2698 | ||
2699 | default: | |
2700 | gcc_unreachable (); | |
2701 | } | |
7b13ee6b KH |
2702 | } |
2703 | } | |
2704 | ||
b385aeda RK |
2705 | /* If we have a group of zero bits at the low-order part of T, try |
2706 | multiplying by the remaining bits and then doing a shift. */ | |
2707 | ||
b2fb324c | 2708 | if ((t & 1) == 0) |
44037a66 | 2709 | { |
7b13ee6b | 2710 | do_alg_shift: |
146ec50f | 2711 | m = ctz_or_zero (t); /* m = number of low zero bits */ |
0792ab19 | 2712 | if (m < maxm) |
44037a66 | 2713 | { |
02a65aef | 2714 | q = t >> m; |
15bad393 RS |
2715 | /* The function expand_shift will choose between a shift and |
2716 | a sequence of additions, so the observed cost is given as | |
5322d07e NF |
2717 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |
2718 | op_cost = m * add_cost (speed, mode); | |
2719 | if (shift_cost (speed, mode, m) < op_cost) | |
2720 | op_cost = shift_cost (speed, mode, m); | |
26276705 RS |
2721 | new_limit.cost = best_cost.cost - op_cost; |
2722 | new_limit.latency = best_cost.latency - op_cost; | |
2723 | synth_mult (alg_in, q, &new_limit, mode); | |
2724 | ||
2725 | alg_in->cost.cost += op_cost; | |
2726 | alg_in->cost.latency += op_cost; | |
2727 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
02a65aef | 2728 | { |
26276705 | 2729 | best_cost = alg_in->cost; |
076701b6 | 2730 | std::swap (alg_in, best_alg); |
02a65aef R |
2731 | best_alg->log[best_alg->ops] = m; |
2732 | best_alg->op[best_alg->ops] = alg_shift; | |
02a65aef | 2733 | } |
ddc2690a KH |
2734 | |
2735 | /* See if treating ORIG_T as a signed number yields a better | |
2736 | sequence. Try this sequence only for a negative ORIG_T | |
2737 | as it would be useless for a non-negative ORIG_T. */ | |
2738 | if ((HOST_WIDE_INT) orig_t < 0) | |
2739 | { | |
2740 | /* Shift ORIG_T as follows because a right shift of a | |
2741 | negative-valued signed type is implementation | |
2742 | defined. */ | |
2743 | q = ~(~orig_t >> m); | |
2744 | /* The function expand_shift will choose between a shift | |
2745 | and a sequence of additions, so the observed cost is | |
5322d07e NF |
2746 | given as MIN (m * add_cost(speed, mode), |
2747 | shift_cost(speed, mode, m)). */ | |
2748 | op_cost = m * add_cost (speed, mode); | |
2749 | if (shift_cost (speed, mode, m) < op_cost) | |
2750 | op_cost = shift_cost (speed, mode, m); | |
ddc2690a KH |
2751 | new_limit.cost = best_cost.cost - op_cost; |
2752 | new_limit.latency = best_cost.latency - op_cost; | |
2753 | synth_mult (alg_in, q, &new_limit, mode); | |
2754 | ||
2755 | alg_in->cost.cost += op_cost; | |
2756 | alg_in->cost.latency += op_cost; | |
2757 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
2758 | { | |
ddc2690a | 2759 | best_cost = alg_in->cost; |
076701b6 | 2760 | std::swap (alg_in, best_alg); |
ddc2690a KH |
2761 | best_alg->log[best_alg->ops] = m; |
2762 | best_alg->op[best_alg->ops] = alg_shift; | |
2763 | } | |
2764 | } | |
819126a6 | 2765 | } |
7b13ee6b KH |
2766 | if (cache_hit) |
2767 | goto done; | |
819126a6 RK |
2768 | } |
2769 | ||
2770 | /* If we have an odd number, add or subtract one. */ | |
2771 | if ((t & 1) != 0) | |
2772 | { | |
2773 | unsigned HOST_WIDE_INT w; | |
2774 | ||
7b13ee6b | 2775 | do_alg_addsub_t_m2: |
819126a6 RK |
2776 | for (w = 1; (w & t) != 0; w <<= 1) |
2777 | ; | |
31031edd | 2778 | /* If T was -1, then W will be zero after the loop. This is another |
c410d49e | 2779 | case where T ends with ...111. Handling this with (T + 1) and |
31031edd JL |
2780 | subtract 1 produces slightly better code and results in algorithm |
2781 | selection much faster than treating it like the ...0111 case | |
2782 | below. */ | |
2783 | if (w == 0 | |
2784 | || (w > 2 | |
2785 | /* Reject the case where t is 3. | |
2786 | Thus we prefer addition in that case. */ | |
2787 | && t != 3)) | |
819126a6 | 2788 | { |
c61928d0 | 2789 | /* T ends with ...111. Multiply by (T + 1) and subtract T. */ |
819126a6 | 2790 | |
5322d07e | 2791 | op_cost = add_cost (speed, mode); |
26276705 RS |
2792 | new_limit.cost = best_cost.cost - op_cost; |
2793 | new_limit.latency = best_cost.latency - op_cost; | |
2794 | synth_mult (alg_in, t + 1, &new_limit, mode); | |
b2fb324c | 2795 | |
26276705 RS |
2796 | alg_in->cost.cost += op_cost; |
2797 | alg_in->cost.latency += op_cost; | |
2798 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2799 | { |
26276705 | 2800 | best_cost = alg_in->cost; |
076701b6 | 2801 | std::swap (alg_in, best_alg); |
819126a6 RK |
2802 | best_alg->log[best_alg->ops] = 0; |
2803 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
44037a66 | 2804 | } |
44037a66 | 2805 | } |
819126a6 RK |
2806 | else |
2807 | { | |
c61928d0 | 2808 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ |
44037a66 | 2809 | |
5322d07e | 2810 | op_cost = add_cost (speed, mode); |
26276705 RS |
2811 | new_limit.cost = best_cost.cost - op_cost; |
2812 | new_limit.latency = best_cost.latency - op_cost; | |
2813 | synth_mult (alg_in, t - 1, &new_limit, mode); | |
819126a6 | 2814 | |
26276705 RS |
2815 | alg_in->cost.cost += op_cost; |
2816 | alg_in->cost.latency += op_cost; | |
2817 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
819126a6 | 2818 | { |
26276705 | 2819 | best_cost = alg_in->cost; |
076701b6 | 2820 | std::swap (alg_in, best_alg); |
819126a6 RK |
2821 | best_alg->log[best_alg->ops] = 0; |
2822 | best_alg->op[best_alg->ops] = alg_add_t_m2; | |
819126a6 RK |
2823 | } |
2824 | } | |
ef268d34 KH |
2825 | |
2826 | /* We may be able to calculate a * -7, a * -15, a * -31, etc | |
2827 | quickly with a - a * n for some appropriate constant n. */ | |
2828 | m = exact_log2 (-orig_t + 1); | |
2829 | if (m >= 0 && m < maxm) | |
2830 | { | |
35430ca0 KT |
2831 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
2832 | /* If the target has a cheap shift-and-subtract insn use | |
2833 | that in preference to a shift insn followed by a sub insn. | |
2834 | Assume that the shift-and-sub is "atomic" with a latency | |
2835 | equal to it's cost, otherwise assume that on superscalar | |
2836 | hardware the shift may be executed concurrently with the | |
2837 | earlier steps in the algorithm. */ | |
2838 | if (shiftsub1_cost (speed, mode, m) <= op_cost) | |
2839 | { | |
2840 | op_cost = shiftsub1_cost (speed, mode, m); | |
2841 | op_latency = op_cost; | |
2842 | } | |
2843 | else | |
2844 | op_latency = add_cost (speed, mode); | |
2845 | ||
ef268d34 | 2846 | new_limit.cost = best_cost.cost - op_cost; |
35430ca0 | 2847 | new_limit.latency = best_cost.latency - op_latency; |
84ddb681 RH |
2848 | synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |
2849 | &new_limit, mode); | |
ef268d34 KH |
2850 | |
2851 | alg_in->cost.cost += op_cost; | |
35430ca0 | 2852 | alg_in->cost.latency += op_latency; |
ef268d34 KH |
2853 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2854 | { | |
ef268d34 | 2855 | best_cost = alg_in->cost; |
076701b6 | 2856 | std::swap (alg_in, best_alg); |
ef268d34 KH |
2857 | best_alg->log[best_alg->ops] = m; |
2858 | best_alg->op[best_alg->ops] = alg_sub_t_m2; | |
2859 | } | |
2860 | } | |
2861 | ||
7b13ee6b KH |
2862 | if (cache_hit) |
2863 | goto done; | |
819126a6 | 2864 | } |
63610db9 | 2865 | |
44037a66 | 2866 | /* Look for factors of t of the form |
7963ac37 | 2867 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
44037a66 | 2868 | If we find such a factor, we can multiply by t using an algorithm that |
7963ac37 | 2869 | multiplies by q, shift the result by m and add/subtract it to itself. |
44037a66 | 2870 | |
7963ac37 RK |
2871 | We search for large factors first and loop down, even if large factors |
2872 | are less probable than small; if we find a large factor we will find a | |
2873 | good sequence quickly, and therefore be able to prune (by decreasing | |
2874 | COST_LIMIT) the search. */ | |
2875 | ||
7b13ee6b | 2876 | do_alg_addsub_factor: |
7963ac37 | 2877 | for (m = floor_log2 (t - 1); m >= 2; m--) |
44037a66 | 2878 | { |
7963ac37 | 2879 | unsigned HOST_WIDE_INT d; |
44037a66 | 2880 | |
fecfbfa4 | 2881 | d = (HOST_WIDE_INT_1U << m) + 1; |
7b13ee6b KH |
2882 | if (t % d == 0 && t > d && m < maxm |
2883 | && (!cache_hit || cache_alg == alg_add_factor)) | |
44037a66 | 2884 | { |
5322d07e | 2885 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
35430ca0 KT |
2886 | if (shiftadd_cost (speed, mode, m) <= op_cost) |
2887 | op_cost = shiftadd_cost (speed, mode, m); | |
2888 | ||
2889 | op_latency = op_cost; | |
2890 | ||
26276705 RS |
2891 | |
2892 | new_limit.cost = best_cost.cost - op_cost; | |
2893 | new_limit.latency = best_cost.latency - op_latency; | |
2894 | synth_mult (alg_in, t / d, &new_limit, mode); | |
44037a66 | 2895 | |
26276705 RS |
2896 | alg_in->cost.cost += op_cost; |
2897 | alg_in->cost.latency += op_latency; | |
2898 | if (alg_in->cost.latency < op_cost) | |
2899 | alg_in->cost.latency = op_cost; | |
2900 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2901 | { |
26276705 | 2902 | best_cost = alg_in->cost; |
076701b6 | 2903 | std::swap (alg_in, best_alg); |
b385aeda | 2904 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2905 | best_alg->op[best_alg->ops] = alg_add_factor; |
44037a66 | 2906 | } |
c0b262c1 TG |
2907 | /* Other factors will have been taken care of in the recursion. */ |
2908 | break; | |
44037a66 TG |
2909 | } |
2910 | ||
fecfbfa4 | 2911 | d = (HOST_WIDE_INT_1U << m) - 1; |
7b13ee6b KH |
2912 | if (t % d == 0 && t > d && m < maxm |
2913 | && (!cache_hit || cache_alg == alg_sub_factor)) | |
44037a66 | 2914 | { |
5322d07e | 2915 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |
35430ca0 KT |
2916 | if (shiftsub0_cost (speed, mode, m) <= op_cost) |
2917 | op_cost = shiftsub0_cost (speed, mode, m); | |
2918 | ||
2919 | op_latency = op_cost; | |
26276705 RS |
2920 | |
2921 | new_limit.cost = best_cost.cost - op_cost; | |
417c735c | 2922 | new_limit.latency = best_cost.latency - op_latency; |
26276705 | 2923 | synth_mult (alg_in, t / d, &new_limit, mode); |
44037a66 | 2924 | |
26276705 RS |
2925 | alg_in->cost.cost += op_cost; |
2926 | alg_in->cost.latency += op_latency; | |
2927 | if (alg_in->cost.latency < op_cost) | |
2928 | alg_in->cost.latency = op_cost; | |
2929 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
44037a66 | 2930 | { |
26276705 | 2931 | best_cost = alg_in->cost; |
076701b6 | 2932 | std::swap (alg_in, best_alg); |
b385aeda | 2933 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2934 | best_alg->op[best_alg->ops] = alg_sub_factor; |
44037a66 | 2935 | } |
c0b262c1 | 2936 | break; |
44037a66 TG |
2937 | } |
2938 | } | |
7b13ee6b KH |
2939 | if (cache_hit) |
2940 | goto done; | |
44037a66 | 2941 | |
7963ac37 RK |
2942 | /* Try shift-and-add (load effective address) instructions, |
2943 | i.e. do a*3, a*5, a*9. */ | |
2944 | if ((t & 1) != 0) | |
2945 | { | |
7b13ee6b | 2946 | do_alg_add_t2_m: |
7963ac37 | 2947 | q = t - 1; |
146ec50f JM |
2948 | m = ctz_hwi (q); |
2949 | if (q && m < maxm) | |
b385aeda | 2950 | { |
5322d07e | 2951 | op_cost = shiftadd_cost (speed, mode, m); |
26276705 RS |
2952 | new_limit.cost = best_cost.cost - op_cost; |
2953 | new_limit.latency = best_cost.latency - op_cost; | |
2954 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); | |
2955 | ||
2956 | alg_in->cost.cost += op_cost; | |
2957 | alg_in->cost.latency += op_cost; | |
2958 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb | 2959 | { |
26276705 | 2960 | best_cost = alg_in->cost; |
076701b6 | 2961 | std::swap (alg_in, best_alg); |
5eebe2eb | 2962 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2963 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
5eebe2eb | 2964 | } |
7963ac37 | 2965 | } |
7b13ee6b KH |
2966 | if (cache_hit) |
2967 | goto done; | |
44037a66 | 2968 | |
7b13ee6b | 2969 | do_alg_sub_t2_m: |
7963ac37 | 2970 | q = t + 1; |
146ec50f JM |
2971 | m = ctz_hwi (q); |
2972 | if (q && m < maxm) | |
b385aeda | 2973 | { |
5322d07e | 2974 | op_cost = shiftsub0_cost (speed, mode, m); |
26276705 RS |
2975 | new_limit.cost = best_cost.cost - op_cost; |
2976 | new_limit.latency = best_cost.latency - op_cost; | |
2977 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); | |
2978 | ||
2979 | alg_in->cost.cost += op_cost; | |
2980 | alg_in->cost.latency += op_cost; | |
2981 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) | |
5eebe2eb | 2982 | { |
26276705 | 2983 | best_cost = alg_in->cost; |
076701b6 | 2984 | std::swap (alg_in, best_alg); |
5eebe2eb | 2985 | best_alg->log[best_alg->ops] = m; |
819126a6 | 2986 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
5eebe2eb | 2987 | } |
7963ac37 | 2988 | } |
7b13ee6b KH |
2989 | if (cache_hit) |
2990 | goto done; | |
7963ac37 | 2991 | } |
44037a66 | 2992 | |
7b13ee6b | 2993 | done: |
3ab0f290 DJ |
2994 | /* If best_cost has not decreased, we have not found any algorithm. */ |
2995 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) | |
0178027c KH |
2996 | { |
2997 | /* We failed to find an algorithm. Record alg_impossible for | |
2998 | this case (that is, <T, MODE, COST_LIMIT>) so that next time | |
2999 | we are asked to find an algorithm for T within the same or | |
3000 | lower COST_LIMIT, we can immediately return to the | |
3001 | caller. */ | |
5322d07e NF |
3002 | entry_ptr->t = t; |
3003 | entry_ptr->mode = mode; | |
3004 | entry_ptr->speed = speed; | |
3005 | entry_ptr->alg = alg_impossible; | |
3006 | entry_ptr->cost = *cost_limit; | |
0178027c KH |
3007 | return; |
3008 | } | |
3ab0f290 | 3009 | |
7b13ee6b KH |
3010 | /* Cache the result. */ |
3011 | if (!cache_hit) | |
3012 | { | |
5322d07e NF |
3013 | entry_ptr->t = t; |
3014 | entry_ptr->mode = mode; | |
3015 | entry_ptr->speed = speed; | |
3016 | entry_ptr->alg = best_alg->op[best_alg->ops]; | |
3017 | entry_ptr->cost.cost = best_cost.cost; | |
3018 | entry_ptr->cost.latency = best_cost.latency; | |
7b13ee6b KH |
3019 | } |
3020 | ||
52786026 RK |
3021 | /* If we are getting a too long sequence for `struct algorithm' |
3022 | to record, make this search fail. */ | |
3023 | if (best_alg->ops == MAX_BITS_PER_WORD) | |
3024 | return; | |
3025 | ||
819126a6 RK |
3026 | /* Copy the algorithm from temporary space to the space at alg_out. |
3027 | We avoid using structure assignment because the majority of | |
3028 | best_alg is normally undefined, and this is a critical function. */ | |
3029 | alg_out->ops = best_alg->ops + 1; | |
26276705 | 3030 | alg_out->cost = best_cost; |
4e135bdd KG |
3031 | memcpy (alg_out->op, best_alg->op, |
3032 | alg_out->ops * sizeof *alg_out->op); | |
3033 | memcpy (alg_out->log, best_alg->log, | |
3034 | alg_out->ops * sizeof *alg_out->log); | |
44037a66 TG |
3035 | } |
3036 | \f | |
d1a6adeb | 3037 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
8efc8980 RS |
3038 | Try three variations: |
3039 | ||
3040 | - a shift/add sequence based on VAL itself | |
3041 | - a shift/add sequence based on -VAL, followed by a negation | |
3042 | - a shift/add sequence based on VAL - 1, followed by an addition. | |
3043 | ||
f258e38b UW |
3044 | Return true if the cheapest of these cost less than MULT_COST, |
3045 | describing the algorithm in *ALG and final fixup in *VARIANT. */ | |
8efc8980 | 3046 | |
ec573d17 | 3047 | bool |
ef4bddc2 | 3048 | choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, |
f258e38b UW |
3049 | struct algorithm *alg, enum mult_variant *variant, |
3050 | int mult_cost) | |
8efc8980 | 3051 | { |
8efc8980 | 3052 | struct algorithm alg2; |
26276705 RS |
3053 | struct mult_cost limit; |
3054 | int op_cost; | |
f40751dd | 3055 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 3056 | |
18eaea7f RS |
3057 | /* Fail quickly for impossible bounds. */ |
3058 | if (mult_cost < 0) | |
3059 | return false; | |
3060 | ||
3061 | /* Ensure that mult_cost provides a reasonable upper bound. | |
3062 | Any constant multiplication can be performed with less | |
3063 | than 2 * bits additions. */ | |
5322d07e | 3064 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |
18eaea7f RS |
3065 | if (mult_cost > op_cost) |
3066 | mult_cost = op_cost; | |
3067 | ||
8efc8980 | 3068 | *variant = basic_variant; |
26276705 RS |
3069 | limit.cost = mult_cost; |
3070 | limit.latency = mult_cost; | |
3071 | synth_mult (alg, val, &limit, mode); | |
8efc8980 RS |
3072 | |
3073 | /* This works only if the inverted value actually fits in an | |
3074 | `unsigned int' */ | |
84ddb681 | 3075 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |
8efc8980 | 3076 | { |
c3284718 | 3077 | op_cost = neg_cost (speed, mode); |
26276705 RS |
3078 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3079 | { | |
3080 | limit.cost = alg->cost.cost - op_cost; | |
3081 | limit.latency = alg->cost.latency - op_cost; | |
3082 | } | |
3083 | else | |
3084 | { | |
3085 | limit.cost = mult_cost - op_cost; | |
3086 | limit.latency = mult_cost - op_cost; | |
3087 | } | |
3088 | ||
3089 | synth_mult (&alg2, -val, &limit, mode); | |
3090 | alg2.cost.cost += op_cost; | |
3091 | alg2.cost.latency += op_cost; | |
3092 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
3093 | *alg = alg2, *variant = negate_variant; |
3094 | } | |
3095 | ||
3096 | /* This proves very useful for division-by-constant. */ | |
5322d07e | 3097 | op_cost = add_cost (speed, mode); |
26276705 RS |
3098 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3099 | { | |
3100 | limit.cost = alg->cost.cost - op_cost; | |
3101 | limit.latency = alg->cost.latency - op_cost; | |
3102 | } | |
3103 | else | |
3104 | { | |
3105 | limit.cost = mult_cost - op_cost; | |
3106 | limit.latency = mult_cost - op_cost; | |
3107 | } | |
3108 | ||
3109 | synth_mult (&alg2, val - 1, &limit, mode); | |
3110 | alg2.cost.cost += op_cost; | |
3111 | alg2.cost.latency += op_cost; | |
3112 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) | |
8efc8980 RS |
3113 | *alg = alg2, *variant = add_variant; |
3114 | ||
26276705 | 3115 | return MULT_COST_LESS (&alg->cost, mult_cost); |
8efc8980 RS |
3116 | } |
3117 | ||
3118 | /* A subroutine of expand_mult, used for constant multiplications. | |
3119 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if | |
3120 | convenient. Use the shift/add sequence described by ALG and apply | |
3121 | the final fixup specified by VARIANT. */ | |
3122 | ||
3123 | static rtx | |
ef4bddc2 | 3124 | expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, |
8efc8980 RS |
3125 | rtx target, const struct algorithm *alg, |
3126 | enum mult_variant variant) | |
3127 | { | |
1b13411a | 3128 | unsigned HOST_WIDE_INT val_so_far; |
f3f6fb16 DM |
3129 | rtx_insn *insn; |
3130 | rtx accum, tem; | |
8efc8980 | 3131 | int opno; |
ef4bddc2 | 3132 | machine_mode nmode; |
8efc8980 | 3133 | |
d448860e JH |
3134 | /* Avoid referencing memory over and over and invalid sharing |
3135 | on SUBREGs. */ | |
3136 | op0 = force_reg (mode, op0); | |
8efc8980 RS |
3137 | |
3138 | /* ACCUM starts out either as OP0 or as a zero, depending on | |
3139 | the first operation. */ | |
3140 | ||
3141 | if (alg->op[0] == alg_zero) | |
3142 | { | |
84ddb681 | 3143 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |
8efc8980 RS |
3144 | val_so_far = 0; |
3145 | } | |
3146 | else if (alg->op[0] == alg_m) | |
3147 | { | |
3148 | accum = copy_to_mode_reg (mode, op0); | |
3149 | val_so_far = 1; | |
3150 | } | |
3151 | else | |
5b0264cb | 3152 | gcc_unreachable (); |
8efc8980 RS |
3153 | |
3154 | for (opno = 1; opno < alg->ops; opno++) | |
3155 | { | |
3156 | int log = alg->log[opno]; | |
7c27e184 | 3157 | rtx shift_subtarget = optimize ? 0 : accum; |
8efc8980 RS |
3158 | rtx add_target |
3159 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant | |
7c27e184 | 3160 | && !optimize) |
8efc8980 | 3161 | ? target : 0; |
7c27e184 | 3162 | rtx accum_target = optimize ? 0 : accum; |
7543f918 | 3163 | rtx accum_inner; |
8efc8980 RS |
3164 | |
3165 | switch (alg->op[opno]) | |
3166 | { | |
3167 | case alg_shift: | |
eb6c3df1 | 3168 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
4caa21a1 UB |
3169 | /* REG_EQUAL note will be attached to the following insn. */ |
3170 | emit_move_insn (accum, tem); | |
8efc8980 RS |
3171 | val_so_far <<= log; |
3172 | break; | |
3173 | ||
3174 | case alg_add_t_m2: | |
eb6c3df1 | 3175 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
8efc8980 RS |
3176 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3177 | add_target ? add_target : accum_target); | |
1b13411a | 3178 | val_so_far += HOST_WIDE_INT_1U << log; |
8efc8980 RS |
3179 | break; |
3180 | ||
3181 | case alg_sub_t_m2: | |
eb6c3df1 | 3182 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |
8efc8980 RS |
3183 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
3184 | add_target ? add_target : accum_target); | |
1b13411a | 3185 | val_so_far -= HOST_WIDE_INT_1U << log; |
8efc8980 RS |
3186 | break; |
3187 | ||
3188 | case alg_add_t2_m: | |
3189 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
eb6c3df1 | 3190 | log, shift_subtarget, 0); |
8efc8980 RS |
3191 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
3192 | add_target ? add_target : accum_target); | |
3193 | val_so_far = (val_so_far << log) + 1; | |
3194 | break; | |
3195 | ||
3196 | case alg_sub_t2_m: | |
3197 | accum = expand_shift (LSHIFT_EXPR, mode, accum, | |
eb6c3df1 | 3198 | log, shift_subtarget, 0); |
8efc8980 RS |
3199 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
3200 | add_target ? add_target : accum_target); | |
3201 | val_so_far = (val_so_far << log) - 1; | |
3202 | break; | |
3203 | ||
3204 | case alg_add_factor: | |
eb6c3df1 | 3205 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8efc8980 RS |
3206 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3207 | add_target ? add_target : accum_target); | |
3208 | val_so_far += val_so_far << log; | |
3209 | break; | |
3210 | ||
3211 | case alg_sub_factor: | |
eb6c3df1 | 3212 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |
8efc8980 | 3213 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
7c27e184 PB |
3214 | (add_target |
3215 | ? add_target : (optimize ? 0 : tem))); | |
8efc8980 RS |
3216 | val_so_far = (val_so_far << log) - val_so_far; |
3217 | break; | |
3218 | ||
3219 | default: | |
5b0264cb | 3220 | gcc_unreachable (); |
8efc8980 RS |
3221 | } |
3222 | ||
84ddb681 | 3223 | if (SCALAR_INT_MODE_P (mode)) |
8efc8980 | 3224 | { |
84ddb681 RH |
3225 | /* Write a REG_EQUAL note on the last insn so that we can cse |
3226 | multiplication sequences. Note that if ACCUM is a SUBREG, | |
3227 | we've set the inner register and must properly indicate that. */ | |
3228 | tem = op0, nmode = mode; | |
3229 | accum_inner = accum; | |
3230 | if (GET_CODE (accum) == SUBREG) | |
3231 | { | |
3232 | accum_inner = SUBREG_REG (accum); | |
3233 | nmode = GET_MODE (accum_inner); | |
3234 | tem = gen_lowpart (nmode, op0); | |
3235 | } | |
8efc8980 | 3236 | |
84ddb681 RH |
3237 | insn = get_last_insn (); |
3238 | set_dst_reg_note (insn, REG_EQUAL, | |
4789c0ce RS |
3239 | gen_rtx_MULT (nmode, tem, |
3240 | gen_int_mode (val_so_far, nmode)), | |
84ddb681 RH |
3241 | accum_inner); |
3242 | } | |
8efc8980 RS |
3243 | } |
3244 | ||
3245 | if (variant == negate_variant) | |
3246 | { | |
3247 | val_so_far = -val_so_far; | |
3248 | accum = expand_unop (mode, neg_optab, accum, target, 0); | |
3249 | } | |
3250 | else if (variant == add_variant) | |
3251 | { | |
3252 | val_so_far = val_so_far + 1; | |
3253 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); | |
3254 | } | |
3255 | ||
42eb30b5 ZW |
3256 | /* Compare only the bits of val and val_so_far that are significant |
3257 | in the result mode, to avoid sign-/zero-extension confusion. */ | |
84ddb681 | 3258 | nmode = GET_MODE_INNER (mode); |
84ddb681 RH |
3259 | val &= GET_MODE_MASK (nmode); |
3260 | val_so_far &= GET_MODE_MASK (nmode); | |
1b13411a | 3261 | gcc_assert (val == (HOST_WIDE_INT) val_so_far); |
8efc8980 RS |
3262 | |
3263 | return accum; | |
3264 | } | |
3265 | ||
44037a66 TG |
3266 | /* Perform a multiplication and return an rtx for the result. |
3267 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3268 | TARGET is a suggestion for where to store the result (an rtx). | |
3269 | ||
3270 | We check specially for a constant integer as OP1. | |
3271 | If you want this check for OP0 as well, then before calling | |
3272 | you should swap the two operands if OP0 would be constant. */ | |
3273 | ||
3274 | rtx | |
ef4bddc2 | 3275 | expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
f2593a66 | 3276 | int unsignedp) |
44037a66 | 3277 | { |
8efc8980 RS |
3278 | enum mult_variant variant; |
3279 | struct algorithm algorithm; | |
84ddb681 | 3280 | rtx scalar_op1; |
65dc9350 | 3281 | int max_cost; |
f40751dd | 3282 | bool speed = optimize_insn_for_speed_p (); |
84ddb681 | 3283 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |
44037a66 | 3284 | |
84ddb681 | 3285 | if (CONSTANT_P (op0)) |
4c278134 | 3286 | std::swap (op0, op1); |
84ddb681 RH |
3287 | |
3288 | /* For vectors, there are several simplifications that can be made if | |
3289 | all elements of the vector constant are identical. */ | |
92695fbb | 3290 | scalar_op1 = unwrap_const_vec_duplicate (op1); |
84ddb681 RH |
3291 | |
3292 | if (INTEGRAL_MODE_P (mode)) | |
3293 | { | |
3294 | rtx fake_reg; | |
caf62455 JDA |
3295 | HOST_WIDE_INT coeff; |
3296 | bool is_neg; | |
84ddb681 RH |
3297 | int mode_bitsize; |
3298 | ||
3299 | if (op1 == CONST0_RTX (mode)) | |
3300 | return op1; | |
3301 | if (op1 == CONST1_RTX (mode)) | |
3302 | return op0; | |
3303 | if (op1 == CONSTM1_RTX (mode)) | |
3304 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, | |
3305 | op0, target, 0); | |
3306 | ||
3307 | if (do_trapv) | |
3308 | goto skip_synth; | |
3309 | ||
66b3ed5f JJ |
3310 | /* If mode is integer vector mode, check if the backend supports |
3311 | vector lshift (by scalar or vector) at all. If not, we can't use | |
3312 | synthetized multiply. */ | |
3313 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT | |
3314 | && optab_handler (vashl_optab, mode) == CODE_FOR_nothing | |
3315 | && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) | |
3316 | goto skip_synth; | |
3317 | ||
84ddb681 RH |
3318 | /* These are the operations that are potentially turned into |
3319 | a sequence of shifts and additions. */ | |
3320 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); | |
65dc9350 RS |
3321 | |
3322 | /* synth_mult does an `unsigned int' multiply. As long as the mode is | |
3323 | less than or equal in size to `unsigned int' this doesn't matter. | |
3324 | If the mode is larger than `unsigned int', then synth_mult works | |
3325 | only if the constant value exactly fits in an `unsigned int' without | |
3326 | any truncation. This means that multiplying by negative values does | |
3327 | not work; results are off by 2^32 on a 32 bit machine. */ | |
84ddb681 | 3328 | if (CONST_INT_P (scalar_op1)) |
58b42e19 | 3329 | { |
84ddb681 RH |
3330 | coeff = INTVAL (scalar_op1); |
3331 | is_neg = coeff < 0; | |
65dc9350 | 3332 | } |
807e902e KZ |
3333 | #if TARGET_SUPPORTS_WIDE_INT |
3334 | else if (CONST_WIDE_INT_P (scalar_op1)) | |
3335 | #else | |
48175537 | 3336 | else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) |
807e902e | 3337 | #endif |
65dc9350 | 3338 | { |
f079167a | 3339 | int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); |
807e902e KZ |
3340 | /* Perfect power of 2 (other than 1, which is handled above). */ |
3341 | if (shift > 0) | |
3342 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
3343 | shift, target, unsignedp); | |
caf62455 JDA |
3344 | else |
3345 | goto skip_synth; | |
65dc9350 | 3346 | } |
84ddb681 RH |
3347 | else |
3348 | goto skip_synth; | |
b8698a0f | 3349 | |
65dc9350 RS |
3350 | /* We used to test optimize here, on the grounds that it's better to |
3351 | produce a smaller program when -O is not used. But this causes | |
3352 | such a terrible slowdown sometimes that it seems better to always | |
3353 | use synth_mult. */ | |
65dc9350 | 3354 | |
84ddb681 | 3355 | /* Special case powers of two. */ |
be63b77d JJ |
3356 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) |
3357 | && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) | |
84ddb681 RH |
3358 | return expand_shift (LSHIFT_EXPR, mode, op0, |
3359 | floor_log2 (coeff), target, unsignedp); | |
3360 | ||
3361 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
3362 | ||
3363 | /* Attempt to handle multiplication of DImode values by negative | |
3364 | coefficients, by performing the multiplication by a positive | |
3365 | multiplier and then inverting the result. */ | |
84ddb681 RH |
3366 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) |
3367 | { | |
3368 | /* Its safe to use -coeff even for INT_MIN, as the | |
3369 | result is interpreted as an unsigned coefficient. | |
3370 | Exclude cost of op0 from max_cost to match the cost | |
5e839bc8 | 3371 | calculation of the synth_mult. */ |
be63b77d | 3372 | coeff = -(unsigned HOST_WIDE_INT) coeff; |
e548c9df AM |
3373 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), |
3374 | mode, speed) | |
c3284718 | 3375 | - neg_cost (speed, mode)); |
be63b77d JJ |
3376 | if (max_cost <= 0) |
3377 | goto skip_synth; | |
3378 | ||
3379 | /* Special case powers of two. */ | |
3380 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3381 | { | |
3382 | rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, | |
3383 | floor_log2 (coeff), target, unsignedp); | |
3384 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3385 | } | |
3386 | ||
3387 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, | |
3388 | max_cost)) | |
84ddb681 | 3389 | { |
be63b77d | 3390 | rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, |
84ddb681 RH |
3391 | &algorithm, variant); |
3392 | return expand_unop (mode, neg_optab, temp, target, 0); | |
3393 | } | |
b216b86b | 3394 | goto skip_synth; |
58b42e19 | 3395 | } |
44037a66 | 3396 | |
84ddb681 RH |
3397 | /* Exclude cost of op0 from max_cost to match the cost |
3398 | calculation of the synth_mult. */ | |
e548c9df | 3399 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed); |
84ddb681 RH |
3400 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) |
3401 | return expand_mult_const (mode, op0, coeff, target, | |
3402 | &algorithm, variant); | |
f2593a66 | 3403 | } |
84ddb681 | 3404 | skip_synth: |
f2593a66 RS |
3405 | |
3406 | /* Expand x*2.0 as x+x. */ | |
34a72c33 RS |
3407 | if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) |
3408 | && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) | |
f2593a66 | 3409 | { |
34a72c33 RS |
3410 | op0 = force_reg (GET_MODE (op0), op0); |
3411 | return expand_binop (mode, add_optab, op0, op0, | |
3412 | target, unsignedp, OPTAB_LIB_WIDEN); | |
f2593a66 RS |
3413 | } |
3414 | ||
819126a6 RK |
3415 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3416 | there is no difference between signed and unsigned. */ | |
84ddb681 | 3417 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |
44037a66 | 3418 | op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); |
5b0264cb | 3419 | gcc_assert (op0); |
44037a66 TG |
3420 | return op0; |
3421 | } | |
5b58b39b | 3422 | |
6dd8f4bb BS |
3423 | /* Return a cost estimate for multiplying a register by the given |
3424 | COEFFicient in the given MODE and SPEED. */ | |
3425 | ||
3426 | int | |
ef4bddc2 | 3427 | mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) |
6dd8f4bb BS |
3428 | { |
3429 | int max_cost; | |
3430 | struct algorithm algorithm; | |
3431 | enum mult_variant variant; | |
3432 | ||
3433 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); | |
e548c9df AM |
3434 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), |
3435 | mode, speed); | |
6dd8f4bb BS |
3436 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) |
3437 | return algorithm.cost.cost; | |
3438 | else | |
3439 | return max_cost; | |
3440 | } | |
3441 | ||
5b58b39b BS |
3442 | /* Perform a widening multiplication and return an rtx for the result. |
3443 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); | |
3444 | TARGET is a suggestion for where to store the result (an rtx). | |
3445 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab | |
3446 | or smul_widen_optab. | |
3447 | ||
3448 | We check specially for a constant integer as OP1, comparing the | |
3449 | cost of a widening multiply against the cost of a sequence of shifts | |
3450 | and adds. */ | |
3451 | ||
3452 | rtx | |
ef4bddc2 | 3453 | expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
5b58b39b BS |
3454 | int unsignedp, optab this_optab) |
3455 | { | |
3456 | bool speed = optimize_insn_for_speed_p (); | |
e7ef91dc | 3457 | rtx cop1; |
5b58b39b BS |
3458 | |
3459 | if (CONST_INT_P (op1) | |
e7ef91dc JJ |
3460 | && GET_MODE (op0) != VOIDmode |
3461 | && (cop1 = convert_modes (mode, GET_MODE (op0), op1, | |
3462 | this_optab == umul_widen_optab)) | |
3463 | && CONST_INT_P (cop1) | |
3464 | && (INTVAL (cop1) >= 0 | |
46c9550f | 3465 | || HWI_COMPUTABLE_MODE_P (mode))) |
5b58b39b | 3466 | { |
e7ef91dc | 3467 | HOST_WIDE_INT coeff = INTVAL (cop1); |
5b58b39b BS |
3468 | int max_cost; |
3469 | enum mult_variant variant; | |
3470 | struct algorithm algorithm; | |
3471 | ||
e9082138 JJ |
3472 | if (coeff == 0) |
3473 | return CONST0_RTX (mode); | |
3474 | ||
5b58b39b BS |
3475 | /* Special case powers of two. */ |
3476 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) | |
3477 | { | |
3478 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3479 | return expand_shift (LSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 3480 | floor_log2 (coeff), target, unsignedp); |
5b58b39b BS |
3481 | } |
3482 | ||
3483 | /* Exclude cost of op0 from max_cost to match the cost | |
3484 | calculation of the synth_mult. */ | |
5322d07e | 3485 | max_cost = mul_widen_cost (speed, mode); |
5b58b39b BS |
3486 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |
3487 | max_cost)) | |
3488 | { | |
3489 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); | |
3490 | return expand_mult_const (mode, op0, coeff, target, | |
3491 | &algorithm, variant); | |
3492 | } | |
3493 | } | |
3494 | return expand_binop (mode, this_optab, op0, op1, target, | |
3495 | unsignedp, OPTAB_LIB_WIDEN); | |
3496 | } | |
44037a66 | 3497 | \f |
55c2d311 TG |
3498 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |
3499 | replace division by D, and put the least significant N bits of the result | |
3500 | in *MULTIPLIER_PTR and return the most significant bit. | |
3501 | ||
3502 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the | |
3503 | needed precision is in PRECISION (should be <= N). | |
3504 | ||
3505 | PRECISION should be as small as possible so this function can choose | |
3506 | multiplier more freely. | |
3507 | ||
3508 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that | |
3509 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. | |
3510 | ||
3511 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), | |
3512 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ | |
3513 | ||
55c2d311 | 3514 | unsigned HOST_WIDE_INT |
502b8322 | 3515 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
079c527f JJ |
3516 | unsigned HOST_WIDE_INT *multiplier_ptr, |
3517 | int *post_shift_ptr, int *lgup_ptr) | |
55c2d311 | 3518 | { |
55c2d311 TG |
3519 | int lgup, post_shift; |
3520 | int pow, pow2; | |
55c2d311 TG |
3521 | |
3522 | /* lgup = ceil(log2(divisor)); */ | |
3523 | lgup = ceil_log2 (d); | |
3524 | ||
5b0264cb | 3525 | gcc_assert (lgup <= n); |
55c2d311 TG |
3526 | |
3527 | pow = n + lgup; | |
3528 | pow2 = n + lgup - precision; | |
3529 | ||
55c2d311 | 3530 | /* mlow = 2^(N + lgup)/d */ |
807e902e KZ |
3531 | wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); |
3532 | wide_int mlow = wi::udiv_trunc (val, d); | |
55c2d311 | 3533 | |
9be0ac8c | 3534 | /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ |
807e902e KZ |
3535 | val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); |
3536 | wide_int mhigh = wi::udiv_trunc (val, d); | |
55c2d311 TG |
3537 | |
3538 | /* If precision == N, then mlow, mhigh exceed 2^N | |
3539 | (but they do not exceed 2^(N+1)). */ | |
3540 | ||
f9da5064 | 3541 | /* Reduce to lowest terms. */ |
55c2d311 TG |
3542 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3543 | { | |
807e902e KZ |
3544 | unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, |
3545 | HOST_BITS_PER_WIDE_INT); | |
3546 | unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, | |
3547 | HOST_BITS_PER_WIDE_INT); | |
55c2d311 TG |
3548 | if (ml_lo >= mh_lo) |
3549 | break; | |
3550 | ||
807e902e KZ |
3551 | mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); |
3552 | mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); | |
55c2d311 TG |
3553 | } |
3554 | ||
3555 | *post_shift_ptr = post_shift; | |
3556 | *lgup_ptr = lgup; | |
3557 | if (n < HOST_BITS_PER_WIDE_INT) | |
3558 | { | |
fecfbfa4 | 3559 | unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; |
807e902e KZ |
3560 | *multiplier_ptr = mhigh.to_uhwi () & mask; |
3561 | return mhigh.to_uhwi () >= mask; | |
55c2d311 TG |
3562 | } |
3563 | else | |
3564 | { | |
807e902e KZ |
3565 | *multiplier_ptr = mhigh.to_uhwi (); |
3566 | return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); | |
55c2d311 TG |
3567 | } |
3568 | } | |
3569 | ||
3570 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is | |
3571 | congruent to 1 (mod 2**N). */ | |
3572 | ||
3573 | static unsigned HOST_WIDE_INT | |
502b8322 | 3574 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
55c2d311 | 3575 | { |
0f41302f | 3576 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
55c2d311 TG |
3577 | |
3578 | /* The algorithm notes that the choice y = x satisfies | |
3579 | x*y == 1 mod 2^3, since x is assumed odd. | |
3580 | Each iteration doubles the number of bits of significance in y. */ | |
3581 | ||
3582 | unsigned HOST_WIDE_INT mask; | |
3583 | unsigned HOST_WIDE_INT y = x; | |
3584 | int nbit = 3; | |
3585 | ||
3586 | mask = (n == HOST_BITS_PER_WIDE_INT | |
dd4786fe | 3587 | ? HOST_WIDE_INT_M1U |
fecfbfa4 | 3588 | : (HOST_WIDE_INT_1U << n) - 1); |
55c2d311 TG |
3589 | |
3590 | while (nbit < n) | |
3591 | { | |
3592 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ | |
3593 | nbit *= 2; | |
3594 | } | |
3595 | return y; | |
3596 | } | |
3597 | ||
3598 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness | |
3599 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the | |
3600 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product | |
3601 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to | |
3602 | become signed. | |
3603 | ||
3604 | The result is put in TARGET if that is convenient. | |
3605 | ||
3606 | MODE is the mode of operation. */ | |
3607 | ||
3608 | rtx | |
ef4bddc2 | 3609 | expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0, |
502b8322 | 3610 | rtx op1, rtx target, int unsignedp) |
55c2d311 TG |
3611 | { |
3612 | rtx tem; | |
3613 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; | |
3614 | ||
3615 | tem = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 3616 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
22273300 | 3617 | tem = expand_and (mode, tem, op1, NULL_RTX); |
38a448ca RH |
3618 | adj_operand |
3619 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), | |
3620 | adj_operand); | |
55c2d311 TG |
3621 | |
3622 | tem = expand_shift (RSHIFT_EXPR, mode, op1, | |
eb6c3df1 | 3623 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |
22273300 | 3624 | tem = expand_and (mode, tem, op0, NULL_RTX); |
38a448ca RH |
3625 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3626 | target); | |
55c2d311 TG |
3627 | |
3628 | return target; | |
3629 | } | |
3630 | ||
00f07b86 | 3631 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |
55c2d311 | 3632 | |
8efc8980 | 3633 | static rtx |
ef4bddc2 | 3634 | extract_high_half (machine_mode mode, rtx op) |
8efc8980 | 3635 | { |
ef4bddc2 | 3636 | machine_mode wider_mode; |
55c2d311 | 3637 | |
8efc8980 RS |
3638 | if (mode == word_mode) |
3639 | return gen_highpart (mode, op); | |
71af73bb | 3640 | |
15ed7b52 JG |
3641 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3642 | ||
490d0f6c | 3643 | wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
8efc8980 | 3644 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, |
eb6c3df1 | 3645 | GET_MODE_BITSIZE (mode), 0, 1); |
8efc8980 RS |
3646 | return convert_modes (mode, wider_mode, op, 0); |
3647 | } | |
55c2d311 | 3648 | |
00f07b86 | 3649 | /* Like expmed_mult_highpart, but only consider using a multiplication |
8efc8980 RS |
3650 | optab. OP1 is an rtx for the constant operand. */ |
3651 | ||
3652 | static rtx | |
ef4bddc2 | 3653 | expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1, |
8efc8980 | 3654 | rtx target, int unsignedp, int max_cost) |
55c2d311 | 3655 | { |
665acd1e | 3656 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
ef4bddc2 | 3657 | machine_mode wider_mode; |
55c2d311 TG |
3658 | optab moptab; |
3659 | rtx tem; | |
8efc8980 | 3660 | int size; |
f40751dd | 3661 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 3662 | |
15ed7b52 JG |
3663 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
3664 | ||
490d0f6c | 3665 | wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
8efc8980 | 3666 | size = GET_MODE_BITSIZE (mode); |
55c2d311 TG |
3667 | |
3668 | /* Firstly, try using a multiplication insn that only generates the needed | |
3669 | high part of the product, and in the sign flavor of unsignedp. */ | |
5322d07e | 3670 | if (mul_highpart_cost (speed, mode) < max_cost) |
71af73bb | 3671 | { |
8efc8980 | 3672 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
665acd1e | 3673 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3674 | unsignedp, OPTAB_DIRECT); |
3675 | if (tem) | |
3676 | return tem; | |
71af73bb | 3677 | } |
55c2d311 TG |
3678 | |
3679 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. | |
3680 | Need to adjust the result after the multiplication. */ | |
02a65aef | 3681 | if (size - 1 < BITS_PER_WORD |
5322d07e NF |
3682 | && (mul_highpart_cost (speed, mode) |
3683 | + 2 * shift_cost (speed, mode, size-1) | |
3684 | + 4 * add_cost (speed, mode) < max_cost)) | |
71af73bb | 3685 | { |
8efc8980 | 3686 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
665acd1e | 3687 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
8efc8980 RS |
3688 | unsignedp, OPTAB_DIRECT); |
3689 | if (tem) | |
71af73bb | 3690 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3691 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
8efc8980 | 3692 | tem, unsignedp); |
71af73bb | 3693 | } |
55c2d311 | 3694 | |
71af73bb | 3695 | /* Try widening multiplication. */ |
55c2d311 | 3696 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
4d8752f0 | 3697 | if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
5322d07e | 3698 | && mul_widen_cost (speed, wider_mode) < max_cost) |
a295d331 | 3699 | { |
665acd1e | 3700 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
8efc8980 RS |
3701 | unsignedp, OPTAB_WIDEN); |
3702 | if (tem) | |
3703 | return extract_high_half (mode, tem); | |
c410d49e | 3704 | } |
71af73bb TG |
3705 | |
3706 | /* Try widening the mode and perform a non-widening multiplication. */ | |
947131ba | 3707 | if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing |
02a65aef | 3708 | && size - 1 < BITS_PER_WORD |
5322d07e NF |
3709 | && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) |
3710 | < max_cost)) | |
a295d331 | 3711 | { |
f3f6fb16 DM |
3712 | rtx_insn *insns; |
3713 | rtx wop0, wop1; | |
82dfb9a5 RS |
3714 | |
3715 | /* We need to widen the operands, for example to ensure the | |
3716 | constant multiplier is correctly sign or zero extended. | |
3717 | Use a sequence to clean-up any instructions emitted by | |
3718 | the conversions if things don't work out. */ | |
3719 | start_sequence (); | |
3720 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); | |
3721 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); | |
3722 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, | |
8efc8980 | 3723 | unsignedp, OPTAB_WIDEN); |
82dfb9a5 RS |
3724 | insns = get_insns (); |
3725 | end_sequence (); | |
3726 | ||
8efc8980 | 3727 | if (tem) |
82dfb9a5 RS |
3728 | { |
3729 | emit_insn (insns); | |
3730 | return extract_high_half (mode, tem); | |
3731 | } | |
a295d331 | 3732 | } |
71af73bb TG |
3733 | |
3734 | /* Try widening multiplication of opposite signedness, and adjust. */ | |
3735 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; | |
4d8752f0 | 3736 | if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |
02a65aef | 3737 | && size - 1 < BITS_PER_WORD |
5322d07e NF |
3738 | && (mul_widen_cost (speed, wider_mode) |
3739 | + 2 * shift_cost (speed, mode, size-1) | |
3740 | + 4 * add_cost (speed, mode) < max_cost)) | |
55c2d311 | 3741 | { |
665acd1e | 3742 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
71af73bb TG |
3743 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3744 | if (tem != 0) | |
55c2d311 | 3745 | { |
8efc8980 | 3746 | tem = extract_high_half (mode, tem); |
71af73bb | 3747 | /* We used the wrong signedness. Adjust the result. */ |
77278891 | 3748 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |
71af73bb | 3749 | target, unsignedp); |
55c2d311 | 3750 | } |
55c2d311 TG |
3751 | } |
3752 | ||
71af73bb | 3753 | return 0; |
8efc8980 | 3754 | } |
71af73bb | 3755 | |
0d282692 RS |
3756 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3757 | putting the high half of the result in TARGET if that is convenient, | |
3758 | and return where the result is. If the operation can not be performed, | |
3759 | 0 is returned. | |
55c2d311 | 3760 | |
8efc8980 RS |
3761 | MODE is the mode of operation and result. |
3762 | ||
3763 | UNSIGNEDP nonzero means unsigned multiply. | |
3764 | ||
3765 | MAX_COST is the total allowed cost for the expanded RTL. */ | |
3766 | ||
0d282692 | 3767 | static rtx |
ef4bddc2 | 3768 | expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1, |
0d282692 | 3769 | rtx target, int unsignedp, int max_cost) |
8efc8980 | 3770 | { |
490d0f6c | 3771 | machine_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |
0d282692 | 3772 | unsigned HOST_WIDE_INT cnst1; |
f258e38b UW |
3773 | int extra_cost; |
3774 | bool sign_adjust = false; | |
8efc8980 RS |
3775 | enum mult_variant variant; |
3776 | struct algorithm alg; | |
0d282692 | 3777 | rtx tem; |
f40751dd | 3778 | bool speed = optimize_insn_for_speed_p (); |
8efc8980 | 3779 | |
15ed7b52 | 3780 | gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); |
8efc8980 | 3781 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |
46c9550f | 3782 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |
8efc8980 | 3783 | |
0d282692 | 3784 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
f258e38b | 3785 | |
b8698a0f L |
3786 | /* We can't optimize modes wider than BITS_PER_WORD. |
3787 | ??? We might be able to perform double-word arithmetic if | |
f258e38b UW |
3788 | mode == word_mode, however all the cost calculations in |
3789 | synth_mult etc. assume single-word operations. */ | |
3790 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) | |
00f07b86 | 3791 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
f258e38b UW |
3792 | unsignedp, max_cost); |
3793 | ||
5322d07e | 3794 | extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); |
f258e38b UW |
3795 | |
3796 | /* Check whether we try to multiply by a negative constant. */ | |
3797 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) | |
3798 | { | |
3799 | sign_adjust = true; | |
5322d07e | 3800 | extra_cost += add_cost (speed, mode); |
f258e38b | 3801 | } |
8efc8980 RS |
3802 | |
3803 | /* See whether shift/add multiplication is cheap enough. */ | |
f258e38b UW |
3804 | if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, |
3805 | max_cost - extra_cost)) | |
a295d331 | 3806 | { |
8efc8980 RS |
3807 | /* See whether the specialized multiplication optabs are |
3808 | cheaper than the shift/add version. */ | |
00f07b86 | 3809 | tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
26276705 | 3810 | alg.cost.cost + extra_cost); |
8efc8980 RS |
3811 | if (tem) |
3812 | return tem; | |
3813 | ||
f258e38b UW |
3814 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
3815 | tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); | |
3816 | tem = extract_high_half (mode, tem); | |
3817 | ||
9cf737f8 | 3818 | /* Adjust result for signedness. */ |
f258e38b UW |
3819 | if (sign_adjust) |
3820 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); | |
3821 | ||
3822 | return tem; | |
a295d331 | 3823 | } |
00f07b86 | 3824 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
8efc8980 | 3825 | unsignedp, max_cost); |
55c2d311 | 3826 | } |
0b55e932 RS |
3827 | |
3828 | ||
3829 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ | |
3830 | ||
3831 | static rtx | |
ef4bddc2 | 3832 | expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) |
0b55e932 | 3833 | { |
f3f6fb16 DM |
3834 | rtx result, temp, shift; |
3835 | rtx_code_label *label; | |
0b55e932 | 3836 | int logd; |
807e902e | 3837 | int prec = GET_MODE_PRECISION (mode); |
0b55e932 RS |
3838 | |
3839 | logd = floor_log2 (d); | |
3840 | result = gen_reg_rtx (mode); | |
3841 | ||
3842 | /* Avoid conditional branches when they're expensive. */ | |
3a4fd356 | 3843 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
22660666 | 3844 | && optimize_insn_for_speed_p ()) |
0b55e932 RS |
3845 | { |
3846 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, | |
3847 | mode, 0, -1); | |
3848 | if (signmask) | |
3849 | { | |
fecfbfa4 | 3850 | HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1; |
0b55e932 | 3851 | signmask = force_reg (mode, signmask); |
1c234fcb RS |
3852 | shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); |
3853 | ||
3854 | /* Use the rtx_cost of a LSHIFTRT instruction to determine | |
3855 | which instruction sequence to use. If logical right shifts | |
3856 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise | |
3857 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ | |
6e7a355c | 3858 | |
1c234fcb | 3859 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
947131ba | 3860 | if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing |
e548c9df | 3861 | || (set_src_cost (temp, mode, optimize_insn_for_speed_p ()) |
5e8f01f4 | 3862 | > COSTS_N_INSNS (2))) |
1c234fcb RS |
3863 | { |
3864 | temp = expand_binop (mode, xor_optab, op0, signmask, | |
3865 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3866 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3867 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
2f1cd2eb RS |
3868 | temp = expand_binop (mode, and_optab, temp, |
3869 | gen_int_mode (masklow, mode), | |
1c234fcb RS |
3870 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3871 | temp = expand_binop (mode, xor_optab, temp, signmask, | |
3872 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3873 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3874 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3875 | } | |
3876 | else | |
3877 | { | |
3878 | signmask = expand_binop (mode, lshr_optab, signmask, shift, | |
3879 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3880 | signmask = force_reg (mode, signmask); | |
3881 | ||
3882 | temp = expand_binop (mode, add_optab, op0, signmask, | |
3883 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
2f1cd2eb RS |
3884 | temp = expand_binop (mode, and_optab, temp, |
3885 | gen_int_mode (masklow, mode), | |
1c234fcb RS |
3886 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
3887 | temp = expand_binop (mode, sub_optab, temp, signmask, | |
3888 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
3889 | } | |
0b55e932 RS |
3890 | return temp; |
3891 | } | |
3892 | } | |
3893 | ||
3894 | /* Mask contains the mode's signbit and the significant bits of the | |
3895 | modulus. By including the signbit in the operation, many targets | |
3896 | can avoid an explicit compare operation in the following comparison | |
3897 | against zero. */ | |
807e902e KZ |
3898 | wide_int mask = wi::mask (logd, false, prec); |
3899 | mask = wi::set_bit (mask, prec - 1); | |
0b55e932 | 3900 | |
6e7a355c | 3901 | temp = expand_binop (mode, and_optab, op0, |
807e902e | 3902 | immed_wide_int_const (mask, mode), |
6e7a355c | 3903 | result, 1, OPTAB_LIB_WIDEN); |
0b55e932 RS |
3904 | if (temp != result) |
3905 | emit_move_insn (result, temp); | |
3906 | ||
3907 | label = gen_label_rtx (); | |
3908 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); | |
3909 | ||
3910 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, | |
3911 | 0, OPTAB_LIB_WIDEN); | |
807e902e KZ |
3912 | |
3913 | mask = wi::mask (logd, true, prec); | |
6e7a355c | 3914 | temp = expand_binop (mode, ior_optab, temp, |
807e902e | 3915 | immed_wide_int_const (mask, mode), |
6e7a355c | 3916 | result, 1, OPTAB_LIB_WIDEN); |
0b55e932 RS |
3917 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
3918 | 0, OPTAB_LIB_WIDEN); | |
3919 | if (temp != result) | |
3920 | emit_move_insn (result, temp); | |
3921 | emit_label (label); | |
3922 | return result; | |
3923 | } | |
39cab019 RS |
3924 | |
3925 | /* Expand signed division of OP0 by a power of two D in mode MODE. | |
3926 | This routine is only called for positive values of D. */ | |
3927 | ||
3928 | static rtx | |
ef4bddc2 | 3929 | expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) |
39cab019 | 3930 | { |
f3f6fb16 DM |
3931 | rtx temp; |
3932 | rtx_code_label *label; | |
39cab019 RS |
3933 | int logd; |
3934 | ||
3935 | logd = floor_log2 (d); | |
39cab019 | 3936 | |
3a4fd356 JH |
3937 | if (d == 2 |
3938 | && BRANCH_COST (optimize_insn_for_speed_p (), | |
3939 | false) >= 1) | |
39cab019 RS |
3940 | { |
3941 | temp = gen_reg_rtx (mode); | |
3942 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); | |
3943 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, | |
3944 | 0, OPTAB_LIB_WIDEN); | |
eb6c3df1 | 3945 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 RS |
3946 | } |
3947 | ||
692e0312 TS |
3948 | if (HAVE_conditional_move |
3949 | && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) | |
fdded401 RS |
3950 | { |
3951 | rtx temp2; | |
3952 | ||
3953 | start_sequence (); | |
3954 | temp2 = copy_to_mode_reg (mode, op0); | |
2f1cd2eb | 3955 | temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), |
fdded401 RS |
3956 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
3957 | temp = force_reg (mode, temp); | |
3958 | ||
3959 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ | |
3960 | temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, | |
3961 | mode, temp, temp2, mode, 0); | |
3962 | if (temp2) | |
3963 | { | |
f3f6fb16 | 3964 | rtx_insn *seq = get_insns (); |
fdded401 RS |
3965 | end_sequence (); |
3966 | emit_insn (seq); | |
eb6c3df1 | 3967 | return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); |
fdded401 RS |
3968 | } |
3969 | end_sequence (); | |
3970 | } | |
fdded401 | 3971 | |
3a4fd356 JH |
3972 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
3973 | false) >= 2) | |
39cab019 RS |
3974 | { |
3975 | int ushift = GET_MODE_BITSIZE (mode) - logd; | |
3976 | ||
3977 | temp = gen_reg_rtx (mode); | |
3978 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); | |
00a7ba58 JJ |
3979 | if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD |
3980 | || shift_cost (optimize_insn_for_speed_p (), mode, ushift) | |
3981 | > COSTS_N_INSNS (1)) | |
2f1cd2eb | 3982 | temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode), |
39cab019 RS |
3983 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
3984 | else | |
3985 | temp = expand_shift (RSHIFT_EXPR, mode, temp, | |
eb6c3df1 | 3986 | ushift, NULL_RTX, 1); |
39cab019 RS |
3987 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
3988 | 0, OPTAB_LIB_WIDEN); | |
eb6c3df1 | 3989 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 RS |
3990 | } |
3991 | ||
3992 | label = gen_label_rtx (); | |
3993 | temp = copy_to_mode_reg (mode, op0); | |
3994 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); | |
2f1cd2eb | 3995 | expand_inc (temp, gen_int_mode (d - 1, mode)); |
39cab019 | 3996 | emit_label (label); |
eb6c3df1 | 3997 | return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); |
39cab019 | 3998 | } |
55c2d311 | 3999 | \f |
44037a66 TG |
4000 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
4001 | if that is convenient, and returning where the result is. | |
4002 | You may request either the quotient or the remainder as the result; | |
4003 | specify REM_FLAG nonzero to get the remainder. | |
4004 | ||
4005 | CODE is the expression code for which kind of division this is; | |
4006 | it controls how rounding is done. MODE is the machine mode to use. | |
4007 | UNSIGNEDP nonzero means do unsigned division. */ | |
4008 | ||
4009 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI | |
4010 | and then correct it by or'ing in missing high bits | |
4011 | if result of ANDI is nonzero. | |
4012 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. | |
4013 | This could optimize to a bfexts instruction. | |
4014 | But C doesn't use these operations, so their optimizations are | |
4015 | left for later. */ | |
5353610b R |
4016 | /* ??? For modulo, we don't actually need the highpart of the first product, |
4017 | the low part will do nicely. And for small divisors, the second multiply | |
4018 | can also be a low-part only multiply or even be completely left out. | |
4019 | E.g. to calculate the remainder of a division by 3 with a 32 bit | |
4020 | multiply, multiply with 0x55555556 and extract the upper two bits; | |
4021 | the result is exact for inputs up to 0x1fffffff. | |
4022 | The input range can be reduced by using cross-sum rules. | |
4023 | For odd divisors >= 3, the following table gives right shift counts | |
09da1532 | 4024 | so that if a number is shifted by an integer multiple of the given |
5353610b R |
4025 | amount, the remainder stays the same: |
4026 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, | |
4027 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, | |
4028 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, | |
4029 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, | |
4030 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 | |
4031 | ||
4032 | Cross-sum rules for even numbers can be derived by leaving as many bits | |
4033 | to the right alone as the divisor has zeros to the right. | |
4034 | E.g. if x is an unsigned 32 bit number: | |
4035 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 | |
4036 | */ | |
44037a66 TG |
4037 | |
4038 | rtx | |
ef4bddc2 | 4039 | expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, |
502b8322 | 4040 | rtx op0, rtx op1, rtx target, int unsignedp) |
44037a66 | 4041 | { |
ef4bddc2 | 4042 | machine_mode compute_mode; |
b3694847 | 4043 | rtx tquotient; |
55c2d311 | 4044 | rtx quotient = 0, remainder = 0; |
f3f6fb16 | 4045 | rtx_insn *last; |
2c414fba | 4046 | int size; |
f3f6fb16 | 4047 | rtx_insn *insn; |
44037a66 | 4048 | optab optab1, optab2; |
1c4a429a | 4049 | int op1_is_constant, op1_is_pow2 = 0; |
71af73bb | 4050 | int max_cost, extra_cost; |
9ec36da5 | 4051 | static HOST_WIDE_INT last_div_const = 0; |
f40751dd | 4052 | bool speed = optimize_insn_for_speed_p (); |
55c2d311 | 4053 | |
481683e1 | 4054 | op1_is_constant = CONST_INT_P (op1); |
1c4a429a JH |
4055 | if (op1_is_constant) |
4056 | { | |
76a7314d JJ |
4057 | wide_int ext_op1 = rtx_mode_t (op1, mode); |
4058 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 | |
4059 | || (! unsignedp | |
4060 | && wi::popcount (wi::neg (ext_op1)) == 1)); | |
1c4a429a | 4061 | } |
55c2d311 TG |
4062 | |
4063 | /* | |
4064 | This is the structure of expand_divmod: | |
4065 | ||
4066 | First comes code to fix up the operands so we can perform the operations | |
4067 | correctly and efficiently. | |
4068 | ||
4069 | Second comes a switch statement with code specific for each rounding mode. | |
4070 | For some special operands this code emits all RTL for the desired | |
69f61901 | 4071 | operation, for other cases, it generates only a quotient and stores it in |
55c2d311 TG |
4072 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
4073 | to indicate that it has not done anything. | |
4074 | ||
69f61901 RK |
4075 | Last comes code that finishes the operation. If QUOTIENT is set and |
4076 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If | |
4077 | QUOTIENT is not set, it is computed using trunc rounding. | |
44037a66 | 4078 | |
55c2d311 TG |
4079 | We try to generate special code for division and remainder when OP1 is a |
4080 | constant. If |OP1| = 2**n we can use shifts and some other fast | |
4081 | operations. For other values of OP1, we compute a carefully selected | |
4082 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 | |
4083 | by m. | |
4084 | ||
4085 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper | |
4086 | half of the product. Different strategies for generating the product are | |
00f07b86 | 4087 | implemented in expmed_mult_highpart. |
55c2d311 TG |
4088 | |
4089 | If what we actually want is the remainder, we generate that by another | |
4090 | by-constant multiplication and a subtraction. */ | |
4091 | ||
4092 | /* We shouldn't be called with OP1 == const1_rtx, but some of the | |
3d32ffd1 TW |
4093 | code below will malfunction if we are, so check here and handle |
4094 | the special case if so. */ | |
4095 | if (op1 == const1_rtx) | |
4096 | return rem_flag ? const0_rtx : op0; | |
4097 | ||
91ce572a CC |
4098 | /* When dividing by -1, we could get an overflow. |
4099 | negv_optab can handle overflows. */ | |
4100 | if (! unsignedp && op1 == constm1_rtx) | |
4101 | { | |
4102 | if (rem_flag) | |
0fb7aeda | 4103 | return const0_rtx; |
c3284718 | 4104 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT |
0fb7aeda | 4105 | ? negv_optab : neg_optab, op0, target, 0); |
91ce572a CC |
4106 | } |
4107 | ||
bc1c7e93 RK |
4108 | if (target |
4109 | /* Don't use the function value register as a target | |
4110 | since we have to read it as well as write it, | |
4111 | and function-inlining gets confused by this. */ | |
4112 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) | |
4113 | /* Don't clobber an operand while doing a multi-step calculation. */ | |
515dfc7a | 4114 | || ((rem_flag || op1_is_constant) |
bc1c7e93 | 4115 | && (reg_mentioned_p (target, op0) |
3c0cb5de | 4116 | || (MEM_P (op0) && MEM_P (target)))) |
bc1c7e93 | 4117 | || reg_mentioned_p (target, op1) |
3c0cb5de | 4118 | || (MEM_P (op1) && MEM_P (target)))) |
44037a66 TG |
4119 | target = 0; |
4120 | ||
44037a66 TG |
4121 | /* Get the mode in which to perform this computation. Normally it will |
4122 | be MODE, but sometimes we can't do the desired operation in MODE. | |
4123 | If so, pick a wider mode in which we can do the operation. Convert | |
4124 | to that mode at the start to avoid repeated conversions. | |
4125 | ||
4126 | First see what operations we need. These depend on the expression | |
4127 | we are evaluating. (We assume that divxx3 insns exist under the | |
4128 | same conditions that modxx3 insns and that these insns don't normally | |
4129 | fail. If these assumptions are not correct, we may generate less | |
4130 | efficient code in some cases.) | |
4131 | ||
4132 | Then see if we find a mode in which we can open-code that operation | |
4133 | (either a division, modulus, or shift). Finally, check for the smallest | |
4134 | mode for which we can do the operation with a library call. */ | |
4135 | ||
55c2d311 | 4136 | /* We might want to refine this now that we have division-by-constant |
00f07b86 | 4137 | optimization. Since expmed_mult_highpart tries so many variants, it is |
55c2d311 TG |
4138 | not straightforward to generalize this. Maybe we should make an array |
4139 | of possible modes in init_expmed? Save this for GCC 2.7. */ | |
4140 | ||
76a7314d | 4141 | optab1 = (op1_is_pow2 |
556a56ac | 4142 | ? (unsignedp ? lshr_optab : ashr_optab) |
44037a66 | 4143 | : (unsignedp ? udiv_optab : sdiv_optab)); |
76a7314d | 4144 | optab2 = (op1_is_pow2 ? optab1 |
556a56ac | 4145 | : (unsignedp ? udivmod_optab : sdivmod_optab)); |
44037a66 | 4146 | |
c94843d2 | 4147 | FOR_EACH_MODE_FROM (compute_mode, mode) |
947131ba RS |
4148 | if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing |
4149 | || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) | |
44037a66 TG |
4150 | break; |
4151 | ||
4152 | if (compute_mode == VOIDmode) | |
c94843d2 | 4153 | FOR_EACH_MODE_FROM (compute_mode, mode) |
8a33f100 JH |
4154 | if (optab_libfunc (optab1, compute_mode) |
4155 | || optab_libfunc (optab2, compute_mode)) | |
44037a66 TG |
4156 | break; |
4157 | ||
535a42b1 NS |
4158 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
4159 | probably die. */ | |
44037a66 TG |
4160 | if (compute_mode == VOIDmode) |
4161 | compute_mode = mode; | |
4162 | ||
55c2d311 TG |
4163 | if (target && GET_MODE (target) == compute_mode) |
4164 | tquotient = target; | |
4165 | else | |
4166 | tquotient = gen_reg_rtx (compute_mode); | |
2c414fba | 4167 | |
55c2d311 TG |
4168 | size = GET_MODE_BITSIZE (compute_mode); |
4169 | #if 0 | |
4170 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE | |
71af73bb TG |
4171 | (mode), and thereby get better code when OP1 is a constant. Do that |
4172 | later. It will require going over all usages of SIZE below. */ | |
55c2d311 TG |
4173 | size = GET_MODE_BITSIZE (mode); |
4174 | #endif | |
bc1c7e93 | 4175 | |
9ec36da5 JL |
4176 | /* Only deduct something for a REM if the last divide done was |
4177 | for a different constant. Then set the constant of the last | |
4178 | divide. */ | |
5322d07e NF |
4179 | max_cost = (unsignedp |
4180 | ? udiv_cost (speed, compute_mode) | |
4181 | : sdiv_cost (speed, compute_mode)); | |
a28b2ac6 RS |
4182 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
4183 | && INTVAL (op1) == last_div_const)) | |
5322d07e NF |
4184 | max_cost -= (mul_cost (speed, compute_mode) |
4185 | + add_cost (speed, compute_mode)); | |
9ec36da5 JL |
4186 | |
4187 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; | |
71af73bb | 4188 | |
55c2d311 | 4189 | /* Now convert to the best mode to use. */ |
44037a66 TG |
4190 | if (compute_mode != mode) |
4191 | { | |
55c2d311 | 4192 | op0 = convert_modes (compute_mode, mode, op0, unsignedp); |
81722fa9 | 4193 | op1 = convert_modes (compute_mode, mode, op1, unsignedp); |
e13a25d5 | 4194 | |
e9a25f70 JL |
4195 | /* convert_modes may have placed op1 into a register, so we |
4196 | must recompute the following. */ | |
481683e1 | 4197 | op1_is_constant = CONST_INT_P (op1); |
76a7314d JJ |
4198 | if (op1_is_constant) |
4199 | { | |
4200 | wide_int ext_op1 = rtx_mode_t (op1, compute_mode); | |
4201 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 | |
4202 | || (! unsignedp | |
4203 | && wi::popcount (wi::neg (ext_op1)) == 1)); | |
4204 | } | |
4205 | else | |
4206 | op1_is_pow2 = 0; | |
44037a66 TG |
4207 | } |
4208 | ||
55c2d311 | 4209 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
c2a47e48 | 4210 | |
3c0cb5de | 4211 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
55c2d311 | 4212 | op0 = force_reg (compute_mode, op0); |
3c0cb5de | 4213 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
c2a47e48 RK |
4214 | op1 = force_reg (compute_mode, op1); |
4215 | ||
ab0b6581 TG |
4216 | /* If we need the remainder or if OP1 is constant, we need to |
4217 | put OP0 in a register in case it has any queued subexpressions. */ | |
4218 | if (rem_flag || op1_is_constant) | |
4219 | op0 = force_reg (compute_mode, op0); | |
bc1c7e93 | 4220 | |
55c2d311 | 4221 | last = get_last_insn (); |
44037a66 | 4222 | |
9faa82d8 | 4223 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
55c2d311 | 4224 | if (unsignedp) |
44037a66 | 4225 | { |
55c2d311 TG |
4226 | if (code == FLOOR_DIV_EXPR) |
4227 | code = TRUNC_DIV_EXPR; | |
4228 | if (code == FLOOR_MOD_EXPR) | |
4229 | code = TRUNC_MOD_EXPR; | |
db7cafb0 JL |
4230 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4231 | code = TRUNC_DIV_EXPR; | |
55c2d311 | 4232 | } |
bc1c7e93 | 4233 | |
55c2d311 TG |
4234 | if (op1 != const0_rtx) |
4235 | switch (code) | |
4236 | { | |
4237 | case TRUNC_MOD_EXPR: | |
4238 | case TRUNC_DIV_EXPR: | |
34f016ed | 4239 | if (op1_is_constant) |
55c2d311 | 4240 | { |
d8f1376c | 4241 | if (unsignedp) |
55c2d311 | 4242 | { |
079c527f | 4243 | unsigned HOST_WIDE_INT mh, ml; |
55c2d311 TG |
4244 | int pre_shift, post_shift; |
4245 | int dummy; | |
76a7314d JJ |
4246 | wide_int wd = rtx_mode_t (op1, compute_mode); |
4247 | unsigned HOST_WIDE_INT d = wd.to_uhwi (); | |
55c2d311 | 4248 | |
76a7314d | 4249 | if (wi::popcount (wd) == 1) |
55c2d311 TG |
4250 | { |
4251 | pre_shift = floor_log2 (d); | |
4252 | if (rem_flag) | |
4253 | { | |
2f1cd2eb | 4254 | unsigned HOST_WIDE_INT mask |
fecfbfa4 | 4255 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
db3cf6fb MS |
4256 | remainder |
4257 | = expand_binop (compute_mode, and_optab, op0, | |
2f1cd2eb | 4258 | gen_int_mode (mask, compute_mode), |
db3cf6fb MS |
4259 | remainder, 1, |
4260 | OPTAB_LIB_WIDEN); | |
55c2d311 | 4261 | if (remainder) |
c8dbc8ca | 4262 | return gen_lowpart (mode, remainder); |
55c2d311 TG |
4263 | } |
4264 | quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4265 | pre_shift, tquotient, 1); |
55c2d311 | 4266 | } |
34f016ed | 4267 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 | 4268 | { |
fecfbfa4 | 4269 | if (d >= (HOST_WIDE_INT_1U << (size - 1))) |
55c2d311 | 4270 | { |
dc1d6150 TG |
4271 | /* Most significant bit of divisor is set; emit an scc |
4272 | insn. */ | |
b45f0e58 PB |
4273 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
4274 | compute_mode, 1, 1); | |
55c2d311 TG |
4275 | } |
4276 | else | |
4277 | { | |
dc1d6150 TG |
4278 | /* Find a suitable multiplier and right shift count |
4279 | instead of multiplying with D. */ | |
4280 | ||
4281 | mh = choose_multiplier (d, size, size, | |
4282 | &ml, &post_shift, &dummy); | |
4283 | ||
4284 | /* If the suggested multiplier is more than SIZE bits, | |
4285 | we can do better for even divisors, using an | |
4286 | initial right shift. */ | |
4287 | if (mh != 0 && (d & 1) == 0) | |
4288 | { | |
146ec50f | 4289 | pre_shift = ctz_or_zero (d); |
dc1d6150 TG |
4290 | mh = choose_multiplier (d >> pre_shift, size, |
4291 | size - pre_shift, | |
4292 | &ml, &post_shift, &dummy); | |
5b0264cb | 4293 | gcc_assert (!mh); |
dc1d6150 TG |
4294 | } |
4295 | else | |
4296 | pre_shift = 0; | |
4297 | ||
4298 | if (mh != 0) | |
4299 | { | |
4300 | rtx t1, t2, t3, t4; | |
4301 | ||
02a65aef R |
4302 | if (post_shift - 1 >= BITS_PER_WORD) |
4303 | goto fail1; | |
4304 | ||
965703ed | 4305 | extra_cost |
5322d07e NF |
4306 | = (shift_cost (speed, compute_mode, post_shift - 1) |
4307 | + shift_cost (speed, compute_mode, 1) | |
4308 | + 2 * add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4309 | t1 = expmed_mult_highpart |
4310 | (compute_mode, op0, | |
4311 | gen_int_mode (ml, compute_mode), | |
4312 | NULL_RTX, 1, max_cost - extra_cost); | |
dc1d6150 TG |
4313 | if (t1 == 0) |
4314 | goto fail1; | |
38a448ca RH |
4315 | t2 = force_operand (gen_rtx_MINUS (compute_mode, |
4316 | op0, t1), | |
dc1d6150 | 4317 | NULL_RTX); |
eb6c3df1 RG |
4318 | t3 = expand_shift (RSHIFT_EXPR, compute_mode, |
4319 | t2, 1, NULL_RTX, 1); | |
38a448ca RH |
4320 | t4 = force_operand (gen_rtx_PLUS (compute_mode, |
4321 | t1, t3), | |
dc1d6150 | 4322 | NULL_RTX); |
4a90aeeb NS |
4323 | quotient = expand_shift |
4324 | (RSHIFT_EXPR, compute_mode, t4, | |
eb6c3df1 | 4325 | post_shift - 1, tquotient, 1); |
dc1d6150 TG |
4326 | } |
4327 | else | |
4328 | { | |
4329 | rtx t1, t2; | |
4330 | ||
02a65aef R |
4331 | if (pre_shift >= BITS_PER_WORD |
4332 | || post_shift >= BITS_PER_WORD) | |
4333 | goto fail1; | |
4334 | ||
4a90aeeb NS |
4335 | t1 = expand_shift |
4336 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4337 | pre_shift, NULL_RTX, 1); |
965703ed | 4338 | extra_cost |
5322d07e NF |
4339 | = (shift_cost (speed, compute_mode, pre_shift) |
4340 | + shift_cost (speed, compute_mode, post_shift)); | |
2f1cd2eb RS |
4341 | t2 = expmed_mult_highpart |
4342 | (compute_mode, t1, | |
4343 | gen_int_mode (ml, compute_mode), | |
4344 | NULL_RTX, 1, max_cost - extra_cost); | |
dc1d6150 TG |
4345 | if (t2 == 0) |
4346 | goto fail1; | |
4a90aeeb NS |
4347 | quotient = expand_shift |
4348 | (RSHIFT_EXPR, compute_mode, t2, | |
eb6c3df1 | 4349 | post_shift, tquotient, 1); |
dc1d6150 | 4350 | } |
55c2d311 TG |
4351 | } |
4352 | } | |
34f016ed TG |
4353 | else /* Too wide mode to use tricky code */ |
4354 | break; | |
55c2d311 TG |
4355 | |
4356 | insn = get_last_insn (); | |
7543f918 JR |
4357 | if (insn != last) |
4358 | set_dst_reg_note (insn, REG_EQUAL, | |
4359 | gen_rtx_UDIV (compute_mode, op0, op1), | |
4360 | quotient); | |
55c2d311 TG |
4361 | } |
4362 | else /* TRUNC_DIV, signed */ | |
4363 | { | |
4364 | unsigned HOST_WIDE_INT ml; | |
4365 | int lgup, post_shift; | |
e71c0aa7 | 4366 | rtx mlr; |
55c2d311 | 4367 | HOST_WIDE_INT d = INTVAL (op1); |
e4c9f3c2 ILT |
4368 | unsigned HOST_WIDE_INT abs_d; |
4369 | ||
093253be ILT |
4370 | /* Since d might be INT_MIN, we have to cast to |
4371 | unsigned HOST_WIDE_INT before negating to avoid | |
4372 | undefined signed overflow. */ | |
6d9c91e9 ILT |
4373 | abs_d = (d >= 0 |
4374 | ? (unsigned HOST_WIDE_INT) d | |
4375 | : - (unsigned HOST_WIDE_INT) d); | |
55c2d311 TG |
4376 | |
4377 | /* n rem d = n rem -d */ | |
4378 | if (rem_flag && d < 0) | |
4379 | { | |
4380 | d = abs_d; | |
2496c7bd | 4381 | op1 = gen_int_mode (abs_d, compute_mode); |
55c2d311 TG |
4382 | } |
4383 | ||
4384 | if (d == 1) | |
4385 | quotient = op0; | |
4386 | else if (d == -1) | |
4387 | quotient = expand_unop (compute_mode, neg_optab, op0, | |
4388 | tquotient, 0); | |
76a7314d | 4389 | else if (size <= HOST_BITS_PER_WIDE_INT |
fecfbfa4 | 4390 | && abs_d == HOST_WIDE_INT_1U << (size - 1)) |
f737b132 RK |
4391 | { |
4392 | /* This case is not handled correctly below. */ | |
4393 | quotient = emit_store_flag (tquotient, EQ, op0, op1, | |
4394 | compute_mode, 1, 1); | |
4395 | if (quotient == 0) | |
4396 | goto fail1; | |
4397 | } | |
55c2d311 | 4398 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
76a7314d | 4399 | && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) |
5322d07e NF |
4400 | && (rem_flag |
4401 | ? smod_pow2_cheap (speed, compute_mode) | |
4402 | : sdiv_pow2_cheap (speed, compute_mode)) | |
0b55e932 RS |
4403 | /* We assume that cheap metric is true if the |
4404 | optab has an expander for this mode. */ | |
166cdb08 JH |
4405 | && ((optab_handler ((rem_flag ? smod_optab |
4406 | : sdiv_optab), | |
947131ba | 4407 | compute_mode) |
a8c7e72d | 4408 | != CODE_FOR_nothing) |
947131ba RS |
4409 | || (optab_handler (sdivmod_optab, |
4410 | compute_mode) | |
4411 | != CODE_FOR_nothing))) | |
55c2d311 | 4412 | ; |
76a7314d JJ |
4413 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d) |
4414 | && (size <= HOST_BITS_PER_WIDE_INT | |
4415 | || abs_d != (unsigned HOST_WIDE_INT) d)) | |
55c2d311 | 4416 | { |
0b55e932 RS |
4417 | if (rem_flag) |
4418 | { | |
4419 | remainder = expand_smod_pow2 (compute_mode, op0, d); | |
4420 | if (remainder) | |
4421 | return gen_lowpart (mode, remainder); | |
4422 | } | |
3d520aaf | 4423 | |
5322d07e | 4424 | if (sdiv_pow2_cheap (speed, compute_mode) |
947131ba | 4425 | && ((optab_handler (sdiv_optab, compute_mode) |
3d520aaf | 4426 | != CODE_FOR_nothing) |
947131ba | 4427 | || (optab_handler (sdivmod_optab, compute_mode) |
3d520aaf DE |
4428 | != CODE_FOR_nothing))) |
4429 | quotient = expand_divmod (0, TRUNC_DIV_EXPR, | |
4430 | compute_mode, op0, | |
4431 | gen_int_mode (abs_d, | |
4432 | compute_mode), | |
4433 | NULL_RTX, 0); | |
4434 | else | |
4435 | quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); | |
55c2d311 | 4436 | |
0b55e932 RS |
4437 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4438 | negate the quotient. */ | |
55c2d311 TG |
4439 | if (d < 0) |
4440 | { | |
4441 | insn = get_last_insn (); | |
4e430df8 | 4442 | if (insn != last |
fecfbfa4 | 4443 | && abs_d < (HOST_WIDE_INT_1U |
c8e7fe58 | 4444 | << (HOST_BITS_PER_WIDE_INT - 1))) |
7543f918 JR |
4445 | set_dst_reg_note (insn, REG_EQUAL, |
4446 | gen_rtx_DIV (compute_mode, op0, | |
6d26322f JR |
4447 | gen_int_mode |
4448 | (abs_d, | |
4449 | compute_mode)), | |
7543f918 | 4450 | quotient); |
55c2d311 TG |
4451 | |
4452 | quotient = expand_unop (compute_mode, neg_optab, | |
4453 | quotient, quotient, 0); | |
4454 | } | |
4455 | } | |
34f016ed | 4456 | else if (size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 TG |
4457 | { |
4458 | choose_multiplier (abs_d, size, size - 1, | |
079c527f | 4459 | &ml, &post_shift, &lgup); |
fecfbfa4 | 4460 | if (ml < HOST_WIDE_INT_1U << (size - 1)) |
55c2d311 TG |
4461 | { |
4462 | rtx t1, t2, t3; | |
4463 | ||
02a65aef R |
4464 | if (post_shift >= BITS_PER_WORD |
4465 | || size - 1 >= BITS_PER_WORD) | |
4466 | goto fail1; | |
4467 | ||
5322d07e NF |
4468 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4469 | + shift_cost (speed, compute_mode, size - 1) | |
4470 | + add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4471 | t1 = expmed_mult_highpart |
4472 | (compute_mode, op0, gen_int_mode (ml, compute_mode), | |
4473 | NULL_RTX, 0, max_cost - extra_cost); | |
55c2d311 TG |
4474 | if (t1 == 0) |
4475 | goto fail1; | |
4a90aeeb NS |
4476 | t2 = expand_shift |
4477 | (RSHIFT_EXPR, compute_mode, t1, | |
eb6c3df1 | 4478 | post_shift, NULL_RTX, 0); |
4a90aeeb NS |
4479 | t3 = expand_shift |
4480 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4481 | size - 1, NULL_RTX, 0); |
55c2d311 | 4482 | if (d < 0) |
c5c76735 JL |
4483 | quotient |
4484 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4485 | t3, t2), | |
4486 | tquotient); | |
55c2d311 | 4487 | else |
c5c76735 JL |
4488 | quotient |
4489 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4490 | t2, t3), | |
4491 | tquotient); | |
55c2d311 TG |
4492 | } |
4493 | else | |
4494 | { | |
4495 | rtx t1, t2, t3, t4; | |
4496 | ||
02a65aef R |
4497 | if (post_shift >= BITS_PER_WORD |
4498 | || size - 1 >= BITS_PER_WORD) | |
4499 | goto fail1; | |
4500 | ||
dd4786fe | 4501 | ml |= HOST_WIDE_INT_M1U << (size - 1); |
e71c0aa7 | 4502 | mlr = gen_int_mode (ml, compute_mode); |
5322d07e NF |
4503 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4504 | + shift_cost (speed, compute_mode, size - 1) | |
4505 | + 2 * add_cost (speed, compute_mode)); | |
00f07b86 | 4506 | t1 = expmed_mult_highpart (compute_mode, op0, mlr, |
71af73bb TG |
4507 | NULL_RTX, 0, |
4508 | max_cost - extra_cost); | |
55c2d311 TG |
4509 | if (t1 == 0) |
4510 | goto fail1; | |
c5c76735 JL |
4511 | t2 = force_operand (gen_rtx_PLUS (compute_mode, |
4512 | t1, op0), | |
55c2d311 | 4513 | NULL_RTX); |
4a90aeeb NS |
4514 | t3 = expand_shift |
4515 | (RSHIFT_EXPR, compute_mode, t2, | |
eb6c3df1 | 4516 | post_shift, NULL_RTX, 0); |
4a90aeeb NS |
4517 | t4 = expand_shift |
4518 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4519 | size - 1, NULL_RTX, 0); |
55c2d311 | 4520 | if (d < 0) |
c5c76735 JL |
4521 | quotient |
4522 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4523 | t4, t3), | |
4524 | tquotient); | |
55c2d311 | 4525 | else |
c5c76735 JL |
4526 | quotient |
4527 | = force_operand (gen_rtx_MINUS (compute_mode, | |
4528 | t3, t4), | |
4529 | tquotient); | |
55c2d311 TG |
4530 | } |
4531 | } | |
34f016ed TG |
4532 | else /* Too wide mode to use tricky code */ |
4533 | break; | |
55c2d311 | 4534 | |
4e430df8 | 4535 | insn = get_last_insn (); |
7543f918 JR |
4536 | if (insn != last) |
4537 | set_dst_reg_note (insn, REG_EQUAL, | |
4538 | gen_rtx_DIV (compute_mode, op0, op1), | |
4539 | quotient); | |
55c2d311 TG |
4540 | } |
4541 | break; | |
4542 | } | |
4543 | fail1: | |
4544 | delete_insns_since (last); | |
4545 | break; | |
44037a66 | 4546 | |
55c2d311 TG |
4547 | case FLOOR_DIV_EXPR: |
4548 | case FLOOR_MOD_EXPR: | |
4549 | /* We will come here only for signed operations. */ | |
76a7314d | 4550 | if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 | 4551 | { |
079c527f | 4552 | unsigned HOST_WIDE_INT mh, ml; |
55c2d311 TG |
4553 | int pre_shift, lgup, post_shift; |
4554 | HOST_WIDE_INT d = INTVAL (op1); | |
4555 | ||
4556 | if (d > 0) | |
4557 | { | |
4558 | /* We could just as easily deal with negative constants here, | |
4559 | but it does not seem worth the trouble for GCC 2.6. */ | |
4560 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) | |
4561 | { | |
4562 | pre_shift = floor_log2 (d); | |
4563 | if (rem_flag) | |
4564 | { | |
2f1cd2eb | 4565 | unsigned HOST_WIDE_INT mask |
fecfbfa4 | 4566 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
2f1cd2eb RS |
4567 | remainder = expand_binop |
4568 | (compute_mode, and_optab, op0, | |
4569 | gen_int_mode (mask, compute_mode), | |
4570 | remainder, 0, OPTAB_LIB_WIDEN); | |
55c2d311 | 4571 | if (remainder) |
c8dbc8ca | 4572 | return gen_lowpart (mode, remainder); |
55c2d311 | 4573 | } |
4a90aeeb NS |
4574 | quotient = expand_shift |
4575 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4576 | pre_shift, tquotient, 0); |
55c2d311 TG |
4577 | } |
4578 | else | |
4579 | { | |
4580 | rtx t1, t2, t3, t4; | |
4581 | ||
4582 | mh = choose_multiplier (d, size, size - 1, | |
4583 | &ml, &post_shift, &lgup); | |
5b0264cb | 4584 | gcc_assert (!mh); |
55c2d311 | 4585 | |
02a65aef R |
4586 | if (post_shift < BITS_PER_WORD |
4587 | && size - 1 < BITS_PER_WORD) | |
55c2d311 | 4588 | { |
4a90aeeb NS |
4589 | t1 = expand_shift |
4590 | (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4591 | size - 1, NULL_RTX, 0); |
02a65aef R |
4592 | t2 = expand_binop (compute_mode, xor_optab, op0, t1, |
4593 | NULL_RTX, 0, OPTAB_WIDEN); | |
5322d07e NF |
4594 | extra_cost = (shift_cost (speed, compute_mode, post_shift) |
4595 | + shift_cost (speed, compute_mode, size - 1) | |
4596 | + 2 * add_cost (speed, compute_mode)); | |
2f1cd2eb RS |
4597 | t3 = expmed_mult_highpart |
4598 | (compute_mode, t2, gen_int_mode (ml, compute_mode), | |
4599 | NULL_RTX, 1, max_cost - extra_cost); | |
02a65aef R |
4600 | if (t3 != 0) |
4601 | { | |
4a90aeeb NS |
4602 | t4 = expand_shift |
4603 | (RSHIFT_EXPR, compute_mode, t3, | |
eb6c3df1 | 4604 | post_shift, NULL_RTX, 1); |
02a65aef R |
4605 | quotient = expand_binop (compute_mode, xor_optab, |
4606 | t4, t1, tquotient, 0, | |
4607 | OPTAB_WIDEN); | |
4608 | } | |
55c2d311 TG |
4609 | } |
4610 | } | |
4611 | } | |
4612 | else | |
4613 | { | |
4614 | rtx nsign, t1, t2, t3, t4; | |
38a448ca RH |
4615 | t1 = force_operand (gen_rtx_PLUS (compute_mode, |
4616 | op0, constm1_rtx), NULL_RTX); | |
55c2d311 TG |
4617 | t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, |
4618 | 0, OPTAB_WIDEN); | |
76a7314d JJ |
4619 | nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2, |
4620 | size - 1, NULL_RTX, 0); | |
38a448ca | 4621 | t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), |
55c2d311 TG |
4622 | NULL_RTX); |
4623 | t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, | |
4624 | NULL_RTX, 0); | |
4625 | if (t4) | |
4626 | { | |
4627 | rtx t5; | |
4628 | t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, | |
4629 | NULL_RTX, 0); | |
38a448ca RH |
4630 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4631 | t4, t5), | |
55c2d311 TG |
4632 | tquotient); |
4633 | } | |
4634 | } | |
4635 | } | |
4636 | ||
4637 | if (quotient != 0) | |
4638 | break; | |
4639 | delete_insns_since (last); | |
4640 | ||
4641 | /* Try using an instruction that produces both the quotient and | |
4642 | remainder, using truncation. We can easily compensate the quotient | |
4643 | or remainder to get floor rounding, once we have the remainder. | |
4644 | Notice that we compute also the final remainder value here, | |
4645 | and return the result right away. */ | |
a45cf58c | 4646 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4647 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4648 | |
55c2d311 TG |
4649 | if (rem_flag) |
4650 | { | |
668443c9 | 4651 | remainder |
f8cfc6aa | 4652 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4653 | quotient = gen_reg_rtx (compute_mode); |
4654 | } | |
4655 | else | |
4656 | { | |
668443c9 | 4657 | quotient |
f8cfc6aa | 4658 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
55c2d311 TG |
4659 | remainder = gen_reg_rtx (compute_mode); |
4660 | } | |
4661 | ||
4662 | if (expand_twoval_binop (sdivmod_optab, op0, op1, | |
4663 | quotient, remainder, 0)) | |
4664 | { | |
4665 | /* This could be computed with a branch-less sequence. | |
4666 | Save that for later. */ | |
4667 | rtx tem; | |
f3f6fb16 | 4668 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 | 4669 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
55c2d311 TG |
4670 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4671 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4672 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
55c2d311 TG |
4673 | expand_dec (quotient, const1_rtx); |
4674 | expand_inc (remainder, op1); | |
4675 | emit_label (label); | |
c8dbc8ca | 4676 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4677 | } |
4678 | ||
4679 | /* No luck with division elimination or divmod. Have to do it | |
4680 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4681 | { |
f3f6fb16 | 4682 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
55c2d311 TG |
4683 | rtx adjusted_op0; |
4684 | rtx tem; | |
4685 | ||
4686 | quotient = gen_reg_rtx (compute_mode); | |
4687 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4688 | label1 = gen_label_rtx (); | |
4689 | label2 = gen_label_rtx (); | |
4690 | label3 = gen_label_rtx (); | |
4691 | label4 = gen_label_rtx (); | |
4692 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4693 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4694 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); | |
55c2d311 TG |
4695 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4696 | quotient, 0, OPTAB_LIB_WIDEN); | |
4697 | if (tem != quotient) | |
4698 | emit_move_insn (quotient, tem); | |
ec4a505f | 4699 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4700 | emit_barrier (); |
4701 | emit_label (label1); | |
44037a66 | 4702 | expand_inc (adjusted_op0, const1_rtx); |
ec4a505f | 4703 | emit_jump_insn (targetm.gen_jump (label4)); |
55c2d311 TG |
4704 | emit_barrier (); |
4705 | emit_label (label2); | |
f5963e61 | 4706 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
55c2d311 TG |
4707 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4708 | quotient, 0, OPTAB_LIB_WIDEN); | |
4709 | if (tem != quotient) | |
4710 | emit_move_insn (quotient, tem); | |
ec4a505f | 4711 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4712 | emit_barrier (); |
4713 | emit_label (label3); | |
4714 | expand_dec (adjusted_op0, const1_rtx); | |
4715 | emit_label (label4); | |
4716 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4717 | quotient, 0, OPTAB_LIB_WIDEN); | |
4718 | if (tem != quotient) | |
4719 | emit_move_insn (quotient, tem); | |
4720 | expand_dec (quotient, const1_rtx); | |
4721 | emit_label (label5); | |
44037a66 | 4722 | } |
55c2d311 | 4723 | break; |
44037a66 | 4724 | |
55c2d311 TG |
4725 | case CEIL_DIV_EXPR: |
4726 | case CEIL_MOD_EXPR: | |
4727 | if (unsignedp) | |
4728 | { | |
76a7314d JJ |
4729 | if (op1_is_constant |
4730 | && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) | |
4731 | && (size <= HOST_BITS_PER_WIDE_INT | |
4732 | || INTVAL (op1) >= 0)) | |
9176af2f TG |
4733 | { |
4734 | rtx t1, t2, t3; | |
4735 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4736 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4737 | floor_log2 (d), tquotient, 1); |
9176af2f | 4738 | t2 = expand_binop (compute_mode, and_optab, op0, |
2f1cd2eb | 4739 | gen_int_mode (d - 1, compute_mode), |
9176af2f TG |
4740 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4741 | t3 = gen_reg_rtx (compute_mode); | |
4742 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4743 | compute_mode, 1, 1); | |
412381d9 TG |
4744 | if (t3 == 0) |
4745 | { | |
f3f6fb16 | 4746 | rtx_code_label *lab; |
412381d9 | 4747 | lab = gen_label_rtx (); |
f5963e61 | 4748 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
412381d9 TG |
4749 | expand_inc (t1, const1_rtx); |
4750 | emit_label (lab); | |
4751 | quotient = t1; | |
4752 | } | |
4753 | else | |
38a448ca RH |
4754 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4755 | t1, t3), | |
412381d9 | 4756 | tquotient); |
9176af2f TG |
4757 | break; |
4758 | } | |
55c2d311 TG |
4759 | |
4760 | /* Try using an instruction that produces both the quotient and | |
4761 | remainder, using truncation. We can easily compensate the | |
4762 | quotient or remainder to get ceiling rounding, once we have the | |
4763 | remainder. Notice that we compute also the final remainder | |
4764 | value here, and return the result right away. */ | |
a45cf58c | 4765 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 | 4766 | target = gen_reg_rtx (compute_mode); |
668443c9 | 4767 | |
55c2d311 TG |
4768 | if (rem_flag) |
4769 | { | |
f8cfc6aa | 4770 | remainder = (REG_P (target) |
668443c9 | 4771 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4772 | quotient = gen_reg_rtx (compute_mode); |
4773 | } | |
4774 | else | |
4775 | { | |
f8cfc6aa | 4776 | quotient = (REG_P (target) |
668443c9 | 4777 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4778 | remainder = gen_reg_rtx (compute_mode); |
4779 | } | |
4780 | ||
4781 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, | |
4782 | remainder, 1)) | |
4783 | { | |
4784 | /* This could be computed with a branch-less sequence. | |
4785 | Save that for later. */ | |
f3f6fb16 | 4786 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 JL |
4787 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4788 | compute_mode, label); | |
55c2d311 TG |
4789 | expand_inc (quotient, const1_rtx); |
4790 | expand_dec (remainder, op1); | |
4791 | emit_label (label); | |
c8dbc8ca | 4792 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4793 | } |
4794 | ||
4795 | /* No luck with division elimination or divmod. Have to do it | |
4796 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4797 | { |
f3f6fb16 | 4798 | rtx_code_label *label1, *label2; |
55c2d311 TG |
4799 | rtx adjusted_op0, tem; |
4800 | ||
4801 | quotient = gen_reg_rtx (compute_mode); | |
4802 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4803 | label1 = gen_label_rtx (); | |
4804 | label2 = gen_label_rtx (); | |
f5963e61 JL |
4805 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
4806 | compute_mode, label1); | |
55c2d311 | 4807 | emit_move_insn (quotient, const0_rtx); |
ec4a505f | 4808 | emit_jump_insn (targetm.gen_jump (label2)); |
55c2d311 TG |
4809 | emit_barrier (); |
4810 | emit_label (label1); | |
4811 | expand_dec (adjusted_op0, const1_rtx); | |
4812 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, | |
4813 | quotient, 1, OPTAB_LIB_WIDEN); | |
4814 | if (tem != quotient) | |
4815 | emit_move_insn (quotient, tem); | |
4816 | expand_inc (quotient, const1_rtx); | |
4817 | emit_label (label2); | |
44037a66 | 4818 | } |
55c2d311 TG |
4819 | } |
4820 | else /* signed */ | |
4821 | { | |
73f27728 RK |
4822 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4823 | && INTVAL (op1) >= 0) | |
4824 | { | |
4825 | /* This is extremely similar to the code for the unsigned case | |
4826 | above. For 2.7 we should merge these variants, but for | |
4827 | 2.6.1 I don't want to touch the code for unsigned since that | |
4828 | get used in C. The signed case will only be used by other | |
4829 | languages (Ada). */ | |
4830 | ||
4831 | rtx t1, t2, t3; | |
4832 | unsigned HOST_WIDE_INT d = INTVAL (op1); | |
4833 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4834 | floor_log2 (d), tquotient, 0); |
73f27728 | 4835 | t2 = expand_binop (compute_mode, and_optab, op0, |
2f1cd2eb | 4836 | gen_int_mode (d - 1, compute_mode), |
73f27728 RK |
4837 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4838 | t3 = gen_reg_rtx (compute_mode); | |
4839 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, | |
4840 | compute_mode, 1, 1); | |
4841 | if (t3 == 0) | |
4842 | { | |
f3f6fb16 | 4843 | rtx_code_label *lab; |
73f27728 | 4844 | lab = gen_label_rtx (); |
f5963e61 | 4845 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
73f27728 RK |
4846 | expand_inc (t1, const1_rtx); |
4847 | emit_label (lab); | |
4848 | quotient = t1; | |
4849 | } | |
4850 | else | |
38a448ca RH |
4851 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
4852 | t1, t3), | |
73f27728 RK |
4853 | tquotient); |
4854 | break; | |
4855 | } | |
4856 | ||
55c2d311 TG |
4857 | /* Try using an instruction that produces both the quotient and |
4858 | remainder, using truncation. We can easily compensate the | |
4859 | quotient or remainder to get ceiling rounding, once we have the | |
4860 | remainder. Notice that we compute also the final remainder | |
4861 | value here, and return the result right away. */ | |
a45cf58c | 4862 | if (target == 0 || GET_MODE (target) != compute_mode) |
55c2d311 TG |
4863 | target = gen_reg_rtx (compute_mode); |
4864 | if (rem_flag) | |
4865 | { | |
f8cfc6aa | 4866 | remainder= (REG_P (target) |
668443c9 | 4867 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4868 | quotient = gen_reg_rtx (compute_mode); |
4869 | } | |
4870 | else | |
4871 | { | |
f8cfc6aa | 4872 | quotient = (REG_P (target) |
668443c9 | 4873 | ? target : gen_reg_rtx (compute_mode)); |
55c2d311 TG |
4874 | remainder = gen_reg_rtx (compute_mode); |
4875 | } | |
4876 | ||
4877 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, | |
4878 | remainder, 0)) | |
4879 | { | |
4880 | /* This could be computed with a branch-less sequence. | |
4881 | Save that for later. */ | |
4882 | rtx tem; | |
f3f6fb16 | 4883 | rtx_code_label *label = gen_label_rtx (); |
f5963e61 JL |
4884 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4885 | compute_mode, label); | |
55c2d311 TG |
4886 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4887 | NULL_RTX, 0, OPTAB_WIDEN); | |
f5963e61 | 4888 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
55c2d311 TG |
4889 | expand_inc (quotient, const1_rtx); |
4890 | expand_dec (remainder, op1); | |
4891 | emit_label (label); | |
c8dbc8ca | 4892 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
55c2d311 TG |
4893 | } |
4894 | ||
4895 | /* No luck with division elimination or divmod. Have to do it | |
4896 | by conditionally adjusting op0 *and* the result. */ | |
44037a66 | 4897 | { |
f3f6fb16 | 4898 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
55c2d311 TG |
4899 | rtx adjusted_op0; |
4900 | rtx tem; | |
4901 | ||
4902 | quotient = gen_reg_rtx (compute_mode); | |
4903 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); | |
4904 | label1 = gen_label_rtx (); | |
4905 | label2 = gen_label_rtx (); | |
4906 | label3 = gen_label_rtx (); | |
4907 | label4 = gen_label_rtx (); | |
4908 | label5 = gen_label_rtx (); | |
f5963e61 JL |
4909 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4910 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, | |
4911 | compute_mode, label1); | |
55c2d311 TG |
4912 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4913 | quotient, 0, OPTAB_LIB_WIDEN); | |
4914 | if (tem != quotient) | |
4915 | emit_move_insn (quotient, tem); | |
ec4a505f | 4916 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4917 | emit_barrier (); |
4918 | emit_label (label1); | |
4919 | expand_dec (adjusted_op0, const1_rtx); | |
ec4a505f | 4920 | emit_jump_insn (targetm.gen_jump (label4)); |
55c2d311 TG |
4921 | emit_barrier (); |
4922 | emit_label (label2); | |
f5963e61 JL |
4923 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
4924 | compute_mode, label3); | |
55c2d311 TG |
4925 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4926 | quotient, 0, OPTAB_LIB_WIDEN); | |
4927 | if (tem != quotient) | |
4928 | emit_move_insn (quotient, tem); | |
ec4a505f | 4929 | emit_jump_insn (targetm.gen_jump (label5)); |
55c2d311 TG |
4930 | emit_barrier (); |
4931 | emit_label (label3); | |
4932 | expand_inc (adjusted_op0, const1_rtx); | |
4933 | emit_label (label4); | |
4934 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, | |
4935 | quotient, 0, OPTAB_LIB_WIDEN); | |
4936 | if (tem != quotient) | |
4937 | emit_move_insn (quotient, tem); | |
4938 | expand_inc (quotient, const1_rtx); | |
4939 | emit_label (label5); | |
44037a66 | 4940 | } |
55c2d311 TG |
4941 | } |
4942 | break; | |
bc1c7e93 | 4943 | |
55c2d311 | 4944 | case EXACT_DIV_EXPR: |
76a7314d | 4945 | if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT) |
55c2d311 TG |
4946 | { |
4947 | HOST_WIDE_INT d = INTVAL (op1); | |
4948 | unsigned HOST_WIDE_INT ml; | |
91ce572a | 4949 | int pre_shift; |
55c2d311 TG |
4950 | rtx t1; |
4951 | ||
146ec50f | 4952 | pre_shift = ctz_or_zero (d); |
91ce572a CC |
4953 | ml = invert_mod2n (d >> pre_shift, size); |
4954 | t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, | |
eb6c3df1 | 4955 | pre_shift, NULL_RTX, unsignedp); |
69107307 | 4956 | quotient = expand_mult (compute_mode, t1, |
2496c7bd | 4957 | gen_int_mode (ml, compute_mode), |
31ff3e0b | 4958 | NULL_RTX, 1); |
55c2d311 TG |
4959 | |
4960 | insn = get_last_insn (); | |
7543f918 JR |
4961 | set_dst_reg_note (insn, REG_EQUAL, |
4962 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, | |
4963 | compute_mode, op0, op1), | |
4964 | quotient); | |
55c2d311 TG |
4965 | } |
4966 | break; | |
4967 | ||
4968 | case ROUND_DIV_EXPR: | |
4969 | case ROUND_MOD_EXPR: | |
69f61901 RK |
4970 | if (unsignedp) |
4971 | { | |
4972 | rtx tem; | |
f3f6fb16 | 4973 | rtx_code_label *label; |
69f61901 RK |
4974 | label = gen_label_rtx (); |
4975 | quotient = gen_reg_rtx (compute_mode); | |
4976 | remainder = gen_reg_rtx (compute_mode); | |
4977 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) | |
4978 | { | |
4979 | rtx tem; | |
4980 | quotient = expand_binop (compute_mode, udiv_optab, op0, op1, | |
4981 | quotient, 1, OPTAB_LIB_WIDEN); | |
4982 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); | |
4983 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
4984 | remainder, 1, OPTAB_LIB_WIDEN); | |
4985 | } | |
0a81f074 | 4986 | tem = plus_constant (compute_mode, op1, -1); |
eb6c3df1 | 4987 | tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1); |
f5963e61 | 4988 | do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); |
69f61901 RK |
4989 | expand_inc (quotient, const1_rtx); |
4990 | expand_dec (remainder, op1); | |
4991 | emit_label (label); | |
4992 | } | |
4993 | else | |
4994 | { | |
4995 | rtx abs_rem, abs_op1, tem, mask; | |
f3f6fb16 | 4996 | rtx_code_label *label; |
69f61901 RK |
4997 | label = gen_label_rtx (); |
4998 | quotient = gen_reg_rtx (compute_mode); | |
4999 | remainder = gen_reg_rtx (compute_mode); | |
5000 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) | |
5001 | { | |
5002 | rtx tem; | |
5003 | quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, | |
5004 | quotient, 0, OPTAB_LIB_WIDEN); | |
5005 | tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); | |
5006 | remainder = expand_binop (compute_mode, sub_optab, op0, tem, | |
5007 | remainder, 0, OPTAB_LIB_WIDEN); | |
5008 | } | |
91ce572a CC |
5009 | abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); |
5010 | abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); | |
69f61901 | 5011 | tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, |
eb6c3df1 | 5012 | 1, NULL_RTX, 1); |
f5963e61 | 5013 | do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); |
69f61901 RK |
5014 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
5015 | NULL_RTX, 0, OPTAB_WIDEN); | |
5016 | mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, | |
eb6c3df1 | 5017 | size - 1, NULL_RTX, 0); |
69f61901 RK |
5018 | tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, |
5019 | NULL_RTX, 0, OPTAB_WIDEN); | |
5020 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
5021 | NULL_RTX, 0, OPTAB_WIDEN); | |
5022 | expand_inc (quotient, tem); | |
5023 | tem = expand_binop (compute_mode, xor_optab, mask, op1, | |
5024 | NULL_RTX, 0, OPTAB_WIDEN); | |
5025 | tem = expand_binop (compute_mode, sub_optab, tem, mask, | |
5026 | NULL_RTX, 0, OPTAB_WIDEN); | |
5027 | expand_dec (remainder, tem); | |
5028 | emit_label (label); | |
5029 | } | |
5030 | return gen_lowpart (mode, rem_flag ? remainder : quotient); | |
c410d49e | 5031 | |
e9a25f70 | 5032 | default: |
5b0264cb | 5033 | gcc_unreachable (); |
55c2d311 | 5034 | } |
44037a66 | 5035 | |
55c2d311 | 5036 | if (quotient == 0) |
44037a66 | 5037 | { |
a45cf58c RK |
5038 | if (target && GET_MODE (target) != compute_mode) |
5039 | target = 0; | |
5040 | ||
55c2d311 | 5041 | if (rem_flag) |
44037a66 | 5042 | { |
32fdf36b | 5043 | /* Try to produce the remainder without producing the quotient. |
d6a7951f | 5044 | If we seem to have a divmod pattern that does not require widening, |
b20b352b | 5045 | don't try widening here. We should really have a WIDEN argument |
32fdf36b TG |
5046 | to expand_twoval_binop, since what we'd really like to do here is |
5047 | 1) try a mod insn in compute_mode | |
5048 | 2) try a divmod insn in compute_mode | |
5049 | 3) try a div insn in compute_mode and multiply-subtract to get | |
5050 | remainder | |
5051 | 4) try the same things with widening allowed. */ | |
5052 | remainder | |
5053 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
5054 | op0, op1, target, | |
5055 | unsignedp, | |
947131ba | 5056 | ((optab_handler (optab2, compute_mode) |
32fdf36b TG |
5057 | != CODE_FOR_nothing) |
5058 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
55c2d311 | 5059 | if (remainder == 0) |
44037a66 TG |
5060 | { |
5061 | /* No luck there. Can we do remainder and divide at once | |
5062 | without a library call? */ | |
55c2d311 TG |
5063 | remainder = gen_reg_rtx (compute_mode); |
5064 | if (! expand_twoval_binop ((unsignedp | |
5065 | ? udivmod_optab | |
5066 | : sdivmod_optab), | |
5067 | op0, op1, | |
5068 | NULL_RTX, remainder, unsignedp)) | |
5069 | remainder = 0; | |
44037a66 | 5070 | } |
55c2d311 TG |
5071 | |
5072 | if (remainder) | |
5073 | return gen_lowpart (mode, remainder); | |
44037a66 | 5074 | } |
44037a66 | 5075 | |
dc38b292 RK |
5076 | /* Produce the quotient. Try a quotient insn, but not a library call. |
5077 | If we have a divmod in this mode, use it in preference to widening | |
5078 | the div (for this test we assume it will not fail). Note that optab2 | |
5079 | is set to the one of the two optabs that the call below will use. */ | |
5080 | quotient | |
5081 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, | |
5082 | op0, op1, rem_flag ? NULL_RTX : target, | |
5083 | unsignedp, | |
947131ba | 5084 | ((optab_handler (optab2, compute_mode) |
dc38b292 RK |
5085 | != CODE_FOR_nothing) |
5086 | ? OPTAB_DIRECT : OPTAB_WIDEN)); | |
5087 | ||
55c2d311 | 5088 | if (quotient == 0) |
44037a66 TG |
5089 | { |
5090 | /* No luck there. Try a quotient-and-remainder insn, | |
5091 | keeping the quotient alone. */ | |
55c2d311 | 5092 | quotient = gen_reg_rtx (compute_mode); |
44037a66 | 5093 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
55c2d311 TG |
5094 | op0, op1, |
5095 | quotient, NULL_RTX, unsignedp)) | |
5096 | { | |
5097 | quotient = 0; | |
5098 | if (! rem_flag) | |
5099 | /* Still no luck. If we are not computing the remainder, | |
5100 | use a library call for the quotient. */ | |
5101 | quotient = sign_expand_binop (compute_mode, | |
5102 | udiv_optab, sdiv_optab, | |
5103 | op0, op1, target, | |
5104 | unsignedp, OPTAB_LIB_WIDEN); | |
5105 | } | |
44037a66 | 5106 | } |
44037a66 TG |
5107 | } |
5108 | ||
44037a66 TG |
5109 | if (rem_flag) |
5110 | { | |
a45cf58c RK |
5111 | if (target && GET_MODE (target) != compute_mode) |
5112 | target = 0; | |
5113 | ||
55c2d311 | 5114 | if (quotient == 0) |
b3f8d95d MM |
5115 | { |
5116 | /* No divide instruction either. Use library for remainder. */ | |
5117 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, | |
5118 | op0, op1, target, | |
5119 | unsignedp, OPTAB_LIB_WIDEN); | |
5120 | /* No remainder function. Try a quotient-and-remainder | |
5121 | function, keeping the remainder. */ | |
5122 | if (!remainder) | |
5123 | { | |
5124 | remainder = gen_reg_rtx (compute_mode); | |
b8698a0f | 5125 | if (!expand_twoval_binop_libfunc |
b3f8d95d MM |
5126 | (unsignedp ? udivmod_optab : sdivmod_optab, |
5127 | op0, op1, | |
5128 | NULL_RTX, remainder, | |
5129 | unsignedp ? UMOD : MOD)) | |
5130 | remainder = NULL_RTX; | |
5131 | } | |
5132 | } | |
44037a66 TG |
5133 | else |
5134 | { | |
5135 | /* We divided. Now finish doing X - Y * (X / Y). */ | |
55c2d311 TG |
5136 | remainder = expand_mult (compute_mode, quotient, op1, |
5137 | NULL_RTX, unsignedp); | |
5138 | remainder = expand_binop (compute_mode, sub_optab, op0, | |
5139 | remainder, target, unsignedp, | |
5140 | OPTAB_LIB_WIDEN); | |
44037a66 TG |
5141 | } |
5142 | } | |
5143 | ||
55c2d311 | 5144 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
44037a66 TG |
5145 | } |
5146 | \f | |
5147 | /* Return a tree node with data type TYPE, describing the value of X. | |
4dfa0342 | 5148 | Usually this is an VAR_DECL, if there is no obvious better choice. |
44037a66 | 5149 | X may be an expression, however we only support those expressions |
6d2f8887 | 5150 | generated by loop.c. */ |
44037a66 TG |
5151 | |
5152 | tree | |
502b8322 | 5153 | make_tree (tree type, rtx x) |
44037a66 TG |
5154 | { |
5155 | tree t; | |
5156 | ||
5157 | switch (GET_CODE (x)) | |
5158 | { | |
5159 | case CONST_INT: | |
807e902e | 5160 | case CONST_WIDE_INT: |
f079167a | 5161 | t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type))); |
807e902e | 5162 | return t; |
b8698a0f | 5163 | |
44037a66 | 5164 | case CONST_DOUBLE: |
807e902e KZ |
5165 | STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); |
5166 | if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) | |
5167 | t = wide_int_to_tree (type, | |
5168 | wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, | |
5169 | HOST_BITS_PER_WIDE_INT * 2)); | |
44037a66 | 5170 | else |
34a72c33 | 5171 | t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x)); |
44037a66 TG |
5172 | |
5173 | return t; | |
69ef87e2 AH |
5174 | |
5175 | case CONST_VECTOR: | |
5176 | { | |
b8b7f162 RS |
5177 | int units = CONST_VECTOR_NUNITS (x); |
5178 | tree itype = TREE_TYPE (type); | |
d2a12ae7 | 5179 | tree *elts; |
b8b7f162 | 5180 | int i; |
69ef87e2 | 5181 | |
69ef87e2 | 5182 | /* Build a tree with vector elements. */ |
d2a12ae7 | 5183 | elts = XALLOCAVEC (tree, units); |
69ef87e2 AH |
5184 | for (i = units - 1; i >= 0; --i) |
5185 | { | |
b8b7f162 | 5186 | rtx elt = CONST_VECTOR_ELT (x, i); |
d2a12ae7 | 5187 | elts[i] = make_tree (itype, elt); |
69ef87e2 | 5188 | } |
c410d49e | 5189 | |
d2a12ae7 | 5190 | return build_vector (type, elts); |
69ef87e2 AH |
5191 | } |
5192 | ||
44037a66 | 5193 | case PLUS: |
4845b383 KH |
5194 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5195 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5196 | |
44037a66 | 5197 | case MINUS: |
4845b383 KH |
5198 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5199 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5200 | |
44037a66 | 5201 | case NEG: |
4845b383 | 5202 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
44037a66 TG |
5203 | |
5204 | case MULT: | |
4845b383 KH |
5205 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5206 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5207 | |
44037a66 | 5208 | case ASHIFT: |
4845b383 KH |
5209 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5210 | make_tree (type, XEXP (x, 1))); | |
c410d49e | 5211 | |
44037a66 | 5212 | case LSHIFTRT: |
ca5ba2a3 | 5213 | t = unsigned_type_for (type); |
aeba6c28 JM |
5214 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5215 | make_tree (t, XEXP (x, 0)), | |
5216 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5217 | |
44037a66 | 5218 | case ASHIFTRT: |
12753674 | 5219 | t = signed_type_for (type); |
aeba6c28 JM |
5220 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5221 | make_tree (t, XEXP (x, 0)), | |
5222 | make_tree (type, XEXP (x, 1)))); | |
c410d49e | 5223 | |
44037a66 TG |
5224 | case DIV: |
5225 | if (TREE_CODE (type) != REAL_TYPE) | |
12753674 | 5226 | t = signed_type_for (type); |
44037a66 TG |
5227 | else |
5228 | t = type; | |
5229 | ||
aeba6c28 JM |
5230 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5231 | make_tree (t, XEXP (x, 0)), | |
5232 | make_tree (t, XEXP (x, 1)))); | |
44037a66 | 5233 | case UDIV: |
ca5ba2a3 | 5234 | t = unsigned_type_for (type); |
aeba6c28 JM |
5235 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5236 | make_tree (t, XEXP (x, 0)), | |
5237 | make_tree (t, XEXP (x, 1)))); | |
5c45425b RH |
5238 | |
5239 | case SIGN_EXTEND: | |
5240 | case ZERO_EXTEND: | |
ae2bcd98 RS |
5241 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5242 | GET_CODE (x) == ZERO_EXTEND); | |
aeba6c28 | 5243 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
5c45425b | 5244 | |
84816907 JM |
5245 | case CONST: |
5246 | return make_tree (type, XEXP (x, 0)); | |
5247 | ||
5248 | case SYMBOL_REF: | |
5249 | t = SYMBOL_REF_DECL (x); | |
5250 | if (t) | |
5251 | return fold_convert (type, build_fold_addr_expr (t)); | |
191816a3 | 5252 | /* fall through. */ |
84816907 | 5253 | |
4dfa0342 | 5254 | default: |
c2255bc4 | 5255 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
d1608933 | 5256 | |
d4ebfa65 BE |
5257 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5258 | address mode to pointer mode. */ | |
5ae6cd0d | 5259 | if (POINTER_TYPE_P (type)) |
d4ebfa65 BE |
5260 | x = convert_memory_address_addr_space |
5261 | (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); | |
d1608933 | 5262 | |
8a0aa06e RH |
5263 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5264 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ | |
820cc88f | 5265 | t->decl_with_rtl.rtl = x; |
4dfa0342 | 5266 | |
44037a66 TG |
5267 | return t; |
5268 | } | |
5269 | } | |
44037a66 TG |
5270 | \f |
5271 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET | |
5272 | and returning TARGET. | |
5273 | ||
5274 | If TARGET is 0, a pseudo-register or constant is returned. */ | |
5275 | ||
5276 | rtx | |
ef4bddc2 | 5277 | expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) |
44037a66 | 5278 | { |
22273300 | 5279 | rtx tem = 0; |
44037a66 | 5280 | |
22273300 JJ |
5281 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5282 | tem = simplify_binary_operation (AND, mode, op0, op1); | |
5283 | if (tem == 0) | |
44037a66 | 5284 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
44037a66 TG |
5285 | |
5286 | if (target == 0) | |
5287 | target = tem; | |
5288 | else if (tem != target) | |
5289 | emit_move_insn (target, tem); | |
5290 | return target; | |
5291 | } | |
495499da | 5292 | |
a41a56b6 | 5293 | /* Helper function for emit_store_flag. */ |
2d52a3a1 | 5294 | rtx |
ef12ae45 | 5295 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
ef4bddc2 | 5296 | machine_mode mode, machine_mode compare_mode, |
92355a9c | 5297 | int unsignedp, rtx x, rtx y, int normalizep, |
ef4bddc2 | 5298 | machine_mode target_mode) |
a41a56b6 | 5299 | { |
a5c7d693 | 5300 | struct expand_operand ops[4]; |
f3f6fb16 DM |
5301 | rtx op0, comparison, subtarget; |
5302 | rtx_insn *last; | |
ef4bddc2 | 5303 | machine_mode result_mode = targetm.cstore_mode (icode); |
45475a3f PB |
5304 | |
5305 | last = get_last_insn (); | |
5306 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); | |
5307 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); | |
a5c7d693 | 5308 | if (!x || !y) |
45475a3f PB |
5309 | { |
5310 | delete_insns_since (last); | |
5311 | return NULL_RTX; | |
5312 | } | |
5313 | ||
92355a9c PB |
5314 | if (target_mode == VOIDmode) |
5315 | target_mode = result_mode; | |
5316 | if (!target) | |
5317 | target = gen_reg_rtx (target_mode); | |
b8698a0f | 5318 | |
a5c7d693 | 5319 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); |
45475a3f | 5320 | |
a5c7d693 RS |
5321 | create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); |
5322 | create_fixed_operand (&ops[1], comparison); | |
5323 | create_fixed_operand (&ops[2], x); | |
5324 | create_fixed_operand (&ops[3], y); | |
5325 | if (!maybe_expand_insn (icode, 4, ops)) | |
5326 | { | |
5327 | delete_insns_since (last); | |
5328 | return NULL_RTX; | |
5329 | } | |
5330 | subtarget = ops[0].value; | |
495499da | 5331 | |
a41a56b6 RE |
5332 | /* If we are converting to a wider mode, first convert to |
5333 | TARGET_MODE, then normalize. This produces better combining | |
5334 | opportunities on machines that have a SIGN_EXTRACT when we are | |
5335 | testing a single bit. This mostly benefits the 68k. | |
5336 | ||
5337 | If STORE_FLAG_VALUE does not have the sign bit set when | |
5338 | interpreted in MODE, we can do this conversion as unsigned, which | |
5339 | is usually more efficient. */ | |
45475a3f | 5340 | if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) |
a41a56b6 RE |
5341 | { |
5342 | convert_move (target, subtarget, | |
2d0c270f BS |
5343 | val_signbit_known_clear_p (result_mode, |
5344 | STORE_FLAG_VALUE)); | |
a41a56b6 | 5345 | op0 = target; |
45475a3f | 5346 | result_mode = target_mode; |
a41a56b6 RE |
5347 | } |
5348 | else | |
5349 | op0 = subtarget; | |
5350 | ||
5351 | /* If we want to keep subexpressions around, don't reuse our last | |
5352 | target. */ | |
5353 | if (optimize) | |
5354 | subtarget = 0; | |
5355 | ||
5356 | /* Now normalize to the proper value in MODE. Sometimes we don't | |
5357 | have to do anything. */ | |
5358 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) | |
5359 | ; | |
5360 | /* STORE_FLAG_VALUE might be the most negative number, so write | |
5361 | the comparison this way to avoid a compiler-time warning. */ | |
5362 | else if (- normalizep == STORE_FLAG_VALUE) | |
45475a3f | 5363 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
a41a56b6 RE |
5364 | |
5365 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes | |
5366 | it hard to use a value of just the sign bit due to ANSI integer | |
5367 | constant typing rules. */ | |
2d0c270f | 5368 | else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) |
45475a3f | 5369 | op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, |
eb6c3df1 | 5370 | GET_MODE_BITSIZE (result_mode) - 1, subtarget, |
a41a56b6 RE |
5371 | normalizep == 1); |
5372 | else | |
5373 | { | |
5374 | gcc_assert (STORE_FLAG_VALUE & 1); | |
5375 | ||
45475a3f | 5376 | op0 = expand_and (result_mode, op0, const1_rtx, subtarget); |
a41a56b6 | 5377 | if (normalizep == -1) |
45475a3f | 5378 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
a41a56b6 RE |
5379 | } |
5380 | ||
5381 | /* If we were converting to a smaller mode, do the conversion now. */ | |
45475a3f | 5382 | if (target_mode != result_mode) |
a41a56b6 RE |
5383 | { |
5384 | convert_move (target, op0, 0); | |
5385 | return target; | |
5386 | } | |
5387 | else | |
5388 | return op0; | |
5389 | } | |
5390 | ||
44037a66 | 5391 | |
ef12ae45 PB |
5392 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5393 | need a recursive call. These are kept separate to avoid infinite | |
5394 | loops. */ | |
44037a66 | 5395 | |
ef12ae45 PB |
5396 | static rtx |
5397 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
ef4bddc2 RS |
5398 | machine_mode mode, int unsignedp, int normalizep, |
5399 | machine_mode target_mode) | |
44037a66 TG |
5400 | { |
5401 | rtx subtarget; | |
5402 | enum insn_code icode; | |
ef4bddc2 | 5403 | machine_mode compare_mode; |
f90b7a5a | 5404 | enum mode_class mclass; |
45475a3f | 5405 | enum rtx_code scode; |
44037a66 | 5406 | |
b30f05db BS |
5407 | if (unsignedp) |
5408 | code = unsigned_condition (code); | |
45475a3f | 5409 | scode = swap_condition (code); |
b30f05db | 5410 | |
c2615a67 RK |
5411 | /* If one operand is constant, make it the second one. Only do this |
5412 | if the other operand is not constant as well. */ | |
5413 | ||
8c9864f3 | 5414 | if (swap_commutative_operands_p (op0, op1)) |
c2615a67 | 5415 | { |
fab27f52 | 5416 | std::swap (op0, op1); |
c2615a67 RK |
5417 | code = swap_condition (code); |
5418 | } | |
5419 | ||
6405e07b DE |
5420 | if (mode == VOIDmode) |
5421 | mode = GET_MODE (op0); | |
5422 | ||
c410d49e | 5423 | /* For some comparisons with 1 and -1, we can convert this to |
44037a66 | 5424 | comparisons with zero. This will often produce more opportunities for |
0f41302f | 5425 | store-flag insns. */ |
44037a66 TG |
5426 | |
5427 | switch (code) | |
5428 | { | |
5429 | case LT: | |
5430 | if (op1 == const1_rtx) | |
5431 | op1 = const0_rtx, code = LE; | |
5432 | break; | |
5433 | case LE: | |
5434 | if (op1 == constm1_rtx) | |
5435 | op1 = const0_rtx, code = LT; | |
5436 | break; | |
5437 | case GE: | |
5438 | if (op1 == const1_rtx) | |
5439 | op1 = const0_rtx, code = GT; | |
5440 | break; | |
5441 | case GT: | |
5442 | if (op1 == constm1_rtx) | |
5443 | op1 = const0_rtx, code = GE; | |
5444 | break; | |
5445 | case GEU: | |
5446 | if (op1 == const1_rtx) | |
5447 | op1 = const0_rtx, code = NE; | |
5448 | break; | |
5449 | case LTU: | |
5450 | if (op1 == const1_rtx) | |
5451 | op1 = const0_rtx, code = EQ; | |
5452 | break; | |
e9a25f70 JL |
5453 | default: |
5454 | break; | |
44037a66 TG |
5455 | } |
5456 | ||
884815aa JB |
5457 | /* If we are comparing a double-word integer with zero or -1, we can |
5458 | convert the comparison into one involving a single word. */ | |
6912b84b RK |
5459 | if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 |
5460 | && GET_MODE_CLASS (mode) == MODE_INT | |
3c0cb5de | 5461 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
6912b84b | 5462 | { |
fab27f52 | 5463 | rtx tem; |
884815aa JB |
5464 | if ((code == EQ || code == NE) |
5465 | && (op1 == const0_rtx || op1 == constm1_rtx)) | |
6912b84b | 5466 | { |
1ed20a40 | 5467 | rtx op00, op01; |
8433f113 | 5468 | |
a41a56b6 RE |
5469 | /* Do a logical OR or AND of the two words and compare the |
5470 | result. */ | |
8433f113 RH |
5471 | op00 = simplify_gen_subreg (word_mode, op0, mode, 0); |
5472 | op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); | |
1ed20a40 PB |
5473 | tem = expand_binop (word_mode, |
5474 | op1 == const0_rtx ? ior_optab : and_optab, | |
5475 | op00, op01, NULL_RTX, unsignedp, | |
5476 | OPTAB_DIRECT); | |
884815aa | 5477 | |
1ed20a40 PB |
5478 | if (tem != 0) |
5479 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, | |
92355a9c | 5480 | unsignedp, normalizep); |
6912b84b | 5481 | } |
884815aa | 5482 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
8433f113 RH |
5483 | { |
5484 | rtx op0h; | |
5485 | ||
5486 | /* If testing the sign bit, can just test on high word. */ | |
5487 | op0h = simplify_gen_subreg (word_mode, op0, mode, | |
a41a56b6 RE |
5488 | subreg_highpart_offset (word_mode, |
5489 | mode)); | |
1ed20a40 PB |
5490 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5491 | unsignedp, normalizep); | |
5492 | } | |
5493 | else | |
5494 | tem = NULL_RTX; | |
5495 | ||
5496 | if (tem) | |
5497 | { | |
92355a9c | 5498 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
1ed20a40 | 5499 | return tem; |
92355a9c PB |
5500 | if (!target) |
5501 | target = gen_reg_rtx (target_mode); | |
1ed20a40 PB |
5502 | |
5503 | convert_move (target, tem, | |
2d0c270f BS |
5504 | !val_signbit_known_set_p (word_mode, |
5505 | (normalizep ? normalizep | |
5506 | : STORE_FLAG_VALUE))); | |
1ed20a40 | 5507 | return target; |
8433f113 | 5508 | } |
6912b84b RK |
5509 | } |
5510 | ||
44037a66 TG |
5511 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5512 | complement of A (for GE) and shifting the sign bit to the low bit. */ | |
5513 | if (op1 == const0_rtx && (code == LT || code == GE) | |
5514 | && GET_MODE_CLASS (mode) == MODE_INT | |
5515 | && (normalizep || STORE_FLAG_VALUE == 1 | |
2d0c270f | 5516 | || val_signbit_p (mode, STORE_FLAG_VALUE))) |
44037a66 | 5517 | { |
8deb7047 | 5518 | subtarget = target; |
44037a66 | 5519 | |
495499da PB |
5520 | if (!target) |
5521 | target_mode = mode; | |
5522 | ||
44037a66 TG |
5523 | /* If the result is to be wider than OP0, it is best to convert it |
5524 | first. If it is to be narrower, it is *incorrect* to convert it | |
5525 | first. */ | |
495499da | 5526 | else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) |
44037a66 | 5527 | { |
81722fa9 | 5528 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5529 | mode = target_mode; |
5530 | } | |
5531 | ||
5532 | if (target_mode != mode) | |
5533 | subtarget = 0; | |
5534 | ||
5535 | if (code == GE) | |
1d6eaf3d RK |
5536 | op0 = expand_unop (mode, one_cmpl_optab, op0, |
5537 | ((STORE_FLAG_VALUE == 1 || normalizep) | |
5538 | ? 0 : subtarget), 0); | |
44037a66 | 5539 | |
1d6eaf3d | 5540 | if (STORE_FLAG_VALUE == 1 || normalizep) |
44037a66 TG |
5541 | /* If we are supposed to produce a 0/1 value, we want to do |
5542 | a logical shift from the sign bit to the low-order bit; for | |
5543 | a -1/0 value, we do an arithmetic shift. */ | |
5544 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, | |
eb6c3df1 | 5545 | GET_MODE_BITSIZE (mode) - 1, |
44037a66 TG |
5546 | subtarget, normalizep != -1); |
5547 | ||
5548 | if (mode != target_mode) | |
c2ec26b8 | 5549 | op0 = convert_modes (target_mode, mode, op0, 0); |
44037a66 TG |
5550 | |
5551 | return op0; | |
5552 | } | |
5553 | ||
f90b7a5a | 5554 | mclass = GET_MODE_CLASS (mode); |
c94843d2 | 5555 | FOR_EACH_MODE_FROM (compare_mode, mode) |
a41a56b6 | 5556 | { |
ef4bddc2 | 5557 | machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
947131ba | 5558 | icode = optab_handler (cstore_optab, optab_mode); |
f90b7a5a | 5559 | if (icode != CODE_FOR_nothing) |
a41a56b6 | 5560 | { |
a41a56b6 | 5561 | do_pending_stack_adjust (); |
fab27f52 MM |
5562 | rtx tem = emit_cstore (target, icode, code, mode, compare_mode, |
5563 | unsignedp, op0, op1, normalizep, target_mode); | |
45475a3f PB |
5564 | if (tem) |
5565 | return tem; | |
44037a66 | 5566 | |
45475a3f | 5567 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
44037a66 | 5568 | { |
ef12ae45 | 5569 | tem = emit_cstore (target, icode, scode, mode, compare_mode, |
92355a9c | 5570 | unsignedp, op1, op0, normalizep, target_mode); |
45475a3f PB |
5571 | if (tem) |
5572 | return tem; | |
44037a66 | 5573 | } |
f90b7a5a | 5574 | break; |
44037a66 TG |
5575 | } |
5576 | } | |
5577 | ||
ef12ae45 PB |
5578 | return 0; |
5579 | } | |
5580 | ||
8afacf2c RS |
5581 | /* Subroutine of emit_store_flag that handles cases in which the operands |
5582 | are scalar integers. SUBTARGET is the target to use for temporary | |
5583 | operations and TRUEVAL is the value to store when the condition is | |
5584 | true. All other arguments are as for emit_store_flag. */ | |
ef12ae45 PB |
5585 | |
5586 | rtx | |
8afacf2c RS |
5587 | emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0, |
5588 | rtx op1, machine_mode mode, int unsignedp, | |
5589 | int normalizep, rtx trueval) | |
ef12ae45 | 5590 | { |
ef4bddc2 | 5591 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
8afacf2c RS |
5592 | rtx_insn *last = get_last_insn (); |
5593 | rtx tem; | |
495499da PB |
5594 | |
5595 | /* If this is an equality comparison of integers, we can try to exclusive-or | |
44037a66 TG |
5596 | (or subtract) the two operands and use a recursive call to try the |
5597 | comparison with zero. Don't do any of these cases if branches are | |
5598 | very cheap. */ | |
5599 | ||
495499da | 5600 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
44037a66 TG |
5601 | { |
5602 | tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, | |
5603 | OPTAB_WIDEN); | |
5604 | ||
5605 | if (tem == 0) | |
5606 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, | |
5607 | OPTAB_WIDEN); | |
5608 | if (tem != 0) | |
a22fb74c AK |
5609 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5610 | mode, unsignedp, normalizep); | |
495499da PB |
5611 | if (tem != 0) |
5612 | return tem; | |
5613 | ||
5614 | delete_insns_since (last); | |
5615 | } | |
5616 | ||
5617 | /* For integer comparisons, try the reverse comparison. However, for | |
5618 | small X and if we'd have anyway to extend, implementing "X != 0" | |
5619 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ | |
8afacf2c | 5620 | rtx_code rcode = reverse_condition (code); |
495499da | 5621 | if (can_compare_p (rcode, mode, ccp_store_flag) |
947131ba | 5622 | && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing |
495499da PB |
5623 | && code == NE |
5624 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD | |
5625 | && op1 == const0_rtx)) | |
5626 | { | |
533d4b99 PB |
5627 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5628 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5629 | ||
495499da | 5630 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
533d4b99 | 5631 | if (want_add |
e548c9df | 5632 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, |
533d4b99 | 5633 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5634 | { |
ef12ae45 | 5635 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5636 | STORE_FLAG_VALUE, target_mode); |
495499da | 5637 | if (tem != 0) |
8afacf2c | 5638 | tem = expand_binop (target_mode, add_optab, tem, |
2f1cd2eb RS |
5639 | gen_int_mode (normalizep, target_mode), |
5640 | target, 0, OPTAB_WIDEN); | |
495499da | 5641 | } |
533d4b99 | 5642 | else if (!want_add |
e548c9df | 5643 | && rtx_cost (trueval, mode, XOR, 1, |
533d4b99 | 5644 | optimize_insn_for_speed_p ()) == 0) |
495499da | 5645 | { |
ef12ae45 | 5646 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, |
92355a9c | 5647 | normalizep, target_mode); |
495499da | 5648 | if (tem != 0) |
8afacf2c | 5649 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, |
495499da PB |
5650 | INTVAL (trueval) >= 0, OPTAB_WIDEN); |
5651 | } | |
5652 | ||
5653 | if (tem != 0) | |
5654 | return tem; | |
5655 | delete_insns_since (last); | |
44037a66 TG |
5656 | } |
5657 | ||
c410d49e | 5658 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
44037a66 TG |
5659 | the constant zero. Reject all other comparisons at this point. Only |
5660 | do LE and GT if branches are expensive since they are expensive on | |
5661 | 2-operand machines. */ | |
5662 | ||
495499da | 5663 | if (op1 != const0_rtx |
44037a66 | 5664 | || (code != EQ && code != NE |
3a4fd356 JH |
5665 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5666 | false) <= 1 || (code != LE && code != GT)))) | |
44037a66 TG |
5667 | return 0; |
5668 | ||
44037a66 TG |
5669 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5670 | do the necessary operation below. */ | |
5671 | ||
5672 | tem = 0; | |
5673 | ||
5674 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has | |
5675 | the sign bit set. */ | |
5676 | ||
5677 | if (code == LE) | |
5678 | { | |
5679 | /* This is destructive, so SUBTARGET can't be OP0. */ | |
5680 | if (rtx_equal_p (subtarget, op0)) | |
5681 | subtarget = 0; | |
5682 | ||
5683 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, | |
5684 | OPTAB_WIDEN); | |
5685 | if (tem) | |
5686 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, | |
5687 | OPTAB_WIDEN); | |
5688 | } | |
5689 | ||
5690 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the | |
5691 | number of bits in the mode of OP0, minus one. */ | |
5692 | ||
5693 | if (code == GT) | |
5694 | { | |
5695 | if (rtx_equal_p (subtarget, op0)) | |
5696 | subtarget = 0; | |
5697 | ||
ea000c3f EB |
5698 | tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0, |
5699 | GET_MODE_BITSIZE (mode) - 1, | |
5700 | subtarget, 0); | |
5701 | if (tem) | |
5702 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, | |
5703 | OPTAB_WIDEN); | |
44037a66 | 5704 | } |
c410d49e | 5705 | |
44037a66 TG |
5706 | if (code == EQ || code == NE) |
5707 | { | |
5708 | /* For EQ or NE, one way to do the comparison is to apply an operation | |
cc2902df | 5709 | that converts the operand into a positive number if it is nonzero |
44037a66 TG |
5710 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5711 | for NE we negate. This puts the result in the sign bit. Then we | |
c410d49e | 5712 | normalize with a shift, if needed. |
44037a66 TG |
5713 | |
5714 | Two operations that can do the above actions are ABS and FFS, so try | |
5715 | them. If that doesn't work, and MODE is smaller than a full word, | |
36d747f6 | 5716 | we can use zero-extension to the wider mode (an unsigned conversion) |
44037a66 TG |
5717 | as the operation. */ |
5718 | ||
c410d49e EC |
5719 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5720 | that is compensated by the subsequent overflow when subtracting | |
30f7a378 | 5721 | one / negating. */ |
91ce572a | 5722 | |
947131ba | 5723 | if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) |
44037a66 | 5724 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
947131ba | 5725 | else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) |
44037a66 TG |
5726 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5727 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) | |
5728 | { | |
c2ec26b8 | 5729 | tem = convert_modes (word_mode, mode, op0, 1); |
81722fa9 | 5730 | mode = word_mode; |
44037a66 TG |
5731 | } |
5732 | ||
5733 | if (tem != 0) | |
5734 | { | |
5735 | if (code == EQ) | |
5736 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, | |
5737 | 0, OPTAB_WIDEN); | |
5738 | else | |
5739 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); | |
5740 | } | |
5741 | ||
5742 | /* If we couldn't do it that way, for NE we can "or" the two's complement | |
5743 | of the value with itself. For EQ, we take the one's complement of | |
5744 | that "or", which is an extra insn, so we only handle EQ if branches | |
5745 | are expensive. */ | |
5746 | ||
3a4fd356 JH |
5747 | if (tem == 0 |
5748 | && (code == NE | |
5749 | || BRANCH_COST (optimize_insn_for_speed_p (), | |
8afacf2c | 5750 | false) > 1)) |
44037a66 | 5751 | { |
36d747f6 RS |
5752 | if (rtx_equal_p (subtarget, op0)) |
5753 | subtarget = 0; | |
5754 | ||
44037a66 TG |
5755 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5756 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, | |
5757 | OPTAB_WIDEN); | |
5758 | ||
5759 | if (tem && code == EQ) | |
5760 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); | |
5761 | } | |
5762 | } | |
5763 | ||
5764 | if (tem && normalizep) | |
ea000c3f EB |
5765 | tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem, |
5766 | GET_MODE_BITSIZE (mode) - 1, | |
5767 | subtarget, normalizep == 1); | |
44037a66 | 5768 | |
91e66235 | 5769 | if (tem) |
44037a66 | 5770 | { |
495499da | 5771 | if (!target) |
8afacf2c | 5772 | ; |
495499da | 5773 | else if (GET_MODE (tem) != target_mode) |
91e66235 MM |
5774 | { |
5775 | convert_move (target, tem, 0); | |
5776 | tem = target; | |
5777 | } | |
5778 | else if (!subtarget) | |
5779 | { | |
5780 | emit_move_insn (target, tem); | |
5781 | tem = target; | |
5782 | } | |
44037a66 | 5783 | } |
91e66235 | 5784 | else |
44037a66 TG |
5785 | delete_insns_since (last); |
5786 | ||
5787 | return tem; | |
5788 | } | |
04a8ee2f | 5789 | |
8afacf2c RS |
5790 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 |
5791 | and storing in TARGET. Normally return TARGET. | |
5792 | Return 0 if that cannot be done. | |
5793 | ||
5794 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If | |
5795 | it is VOIDmode, they cannot both be CONST_INT. | |
5796 | ||
5797 | UNSIGNEDP is for the case where we have to widen the operands | |
5798 | to perform the operation. It says to use zero-extension. | |
5799 | ||
5800 | NORMALIZEP is 1 if we should convert the result to be either zero | |
5801 | or one. Normalize is -1 if we should convert the result to be | |
5802 | either zero or -1. If NORMALIZEP is zero, the result will be left | |
5803 | "raw" out of the scc insn. */ | |
5804 | ||
5805 | rtx | |
5806 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, | |
5807 | machine_mode mode, int unsignedp, int normalizep) | |
5808 | { | |
5809 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; | |
5810 | enum rtx_code rcode; | |
5811 | rtx subtarget; | |
5812 | rtx tem, trueval; | |
5813 | rtx_insn *last; | |
5814 | ||
5815 | /* If we compare constants, we shouldn't use a store-flag operation, | |
5816 | but a constant load. We can get there via the vanilla route that | |
5817 | usually generates a compare-branch sequence, but will in this case | |
5818 | fold the comparison to a constant, and thus elide the branch. */ | |
5819 | if (CONSTANT_P (op0) && CONSTANT_P (op1)) | |
5820 | return NULL_RTX; | |
5821 | ||
5822 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, | |
5823 | target_mode); | |
5824 | if (tem) | |
5825 | return tem; | |
5826 | ||
5827 | /* If we reached here, we can't do this with a scc insn, however there | |
5828 | are some comparisons that can be done in other ways. Don't do any | |
5829 | of these cases if branches are very cheap. */ | |
5830 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) | |
5831 | return 0; | |
5832 | ||
5833 | /* See what we need to return. We can only return a 1, -1, or the | |
5834 | sign bit. */ | |
5835 | ||
5836 | if (normalizep == 0) | |
5837 | { | |
5838 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) | |
5839 | normalizep = STORE_FLAG_VALUE; | |
5840 | ||
5841 | else if (val_signbit_p (mode, STORE_FLAG_VALUE)) | |
5842 | ; | |
5843 | else | |
5844 | return 0; | |
5845 | } | |
5846 | ||
5847 | last = get_last_insn (); | |
5848 | ||
5849 | /* If optimizing, use different pseudo registers for each insn, instead | |
5850 | of reusing the same pseudo. This leads to better CSE, but slows | |
5851 | down the compiler, since there are more pseudos. */ | |
5852 | subtarget = (!optimize | |
5853 | && (target_mode == mode)) ? target : NULL_RTX; | |
5854 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); | |
5855 | ||
5856 | /* For floating-point comparisons, try the reverse comparison or try | |
5857 | changing the "orderedness" of the comparison. */ | |
5858 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
5859 | { | |
5860 | enum rtx_code first_code; | |
5861 | bool and_them; | |
5862 | ||
5863 | rcode = reverse_condition_maybe_unordered (code); | |
5864 | if (can_compare_p (rcode, mode, ccp_store_flag) | |
5865 | && (code == ORDERED || code == UNORDERED | |
5866 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5867 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5868 | { | |
5869 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) | |
5870 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); | |
5871 | ||
5872 | /* For the reverse comparison, use either an addition or a XOR. */ | |
5873 | if (want_add | |
5874 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, | |
5875 | optimize_insn_for_speed_p ()) == 0) | |
5876 | { | |
5877 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, | |
5878 | STORE_FLAG_VALUE, target_mode); | |
5879 | if (tem) | |
5880 | return expand_binop (target_mode, add_optab, tem, | |
5881 | gen_int_mode (normalizep, target_mode), | |
5882 | target, 0, OPTAB_WIDEN); | |
5883 | } | |
5884 | else if (!want_add | |
5885 | && rtx_cost (trueval, mode, XOR, 1, | |
5886 | optimize_insn_for_speed_p ()) == 0) | |
5887 | { | |
5888 | tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, | |
5889 | normalizep, target_mode); | |
5890 | if (tem) | |
5891 | return expand_binop (target_mode, xor_optab, tem, trueval, | |
5892 | target, INTVAL (trueval) >= 0, | |
5893 | OPTAB_WIDEN); | |
5894 | } | |
5895 | } | |
5896 | ||
5897 | delete_insns_since (last); | |
5898 | ||
5899 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ | |
5900 | if (code == ORDERED || code == UNORDERED) | |
5901 | return 0; | |
5902 | ||
5903 | and_them = split_comparison (code, mode, &first_code, &code); | |
5904 | ||
5905 | /* If there are no NaNs, the first comparison should always fall through. | |
5906 | Effectively change the comparison to the other one. */ | |
5907 | if (!HONOR_NANS (mode)) | |
5908 | { | |
5909 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); | |
5910 | return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, | |
5911 | target_mode); | |
5912 | } | |
5913 | ||
5914 | if (!HAVE_conditional_move) | |
5915 | return 0; | |
5916 | ||
5917 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a | |
5918 | conditional move. */ | |
5919 | tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, | |
5920 | normalizep, target_mode); | |
5921 | if (tem == 0) | |
5922 | return 0; | |
5923 | ||
5924 | if (and_them) | |
5925 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5926 | tem, const0_rtx, GET_MODE (tem), 0); | |
5927 | else | |
5928 | tem = emit_conditional_move (target, code, op0, op1, mode, | |
5929 | trueval, tem, GET_MODE (tem), 0); | |
5930 | ||
5931 | if (tem == 0) | |
5932 | delete_insns_since (last); | |
5933 | return tem; | |
5934 | } | |
5935 | ||
5936 | /* The remaining tricks only apply to integer comparisons. */ | |
5937 | ||
5938 | if (GET_MODE_CLASS (mode) == MODE_INT) | |
5939 | return emit_store_flag_int (target, subtarget, code, op0, op1, mode, | |
5940 | unsignedp, normalizep, trueval); | |
5941 | ||
5942 | return 0; | |
5943 | } | |
5944 | ||
04a8ee2f TG |
5945 | /* Like emit_store_flag, but always succeeds. */ |
5946 | ||
5947 | rtx | |
502b8322 | 5948 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
ef4bddc2 | 5949 | machine_mode mode, int unsignedp, int normalizep) |
04a8ee2f | 5950 | { |
f3f6fb16 DM |
5951 | rtx tem; |
5952 | rtx_code_label *label; | |
495499da | 5953 | rtx trueval, falseval; |
04a8ee2f TG |
5954 | |
5955 | /* First see if emit_store_flag can do the job. */ | |
5956 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); | |
5957 | if (tem != 0) | |
5958 | return tem; | |
5959 | ||
495499da PB |
5960 | if (!target) |
5961 | target = gen_reg_rtx (word_mode); | |
04a8ee2f | 5962 | |
495499da PB |
5963 | /* If this failed, we have to do this with set/compare/jump/set code. |
5964 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ | |
5965 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; | |
b8698a0f | 5966 | if (code == NE |
495499da PB |
5967 | && GET_MODE_CLASS (mode) == MODE_INT |
5968 | && REG_P (target) | |
5969 | && op0 == target | |
5970 | && op1 == const0_rtx) | |
5971 | { | |
5972 | label = gen_label_rtx (); | |
1476d1bd | 5973 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, |
357067f2 JH |
5974 | NULL_RTX, NULL, label, |
5975 | profile_probability::uninitialized ()); | |
495499da PB |
5976 | emit_move_insn (target, trueval); |
5977 | emit_label (label); | |
5978 | return target; | |
5979 | } | |
04a8ee2f | 5980 | |
f8cfc6aa | 5981 | if (!REG_P (target) |
04a8ee2f TG |
5982 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
5983 | target = gen_reg_rtx (GET_MODE (target)); | |
5984 | ||
495499da PB |
5985 | /* Jump in the right direction if the target cannot implement CODE |
5986 | but can jump on its reverse condition. */ | |
5987 | falseval = const0_rtx; | |
5988 | if (! can_compare_p (code, mode, ccp_jump) | |
5989 | && (! FLOAT_MODE_P (mode) | |
5990 | || code == ORDERED || code == UNORDERED | |
5991 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) | |
5992 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) | |
5993 | { | |
5994 | enum rtx_code rcode; | |
5995 | if (FLOAT_MODE_P (mode)) | |
5996 | rcode = reverse_condition_maybe_unordered (code); | |
5997 | else | |
5998 | rcode = reverse_condition (code); | |
5999 | ||
6000 | /* Canonicalize to UNORDERED for the libcall. */ | |
6001 | if (can_compare_p (rcode, mode, ccp_jump) | |
6002 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) | |
6003 | { | |
6004 | falseval = trueval; | |
6005 | trueval = const0_rtx; | |
6006 | code = rcode; | |
6007 | } | |
6008 | } | |
6009 | ||
6010 | emit_move_insn (target, trueval); | |
04a8ee2f | 6011 | label = gen_label_rtx (); |
1476d1bd | 6012 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, |
357067f2 | 6013 | label, profile_probability::uninitialized ()); |
04a8ee2f | 6014 | |
495499da | 6015 | emit_move_insn (target, falseval); |
44037a66 TG |
6016 | emit_label (label); |
6017 | ||
6018 | return target; | |
6019 | } | |
f5963e61 JL |
6020 | \f |
6021 | /* Perform possibly multi-word comparison and conditional jump to LABEL | |
feb04780 RS |
6022 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
6023 | now a thin wrapper around do_compare_rtx_and_jump. */ | |
f5963e61 JL |
6024 | |
6025 | static void | |
ef4bddc2 | 6026 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, |
f3f6fb16 | 6027 | rtx_code_label *label) |
f5963e61 | 6028 | { |
feb04780 | 6029 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
1476d1bd | 6030 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, |
357067f2 | 6031 | NULL, label, profile_probability::uninitialized ()); |
f5963e61 | 6032 | } |